Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

prov/verbs: Add support for IBV_ACCESS_RELAXED_ORDERING #9378

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
10 changes: 10 additions & 0 deletions prov/verbs/configure.m4
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,16 @@ AC_DEFUN([FI_VERBS_CONFIGURE],[
AC_DEFINE_UNQUOTED([VERBS_HAVE_DMABUF_MR],[$VERBS_HAVE_DMABUF_MR],
[Whether infiniband/verbs.h has ibv_reg_dmabuf_mr() support or not])

#See if we have rdma-core IBV_ACCESS_RELAXED_ORDERING mr support
VERBS_HAVE_RELAXED_ORDERING_MR=0
AS_IF([test $verbs_ibverbs_happy -eq 1],[
AC_CHECK_DECL([IBV_ACCESS_RELAXED_ORDERING],
[VERBS_HAVE_RELAXED_ORDERING_MR=1],[],
[#include <infiniband/verbs.h>])
])
AC_DEFINE_UNQUOTED([VERBS_HAVE_RELAXED_ORDERING_MR],[$VERBS_HAVE_RELAXED_ORDERING_MR],
[Whether infiniband/verbs.h has IBV_ACCESS_RELAXED_ORDERING support or not])

CPPFLAGS=$fi_verbs_configure_save_CPPFLAGS

# Technically, verbs_ibverbs_CPPFLAGS and
Expand Down
12 changes: 9 additions & 3 deletions prov/verbs/src/verbs_domain.c
Original file line number Diff line number Diff line change
Expand Up @@ -246,9 +246,12 @@ static int vrb_open_device_by_name(struct vrb_domain *domain, const char *name)
const char *rdma_name = ibv_get_device_name(dev_list[i]->device);
switch (domain->ep_type) {
case FI_EP_MSG:
ret = domain->ext_flags & VRB_USE_XRC ?
vrb_cmp_xrc_domain_name(name, rdma_name) :
strcmp(name, rdma_name);
if (domain->ext_flags & VRB_USE_XRC)
ret = vrb_cmp_xrc_domain_name(name, rdma_name);
else if (domain->ext_flags & VRB_USE_RO)
ret = vrb_cmp_ro_domain_name(name, rdma_name);
else
ret = strcmp(name, rdma_name);
break;
case FI_EP_DGRAM:
ret = strncmp(name, rdma_name,
Expand Down Expand Up @@ -345,6 +348,9 @@ vrb_domain(struct fid_fabric *fabric, struct fi_info *info,
if (!_domain->info)
goto err2;

if (VRB_RO_ENABLED(info))
_domain->ext_flags |= VRB_USE_RO;

_domain->ep_type = VRB_EP_TYPE(info);
_domain->ext_flags |= vrb_is_xrc_info(info) ? VRB_USE_XRC : 0;

Expand Down
3 changes: 3 additions & 0 deletions prov/verbs/src/verbs_eq.c
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,9 @@ vrb_pep_dev_domain_match(struct fi_info *hints, const char *devname)
if ((VRB_EP_PROTO(hints)) == FI_PROTO_RDMA_CM_IB_XRC)
ret = vrb_cmp_xrc_domain_name(hints->domain_attr->name,
devname);
else if (VRB_RO_ENABLED(hints))
ret = vrb_cmp_ro_domain_name(hints->domain_attr->name,
devname);
else
ret = strcmp(hints->domain_attr->name, devname);

Expand Down
50 changes: 45 additions & 5 deletions prov/verbs/src/verbs_info.c
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,15 @@ const struct fi_rx_attr verbs_rx_attr = {
.total_buffered_recv = 0,
};

const struct fi_rx_attr verbs_ro_rx_attr = {
.caps = VERBS_MSG_RX_CAPS,
.mode = VERBS_RX_MODE,
.op_flags = FI_COMPLETION,
.msg_order = 0,
.comp_order = FI_ORDER_STRICT,
.total_buffered_recv = 0,
};

const struct fi_rx_attr verbs_dgram_rx_attr = {
.caps = VERBS_DGRAM_RX_CAPS,
.mode = VERBS_DGRAM_RX_MODE | VERBS_RX_MODE,
Expand All @@ -132,6 +141,16 @@ const struct fi_tx_attr verbs_tx_attr = {
.rma_iov_limit = 1,
};

const struct fi_tx_attr verbs_ro_tx_attr = {
.caps = VERBS_MSG_TX_CAPS,
.mode = 0,
.op_flags = VERBS_TX_OP_FLAGS,
.msg_order = 0,
.comp_order = FI_ORDER_STRICT,
.inject_size = 0,
.rma_iov_limit = 1,
};

const struct fi_tx_attr verbs_dgram_tx_attr = {
.caps = VERBS_DGRAM_TX_CAPS,
.mode = 0,
Expand All @@ -146,18 +165,28 @@ const struct verbs_ep_domain verbs_msg_domain = {
.suffix = "",
.type = FI_EP_MSG,
.protocol = FI_PROTO_UNSPEC,
.relaxed_ordering = false,
};

const struct verbs_ep_domain verbs_msg_ro_domain = {
.suffix = "-ro",
.type = FI_EP_MSG,
.protocol = FI_PROTO_UNSPEC,
.relaxed_ordering = true,
};

const struct verbs_ep_domain verbs_msg_xrc_domain = {
.suffix = "-xrc",
.type = FI_EP_MSG,
.protocol = FI_PROTO_RDMA_CM_IB_XRC,
.relaxed_ordering = false,
};

const struct verbs_ep_domain verbs_dgram_domain = {
.suffix = "-dgram",
.type = FI_EP_DGRAM,
.protocol = FI_PROTO_UNSPEC,
.relaxed_ordering = false,
};

/* The list (not thread safe) is populated once when the provider is initialized */
Expand Down Expand Up @@ -770,8 +799,13 @@ static int vrb_alloc_info(struct ibv_context *ctx, struct fi_info **info,
switch (ep_dom->type) {
case FI_EP_MSG:
fi->caps = VERBS_MSG_CAPS;
*(fi->tx_attr) = verbs_tx_attr;
*(fi->rx_attr) = verbs_rx_attr;
if (ep_dom->relaxed_ordering) {
*(fi->tx_attr) = verbs_ro_tx_attr;
*(fi->rx_attr) = verbs_ro_rx_attr;
} else {
*(fi->tx_attr) = verbs_tx_attr;
*(fi->rx_attr) = verbs_rx_attr;
}
fi->addr_format = FI_SOCKADDR_IB;
break;
case FI_EP_DGRAM:
Expand Down Expand Up @@ -1332,7 +1366,7 @@ static int vrb_device_has_ipoib_addr(const char *dev_name)
return 0;
}

#define VERBS_NUM_DOMAIN_TYPES 3
#define VERBS_NUM_DOMAIN_TYPES 4

static int vrb_init_info(const struct fi_info **all_infos)
{
Expand Down Expand Up @@ -1379,12 +1413,14 @@ static int vrb_init_info(const struct fi_info **all_infos)
if (!vrb_gl_data.iface)
vrb_get_sib(&verbs_devs);

if (dlist_empty(&verbs_devs))
if (dlist_empty(&verbs_devs)) {
FI_WARN(&vrb_prov, FI_LOG_FABRIC,
"no valid IPoIB interfaces found, FI_EP_MSG endpoint "
"type would not be available\n");
else
} else {
ep_type[dom_count++] = &verbs_msg_domain;
ep_type[dom_count++] = &verbs_msg_ro_domain;
}

if (!vrb_gl_data.msg.prefer_xrc && VERBS_HAVE_XRC)
ep_type[dom_count++] = &verbs_msg_xrc_domain;
Expand Down Expand Up @@ -1562,6 +1598,10 @@ int vrb_get_matching_info(uint32_t version, const struct fi_info *hints,
"XRC FI_EP_MSG endpoints\n");
continue;
}

if (VRB_RO_ENABLED(hints) && (check_info->tx_attr->msg_order ||
check_info->rx_attr->msg_order))
continue;
}

if ((check_info->ep_attr->type == FI_EP_MSG) && passive) {
Expand Down
3 changes: 3 additions & 0 deletions prov/verbs/src/verbs_mr.c
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,9 @@ vrb_mr_ofi2ibv_access(uint64_t ofi_access, struct vrb_domain *domain)
IBV_ACCESS_REMOTE_WRITE |
IBV_ACCESS_REMOTE_ATOMIC;

if (domain->ext_flags & VRB_USE_RO)
ibv_access |= VRB_ACCESS_RELAXED_ORDERING;

return ibv_access;
}

Expand Down
22 changes: 22 additions & 0 deletions prov/verbs/src/verbs_ofi.h
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,9 @@
#define VRB_EP_PROTO(info) \
(((info) && (info)->ep_attr) ? (info)->ep_attr->protocol : \
FI_PROTO_UNSPEC)
#define VRB_RO_ENABLED(info) \
((info)->tx_attr && !(info)->tx_attr->msg_order && \
(info)->rx_attr && !(info)->rx_attr->msg_order)

#define VRB_MEM_ALIGNMENT (64)
#define VRB_BUF_ALIGNMENT (4096) /* TODO: Page or MTU size */
Expand Down Expand Up @@ -366,6 +369,7 @@ struct fi_ops_cm *vrb_pep_ops_cm(struct vrb_pep *pep);
enum {
VRB_USE_XRC = BIT(0),
VRB_USE_ODP = BIT(1),
VRB_USE_RO = BIT(2),
};

struct vrb_domain {
Expand Down Expand Up @@ -437,6 +441,12 @@ int vrb_cq_open(struct fid_domain *domain, struct fi_cq_attr *attr,
struct fid_cq **cq, void *context);
int vrb_cq_trywait(struct vrb_cq *cq);

#if VERBS_HAVE_RELAXED_ORDERING_MR
#define VRB_ACCESS_RELAXED_ORDERING IBV_ACCESS_RELAXED_ORDERING
#else
#define VRB_ACCESS_RELAXED_ORDERING 0
#endif

struct vrb_mem_desc {
struct fid_mr mr_fid;
struct ibv_mr *mr;
Expand Down Expand Up @@ -839,10 +849,12 @@ struct verbs_ep_domain {
char *suffix;
enum fi_ep_type type;
uint32_t protocol;
bool relaxed_ordering;
};

extern const struct verbs_ep_domain verbs_dgram_domain;
extern const struct verbs_ep_domain verbs_msg_xrc_domain;
extern const struct verbs_ep_domain verbs_msg_ro_domain;

int vrb_check_ep_attr(const struct fi_info *hints,
const struct fi_info *info);
Expand All @@ -860,6 +872,16 @@ static inline int vrb_cmp_xrc_domain_name(const char *domain_name,
domain_len - suffix_len) : -1;
}

static inline int vrb_cmp_ro_domain_name(const char *domain_name,
const char *rdma_name)
{
size_t domain_len = strlen(domain_name);
size_t suffix_len = strlen(verbs_msg_ro_domain.suffix);

return domain_len > suffix_len ? strncmp(domain_name, rdma_name,
domain_len - suffix_len) : -1;
}

int vrb_cq_signal(struct fid_cq *cq);

struct vrb_eq_entry *vrb_eq_alloc_entry(uint32_t event,
Expand Down