Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

UCP/WIREUP: Increase UCP_MAX_LANES to 64 #9814

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/ucp/core/ucp_ep.inl
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ static inline ucp_lane_index_t ucp_ep_num_lanes(ucp_ep_h ep)

static inline int ucp_ep_is_lane_p2p(ucp_ep_h ep, ucp_lane_index_t lane)
{
return ucp_ep_config(ep)->p2p_lanes & UCS_BIT(lane);
return !!(ucp_ep_config(ep)->p2p_lanes & UCS_BIT(lane));
brminich marked this conversation as resolved.
Show resolved Hide resolved
tvegas1 marked this conversation as resolved.
Show resolved Hide resolved
}

static inline ucp_md_index_t ucp_ep_md_index(ucp_ep_h ep, ucp_lane_index_t lane)
Expand Down Expand Up @@ -239,7 +239,7 @@ static inline ucp_rsc_index_t
ucp_ep_config_get_dst_md_cmpt(const ucp_ep_config_key_t *key,
ucp_md_index_t dst_md_index)
{
unsigned idx = ucs_popcount(key->reachable_md_map & UCS_MASK(dst_md_index));
unsigned idx = ucs_bitmap2idx(key->reachable_md_map, dst_md_index);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

unrelated fix?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, a little :)

I just discovered that there is a helper function for that logic, so decided to included it to the patch.


return key->dst_md_cmpts[idx];
}
Expand Down
4 changes: 2 additions & 2 deletions src/ucp/core/ucp_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,9 @@ UCP_UINT_TYPE(UCP_MD_INDEX_BITS) ucp_md_map_t;


/* Lanes */
#define UCP_MAX_LANES 16
#define UCP_MAX_LANES_LEGACY 16
#define UCP_MAX_LANES 64
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we should probably change code that uses ucs_ffs32 on the lane map to use ucs_ffs64
for example in protov2 rndv/put/zcopy

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I changed all the entries that I found, but I haven't found one in rndv/put/zcopy. Maybe they were removed in #9862

#define UCP_MAX_FAST_PATH_LANES 5
#define UCP_MAX_SLOW_PATH_LANES (UCP_MAX_LANES - UCP_MAX_FAST_PATH_LANES)

#define UCP_NULL_LANE ((ucp_lane_index_t)-1)
typedef uint8_t ucp_lane_index_t;
Expand Down
5 changes: 2 additions & 3 deletions src/ucp/core/ucp_worker.c
Original file line number Diff line number Diff line change
Expand Up @@ -3470,8 +3470,7 @@ static int ucp_worker_do_ep_keepalive(ucp_worker_h worker, ucs_time_t now)
ucs_trace("ep %p: do keepalive on lane[%d]=%p ep->flags=0x%x", ep, lane,
uct_ep, ep->flags);

if (ucp_ep_is_am_keepalive(ep, rsc_index,
ucp_ep_config(ep)->p2p_lanes & UCS_BIT(lane))) {
if (ucp_ep_is_am_keepalive(ep, rsc_index, ucp_ep_is_lane_p2p(ep, lane))) {
status = ucp_ep_do_uct_ep_am_keepalive(ep, uct_ep, rsc_index);
} else {
status = uct_ep_check(uct_ep, 0, NULL);
Expand Down Expand Up @@ -3773,7 +3772,7 @@ ucp_wiface_process_for_each_lane(ucp_worker_h worker,

ucs_for_each_bit(lane, lane_map) {
ucs_assertv(lane < UCP_MAX_LANES,
"lane=%" PRIu8 ", lane_map=0x%" PRIx16, lane, lane_map);
"lane=%" PRIu8 ", lane_map=0x%" PRIx64, lane, lane_map);
rsc_index = ep_config->key.lanes[lane].rsc_index;
wiface = ucp_worker_iface(worker, rsc_index);
wiface_process(wiface);
Expand Down
8 changes: 7 additions & 1 deletion src/ucp/proto/proto_init.c
Original file line number Diff line number Diff line change
Expand Up @@ -55,11 +55,17 @@ void ucp_proto_common_add_ppln_range(ucp_proto_caps_t *caps,
frag_overhead =
ucs_linear_func_apply(ppln_perf[UCP_PROTO_PERF_TYPE_SINGLE], frag_size) -
ucs_linear_func_apply(ppln_perf[UCP_PROTO_PERF_TYPE_MULTI], frag_size);
ucs_assert(frag_overhead >= 0);

ucs_trace("frag-size: %zd" UCP_PROTO_TIME_FMT(frag_overhead), frag_size,
UCP_PROTO_TIME_ARG(frag_overhead));

/* In certain cases multi perf can be bigger than single (e.g. FAST_CMPL
* with huge send_post_overhead) so the fragment overhead can be negative
* according to the logic above. The simplest estimation is to ignore
* fragment overhead in that particular case.
*/
frag_overhead = ucs_max(frag_overhead, 0);

/* Apply the pipelining effect when sending multiple fragments */
ppln_perf[UCP_PROTO_PERF_TYPE_SINGLE] =
ucs_linear_func_add(ppln_perf[UCP_PROTO_PERF_TYPE_MULTI],
Expand Down
7 changes: 4 additions & 3 deletions src/ucp/proto/proto_multi.inl
Original file line number Diff line number Diff line change
Expand Up @@ -255,9 +255,10 @@ static UCS_F_ALWAYS_INLINE ucs_status_t ucp_proto_multi_lane_map_progress(
ucp_lane_index_t lane;
ucs_status_t status;

ucs_assertv(remaining_lane_map != 0, "req=%p *lane_p=%d lane_map=0x%x", req,
*lane_p, lane_map);
lane = ucs_ffs32(remaining_lane_map);
ucs_assertv(remaining_lane_map != 0,
"req=%p *lane_p=%d lane_map=0x%" PRIx64, req, *lane_p,
lane_map);
lane = ucs_ffs64(remaining_lane_map);

status = send_func(req, lane);
if (ucs_likely(status == UCS_OK)) {
Expand Down
31 changes: 18 additions & 13 deletions src/ucp/rma/flush.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,13 @@ ucp_ep_flush_request_update_uct_comp(ucp_request_t *req, int diff,
&req->send.state.uct_comp, req->send.state.uct_comp.count,
diff);
ucs_assertv(!(req->send.flush.started_lanes & new_started_lanes),
"req=%p started_lanes=0x%x new_started_lanes=0x%x", req,
req->send.flush.started_lanes, new_started_lanes);
"req=%p started_lanes=0x%" PRIx64
" new_started_lanes=0x%" PRIx64,
req, req->send.flush.started_lanes, new_started_lanes);

ucp_trace_req(req,
"flush update ep %p comp_count %d->%d num_lanes %d->%d "
"started_lanes 0x%x->0x%x",
"started_lanes 0x%" PRIx64 "->0x%" PRIx64,
req->send.ep, req->send.state.uct_comp.count,
req->send.state.uct_comp.count + diff,
req->send.flush.num_lanes, ucp_ep_num_lanes(req->send.ep),
Expand Down Expand Up @@ -99,7 +100,8 @@ static void ucp_ep_flush_progress(ucp_request_t *req)
}

ucp_trace_req(req,
"progress ep=%p flush flags=0x%x started_lanes=0x%x count=%d",
"progress ep=%p flush flags=0x%x started_lanes=0x%" PRIx64
" count=%d",
ep, ep->flags, req->send.flush.started_lanes,
req->send.state.uct_comp.count);

Expand Down Expand Up @@ -229,20 +231,22 @@ static void ucp_ep_flush_request_resched(ucp_ep_h ep, ucp_request_t *req)
(ucp_ep_config(ep)->p2p_lanes &&
ep->worker->context->config.ext.proto_request_reset)) {
ucs_assertv(!req->send.flush.started_lanes,
"req=%p flush started_lanes=0x%x", req,
"req=%p flush started_lanes=0x%" PRIx64, req,
req->send.flush.started_lanes);
} else {
ucs_assertv(!(UCS_BIT(req->send.lane) & req->send.flush.started_lanes),
"req=%p lane=%d started_lanes=0x%x", req, req->send.lane,
req->send.flush.started_lanes);
ucs_assertv(!(UCS_BIT(req->send.lane) &
req->send.flush.started_lanes),
"req=%p lane=%d started_lanes=0x%" PRIx64, req,
req->send.lane, req->send.flush.started_lanes);

/* Only lanes connected to iface can be started/flushed before
* wireup is done because connect2iface does not create wireup_ep
* without cm mode */
ucs_assertv(!(req->send.flush.started_lanes &
ucp_ep_config(ep)->p2p_lanes),
"req=%p flush started_lanes=0x%x p2p_lanes=0x%x", req,
req->send.flush.started_lanes,
"req=%p flush started_lanes=0x%" PRIx64
" p2p_lanes=0x%" PRIx64,
req, req->send.flush.started_lanes,
ucp_ep_config(ep)->p2p_lanes);
}

Expand Down Expand Up @@ -348,9 +352,10 @@ void ucp_ep_flush_request_ff(ucp_request_t *req, ucs_status_t status)
int num_comps = req->send.flush.num_lanes -
ucs_popcount(req->send.flush.started_lanes);

ucp_trace_req(req, "fast-forward flush, comp-=%d num_lanes %d started 0x%x",
num_comps, req->send.flush.num_lanes,
req->send.flush.started_lanes);
ucp_trace_req(
req, "fast-forward flush, comp-=%d num_lanes %d started 0x%" PRIx64,
num_comps, req->send.flush.num_lanes,
req->send.flush.started_lanes);

ucp_ep_flush_request_update_uct_comp(req, -num_comps,
UCS_MASK(req->send.flush.num_lanes) &
Expand Down
4 changes: 2 additions & 2 deletions src/ucp/rndv/rndv.c
Original file line number Diff line number Diff line change
Expand Up @@ -519,7 +519,7 @@ static void ucp_rndv_zcopy_next_lane(ucp_request_t *rndv_req)
ucp_lane_map_t lane_map;
lane_map = lanes_map_all & ~UCS_MASK(rndv_req->send.multi_lane_idx + 1);

rndv_req->send.multi_lane_idx = ucs_ffs32((lane_map > 0) ? lane_map :
rndv_req->send.multi_lane_idx = ucs_ffs64((lane_map > 0) ? lane_map :
lanes_map_all);
}

Expand Down Expand Up @@ -694,7 +694,7 @@ static void ucp_rndv_req_init_lanes(ucp_request_t *req,
ucp_lane_map_t lanes_map)
{
req->send.rndv.zcopy.lanes_map_all = lanes_map;
req->send.multi_lane_idx = ucs_ffs32(lanes_map);
req->send.multi_lane_idx = ucs_ffs64(lanes_map);
}

static void ucp_rndv_req_init_zcopy_lane_map(ucp_request_t *rndv_req,
Expand Down
8 changes: 6 additions & 2 deletions src/ucp/rndv/rndv_rkey_ptr.c
Original file line number Diff line number Diff line change
Expand Up @@ -93,8 +93,12 @@ ucp_rndv_rkey_ptr_query_common(const ucp_proto_query_params_t *params,
const ucp_proto_rndv_rkey_ptr_priv_t *rpriv = params->priv;

ucp_proto_default_query(params, attr);
attr->lane_map = UCS_BIT(rpriv->spriv.super.lane) |
UCS_BIT(rpriv->ack.lane);

ucs_assert(rpriv->spriv.super.lane != UCP_NULL_LANE);
attr->lane_map = UCS_BIT(rpriv->spriv.super.lane);
if (rpriv->ack.lane != UCP_NULL_LANE) {
attr->lane_map |= UCS_BIT(rpriv->ack.lane);
}
}

static void
Expand Down
62 changes: 41 additions & 21 deletions src/ucp/wireup/select.c
Original file line number Diff line number Diff line change
Expand Up @@ -362,11 +362,20 @@ ucp_wireup_init_select_info(double score, unsigned addr_index,
select_info->priority = priority;
}

static size_t ucp_wireup_max_lanes(ucp_lane_type_t lane_type)
static size_t
ucp_wireup_bw_max_lanes(const ucp_wireup_select_params_t *select_params)
{
return (select_params->address->dst_version < 18) ? UCP_MAX_LANES_LEGACY :
UCP_MAX_LANES;
}

static size_t
ucp_wireup_max_lanes(const ucp_wireup_select_params_t *select_params,
ucp_lane_type_t lane_type)
{
return ucp_wireup_lane_type_is_fast_path(lane_type) ?
UCP_MAX_FAST_PATH_LANES :
UCP_MAX_LANES;
ucp_wireup_bw_max_lanes(select_params);
}

/**
Expand Down Expand Up @@ -563,7 +572,8 @@ static UCS_F_NOINLINE ucs_status_t ucp_wireup_select_transport(
continue;
}

if (select_ctx->num_lanes < ucp_wireup_max_lanes(criteria->lane_type)) {
if (select_ctx->num_lanes <
ucp_wireup_max_lanes(select_params, criteria->lane_type)) {
/* If we have not reached the lanes limit, we can select any
combination of rsc_index/addr_index */
rsc_addr_index_map = addr_index_map;
Expand Down Expand Up @@ -697,6 +707,7 @@ ucp_wireup_path_index_is_equal(unsigned path_index1, unsigned path_index2)
}

static UCS_F_NOINLINE ucs_status_t ucp_wireup_add_lane_desc(
const ucp_wireup_select_params_t *select_params,
const ucp_wireup_select_info_t *select_info,
ucp_md_index_t dst_md_index, ucs_sys_device_t dst_sys_dev,
ucp_lane_type_t lane_type, unsigned seg_size,
Expand Down Expand Up @@ -749,7 +760,8 @@ static UCS_F_NOINLINE ucs_status_t ucp_wireup_add_lane_desc(
ucs_assert_always(!ucp_wireup_has_slow_lanes(select_ctx));
}

if (select_ctx->num_lanes >= ucp_wireup_max_lanes(lane_type)) {
if (select_ctx->num_lanes >=
ucp_wireup_max_lanes(select_params, lane_type)) {
log_level = show_error ? UCS_LOG_LEVEL_ERROR : UCS_LOG_LEVEL_DEBUG;
ucs_log(log_level, "cannot add %s lane - reached limit (%d)",
ucp_lane_type_info[lane_type].short_name,
Expand Down Expand Up @@ -797,7 +809,8 @@ ucp_wireup_add_lane(const ucp_wireup_select_params_t *select_params,
ucp_address_entry_t *addr_list = select_params->address->address_list;
unsigned addr_index = select_info->addr_index;

return ucp_wireup_add_lane_desc(select_info, addr_list[addr_index].md_index,
return ucp_wireup_add_lane_desc(select_params, select_info,
addr_list[addr_index].md_index,
addr_list[addr_index].sys_dev, lane_type,
addr_list[addr_index].iface_attr.seg_size,
select_ctx,
Expand Down Expand Up @@ -1095,7 +1108,8 @@ ucp_wireup_add_cm_lane(const ucp_wireup_select_params_t *select_params,
&select_info);

/* server is not a proxy because it can create all lanes connected */
return ucp_wireup_add_lane_desc(&select_info, UCP_NULL_RESOURCE,
return ucp_wireup_add_lane_desc(select_params, &select_info,
UCP_NULL_RESOURCE,
UCS_SYS_DEVICE_ID_UNKNOWN, UCP_LANE_TYPE_CM,
UINT_MAX, select_ctx, 1);
}
Expand Down Expand Up @@ -1541,9 +1555,10 @@ ucp_wireup_add_fast_lanes(ucp_worker_h worker,
return num_lanes;
}

static unsigned
ucp_wireup_get_current_num_lanes(ucp_ep_h ep, ucp_lane_type_t type)
static unsigned ucp_wireup_get_current_num_lanes(
const ucp_wireup_select_params_t *select_params, ucp_lane_type_t type)
{
ucp_ep_h ep = select_params->ep;
unsigned current_num_lanes = 0;
ucp_lane_index_t lane;

Expand All @@ -1552,7 +1567,7 @@ ucp_wireup_get_current_num_lanes(ucp_ep_h ep, ucp_lane_type_t type)
*/
if ((ep->cfg_index == UCP_WORKER_CFG_INDEX_NULL) ||
ucp_ep_has_cm_lane(ep)) {
return UCP_PROTO_MAX_LANES;
return ucp_wireup_bw_max_lanes(select_params);
}

for (lane = 0; lane < ucp_ep_config(ep)->key.num_lanes; ++lane) {
Expand All @@ -1570,12 +1585,11 @@ ucp_wireup_add_bw_lanes(const ucp_wireup_select_params_t *select_params,
ucp_wireup_select_context_t *select_ctx,
unsigned allow_extra_path)
{
ucp_rsc_index_t skip_dev_index = UCP_NULL_RESOURCE;
ucp_ep_h ep = select_params->ep;
ucp_context_h context = ep->worker->context;
ucp_wireup_dev_usage_count dev_count = {};
UCS_ARRAY_DEFINE_ONSTACK(ucp_proto_select_info_array_t, sinfo_array,
UCP_MAX_LANES);
ucp_proto_select_info_array_t sinfo_array = UCS_ARRAY_DYNAMIC_INITIALIZER;
ucp_rsc_index_t skip_dev_index = UCP_NULL_RESOURCE;
ucp_ep_h ep = select_params->ep;
ucp_context_h context = ep->worker->context;
ucp_wireup_dev_usage_count dev_count = {};
const uct_iface_attr_t *iface_attr;
const ucp_address_entry_t *ae;
ucs_status_t status;
Expand All @@ -1585,7 +1599,7 @@ ucp_wireup_add_bw_lanes(const ucp_wireup_select_params_t *select_params,
ucp_rsc_index_t rsc_index;
unsigned addr_index;
ucp_wireup_select_info_t *sinfo;
unsigned max_lanes;
unsigned max_lanes, num_lanes;
unsigned local_num_paths, remote_num_paths;

local_dev_bitmap = bw_info->local_dev_bitmap;
Expand All @@ -1596,7 +1610,7 @@ ucp_wireup_add_bw_lanes(const ucp_wireup_select_params_t *select_params,
* to prevent EP reconfiguration in case of dropping lanes due to low BW.
* TODO: Remove when endpoint reconfiguration is supported.
*/
max_lanes = ucp_wireup_get_current_num_lanes(select_params->ep,
max_lanes = ucp_wireup_get_current_num_lanes(select_params,
bw_info->criteria.lane_type);
max_lanes = ucs_min(max_lanes, bw_info->max_lanes);

Expand All @@ -1605,7 +1619,7 @@ ucp_wireup_add_bw_lanes(const ucp_wireup_select_params_t *select_params,
* memory registration) */
while (ucs_array_length(&sinfo_array) < max_lanes) {
if (excl_lane == UCP_NULL_LANE) {
sinfo = ucs_array_append_fixed(&sinfo_array);
sinfo = ucs_array_append(&sinfo_array, break);
status = ucp_wireup_select_transport(select_ctx, select_params,
&bw_info->criteria, tl_bitmap,
UINT64_MAX, local_dev_bitmap,
Expand Down Expand Up @@ -1665,9 +1679,13 @@ ucp_wireup_add_bw_lanes(const ucp_wireup_select_params_t *select_params,
}

bw_info->criteria.arg = NULL; /* To suppress compiler warning */
num_lanes = ucp_wireup_add_fast_lanes(ep->worker, select_params,
&sinfo_array,
bw_info->criteria.lane_type,
select_ctx);

return ucp_wireup_add_fast_lanes(ep->worker, select_params, &sinfo_array,
bw_info->criteria.lane_type, select_ctx);
ucs_array_cleanup_dynamic(&sinfo_array);
return num_lanes;
}

static ucs_status_t
Expand All @@ -1687,6 +1705,7 @@ ucp_wireup_add_am_bw_lanes(const ucp_wireup_select_params_t *select_params,
(UCP_FEATURE_TAG | UCP_FEATURE_AM)) ||
(ep_init_flags & (UCP_EP_INIT_FLAG_MEM_TYPE |
UCP_EP_INIT_CREATE_AM_LANE_ONLY)) ||
/* TODO: Check MAX_RNDV_LANES here for rndv/am case */
(context->config.ext.max_eager_lanes < 2)) {
return UCS_OK;
}
Expand Down Expand Up @@ -1882,7 +1901,8 @@ ucp_wireup_add_rma_bw_lanes(const ucp_wireup_select_params_t *select_params,

if (context->config.ext.proto_enable &&
(select_params->address->dst_version > UCP_RELEASE_LEGACY)) {
bw_info.max_lanes = UCP_PROTO_MAX_LANES;
bw_info.max_lanes = ucp_wireup_max_lanes(select_params,
bw_info.criteria.lane_type);
} else {
bw_info.max_lanes = context->config.ext.max_rndv_lanes;
}
Expand Down
5 changes: 2 additions & 3 deletions src/ucp/wireup/wireup.c
Original file line number Diff line number Diff line change
Expand Up @@ -1169,7 +1169,7 @@ ucp_wireup_connect_lane(ucp_ep_h ep, unsigned ep_init_flags,
* create a wireup endpoint which will start connection establishment
* protocol using an auxiliary transport.
*/
if (ucp_ep_config(ep)->p2p_lanes & UCS_BIT(lane)) {
if (ucp_ep_is_lane_p2p(ep, lane)) {
return ucp_wireup_connect_lane_to_ep(ep, ep_init_flags, lane,
path_index, rsc_index, wiface,
remote_address);
Expand Down Expand Up @@ -1447,8 +1447,7 @@ ucp_wireup_check_config_intersect(ucp_ep_h ep, ucp_ep_config_key_t *new_key,
ucp_wireup_ep_set_aux(cm_wireup_ep,
ucp_wireup_ep_extract_next_ep(uct_ep),
old_key->lanes[reuse_lane].rsc_index,
ucp_ep_config(ep)->p2p_lanes &
UCS_BIT(reuse_lane));
ucp_ep_is_lane_p2p(ep, reuse_lane));

/* reset the UCT EP from the previous WIREUP lane and destroy its WIREUP EP,
* since it's not needed anymore in the new configuration, UCT EP will be
Expand Down
2 changes: 1 addition & 1 deletion src/ucp/wireup/wireup_cm.c
Original file line number Diff line number Diff line change
Expand Up @@ -417,7 +417,7 @@ static ucs_status_t ucp_cm_ep_init_lanes(ucp_ep_h ep,
ucp_ep_set_lane(ep, lane_idx, uct_ep);

UCS_STATIC_BITMAP_SET(tl_bitmap, rsc_idx);
if (ucp_ep_config(ep)->p2p_lanes & UCS_BIT(lane_idx)) {
if (ucp_ep_is_lane_p2p(ep, lane_idx)) {
path_index = ucp_ep_get_path_index(ep, lane_idx);
status = ucp_wireup_ep_connect(ucp_ep_get_lane(ep, lane_idx), 0,
rsc_idx, path_index, 0, NULL);
Expand Down
2 changes: 1 addition & 1 deletion src/ucs/arch/bitops.h
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ BEGIN_C_DECLS
((sizeof(_n) <= 4) ? __builtin_clz((uint32_t)(_n)) : __builtin_clzl(_n))

/* Returns the number of bits lower than 'bit_index' that are set in 'mask'
* For example: ucs_idx2bitmap(mask=0xF0, idx=6) returns 2
* For example: ucs_bitmap2idx(mask=0xF0, idx=6) returns 2
*/
static inline unsigned ucs_bitmap2idx(uint64_t mask, unsigned bit_index)
{
Expand Down