Skip to content

Commit

Permalink
UCP/TEST: Improve multi_rail_max + fix am_bw_lanes creation for rndv
Browse files Browse the repository at this point in the history
through am

- Enable this test for protov2
- Set max_lanes to 16 for protov1
- Skip testing unsupported case
- Relax requirement for splitting message above lanes
- Enable am bw lanes creation to support multi-lane RNDV through AM
  • Loading branch information
ivankochin committed May 16, 2024
1 parent ffcca4a commit 2f16fe7
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 35 deletions.
15 changes: 8 additions & 7 deletions src/ucp/wireup/select.c
Original file line number Diff line number Diff line change
Expand Up @@ -1685,12 +1685,19 @@ ucp_wireup_add_am_bw_lanes(const ucp_wireup_select_params_t *select_params,
ucp_wireup_select_bw_info_t bw_info;
unsigned num_am_bw_lanes;

bw_info.max_lanes = context->config.ext.max_eager_lanes - 1;
/* rndv/am/zcopy proto should take max_rndv_lanes value into account */
if (context->config.ext.proto_enable) {
bw_info.max_lanes = ucs_max(bw_info.max_lanes,
context->config.ext.max_rndv_lanes - 1);
}

/* Check if we need active message BW lanes */
if (!(ucp_ep_get_context_features(ep) &
(UCP_FEATURE_TAG | UCP_FEATURE_AM)) ||
(ep_init_flags & (UCP_EP_INIT_FLAG_MEM_TYPE |
UCP_EP_INIT_CREATE_AM_LANE_ONLY)) ||
(context->config.ext.max_eager_lanes < 2)) {
(bw_info.max_lanes == 0)) {
return UCS_OK;
}

Expand All @@ -1713,12 +1720,6 @@ ucp_wireup_add_am_bw_lanes(const ucp_wireup_select_params_t *select_params,
bw_info.local_dev_bitmap = UINT64_MAX;
bw_info.remote_dev_bitmap = UINT64_MAX;
bw_info.md_map = 0;
bw_info.max_lanes = context->config.ext.max_eager_lanes - 1;
/* rndv/am/zcopy proto should take max_rndv_lanes value into account */
if (context->config.ext.proto_enable) {
bw_info.max_lanes = ucs_max(bw_info.max_lanes,
context->config.ext.max_rndv_lanes - 1);
}

/* am_bw_lane[0] is am_lane, so don't re-select it here */
am_lane = UCP_NULL_LANE;
Expand Down
3 changes: 2 additions & 1 deletion test/gtest/ucp/test_ucp_sockaddr.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1481,7 +1481,8 @@ UCS_TEST_SKIP_COND_P(test_max_lanes_16, lanes_reconf, !cm_use_all_devices())
UCP_INSTANTIATE_TEST_CASE_TLS(test_max_lanes_16, ib, "ib")

using test_max_lanes_64 = test_max_lanes<64>;
UCS_TEST_SKIP_COND_P(test_max_lanes_64, lanes_reconf, !cm_use_all_devices())
UCS_TEST_SKIP_COND_P(test_max_lanes_64, lanes_reconf,
!cm_use_all_devices() || !is_proto_enabled())
{
test_num_lanes();
}
Expand Down
51 changes: 24 additions & 27 deletions test/gtest/ucp/test_ucp_tag_xfer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -63,13 +63,6 @@ class test_ucp_tag_xfer : public test_ucp_tag {
modify_config("PROTO_ENABLE", "n");
}

/* Init number of lanes according to test requirement
* (default is 2, for max_lanes test we use max_lanes) */
std::string num_lanes_str(std::to_string(num_lanes()));

modify_config("MAX_EAGER_LANES", num_lanes_str);
modify_config("MAX_RNDV_LANES", num_lanes_str);

test_ucp_tag::init();
}

Expand Down Expand Up @@ -1207,6 +1200,16 @@ class multi_rail_max : public test_ucp_tag_xfer {
public:
void init() override
{
auto var = get_variant_value();
if ((var == VARIANT_RNDV_AM_ZCOPY || var == VARIANT_RNDV_AM_BCOPY) &&
(!is_proto_enabled())) {
UCS_TEST_SKIP_R("protov1 rndv/am doesn't respect MAX_RNDV_LANES");
}

std::string num_lanes_str(std::to_string(num_lanes()));
modify_config("IB_NUM_PATHS", num_lanes_str, SETENV_IF_NOT_EXIST);
modify_config("MAX_RNDV_LANES", num_lanes_str);

stats_activate();
test_ucp_tag_xfer::init();
}
Expand All @@ -1229,42 +1232,36 @@ class multi_rail_max : public test_ucp_tag_xfer {

unsigned num_lanes() override
{
return max_lanes;
if (get_variant_value() == VARIANT_PROTO_V1 || !is_proto_enabled()) {
return 16;
}
return 64;
}

const uint32_t max_lanes = 64;
};

UCS_TEST_P(multi_rail_max, max_lanes, "IB_NUM_PATHS?=64", "TM_SW_RNDV=y",
UCS_TEST_P(multi_rail_max, max_lanes, "TM_SW_RNDV=y",
"RNDV_THRESH=1", "MIN_RNDV_CHUNK_SIZE=1", "MULTI_PATH_RATIO=0.0001")
{
if (is_proto_enabled()) {
UCS_TEST_SKIP_R("TM_SW_RNDV has no effect with proto v2");
}

receiver().connect(&sender(), get_ep_params());
test_run_xfer(true, true, true, true, false);

ucp_lane_index_t num_lanes = ucp_ep_num_lanes(sender().ep());
ASSERT_EQ(ucp_ep_num_lanes(receiver().ep()), num_lanes);
ASSERT_EQ(num_lanes, max_lanes);
ucp_lane_index_t current_num_lanes = ucp_ep_num_lanes(sender().ep());
ASSERT_EQ(ucp_ep_num_lanes(receiver().ep()), current_num_lanes);
ASSERT_EQ(current_num_lanes, num_lanes());

size_t chunk_size = get_msg_size() / num_lanes;

for (ucp_lane_index_t lane = 0; lane < num_lanes; ++lane) {
size_t bytes_sent = 0;
for (ucp_lane_index_t lane = 0; lane < num_lanes(); ++lane) {
size_t sender_tx = get_bytes_sent(sender().ep(), lane);
size_t receiver_tx = get_bytes_sent(receiver().ep(), lane);
UCS_TEST_MESSAGE << "lane[" << static_cast<int>(lane) << "] : "
<< "sender " << sender_tx << " receiver " << receiver_tx;

/* Verify that each lane sent something, except the active message lane
that could be used only for control messages */
if (lane == num_lanes - 1) {
EXPECT_GT(sender_tx + receiver_tx, 0); // last lane sends the rest
} else if (lane != ucp_ep_get_am_lane(sender().ep())) {
EXPECT_GE(sender_tx + receiver_tx, chunk_size);
}
EXPECT_GT(sender_tx + receiver_tx, 0);
bytes_sent += sender_tx + receiver_tx;
}

EXPECT_GE(bytes_sent, get_msg_size());
}

UCP_INSTANTIATE_TEST_CASE_TLS(multi_rail_max, rc, "rc")
Expand Down

0 comments on commit 2f16fe7

Please sign in to comment.