Skip to content

Commit

Permalink
Merge pull request #6791 from hzhou/2311_split_res
Browse files Browse the repository at this point in the history
comm: fix split_type error checking and remove shmem_topo

Approved-by: Ken Raffenetti
Approved-by: Lisandro Dalcin
  • Loading branch information
hzhou committed Nov 10, 2023
2 parents bcdc237 + 745392d commit 766a59b
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 66 deletions.
2 changes: 1 addition & 1 deletion src/binding/c/comm_api.txt
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ MPI_Comm_split_type:
The 'split_type' must be non-negative or 'MPI_UNDEFINED'.
*/
{ -- error_check --
CHECKENUM: split_type, splittype, MPI_UNDEFINED MPI_COMM_TYPE_SHARED MPI_COMM_TYPE_HW_GUIDED MPI_COMM_TYPE_HW_UNGUIDED MPIX_COMM_TYPE_NEIGHBORHOOD
CHECKENUM: split_type, splittype, MPI_UNDEFINED MPI_COMM_TYPE_SHARED MPI_COMM_TYPE_HW_GUIDED MPI_COMM_TYPE_HW_UNGUIDED MPI_COMM_TYPE_RESOURCE_GUIDED MPIX_COMM_TYPE_NEIGHBORHOOD
}

MPI_Comm_test_inter:
Expand Down
31 changes: 2 additions & 29 deletions src/mpi/comm/comm_split_type.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@
#include "mpiimpl.h"
#include "mpicomm.h"

static const char *SHMEM_INFO_KEY = "shmem_topo";

static int split_type_by_node(MPIR_Comm * comm_ptr, int key, MPIR_Comm ** newcomm_ptr);
static int node_split(MPIR_Comm * comm_ptr, int key, const char *hint_str,
MPIR_Comm ** newcomm_ptr);
Expand Down Expand Up @@ -36,8 +34,8 @@ int MPIR_Comm_split_type(MPIR_Comm * user_comm_ptr, int split_type, int key,

if (split_type == MPI_COMM_TYPE_SHARED) {
/* NOTE: MPIR_Comm_split_impl will typically call device layer function.
* Currently ch4 calls MPIR_Comm_split_type_node_topo, which checks
* the "shmem_topo" infohints. Thus doesn't run the fallback code here.
* Currently ch4 calls MPIR_Comm_split_type_node_topo, thus doesn't run
* the fallback code here.
* On the otherhand, ch3:sock will directly execute code here. */
mpi_errno = MPIR_Comm_split_type_self(comm_ptr, key, newcomm_ptr);
MPIR_ERR_CHECK(mpi_errno);
Expand Down Expand Up @@ -289,31 +287,6 @@ int MPIR_Comm_split_type_node_topo(MPIR_Comm * user_comm_ptr, int key,
mpi_errno = split_type_by_node(user_comm_ptr, key, &comm_ptr);
MPIR_ERR_CHECK(mpi_errno);

if (comm_ptr == NULL) {
/* it is possible with intercomm split */
goto fn_exit;
}

const char *shmem_topo;
mpi_errno = MPII_collect_info_key(comm_ptr, info_ptr, SHMEM_INFO_KEY, &shmem_topo);

/* if all processes do not have the same shmem_topo info or missing the info, skip
* topology-aware comm split */
if (mpi_errno || !shmem_topo)
goto use_node_comm;

/* if hw topology is not initialized, skip topology-aware comm split */
if (!MPIR_hwtopo_is_initialized())
goto use_node_comm;

mpi_errno = node_split(comm_ptr, key, shmem_topo, newcomm_ptr);
MPIR_ERR_CHECK(mpi_errno);

MPIR_Comm_free_impl(comm_ptr);

goto fn_exit;

use_node_comm:
*newcomm_ptr = comm_ptr;

fn_exit:
Expand Down
65 changes: 29 additions & 36 deletions test/mpi/comm/cmsplit_type.c
Original file line number Diff line number Diff line change
Expand Up @@ -116,40 +116,41 @@ int main(int argc, char *argv[])
}
#endif

/* test for topology hints: pass a valid info value, but do not
* expect that the MPI implementation will respect it. */
#if MTEST_HAVE_MIN_MPI_VERSION(4,1)
/* Check to see if MPI_COMM_TYPE_HW_GUIDED works correctly */
for (i = 0; split_topo[i]; i++) {
MPI_Info_create(&info);
MPI_Info_set(info, "shmem_topo", split_topo[i]);
MPI_Comm_split_type(MPI_COMM_WORLD, MPI_COMM_TYPE_SHARED, 0, info, &comm);
if (comm != MPI_COMM_NULL) {
int newsize;
MPI_Comm_size(comm, &newsize);
if (newsize > size) {
printf("MPI_COMM_TYPE_SHARED (shmem_topo): comm size (%d) > node size (%d)\n",
newsize, size);
errs++;
}
MPI_Info_set(info, "mpi_hw_resource_type", split_topo[i]);
int ret;
ret = MPI_Comm_split_type(MPI_COMM_WORLD, MPI_COMM_TYPE_RESOURCE_GUIDED, 0, info, &comm);
/* result will depend on platform and process bindings, just check returns */
if (ret != MPI_SUCCESS) {
printf("MPI_COMM_TYPE_HW_GUIDED (%s) failed\n", split_topo[i]);
errs++;
} else if (comm != MPI_COMM_NULL) {
MPI_Comm_free(&comm);
}
MPI_Info_free(&info);
}
#endif

#if MTEST_HAVE_MIN_MPI_VERSION(4,1)
/* test for topology hints: pass different info values from
* different processes, the hint must be ignored then. */
MPI_Info_create(&info);
if (rank % 2 == 0) {
MPI_Info_set(info, "shmem_topo", split_topo[0]);
MPI_Info_set(info, "mpi_hw_resource_type", split_topo[0]);
} else {
MPI_Info_set(info, "shmem_topo", split_topo[1]);
MPI_Info_set(info, "mpi_hw_resource_type", split_topo[1]);
}
MPI_Comm_split_type(MPI_COMM_WORLD, MPI_COMM_TYPE_SHARED, 0, info, &comm);
MPI_Comm_split_type(MPI_COMM_WORLD, MPI_COMM_TYPE_RESOURCE_GUIDED, 0, info, &comm);
if (comm != MPI_COMM_NULL) {
int newsize;
MPI_Comm_size(comm, &newsize);
if (newsize > size) {
printf("MPI_COMM_TYPE_SHARED (shmem_topo): comm size (%d) > node size (%d)\n",
newsize, size);
printf
("MPI_COMM_TYPE_RESOURCE_GUIDED (mpi_hw_resource_type): comm size (%d) > node size (%d)\n",
newsize, size);
errs++;
}
MPI_Comm_free(&comm);
Expand All @@ -161,16 +162,17 @@ int main(int argc, char *argv[])
* not others. */
if (rank % 2 == 0) {
MPI_Info_create(&info);
MPI_Info_set(info, "shmem_topo", split_topo[0]);
MPI_Info_set(info, "mpi_hw_resource_type", split_topo[0]);
} else
info = MPI_INFO_NULL;
MPI_Comm_split_type(MPI_COMM_WORLD, MPI_COMM_TYPE_SHARED, 0, info, &comm);
MPI_Comm_split_type(MPI_COMM_WORLD, MPI_COMM_TYPE_RESOURCE_GUIDED, 0, info, &comm);
if (comm != MPI_COMM_NULL) {
int newsize;
MPI_Comm_size(comm, &newsize);
if (newsize > size) {
printf("MPI_COMM_TYPE_SHARED (shmem_topo): comm size (%d) > node size (%d)\n",
newsize, size);
printf
("MPI_COMM_TYPE_RESOURCE_GUIDED (mpi_hw_resource_type): comm size (%d) > node size (%d)\n",
newsize, size);
errs++;
}
MPI_Comm_free(&comm);
Expand All @@ -182,24 +184,15 @@ int main(int argc, char *argv[])
/* test for topology hints: pass an invalid info value and make
* sure the behavior is as if no info key was passed. */
MPI_Info_create(&info);
MPI_Info_set(info, "shmem_topo", "__garbage_value__");
MPI_Comm_split_type(MPI_COMM_WORLD, MPI_COMM_TYPE_SHARED, 0, info, &comm);
if (comm == MPI_COMM_NULL) {
printf("MPI_COMM_TYPE_SHARED (shmem_topo): invalid info value result in MPI_COMM_NULL\n");
MPI_Info_set(info, "mpi_hw_resource_type", "__garbage_value__");
MPI_Comm_split_type(MPI_COMM_WORLD, MPI_COMM_TYPE_RESOURCE_GUIDED, 0, info, &comm);
if (comm != MPI_COMM_NULL) {
printf
("MPI_COMM_TYPE_RESOURCE_GUIDED (mpi_hw_resource_type): invalid info value didn't result in MPI_COMM_NULL\n");
errs++;
} else {
int newsize;

MPI_Comm_size(comm, &newsize);
if (newsize != size) {
printf("MPI_COMM_TYPE_SHARED (shmem_topo): comm size (%d) != node size (%d)\n",
size, size);
errs++;
}
MPI_Comm_free(&comm);
}
MPI_Info_free(&info);

#endif

#if defined(MPIX_COMM_TYPE_NEIGHBORHOOD) && defined(HAVE_MPI_IO)
/* the MPICH-specific MPIX_COMM_TYPE_NEIGHBORHOOD */
Expand Down

0 comments on commit 766a59b

Please sign in to comment.