Skip to content

Commit

Permalink
ch4/ofi: multi-nic setup bugfix
Browse files Browse the repository at this point in the history
This patch fixes an out-of-bound bug when more than MPIDI_OFI_MAX_NICS
nics are discovered, causing illegal access to
MPIDI_OFI_global_t.prov_use array.

Signed-off-by: Wenduo Wang <wenduwan@amazon.com>
  • Loading branch information
wenduwan committed Sep 22, 2023
1 parent 2d3a750 commit 919ff2c
Showing 1 changed file with 9 additions and 12 deletions.
21 changes: 9 additions & 12 deletions src/mpid/ch4/netmod/ofi/ofi_nic.c
Original file line number Diff line number Diff line change
Expand Up @@ -86,12 +86,6 @@ int MPIDI_OFI_init_multi_nic(struct fi_info *prov)
{
int mpi_errno = MPI_SUCCESS;
int nic_count = 0;
int max_nics = MPIR_CVAR_CH4_OFI_MAX_NICS;

if (MPIR_CVAR_CH4_OFI_MAX_NICS == 0 || MPIR_CVAR_CH4_OFI_MAX_NICS <= -2) {
/* Invalid values for the CVAR will default to single nic */
max_nics = 1;
}

/* Count the number of NICs */
struct fi_info *first_prov = NULL;
Expand All @@ -113,10 +107,7 @@ int MPIDI_OFI_init_multi_nic(struct fi_info *prov)
!MPIDI_OFI_nic_already_used(p, MPIDI_OFI_global.prov_use, nic_count)) {
MPIDI_OFI_global.prov_use[nic_count] = fi_dupinfo(p);
MPIR_Assert(MPIDI_OFI_global.prov_use[nic_count]);
nic_count++;
if (nic_count == max_nics) {
break;
}
++nic_count;
}
#endif
}
Expand Down Expand Up @@ -181,12 +172,18 @@ static int setup_multi_nic(int nic_count)
MPIDI_OFI_nic_info_t *nics = MPIDI_OFI_global.nic_info;
MPIR_CHKLMEM_DECL(1);

MPIDI_OFI_global.num_nics = nic_count;

/* Initially sort the NICs by name. This way all intranode ranks have a consistent view. */
qsort(MPIDI_OFI_global.prov_use, MPIDI_OFI_global.num_nics, sizeof(struct fi_info *),
compare_nic_names);

if (nic_count > MPIDI_OFI_MAX_NICS) {
fprintf(stderr, "Warning: Detected %d NICs, but only %d(MPIDI_OFI_MAX_NICS) are considered.\n",
nic_count, MPIDI_OFI_MAX_NICS);
nic_count = MPIDI_OFI_MAX_NICS;
}

MPIDI_OFI_global.num_nics = nic_count;

/* Limit the number of physical NICs depending on the CVAR */
if (MPIR_CVAR_CH4_OFI_MAX_NICS > 0 && MPIDI_OFI_global.num_nics > MPIR_CVAR_CH4_OFI_MAX_NICS) {
for (int i = MPIR_CVAR_CH4_OFI_MAX_NICS; i < MPIDI_OFI_global.num_nics; ++i) {
Expand Down

0 comments on commit 919ff2c

Please sign in to comment.