Skip to content

Commit

Permalink
coll/tuned: Change the bcast default collective algorithm selection
Browse files Browse the repository at this point in the history
The default algorithm selections were out of date and not performing well. After gathering data using the ompi-collectives-tuning package, new default algorithm decisions are selected for bcast.

Signed-off-by: Jessie Yang <jiaxiyan@amazon.com>
  • Loading branch information
jiaxiyan committed Feb 1, 2024
1 parent e53e700 commit 77392aa
Show file tree
Hide file tree
Showing 2 changed files with 77 additions and 0 deletions.
1 change: 1 addition & 0 deletions ompi/mca/coll/tuned/coll_tuned.h
Expand Up @@ -136,6 +136,7 @@ int ompi_coll_tuned_barrier_intra_check_forced_init (coll_tuned_force_algorithm_

/* Bcast */
int ompi_coll_tuned_bcast_intra_dec_fixed(BCAST_ARGS);
int ompi_coll_tuned_bcast_intra_disjoint_dec_fixed(BCAST_ARGS);
int ompi_coll_tuned_bcast_intra_dec_dynamic(BCAST_ARGS);
int ompi_coll_tuned_bcast_intra_do_this(BCAST_ARGS, int algorithm, int faninout, int segsize);
int ompi_coll_tuned_bcast_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);
Expand Down
76 changes: 76 additions & 0 deletions ompi/mca/coll/tuned/coll_tuned_decision_fixed.c
Expand Up @@ -514,6 +514,10 @@ int ompi_coll_tuned_bcast_intra_dec_fixed(void *buff, int count,
struct ompi_communicator_t *comm,
mca_coll_base_module_t *module)
{
if (OMPI_COMM_IS_DISJOINT(comm)) {
return ompi_coll_tuned_bcast_intra_disjoint_dec_fixed(buff, count, datatype, root, comm, module);
}

size_t total_dsize, dsize;
int communicator_size, alg;
communicator_size = ompi_comm_size(comm);
Expand Down Expand Up @@ -651,6 +655,78 @@ int ompi_coll_tuned_bcast_intra_dec_fixed(void *buff, int count,
alg, 0, 0);
}


/*
* bcast_intra_dec for inter node communicators
*
* Function: - selects broadcast algorithm to use
* Accepts: - same arguments as MPI_Bcast()
* Returns: - MPI_SUCCESS or error code (passed from the bcast implementation)
*/
int ompi_coll_tuned_bcast_intra_disjoint_dec_fixed(void *buff, int count,
struct ompi_datatype_t *datatype, int root,
struct ompi_communicator_t *comm,
mca_coll_base_module_t *module) {
size_t total_dsize, dsize;
int communicator_size, alg;
communicator_size = ompi_comm_size(comm);

ompi_datatype_type_size(datatype, &dsize);
total_dsize = dsize * (unsigned long)count;

OPAL_OUTPUT((ompi_coll_tuned_stream, "ompi_coll_tuned_bcast_intra_disjoint_dec_fixed"
" root %d rank %d com_size %d",
root, ompi_comm_rank(comm), communicator_size));

/** Algorithms:
* {1, "basic_linear"},
* {2, "chain"},
* {3, "pipeline"},
* {4, "split_binary_tree"},
* {5, "binary_tree"},
* {6, "binomial"},
* {7, "knomial"},
* {8, "scatter_allgather"},
* {9, "scatter_allgather_ring"},
*/
if (communicator_size < 4) {
alg = 1;
} else if (communicator_size < 8) {
if (total_dsize < 1048576) {
alg = 1;
} else {
alg = 5;
}
} else if (communicator_size < 16) {
if (total_dsize < 1048576) {
alg = 1;
} else {
alg = 5;
}
} else if (communicator_size < 32) {
if (total_dsize < 262144) {
alg = 1;
} else if (total_dsize < 1048576) {
alg = 7;
} else {
alg = 5;
}
} else {
if (total_dsize < 65536) {
alg = 1;
} else if (total_dsize < 1048576) {
alg = 7;
} else {
alg = 5;
}
}

return ompi_coll_tuned_bcast_intra_do_this (buff, count, datatype, root,
comm, module,
alg, 0, 0);
}


/*
* reduce_intra_dec
*
Expand Down

0 comments on commit 77392aa

Please sign in to comment.