Skip to content

Commit

Permalink
coll/tuned: Change the bcast default collective algorithm selection
Browse files Browse the repository at this point in the history
The default algorithm selections were out of date and not performing well. After gathering data using the ompi-collectives-tuning package, new default algorithm decisions are selected for bcast.

Signed-off-by: Jessie Yang <jiaxiyan@amazon.com>
  • Loading branch information
jiaxiyan committed Feb 13, 2024
1 parent 7fc4535 commit 7265e08
Show file tree
Hide file tree
Showing 3 changed files with 80 additions and 1 deletion.
1 change: 1 addition & 0 deletions ompi/mca/coll/tuned/coll_tuned.h
Expand Up @@ -136,6 +136,7 @@ int ompi_coll_tuned_barrier_intra_check_forced_init (coll_tuned_force_algorithm_

/* Bcast */
int ompi_coll_tuned_bcast_intra_dec_fixed(BCAST_ARGS);
int ompi_coll_tuned_bcast_intra_disjoint_dec_fixed(BCAST_ARGS);
int ompi_coll_tuned_bcast_intra_dec_dynamic(BCAST_ARGS);
int ompi_coll_tuned_bcast_intra_do_this(BCAST_ARGS, int algorithm, int faninout, int segsize);
int ompi_coll_tuned_bcast_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);
Expand Down
72 changes: 72 additions & 0 deletions ompi/mca/coll/tuned/coll_tuned_decision_fixed.c
Expand Up @@ -651,6 +651,78 @@ int ompi_coll_tuned_bcast_intra_dec_fixed(void *buff, int count,
alg, 0, 0);
}


/*
* bcast_intra_dec for inter node communicators
*
* Function: - selects broadcast algorithm to use
* Accepts: - same arguments as MPI_Bcast()
* Returns: - MPI_SUCCESS or error code (passed from the bcast implementation)
*/
int ompi_coll_tuned_bcast_intra_disjoint_dec_fixed(void *buff, int count,
struct ompi_datatype_t *datatype, int root,
struct ompi_communicator_t *comm,
mca_coll_base_module_t *module) {
size_t total_dsize, dsize;
int communicator_size, alg;
communicator_size = ompi_comm_size(comm);

ompi_datatype_type_size(datatype, &dsize);
total_dsize = dsize * (unsigned long)count;

OPAL_OUTPUT((ompi_coll_tuned_stream, "ompi_coll_tuned_bcast_intra_disjoint_dec_fixed"
" root %d rank %d com_size %d",
root, ompi_comm_rank(comm), communicator_size));

/** Algorithms:
* {1, "basic_linear"},
* {2, "chain"},
* {3, "pipeline"},
* {4, "split_binary_tree"},
* {5, "binary_tree"},
* {6, "binomial"},
* {7, "knomial"},
* {8, "scatter_allgather"},
* {9, "scatter_allgather_ring"},
*/
if (communicator_size < 4) {
alg = 1;
} else if (communicator_size < 8) {
if (total_dsize < 1048576) {
alg = 1;
} else {
alg = 5;
}
} else if (communicator_size < 16) {
if (total_dsize < 1048576) {
alg = 1;
} else {
alg = 5;
}
} else if (communicator_size < 32) {
if (total_dsize < 262144) {
alg = 1;
} else if (total_dsize < 1048576) {
alg = 7;
} else {
alg = 5;
}
} else {
if (total_dsize < 65536) {
alg = 1;
} else if (total_dsize < 1048576) {
alg = 7;
} else {
alg = 5;
}
}

return ompi_coll_tuned_bcast_intra_do_this (buff, count, datatype, root,
comm, module,
alg, 0, 0);
}


/*
* reduce_intra_dec
*
Expand Down
8 changes: 7 additions & 1 deletion ompi/mca/coll/tuned/coll_tuned_module.c
Expand Up @@ -93,15 +93,21 @@ ompi_coll_tuned_comm_query(struct ompi_communicator_t *comm, int *priority)
/* By default stick with the fixed version of the tuned collectives. Later on,
* when the module get enabled, set the correct version based on the availability
* of the dynamic rules.
* For some collectives, we distinguish between disjoint communicatiors to make
* decision specific for inter node communication.
*/
if (OMPI_COMM_IS_DISJOINT(comm)) {
tuned_module->super.coll_bcast = ompi_coll_tuned_bcast_intra_disjoint_dec_fixed;
} else {
tuned_module->super.coll_bcast = ompi_coll_tuned_bcast_intra_dec_fixed;
}
tuned_module->super.coll_allgather = ompi_coll_tuned_allgather_intra_dec_fixed;
tuned_module->super.coll_allgatherv = ompi_coll_tuned_allgatherv_intra_dec_fixed;
tuned_module->super.coll_allreduce = ompi_coll_tuned_allreduce_intra_dec_fixed;
tuned_module->super.coll_alltoall = ompi_coll_tuned_alltoall_intra_dec_fixed;
tuned_module->super.coll_alltoallv = ompi_coll_tuned_alltoallv_intra_dec_fixed;
tuned_module->super.coll_alltoallw = NULL;
tuned_module->super.coll_barrier = ompi_coll_tuned_barrier_intra_dec_fixed;
tuned_module->super.coll_bcast = ompi_coll_tuned_bcast_intra_dec_fixed;
tuned_module->super.coll_exscan = NULL;
tuned_module->super.coll_gather = ompi_coll_tuned_gather_intra_dec_fixed;
tuned_module->super.coll_gatherv = NULL;
Expand Down

0 comments on commit 7265e08

Please sign in to comment.