Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

coll/han: Add alltoall algorithm #12387

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
3 changes: 2 additions & 1 deletion ompi/mca/coll/han/Makefile.am
Expand Up @@ -2,7 +2,7 @@
# Copyright (c) 2018-2020 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2022 Amazon.com, Inc. or its affiliates. All Rights reserved.
# Copyright (c) 2024 Amazon.com, Inc. or its affiliates. All Rights reserved.
# Copyright (c) 2022 BULL S.A.S. All rights reserved.
# $COPYRIGHT$
#
Expand All @@ -15,6 +15,7 @@ sources = \
coll_han.h \
coll_han_trigger.h \
coll_han_algorithms.h \
coll_han_alltoall.c \
coll_han_dynamic.h \
coll_han_dynamic_file.h \
coll_han_barrier.c \
Expand Down
37 changes: 37 additions & 0 deletions ompi/mca/coll/han/coll_han.h
Expand Up @@ -39,6 +39,7 @@
#include "mpi.h"
#include "ompi/mca/mca.h"
#include "opal/util/output.h"
#include "opal/mca/smsc/smsc.h"
#include "ompi/mca/coll/base/coll_base_functions.h"
#include "coll_han_trigger.h"
#include "ompi/mca/coll/han/coll_han_dynamic.h"
Expand Down Expand Up @@ -194,6 +195,7 @@ typedef struct mca_coll_han_op_module_name_t {
mca_coll_han_op_up_low_module_name_t gatherv;
mca_coll_han_op_up_low_module_name_t scatter;
mca_coll_han_op_up_low_module_name_t scatterv;
mca_coll_han_op_up_low_module_name_t alltoall;
} mca_coll_han_op_module_name_t;

/**
Expand Down Expand Up @@ -249,6 +251,16 @@ typedef struct mca_coll_han_component_t {
uint32_t han_scatterv_up_module;
/* low level module for scatterv */
uint32_t han_scatterv_low_module;

/* low level module for alltoall */
uint32_t han_alltoall_low_module;
/* alltoall: parallel stages */
int32_t han_alltoall_pstages;
/* alltoall: factor to decrease exchange size while increasing rounds */
int32_t han_alltoall_subdivfactor;
uint32_t han_alltoall_algorithm;


/* name of the modules */
mca_coll_han_op_module_name_t han_op_module_name;
/* whether we need reproducible results
Expand Down Expand Up @@ -283,6 +295,7 @@ typedef struct mca_coll_han_component_t {
*/
typedef struct mca_coll_han_single_collective_fallback_s {
union {
mca_coll_base_module_alltoall_fn_t alltoall;
mca_coll_base_module_allgather_fn_t allgather;
mca_coll_base_module_allgatherv_fn_t allgatherv;
mca_coll_base_module_allreduce_fn_t allreduce;
Expand All @@ -303,6 +316,7 @@ typedef struct mca_coll_han_single_collective_fallback_s {
* creation.
*/
typedef struct mca_coll_han_collectives_fallback_s {
mca_coll_han_single_collective_fallback_t alltoall;
mca_coll_han_single_collective_fallback_t allgather;
mca_coll_han_single_collective_fallback_t allgatherv;
mca_coll_han_single_collective_fallback_t allreduce;
Expand Down Expand Up @@ -364,6 +378,9 @@ OBJ_CLASS_DECLARATION(mca_coll_han_module_t);
* Some defines to stick to the naming used in the other components in terms of
* fallback routines
*/
#define previous_alltoall fallback.alltoall.module_fn.alltoall
#define previous_alltoall_module fallback.alltoall.module

#define previous_allgather fallback.allgather.module_fn.allgather
#define previous_allgather_module fallback.allgather.module

Expand Down Expand Up @@ -419,6 +436,7 @@ OBJ_CLASS_DECLARATION(mca_coll_han_module_t);
HAN_LOAD_FALLBACK_COLLECTIVE(HANM, COMM, allreduce); \
HAN_LOAD_FALLBACK_COLLECTIVE(HANM, COMM, allgather); \
HAN_LOAD_FALLBACK_COLLECTIVE(HANM, COMM, allgatherv); \
HAN_LOAD_FALLBACK_COLLECTIVE(HANM, COMM, alltoall); \
han_module->enabled = false; /* entire module set to pass-through from now on */ \
} while(0)

Expand Down Expand Up @@ -479,6 +497,9 @@ mca_coll_han_get_all_coll_modules(struct ompi_communicator_t *comm,
mca_coll_han_module_t *han_module);

int
mca_coll_han_alltoall_intra_dynamic(ALLTOALL_BASE_ARGS,
mca_coll_base_module_t *module);
int
mca_coll_han_allgather_intra_dynamic(ALLGATHER_BASE_ARGS,
mca_coll_base_module_t *module);
int
Expand Down Expand Up @@ -526,4 +547,20 @@ coll_han_utils_gcd(const size_t *numerators, const size_t size);
int
coll_han_utils_create_contiguous_datatype(size_t count, const ompi_datatype_t *oldType,
ompi_datatype_t **newType);

static inline struct mca_smsc_endpoint_t *mca_coll_han_get_smsc_endpoint (struct ompi_proc_t *proc) {
extern opal_mutex_t mca_coll_han_lock;
if (NULL == proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_SMSC]) {
if (NULL == proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_SMSC]) {
OPAL_THREAD_LOCK(&mca_coll_han_lock);
if (NULL == proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_SMSC]) {
proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_SMSC] = mca_smsc->get_endpoint(&proc->super);
}
OPAL_THREAD_UNLOCK(&mca_coll_han_lock);
}
}

return (struct mca_smsc_endpoint_t *) proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_SMSC];
}

#endif /* MCA_COLL_HAN_EXPORT_H */
5 changes: 5 additions & 0 deletions ompi/mca/coll/han/coll_han_algorithms.c
@@ -1,6 +1,7 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2020-2022 Bull S.A.S. All rights reserved.
* Copyright (c) 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* $COPYRIGHT$
*
Expand Down Expand Up @@ -77,6 +78,10 @@ mca_coll_han_algorithm_value_t* mca_coll_han_available_algorithms[COLLCOUNT] =
{"simple", (fnptr_t)&mca_coll_han_allgather_intra_simple}, // 2-level
{ 0 }
},
[ALLTOALL] = (mca_coll_han_algorithm_value_t[]){
{"intrasmsc", (fnptr_t)&mca_coll_han_alltoall_using_smsc}, // 2-level
{ 0 }
},
};

int
Expand Down
7 changes: 7 additions & 0 deletions ompi/mca/coll/han/coll_han_algorithms.h
@@ -1,6 +1,7 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2020-2022 Bull S.A.S. All rights reserved.
* Copyright (c) 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* $COPYRIGHT$
*
Expand Down Expand Up @@ -208,4 +209,10 @@ mca_coll_han_allgather_intra_simple(const void *sbuf, int scount,
struct ompi_communicator_t *comm,
mca_coll_base_module_t *module);

/* Alltoall */
int
mca_coll_han_alltoall_using_smsc(ALLTOALL_BASE_ARGS,
mca_coll_base_module_t *module);


#endif