Skip to content

Commit

Permalink
coll/han: Add alltall to HAN
Browse files Browse the repository at this point in the history
Add two Alltoall algorithms to coll/han.  Both algorithms use the same
communication pattern.  Each rank on one host is assigned a single
partner on a remote host and vice versa.  Then the rank collects all
the data its partner will need to receive from it's host, and sends it
in one large send, and likewise receives it's data in one large recv,
then cycles to the next host.

The two algorithms are:
- mca_coll_han_alltoall_using_allgather: gathering data is done once
  and each rank has a copy of all local data.  Only recommended for
  small message sizes.
- mca_coll_han_alltoall_using_smsc: ranks use smsc module to
  direct-map local memory before copying into a packed send buffer.
  Currently only the XPMEM-based smsc module supports this operation.

Signed-off-by: Luke Robison <lrbison@amazon.com>
  • Loading branch information
lrbison committed Mar 20, 2024
1 parent b15619d commit c15396f
Show file tree
Hide file tree
Showing 9 changed files with 691 additions and 2 deletions.
3 changes: 2 additions & 1 deletion ompi/mca/coll/han/Makefile.am
Expand Up @@ -2,7 +2,7 @@
# Copyright (c) 2018-2020 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2022 Amazon.com, Inc. or its affiliates. All Rights reserved.
# Copyright (c) 2024 Amazon.com, Inc. or its affiliates. All Rights reserved.
# Copyright (c) 2022 BULL S.A.S. All rights reserved.
# $COPYRIGHT$
#
Expand All @@ -15,6 +15,7 @@ sources = \
coll_han.h \
coll_han_trigger.h \
coll_han_algorithms.h \
coll_han_alltoall.c \
coll_han_dynamic.h \
coll_han_dynamic_file.h \
coll_han_barrier.c \
Expand Down
21 changes: 21 additions & 0 deletions ompi/mca/coll/han/coll_han.h
Expand Up @@ -4,6 +4,7 @@
* reserved.
* Copyright (c) 2022 IBM Corporation. All rights reserved
* Copyright (c) 2020-2022 Bull S.A.S. All rights reserved.
* Copyright (c) 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -191,6 +192,7 @@ typedef struct mca_coll_han_op_module_name_t {
mca_coll_han_op_up_low_module_name_t allgather;
mca_coll_han_op_up_low_module_name_t gather;
mca_coll_han_op_up_low_module_name_t scatter;
mca_coll_han_op_up_low_module_name_t alltoall;
} mca_coll_han_op_module_name_t;

/**
Expand Down Expand Up @@ -238,6 +240,16 @@ typedef struct mca_coll_han_component_t {
uint32_t han_scatter_up_module;
/* low level module for scatter */
uint32_t han_scatter_low_module;

/* low level module for alltoall */
uint32_t han_alltoall_low_module;
/* alltoall: parallel stages */
int32_t han_alltoall_pstages;
/* alltoall: factor to decrease exchange size while increasing rounds */
int32_t han_alltoall_subdivfactor;
uint32_t han_alltoall_algorithm;


/* name of the modules */
mca_coll_han_op_module_name_t han_op_module_name;
/* whether we need reproducible results
Expand Down Expand Up @@ -272,6 +284,7 @@ typedef struct mca_coll_han_component_t {
*/
typedef struct mca_coll_han_single_collective_fallback_s {
union {
mca_coll_base_module_alltoall_fn_t alltoall;
mca_coll_base_module_allgather_fn_t allgather;
mca_coll_base_module_allgatherv_fn_t allgatherv;
mca_coll_base_module_allreduce_fn_t allreduce;
Expand All @@ -290,6 +303,7 @@ typedef struct mca_coll_han_single_collective_fallback_s {
* creation.
*/
typedef struct mca_coll_han_collectives_fallback_s {
mca_coll_han_single_collective_fallback_t alltoall;
mca_coll_han_single_collective_fallback_t allgather;
mca_coll_han_single_collective_fallback_t allgatherv;
mca_coll_han_single_collective_fallback_t allreduce;
Expand Down Expand Up @@ -349,6 +363,9 @@ OBJ_CLASS_DECLARATION(mca_coll_han_module_t);
* Some defines to stick to the naming used in the other components in terms of
* fallback routines
*/
#define previous_alltoall fallback.alltoall.module_fn.alltoall
#define previous_alltoall_module fallback.alltoall.module

#define previous_allgather fallback.allgather.module_fn.allgather
#define previous_allgather_module fallback.allgather.module

Expand Down Expand Up @@ -397,6 +414,7 @@ OBJ_CLASS_DECLARATION(mca_coll_han_module_t);
HAN_LOAD_FALLBACK_COLLECTIVE(HANM, COMM, allreduce); \
HAN_LOAD_FALLBACK_COLLECTIVE(HANM, COMM, allgather); \
HAN_LOAD_FALLBACK_COLLECTIVE(HANM, COMM, allgatherv); \
HAN_LOAD_FALLBACK_COLLECTIVE(HANM, COMM, alltoall); \
han_module->enabled = false; /* entire module set to pass-through from now on */ \
} while(0)

Expand Down Expand Up @@ -452,6 +470,9 @@ mca_coll_han_get_all_coll_modules(struct ompi_communicator_t *comm,
mca_coll_han_module_t *han_module);

int
mca_coll_han_alltoall_intra_dynamic(ALLTOALL_BASE_ARGS,
mca_coll_base_module_t *module);
int
mca_coll_han_allgather_intra_dynamic(ALLGATHER_BASE_ARGS,
mca_coll_base_module_t *module);
int
Expand Down
6 changes: 6 additions & 0 deletions ompi/mca/coll/han/coll_han_algorithms.c
@@ -1,6 +1,7 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2020-2022 Bull S.A.S. All rights reserved.
* Copyright (c) 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* $COPYRIGHT$
*
Expand Down Expand Up @@ -69,6 +70,11 @@ mca_coll_han_algorithm_value_t* mca_coll_han_available_algorithms[COLLCOUNT] =
{"simple", (fnptr_t)&mca_coll_han_allgather_intra_simple}, // 2-level
{ 0 }
},
[ALLTOALL] = (mca_coll_han_algorithm_value_t[]){
{"intrasmall", (fnptr_t)&mca_coll_han_alltoall_using_allgather}, // 2-level
{"intrasmsc", (fnptr_t)&mca_coll_han_alltoall_using_smsc}, // 2-level
{ 0 }
},
};

int
Expand Down
10 changes: 10 additions & 0 deletions ompi/mca/coll/han/coll_han_algorithms.h
@@ -1,6 +1,7 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2020-2022 Bull S.A.S. All rights reserved.
* Copyright (c) 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* $COPYRIGHT$
*
Expand Down Expand Up @@ -191,4 +192,13 @@ mca_coll_han_allgather_intra_simple(const void *sbuf, int scount,
struct ompi_communicator_t *comm,
mca_coll_base_module_t *module);

/* Alltoall */
int
mca_coll_han_alltoall_using_allgather(ALLTOALL_BASE_ARGS,
mca_coll_base_module_t *module);
int
mca_coll_han_alltoall_using_smsc(ALLTOALL_BASE_ARGS,
mca_coll_base_module_t *module);


#endif

0 comments on commit c15396f

Please sign in to comment.