Skip to content

Commit

Permalink
fix performance issues and a bug (use of MPI_COMM_WORLD instead glob_…
Browse files Browse the repository at this point in the history
…comm.np) in new node aware code
  • Loading branch information
solomonik committed Nov 16, 2023
1 parent cbbb96f commit c3f9582
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 29 deletions.
11 changes: 7 additions & 4 deletions src/interface/world.cxx
Expand Up @@ -285,10 +285,13 @@ namespace CTF {
}
cppn = getenv("CTF_PPN");
if (cppn != NULL){
if (rank == 0)
printf("Assuming %d processes per node due to CTF_PPN environment variable\n",
atoi(cppn));
ASSERT(atoi(cppn)>=1);
int icppn = atoi(cppn);
if (rank == 0){
printf("CTF_PPN environment variable set to %d\n",icppn);
if (icppn<1 || all_np % icppn!=0) printf("This CTF_PPN value is invalid given the number of processors, aborting.\n");
}
assert(icppn>=1);
assert(all_np % icppn == 0);
#ifdef BGQ
CTF_int::set_memcap(.75);
#else
Expand Down
76 changes: 51 additions & 25 deletions src/mapping/topology.cxx
Expand Up @@ -131,37 +131,63 @@ namespace CTF_int {
int stride, cut;
double tot_comm_nodes[order];
int rank = glb_comm.rank;
int my_color[order];
/**
* The average number of nodes each processor communicates with, g, is the average number of nodes in each communicator - 1.
* Each set of communicators is associated with a stride s, a communicator size t, and the number of communicator sets v = p/(st).
* Let k be the number of processes per node If s >= k, g=t-1.
* If s>k, each node-boundary adds a node to min(d,s) communicators where d is the distance between the node boundary and the nearest multiple of st.
*/
if (intra_node_lens == NULL) {
stride = 1; cut = 0;
int s = 1, t, v;
for (int i = 0; i < order; i++) {
my_color[i] = rank / (stride * lens[i]) * stride + cut;
stride *= lens[i];
cut = (rank - (rank/stride)*stride);
}
std::vector<int> nodes[order];
for (int r = 0; r < glb_comm.np; r++) {
stride = 1; cut = 0;
for (int i = 0; i < order; i++) {
int color = r / (stride * lens[i]) * stride + cut;
if (color == my_color[i]) {
int node_id = r / ppn;
if (std::find(nodes[i].begin(), nodes[i].end(), node_id) == nodes[i].end()) {
nodes[i].push_back(node_id);
}
if (i>0) s *= lens[i-1];
t = lens[i];
v = glb_comm.np/(s*t);
if (s >= ppn) tot_comm_nodes[i] = t-1;
else {
tot_comm_nodes[i] = 0.;
for (int j=0; j<glb_comm.np/ppn; j++){
int d = std::min((j*ppn)%(s*t),s*t-((j*ppn)%(s*t)));
tot_comm_nodes[i] += ((double)std::min(s,d))/(v*s);
}
stride *= lens[i];
cut = (r - (r/stride)*stride);
}
}
int sum_comm_nodes[order];
for (int i = 0; i < order; i++) {
// number of nodes I need to communicate with
int sz = nodes[i].size() - 1;
MPI_Allreduce(&sz, &sum_comm_nodes[i], 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
tot_comm_nodes[i] = sum_comm_nodes[i] / (double)glb_comm.np;
}
}

// OLD inefficient code equivalent to above, but maybe useful for debugging above if issues arise
//if (intra_node_lens == NULL) {
// stride = 1; cut = 0;
// for (int i = 0; i < order; i++) {
// my_color[i] = rank / (stride * lens[i]) * stride + cut;
// stride *= lens[i];
// cut = (rank - (rank/stride)*stride);
// }
// std::vector<int> nodes[order];
// for (int r = 0; r < glb_comm.np; r++) {
// stride = 1; cut = 0;
// for (int i = 0; i < order; i++) {
// int color = r / (stride * lens[i]) * stride + cut;
// if (color == my_color[i]) {
// int node_id = r / ppn;
// if (std::find(nodes[i].begin(), nodes[i].end(), node_id) == nodes[i].end()) {
// nodes[i].push_back(node_id);
// }
// }
// stride *= lens[i];
// cut = (r - (r/stride)*stride);
// }
// }
// int sum_comm_nodes[order];
// for (int i = 0; i < order; i++) {
// // number of nodes I need to communicate with
// int sz = nodes[i].size() - 1;
// MPI_Allreduce(&sz, &sum_comm_nodes[i], 1, MPI_INT, MPI_SUM, glb_comm.cm);
// tot_comm_nodes[i] = sum_comm_nodes[i] / (double)glb_comm.np;
// if (std::abs(tot_comm_nodes[i] - tot_comm_nodes_new[i]) > 1.e-6)
// printf("%d %lf %lf\n",i,tot_comm_nodes[i], tot_comm_nodes_new[i]);
// assert(std::abs(tot_comm_nodes[i] - tot_comm_nodes_new[i])<= 1.e-6);
// }
//}
stride = 1; cut = 0;
for (int i=0; i<order; i++){
lda[i] = stride;
Expand Down

0 comments on commit c3f9582

Please sign in to comment.