Skip to content

Commit

Permalink
Use PMPI when appropriate
Browse files Browse the repository at this point in the history
  • Loading branch information
huttered40 committed Sep 19, 2023
1 parent e52330f commit f8bf1bd
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 18 deletions.
16 changes: 8 additions & 8 deletions src/interface/common.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -381,7 +381,7 @@ namespace CTF_int {

void CommData::bcast(void * buf, int64_t count, MPI_Datatype mdtype, int root){
#ifdef TUNE
MPI_Barrier(cm);
PMPI_Barrier(cm);

int tsize_;
MPI_Type_size(mdtype, &tsize_);
Expand All @@ -394,7 +394,7 @@ namespace CTF_int {
#endif
MPI_Bcast(buf, count, mdtype, root, cm);
#ifdef TUNE
MPI_Barrier(cm);
PMPI_Barrier(cm);
double exe_time = MPI_Wtime()-st_time;
int tsize;
MPI_Type_size(mdtype, &tsize);
Expand All @@ -405,7 +405,7 @@ namespace CTF_int {

void CommData::allred(void * inbuf, void * outbuf, int64_t count, MPI_Datatype mdtype, MPI_Op op){
#ifdef TUNE
MPI_Barrier(cm);
PMPI_Barrier(cm);
#endif

#ifdef TUNE
Expand All @@ -423,7 +423,7 @@ namespace CTF_int {
double st_time = MPI_Wtime();
MPI_Allreduce(inbuf, outbuf, count, mdtype, op, cm);
#ifdef TUNE
MPI_Barrier(cm);
PMPI_Barrier(cm);
#endif
double exe_time = MPI_Wtime()-st_time;
int tsize;
Expand All @@ -437,7 +437,7 @@ namespace CTF_int {

void CommData::red(void * inbuf, void * outbuf, int64_t count, MPI_Datatype mdtype, MPI_Op op, int root){
#ifdef TUNE
MPI_Barrier(cm);
PMPI_Barrier(cm);

// change-of-observe
int tsize_;
Expand All @@ -454,7 +454,7 @@ namespace CTF_int {
double st_time = MPI_Wtime();
MPI_Reduce(inbuf, outbuf, count, mdtype, op, root, cm);
#ifdef TUNE
MPI_Barrier(cm);
PMPI_Barrier(cm);
#endif
double exe_time = MPI_Wtime()-st_time;
int tsize;
Expand All @@ -476,7 +476,7 @@ namespace CTF_int {
int64_t const * recv_displs){

#ifdef TUNE
MPI_Barrier(cm);
PMPI_Barrier(cm);
// change-of-observe
int64_t tot_sz_ = std::max(send_displs[np-1]+send_counts[np-1], recv_displs[np-1]+recv_counts[np-1])*datum_size;
double tps_[] = {0.0, 1.0, log2(np), (double)tot_sz_};
Expand Down Expand Up @@ -568,7 +568,7 @@ namespace CTF_int {
CTF_int::cdealloc(i32_recv_displs);
}
#ifdef TUNE
MPI_Barrier(cm);
PMPI_Barrier(cm);
#endif
double exe_time = MPI_Wtime()-st_time;
int64_t tot_sz = std::max(send_displs[np-1]+send_counts[np-1], recv_displs[np-1]+recv_counts[np-1])*datum_size;
Expand Down
20 changes: 10 additions & 10 deletions src/shared/model.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ namespace CTF_int {
int tot_nrcol;

//compute the total number of observations over all processors
MPI_Allreduce(&nrcol, &tot_nrcol, 1, MPI_INT, MPI_SUM, cm);
PMPI_Allreduce(&nrcol, &tot_nrcol, 1, MPI_INT, MPI_SUM, cm);

//if there has been more than 16*nparam observations per processor, tune the model
if (tot_nrcol >= 16.*np*nparam){
Expand Down Expand Up @@ -315,7 +315,7 @@ namespace CTF_int {
i_st = nparam;
}
//find the max execution time over all processors
// MPI_Allreduce(MPI_IN_PLACE, &max_time, 1, MPI_DOUBLE, MPI_MAX, cm);
// PMPI_Allreduce(MPI_IN_PLACE, &max_time, 1, MPI_DOUBLE, MPI_MAX, cm);
//double chunk = max_time / 1000.;
//printf("%s chunk = %+1.2e\n",name,chunk);

Expand Down Expand Up @@ -406,7 +406,7 @@ namespace CTF_int {
}
int sub_np = np; //std::min(np,32);
MPI_Comm sub_comm;
MPI_Comm_split(cm, rk<sub_np, rk, &sub_comm);
PMPI_Comm_split(cm, rk<sub_np, rk, &sub_comm);
//use only data from the first 32 processors, so that this doesn't take too long
//FIXME: can be smarter but not clear if necessary
if (rk < sub_np){
Expand All @@ -415,13 +415,13 @@ namespace CTF_int {
//all_b will have the bs from each processor vertically stacked as [b_1^T .. b_32^T]^T
double * all_b = (double*)malloc(sizeof(double)*nparam*sub_np);
//gather all Rs from all the processors
MPI_Allgather(R, nparam*nparam, MPI_DOUBLE, all_R, nparam*nparam, MPI_DOUBLE, sub_comm);
PMPI_Allgather(R, nparam*nparam, MPI_DOUBLE, all_R, nparam*nparam, MPI_DOUBLE, sub_comm);
double * Rs = (double*)malloc(sizeof(double)*nparam*nparam*sub_np);
for (int i=0; i<sub_np; i++){
lda_cpy(sizeof(double), nparam, nparam, nparam, sub_np*nparam, (const char *)(all_R+i*nparam*nparam), (char*)(Rs+i*nparam));
}
//gather all bs from all the processors
MPI_Allgather(b, nparam, MPI_DOUBLE, all_b, nparam, MPI_DOUBLE, sub_comm);
PMPI_Allgather(b, nparam, MPI_DOUBLE, all_b, nparam, MPI_DOUBLE, sub_comm);
free(b);
free(all_R);
free(R);
Expand Down Expand Up @@ -485,7 +485,7 @@ namespace CTF_int {
}
MPI_Comm_free(&sub_comm);
//broadcast new coefficient guess
MPI_Bcast(coeff_guess, nparam, MPI_DOUBLE, 0, cm);
PMPI_Bcast(coeff_guess, nparam, MPI_DOUBLE, 0, cm);
/*for (int i=0; i<nparam; i++){
regularization[i] = coeff_guess[i]*REG_LAMBDA;
}*/
Expand All @@ -497,9 +497,9 @@ namespace CTF_int {
double tot_time_total;
double over_time_total;
double under_time_total;
MPI_Allreduce(&tot_time, &tot_time_total, 1, MPI_DOUBLE, MPI_SUM, cm);
MPI_Allreduce(&over_time, &over_time_total, 1, MPI_DOUBLE, MPI_SUM, cm);
MPI_Allreduce(&under_time, &under_time_total, 1, MPI_DOUBLE, MPI_SUM, cm);
PMPI_Allreduce(&tot_time, &tot_time_total, 1, MPI_DOUBLE, MPI_SUM, cm);
PMPI_Allreduce(&over_time, &over_time_total, 1, MPI_DOUBLE, MPI_SUM, cm);
PMPI_Allreduce(&under_time, &under_time_total, 1, MPI_DOUBLE, MPI_SUM, cm);


// NOTE: to change the minimum number of observations and the threshold,
Expand Down Expand Up @@ -729,7 +729,7 @@ namespace CTF_int {
ofs.close();
}
rank++;
MPI_Barrier(MPI_COMM_WORLD);
PMPI_Barrier(MPI_COMM_WORLD);
}
}

Expand Down

0 comments on commit f8bf1bd

Please sign in to comment.