Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

test: add p2p benchmark code #6907

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,6 @@
[submodule "modules/yaksa"]
path = modules/yaksa
url = https://github.com/pmodels/yaksa
[submodule "modules/mydef_boot"]
path = modules/mydef_boot
url = https://github.com/pmodels/mydef_boot
1 change: 1 addition & 0 deletions modules/mydef_boot
Submodule mydef_boot added at ea2d68
4 changes: 4 additions & 0 deletions test/mpi/bench/config
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
module: c
output_dir: out
CC: mpicc
run: mpirun -n 2
88 changes: 88 additions & 0 deletions test/mpi/bench/macros/bench_frame.def
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
/*
* bench_frame : boilerplate for mpi program
* measure(iter) : measures `tf_dur` for $(iter) iterations
* run_stat(N, var) : run N measurements and obtain (avg, std) in sum1, sum2
* warm_up(iter, dur): repeat until measurements (iter, dur) stabilize
* report_latency(N) : print a line of latency result
*/

subcode: bench_frame
$include stdio
$include stdlib
$include mpi

$global grank, gsize: int

$function main
int errs = 0;

MPI_Init(NULL, NULL);

MPI_Comm_rank(MPI_COMM_WORLD, &grank);
MPI_Comm_size(MPI_COMM_WORLD, &gsize);

MPI_Comm comm = MPI_COMM_WORLD;
char *buf = malloc($(MAX_MSG));

$call @report_title
$call main

MPI_Finalize();

return errs

macros:
use_double: 1
data: buf, size, MPI_CHAR
MAX_MSG: 5000000

#----------------------------------------
subcode: _autoload
$register_prefix(comm) MPI_Comm

subcode: foreach_size
$for int size = 0; size < $(MAX_MSG); size = (size==0)?1:size*2
$(set:MSG_SIZE=size)
BLOCK

subcode: measure(iter)
tf_start = MPI_Wtime()
$for 0:$(iter)
BLOCK
tf_dur = MPI_Wtime() - tf_start

subcode: run_stat(N, var)
$my double sum1=0, double sum2=0
$for 0:$(N)
BLOCK
sum1 += $(var)
sum2 += $(var) * $(var)
sum1 /= $(N)
sum2 /= $(N)
sum2 = sqrt(sum2 - sum1 * sum1)

subcode: warm_up(iter, dur)
$(set:MIN_ITER=(int) ($(iter) * 0.001 / $(dur)))
$(iter) = 2
$my double last_dur = 1.0
$my int num_best = 0
$while num_best < 10
BLOCK
$if $(iter) < $(MIN_ITER)
$(iter) = $(MIN_ITER)
num_best = 0
continue
# check that t_dur is no longer monotonically decreasing
$if $(dur) > last_dur
num_best++
last_dur = $(dur)

subcode: report_latency(N)
tf_latency = sum1 / ($(N)) * 1e6
tf_sigma = sum2 / ($(N)) * 1e6
$(if:MSG_SIZE)
tf_bw = $(MSG_SIZE) / tf_latency
printf(" %10d %10.3f %6.3f %10.3f\n", $(MSG_SIZE), tf_latency, tf_sigma, tf_bw)
$(else)
printf(" %10.3f %6.3f\n", tf_latency, tf_sigma)

79 changes: 79 additions & 0 deletions test/mpi/bench/macros/bench_p2p.def
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
/*
* Defines following functions:
* bench_p2p
* bench_send, bench_warmup
* bench_recv
*
* For each measurement -
* First sender tells receiver the `iter` parameter. `iter = 0` means to quit.
* For each iteration runs `send_side` and `recv_side` assuming the measurement on sender side represents a latency measurement.
*
* Caller page defines -
* subcode: sender_side, recv_side
* macro:
* params: function parameters for bench_p2p etc.
* MSG_SIZE: if defined report_latency will include bw
* MULTIPLICITY: divisor for each measurement
*/

subcode: _autoload
$register_name(src) int
$register_name(dst) int
$define TAG 0
$define SYNC_TAG 100

subcode: report_title
$if gsize != 2
printf("! Test $(_pagename) requires 2 processes !\n");
return 0
$if grank == 0
printf("# Test $(_pagename): msg-size avg-latency sigma avg-bandwidth\n")

fncode: bench_p2p(comm, src, dst, @params)
int rank;
MPI_Comm_rank(comm, &rank)

$(if:!REPEAT)
$(set:REPEAT=20)
$(if:!MULTIPLICITY)
$(set:MULTIPLICITY=1)

$if rank == src
iter = bench_warmup(comm, dst, $(params))
&call run_stat, $(REPEAT), tf_latency
tf_latency = bench_send(iter, comm, dst, $(params))
tf_latency /= iter
$call report_latency, $(MULTIPLICITY)
$call send_stop
$elif rank == dst
bench_recv(comm, src, $(params))

subcode: send_stop
iter = 0;
MPI_Send(&iter, 1, MPI_INT, dst, SYNC_TAG, comm)

#----------------------------------------
fncode: bench_send(int iter, comm, dst, @params)
# synchronize with receiver
MPI_Send(&iter, 1, MPI_INT, dst, SYNC_TAG, comm);

&call measure, iter
$call @send_side

return tf_dur

fncode: bench_recv(comm, src, @params)
$while 1
int iter;
# synchronize with sender */
MPI_Recv(&iter, 1, MPI_INT, src, SYNC_TAG, comm, MPI_STATUS_IGNORE);
$if iter == 0
# time to quit
break
$for i=0:iter
$call @recv_side

fncode: bench_warmup(comm, dst, @params): int
&call warm_up, iter, tf_dur
tf_dur = bench_send(iter, comm, dst, $(params))
return iter
55 changes: 55 additions & 0 deletions test/mpi/bench/p2p.def
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
/* Instructions:
* mydef_page p2p.def # -> p2p_latency.c p2p_bw.c
* mpicc p2p_latency.c && mpi_run -n 2 ./a.out
* mpicc p2p_bw.c && mpi_run -n 2 ./a.out
*
* Reference the output C code or bench_{frame,p2p}.def.
*/

include: macros/bench_frame.def
include: macros/bench_p2p.def

subcode: _autoload
$register_name(buf) void *
$register_name(size) int
$register_name(batch_size) int

page: p2p_latency, bench_frame
params: buf, size
MSG_SIZE: size
MULTIPLICITY: 2

bench_p2p(comm, 0, 1, buf, 0)
$for int size = 1; size < $(MAX_MSG); size *= 2
bench_p2p(comm, 0, 1, buf, size)

subcode: send_side
MPI_Send($(data), dst, TAG, comm);
MPI_Recv($(data), dst, TAG, comm, MPI_STATUS_IGNORE);

subcode: recv_side
MPI_Recv($(data), src, TAG, comm, MPI_STATUS_IGNORE);
MPI_Send($(data), src, TAG, comm);

page: p2p_bw, bench_frame
params: buf, size, batch_size
MSG_SIZE: size
MULTIPLICITY: batch_size
MAX_BATCH_SIZE: 64

$for int size = 1; size < $(MAX_MSG); size *= 2
bench_p2p(comm, 0, 1, buf, size, 64)

subcode: send_side
$my MPI_Request reqs[$(MAX_BATCH_SIZE)]
$for j=0:batch_size
MPI_Isend($(data), dst, TAG, comm, &reqs[j])
MPI_Waitall(batch_size, reqs, MPI_STATUSES_IGNORE)
MPI_Recv(NULL, 0, MPI_DATATYPE_NULL, dst, TAG, comm, MPI_STATUS_IGNORE)

subcode: recv_side
$my MPI_Request reqs[$(MAX_BATCH_SIZE)]
$for j=0:batch_size
MPI_Irecv($(data), src, TAG, comm, &reqs[j])
MPI_Waitall(batch_size, reqs, MPI_STATUSES_IGNORE)
MPI_Send(NULL, 0, MPI_DATATYPE_NULL, src, TAG, comm)