Skip to content

Commit

Permalink
Merge pull request #467 from pdziekan/sharedmem_3d_y_blitz_storage_or…
Browse files Browse the repository at this point in the history
…der_mpi_by_two_threads

Sharedmem 3d y blitz storage order mpi by two threads
  • Loading branch information
pdziekan committed Jul 12, 2021
2 parents 23a8eb1 + 3964132 commit cefa43a
Show file tree
Hide file tree
Showing 20 changed files with 518 additions and 140 deletions.
7 changes: 5 additions & 2 deletions libmpdata++/bcond/detail/bcond_common.hpp
Expand Up @@ -193,6 +193,8 @@ namespace libmpdataxx
virtual void avg_edge_and_halo1_sclr_cyclic(arr_3d_t &, const rng_t &, const rng_t &)
{};

const bool single_threaded;

protected:
// sclr
int
Expand All @@ -207,7 +209,7 @@ namespace libmpdataxx
public:

// ctor
bcond_common(const rng_t &i, const std::array<int, n_dims> &) :
bcond_common(const rng_t &i, const std::array<int, n_dims> &, bool single_threaded = false) :
// sclr
left_edge_sclr(
i.first()
Expand Down Expand Up @@ -247,7 +249,8 @@ namespace libmpdataxx
rght_intr_vctr(
(i^h^(-1)).last() - (halo - 1),
(i^h^(-1)).last()
)
),
single_threaded(single_threaded)
{}

// the one for use in shared
Expand Down
6 changes: 3 additions & 3 deletions libmpdata++/bcond/detail/polar_common.hpp
Expand Up @@ -37,10 +37,10 @@ namespace libmpdataxx
// ctor
polar_common(
const rng_t &i,
const std::array<int, n_dims> &grid_size
const std::array<int, n_dims> &distmem_grid_size
) :
parent_t(i, grid_size),
pole((grid_size[0] - 1) / 2)
parent_t(i, distmem_grid_size),
pole((distmem_grid_size[0] - 1) / 2)
{}
};
} // namespace detail
Expand Down
116 changes: 116 additions & 0 deletions libmpdata++/bcond/detail/remote_3d_common.hpp
@@ -0,0 +1,116 @@
// common code for ``remote'' MPI boundary conditions for libmpdata++
//
// licensing: GPU GPL v3
// copyright: University of Warsaw

#pragma once

#include <libmpdata++/bcond/detail/remote_common.hpp>

namespace libmpdataxx
{
namespace bcond
{
namespace detail
{
template <typename real_t, int halo, drctn_e dir>
class remote_3d_common : public remote_common<real_t, halo, dir, 3>
{
using parent_t = detail::remote_common<real_t, halo, dir, 3>;

protected:

using arr_t = typename parent_t::arr_t;
using idx_t = typename parent_t::idx_t;

const int thread_rank, thread_size;

private:

const rng_t thread_j;
const int grid_size_y;

// try to guess what should be the whole domain exchanged by this process
// based on the difference between idx to be sent by this thread and idx of this process
idx_t extend_idx(idx_t idx)
{
//std::cerr << "extend idx start idx(1): " << idx.lbound(1) << ", " << idx.ubound(1) << std::endl;
idx.lbound(1) = 0 + idx.lbound(1) - thread_j.first(); // does it have to start at 0?
idx.ubound(1) = (grid_size_y - 1) + idx.ubound(1) - thread_j.last(); // does it have to end at grid_size_y - 1?
//std::cerr << "extend idx end idx(1): " << idx.lbound(1) << ", " << idx.ubound(1) << std::endl;
return idx;
}

public:

void xchng (
const arr_t &a,
const idx_t &idx_send,
const idx_t &idx_recv
)
{
parent_t::xchng(a, extend_idx(idx_send), extend_idx(idx_recv));
}

void send (
const arr_t &a,
const idx_t &idx_send
)
{
parent_t::send(a, extend_idx(idx_send));
}

void recv (
const arr_t &a,
const idx_t &idx_recv
)
{
parent_t::recv(a, extend_idx(idx_recv));
}

// ctor
remote_3d_common(
const rng_t &i,
const std::array<int, 3> &distmem_grid_size,
const rng_t _thread_j,
const int thread_rank,
const int thread_size
) :
parent_t(i, distmem_grid_size, true), // true indicating that this is a bcond done with a single thread
thread_rank(thread_rank),
thread_size(thread_size),
thread_j(_thread_j),
grid_size_y(distmem_grid_size[1])
{
#if defined(USE_MPI)
// only 2 threads do mpi, others don't need buffers
if(thread_rank != 0 && thread_rank != thread_size-1)
{
free(parent_t::buf_send);
free(parent_t::buf_recv);
}
//std::cerr << "remote_3d_common ctor thread_j: " << thread_j.lbound(0) << ", " << thread_j.ubound(0) << std::endl;
//std::cerr << "remote_3d_common ctor _thread_j: " << _thread_j.lbound(0) << ", " << _thread_j.ubound(0) << std::endl;
//std::cerr << "remote_3d_common ctor f-l thread_j: " << thread_j.first() << ", " << thread_j.last() << std::endl;
//std::cerr << "remote_3d_common ctor f-l _thread_j: " << _thread_j.first() << ", " << _thread_j.last() << std::endl;
#endif
}

// dtor
~remote_3d_common()
{
#if defined(USE_MPI)
if(thread_rank == 0 || thread_rank == thread_size-1)
{
free(parent_t::buf_send);
free(parent_t::buf_recv);
}
// hack to make free in ~remote_common give defined behaviour
parent_t::buf_send = nullptr;
parent_t::buf_recv = nullptr;
#endif
}
};
};
} // namespace bcond
} // namespace libmpdataxx
53 changes: 37 additions & 16 deletions libmpdata++/bcond/detail/remote_common.hpp
Expand Up @@ -29,21 +29,22 @@ namespace libmpdataxx
using arr_t = blitz::Array<real_t, n_dims>;
using idx_t = blitz::RectDomain<n_dims>;

private:
real_t *buf_send,
*buf_recv;

const int grid_size_0;
private:

#if defined(USE_MPI)
boost::mpi::communicator mpicom;
real_t *buf_send,
*buf_recv;

# if defined(NDEBUG)
static const int n_reqs = 2; // data, reqs for recv only is enough?
static const int n_dbg_reqs = 0;
static const int n_dbg_send_reqs = 0;
static const int n_dbg_tags = 0;
# else
static const int n_reqs = 4; // data + ranges
static const int n_dbg_reqs = 1;
static const int n_dbg_send_reqs = 1;
static const int n_dbg_tags = 2;
# endif

std::array<boost::mpi::request, n_reqs> reqs;
Expand All @@ -53,7 +54,6 @@ namespace libmpdataxx
: (mpicom.rank() + 1 ) % mpicom.size();

# if !defined(NDEBUG)
const int debug = 2;
std::pair<int, int> buf_rng;
# endif
#endif
Expand All @@ -78,6 +78,12 @@ namespace libmpdataxx
const int
msg_send = dir == left ? left : rght;

// std::cerr << "send_hlpr idx dir " << dir << " : "
// << " (" << idx_send.lbound(0) << ", " << idx_send.ubound(0) << ")"
// << " (" << idx_send.lbound(1) << ", " << idx_send.ubound(1) << ")"
// << " (" << idx_send.lbound(2) << ", " << idx_send.ubound(2) << ")"
// << std::endl;

// arr_send references part of the send buffer that will be used
arr_t arr_send(buf_send, a(idx_send).shape(), blitz::neverDeleteData);
// copying data to be sent
Expand All @@ -92,7 +98,7 @@ namespace libmpdataxx

// sending debug information
# if !defined(NDEBUG)
reqs[1] = mpicom.isend(peer, msg_send ^ debug, std::pair<int,int>(
reqs[1] = mpicom.isend(peer, msg_send + n_dbg_tags, std::pair<int,int>(
idx_send[0].first(),
idx_send[0].last()
));
Expand All @@ -112,15 +118,21 @@ namespace libmpdataxx
const int
msg_recv = dir == left ? rght : left;

// std::cerr << "recv_hlpr idx dir " << dir << " : "
// << " (" << idx_recv.lbound(0) << ", " << idx_recv.ubound(0) << ")"
// << " (" << idx_recv.lbound(1) << ", " << idx_recv.ubound(1) << ")"
// << " (" << idx_recv.lbound(2) << ", " << idx_recv.ubound(2) << ")"
// << std::endl;


// launching async data transfer
if(a(idx_recv).size()!=0) // TODO: test directly size of idx_recv
{
reqs[1+n_dbg_reqs] = mpicom.irecv(peer, msg_recv, buf_recv, a(idx_recv).size());
reqs[1+n_dbg_send_reqs] = mpicom.irecv(peer, msg_recv, buf_recv, a(idx_recv).size());

// sending debug information
# if !defined(NDEBUG)
reqs[3] = mpicom.irecv(peer, msg_recv ^ debug, buf_rng);
reqs[3] = mpicom.irecv(peer, msg_recv + n_dbg_tags, buf_rng);
# endif
}
#else
Expand All @@ -137,7 +149,7 @@ namespace libmpdataxx
send_hlpr(a, idx_send);

// waiting for the transfers to finish
boost::mpi::wait_all(reqs.begin(), reqs.begin() + 1 + n_dbg_reqs); // MPI_Waitall is thread-safe?
boost::mpi::wait_all(reqs.begin(), reqs.begin() + 1 + n_dbg_send_reqs); // MPI_Waitall is thread-safe?
#else
assert(false);
#endif
Expand All @@ -153,7 +165,7 @@ namespace libmpdataxx
recv_hlpr(a, idx_recv);

// waiting for the transfers to finish
boost::mpi::wait_all(reqs.begin() + 1 + n_dbg_reqs, reqs.end()); // MPI_Waitall is thread-safe?
boost::mpi::wait_all(reqs.begin() + 1 + n_dbg_send_reqs, reqs.end()); // MPI_Waitall is thread-safe?

// a blitz handler for the used part of the receive buffer
arr_t arr_recv(buf_recv, a(idx_recv).shape(), blitz::neverDeleteData); // TODO: shape directly from idx_recv
Expand Down Expand Up @@ -207,13 +219,21 @@ namespace libmpdataxx
// ctor
remote_common(
const rng_t &i,
const std::array<int, n_dims> &grid_size
const std::array<int, n_dims> &distmem_grid_size,
bool single_threaded = false
) :
parent_t(i, grid_size),
grid_size_0(grid_size[0])
parent_t(i, distmem_grid_size, single_threaded)
{
#if defined(USE_MPI)
const int slice_size = n_dims==1 ? 1 : (n_dims==2? grid_size[1]+6 : (grid_size[1]+6) * (grid_size[2]+6) ); // 3 is the max halo size (?), so 6 on both sides

const int slice_size = n_dims==1 ? 1 : (n_dims==2? distmem_grid_size[1]+6 : (distmem_grid_size[1]+6) * (distmem_grid_size[2]+6) ); // 3 is the max halo size (?), so 6 on both sides
//std::cerr << "remote_common ctor, "
// << " distmem_grid_size[0]: " << distmem_grid_size[0]
// << " distmem_grid_size[1]: " << distmem_grid_size[1]
// << " distmem_grid_size[2]: " << distmem_grid_size[2]
// << " slice_size: " << slice_size
// << " halo: " << halo
// << std::endl;
// allocate enough memory in buffers to store largest halos to be sent
buf_send = (real_t *) malloc(halo * slice_size * sizeof(real_t));
buf_recv = (real_t *) malloc(halo * slice_size * sizeof(real_t));
Expand All @@ -232,3 +252,4 @@ namespace libmpdataxx
}
} // namespace bcond
} // namespace libmpdataxx

12 changes: 6 additions & 6 deletions libmpdata++/bcond/open_3d.hpp
Expand Up @@ -78,15 +78,15 @@ namespace libmpdataxx
// TODO: exactly the same code below!
switch (d) // note: order and lack of breaks intentional!
{
case 0:
case 1:
av[d+2](pi<d>(i, j, (k-h).first())) = 0;
av[d+2](pi<d>(i, j, (k+h).last() )) = 0;

case 1:
case 2:
av[d+1](pi<d>(i, (j-h).first(), k)) = 0;
av[d+1](pi<d>(i, (j+h).last(), k)) = 0;

case 2:
case 0:
break;

default: assert(false);
Expand Down Expand Up @@ -190,15 +190,15 @@ namespace libmpdataxx

switch (d) // note: order and lack of breaks intentional!
{
case 0:
case 1:
av[d+2](pi<d>(i, j, (k-h).first())) = 0;
av[d+2](pi<d>(i, j, (k+h).last() )) = 0;

case 1:
case 2:
av[d+1](pi<d>(i, (j-h).first(), k)) = 0;
av[d+1](pi<d>(i, (j+h).last(), k)) = 0;

case 2:
case 0:
break;

default: assert(false);
Expand Down

0 comments on commit cefa43a

Please sign in to comment.