New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Non hpx-mpi communication in multithreaded/funneled interleaved with channels, freezes hpx. #5919
Comments
I checked HPX1.9.0 |
Well I got it to freeze now :) //
// Created by jn98zk on 6/12/22.
//
#include <hpx/hpx_init.hpp>
#include <hpx/channel.hpp>
#include <hpx/future.hpp>
#include <hpx/iostream.hpp>
#include <boost/mpi.hpp>
constexpr int number_of_trails = 1'0000;
HPX_REGISTER_CHANNEL(int);
int hpx_main() {
boost::mpi::environment theEnvironment(boost::mpi::threading::multiple);
boost::mpi::communicator mpi_world;
int world_size = hpx::get_num_localities().get();
int data_send = 4;
int data_recieve;
int my_rank = hpx::get_locality_id();
int i_am_even = my_rank % 2 == 0 ? 1 : -1;
// Positive modulo
int my_neighbour = (((my_rank + i_am_even) % world_size) + world_size % +world_size);
hpx::distributed::channel<int> send(hpx::find_here());
auto const iteration_name = hpx::util::format("/data/");
hpx::register_with_basename(iteration_name, send, my_rank);
auto recv = hpx::find_from_basename<hpx::distributed::channel<int>>(iteration_name, my_neighbour);
for (size_t i = 0; i < number_of_trails; ++i) {
send.set(hpx::launch::async, 4, i);
auto recv_data = recv.get(hpx::launch::async, i);
if (my_rank % 2 == 0) {
mpi_world.send(my_neighbour, 0, &data_send, 1);
mpi_world.recv(my_neighbour, 0, &data_send, 1);
} else {
mpi_world.recv(my_neighbour, 0, &data_send, 1);
mpi_world.send(my_neighbour, 0, &data_send, 1);
}
recv_data.get();
hpx::util::format_to(std::cout, "Running {}\n", i);
}
// std::cout << hpx::util::format("rank {} will send to {} ",my_rank,my_neighbour) << std::endl;
// std::cout << hpx::util::format("rank {} will recive from to {} ",my_rank,my_neighbour) << std::endl;
// auto recv_data = recv.get(hpx::launch::sync);
return hpx::finalize();
}
// Run with 2 ranks
int main(int argc, char *argv[]) {
// std::vector<std::string> const cfg = {"hpx.run_hpx_main!=1","hpx.parcel.mpi.enable!=0"};
std::vector<std::string> const cfg = {"hpx.run_hpx_main!=1"};
//LD_LIBRARY_PATH
hpx::init_params init_args;
init_args.cfg = cfg;
return hpx::init(argc, argv, init_args);
}
|
Even funnier if I do the channel anti-pattern (create a new channel on each iteration) it will survive the test //
// Created by jn98zk on 6/12/22.
//
#include <hpx/hpx_init.hpp>
#include <hpx/channel.hpp>
#include <hpx/future.hpp>
#include <hpx/iostream.hpp>
#include <boost/mpi.hpp>
constexpr int number_of_trails = 1'0000;
HPX_REGISTER_CHANNEL(int);
int hpx_main() {
boost::mpi::environment theEnvironment(boost::mpi::threading::funneled);
boost::mpi::communicator mpi_world;
int world_size = hpx::get_num_localities().get();
int data_send = 4;
int data_recieve;
int my_rank = hpx::get_locality_id();
int i_am_even = my_rank % 2 == 0 ? 1 : -1;
// Positive modulo
int my_neighbour = (((my_rank + i_am_even) % world_size) + world_size % +world_size);
for (size_t i = 0; i < number_of_trails; ++i) {
hpx::distributed::channel<int> send(hpx::find_here());
auto const iteration_name = hpx::util::format("/data/{}",i);
hpx::register_with_basename(iteration_name, send, my_rank);
auto recv = hpx::find_from_basename<hpx::distributed::channel<int>>(iteration_name, my_neighbour);
send.set(hpx::launch::async, 4, i);
auto recv_data = recv.get(hpx::launch::async, i);
if (my_rank % 2 == 0) {
mpi_world.send(my_neighbour, i, &data_send, 1);
mpi_world.recv(my_neighbour, i, &data_recieve, 1);
} else {
mpi_world.recv(my_neighbour, i, &data_recieve, 1);
mpi_world.send(my_neighbour, i, &data_send, 1);
}
recv_data.get();
hpx::util::format_to(std::cout, "Running {}\n", i);
}
// std::cout << hpx::util::format("rank {} will send to {} ",my_rank,my_neighbour) << std::endl;
// std::cout << hpx::util::format("rank {} will recive from to {} ",my_rank,my_neighbour) << std::endl;
// auto recv_data = recv.get(hpx::launch::sync);
return hpx::finalize();
}
int main(int argc, char *argv[]) {
// std::vector<std::string> const cfg = {"hpx.run_hpx_main!=1","hpx.parcel.mpi.enable!=0"};
std::vector<std::string> const cfg = {"hpx.run_hpx_main!=1"};
//LD_LIBRARY_PATH
hpx::init_params init_args;
init_args.cfg = cfg;
return hpx::init(argc, argv, init_args);
} |
This should be fixed by #6213. Would you have the time to try it again? |
Thank for the update, I'll be able to report back latest by Sunday. |
Unfortunately it did not work with the latest main branch :(
CMakeLists.txt
|
@John98Zakaria thanks for testing! Back to the drawing board, then :/ |
Expected Behavior
Doing MPI calls that don't involve HPX shouldn't produce segfaults.
Actual Behavior
Doing MPI calls that don't involve HPX produces segfaults.
Steps to Reproduce the Problem
Run the following several times with at some point it will segfault
mpirun -n 4 ./mpi_conflict.cpp --hpx:threads 4
If you only use one hpx thread, it won't segfault
mpirun -n 4 ./mpi_conflict.cpp --hpx:threads 1
Click to expand code sample
Stacktrace && HPX config
Click to expand stacktrace
The text was updated successfully, but these errors were encountered: