Skip to content

Commit

Permalink
Merge pull request #630 from csparker247/update-ptl-v3
Browse files Browse the repository at this point in the history
Compile against PTL v3
  • Loading branch information
carterbox committed Nov 8, 2023
2 parents 32b6497 + b28afd0 commit 87291c1
Show file tree
Hide file tree
Showing 16 changed files with 167 additions and 204 deletions.
10 changes: 10 additions & 0 deletions azure-pipelines.yml
Expand Up @@ -82,6 +82,11 @@ jobs:
displayName: List build environment
- script: |
source activate tomopy
export CMAKE_PREFIX_PATH="${CMAKE_PREFIX_PATH}:${CONDA_PREFIX}"
export CC=$(which gcc)
export CXX=$(which g++)
echo "C compiler is ${CC}"
echo "C++ compiler is ${CXX}"
pip install . --no-deps
cmake -S . -B build -GNinja -DCMAKE_INSTALL_PREFIX=$CONDA_PREFIX -DCMAKE_BUILD_TYPE=Release -DTOMOPY_USE_MKL:BOOL=$(use.mkl)
cmake --build build
Expand Down Expand Up @@ -112,6 +117,11 @@ jobs:
displayName: List build environment
- script: |
source activate tomopy
export CMAKE_PREFIX_PATH="${CMAKE_PREFIX_PATH}:${CONDA_PREFIX}"
export CC=$(which clang)
export CXX=$(which clang++)
echo "C compiler is ${CC}"
echo "C++ compiler is ${CXX}"
pip install . --no-deps
cmake -S . -B build -GNinja -DCMAKE_INSTALL_PREFIX=$CONDA_PREFIX -DCMAKE_BUILD_TYPE=Release
cmake --build build
Expand Down
1 change: 0 additions & 1 deletion cmake/Modules/Options.cmake
Expand Up @@ -71,7 +71,6 @@ add_option(TOMOPY_USE_SANITIZER "Enable sanitizer" OFF)
add_option(TOMOPY_CXX_GRIDREC "Enable gridrec with C++ std::complex"
${_USE_CXX_GRIDREC})
add_option(TOMOPY_USE_COVERAGE "Enable code coverage for C/C++" OFF)
add_option(TOMOPY_USE_PTL "Enable Parallel Tasking Library (PTL)" ON)
add_option(TOMOPY_USE_CLANG_TIDY "Enable clang-tidy (C++ linter)" OFF)
add_option(TOMOPY_USE_CUDA "Enable CUDA option for GPU execution" ${_USE_CUDA})
add_option(TOMOPY_USER_FLAGS
Expand Down
1 change: 0 additions & 1 deletion pyctest_tomopy.py
Expand Up @@ -198,7 +198,6 @@ def add_option(parser, lc_name, disp_name):
add_bool_opt(args, "TOMOPY_USE_TIMEMORY", args.enable_timemory, args.disable_timemory)
add_bool_opt(args, "TOMOPY_USE_SANITIZER",
args.enable_sanitizer, args.disable_sanitizer)
add_bool_opt(args, "TOMOPY_USE_PTL", args.enable_tasking, args.disable_tasking)

if args.enable_sanitizer:
args.cmake_args.append("-DSANITIZER_TYPE:STRING={}".format(args.sanitizer_type))
Expand Down
28 changes: 10 additions & 18 deletions source/CMakeLists.txt
Expand Up @@ -20,25 +20,17 @@ set(CMAKE_POSITION_INDEPENDENT_CODE
# PTL submodule
#
# ------------------------------------------------------------------------------#
checkout_git_submodule(
RECURSIVE
TEST_FILE
CMakeLists.txt
RELATIVE_PATH
source/PTL
WORKING_DIRECTORY
${PROJECT_SOURCE_DIR})

if(TOMOPY_USE_PTL)
add_subdirectory(PTL)
if(BUILD_STATIC_LIBS)
list(APPEND TOMOPY_EXTERNAL_LIBRARIES ptl-static)
else()
list(APPEND TOMOPY_EXTERNAL_LIBRARIES ptl-shared)
endif()
list(APPEND ${PROJECT_NAME}_DEFINITIONS TOMOPY_USE_PTL)
if(TOMOPY_USE_OPENCV OR TOMOPY_USE_CUDA)
checkout_git_submodule(
RECURSIVE
TEST_FILE
CMakeLists.txt
RELATIVE_PATH
source/PTL
WORKING_DIRECTORY
${PROJECT_SOURCE_DIR})
add_subdirectory(PTL EXCLUDE_FROM_ALL)
endif()

# ------------------------------------------------------------------------------#
#
# TomoPy Python module
Expand Down
2 changes: 1 addition & 1 deletion source/PTL
Submodule PTL updated 105 files
8 changes: 1 addition & 7 deletions source/libtomo/accel/CMakeLists.txt
Expand Up @@ -23,12 +23,6 @@ target_include_directories(
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
PRIVATE ${CMAKE_CURRENT_LIST_DIR})

if(TRUE)
# FIXME: Need PTL headers regardless of whether we use PTL
target_include_directories(tomo-accel
PRIVATE ${tomopy_SOURCE_DIR}/source/PTL/source)
endif()

if(TOMOPY_USE_CUDA)

target_sources(tomo-accel PRIVATE gpu/common.cu gpu/mlem.cu gpu/sirt.cu
Expand All @@ -54,7 +48,7 @@ if(TOMOPY_USE_CUDA)
endif(TOMOPY_USE_CUDA)

target_link_libraries(tomo-accel PRIVATE ${TOMOPY_EXTERNAL_LIBRARIES}
${TOMOPY_EXTERNAL_PRIVATE_LIBRARIES})
${TOMOPY_EXTERNAL_PRIVATE_LIBRARIES} ptl-static)

target_compile_definitions(tomo-accel PRIVATE ${${PROJECT_NAME}_DEFINITIONS})

Expand Down
51 changes: 26 additions & 25 deletions source/libtomo/accel/common.hh
Expand Up @@ -58,9 +58,9 @@ CreateThreadPool(unique_thread_pool_t& tp, num_threads_t& pool_size)
auto min_threads = num_threads_t(1);
if(pool_size <= 0)
{
#if defined(TOMOPY_USE_PTL)

// compute some properties (expected python threads, max threads)
auto pythreads = GetEnv("TOMOPY_PYTHON_THREADS", HW_CONCURRENCY);
auto pythreads = PTL::GetEnv("TOMOPY_PYTHON_THREADS", HW_CONCURRENCY);
# if defined(TOMOPY_USE_CUDA)
// general oversubscription when CUDA is enabled
auto max_threads =
Expand All @@ -69,11 +69,9 @@ CreateThreadPool(unique_thread_pool_t& tp, num_threads_t& pool_size)
// if known that CPU only, just try to use all cores
auto max_threads = HW_CONCURRENCY / std::max(pythreads, min_threads);
# endif
auto nthreads = std::max(GetEnv("TOMOPY_NUM_THREADS", max_threads), min_threads);
pool_size = nthreads;
#else
pool_size = 1;
#endif
auto nthreads =
std::max(PTL::GetEnv("TOMOPY_NUM_THREADS", max_threads), min_threads);
pool_size = nthreads;
}
// always specify at least one thread even if not creating threads
pool_size = std::max(pool_size, min_threads);
Expand All @@ -85,30 +83,30 @@ CreateThreadPool(unique_thread_pool_t& tp, num_threads_t& pool_size)
#endif
// use unique pointer per-thread so manager gets deleted when thread gets deleted
// create the thread-pool instance
tp = unique_thread_pool_t(new tomopy::ThreadPool(pool_size));
tomopy::ThreadPool::Config cfg;
cfg.pool_size = pool_size;
tp = unique_thread_pool_t(new tomopy::ThreadPool(cfg));

#if defined(TOMOPY_USE_PTL)
// ensure this thread is assigned id, assign variable so no unused result warning
auto tid = GetThisThreadID();

// initialize the thread-local data information
auto& thread_data = ThreadData::GetInstance();
auto*& thread_data = PTL::ThreadData::GetInstance();
if(!thread_data)
thread_data.reset(new ThreadData(tp.get()));
thread_data = new PTL::ThreadData(tp.get());

// tell thread that initialized thread-pool to process tasks
// (typically master thread will only wait for other threads)
thread_data->is_master = true;
// (typically main thread will only wait for other threads)
thread_data->is_main = true;

// tell thread that it is not currently within task
thread_data->within_task = false;

// notify
AutoLock l(TypeMutex<decltype(std::cout)>());
PTL::AutoLock l(PTL::TypeMutex<decltype(std::cout)>());
std::cout << "\n"
<< "[" << tid << "] Initialized tasking run manager with " << tp->size()
<< " threads..." << std::endl;
#endif
}

//======================================================================================//
Expand Down Expand Up @@ -237,14 +235,14 @@ public:
inline DeviceOption
GetDevice(const std::string& preferred)
{
auto pythreads = GetEnv("TOMOPY_PYTHON_THREADS", HW_CONCURRENCY);
auto pythreads = PTL::GetEnv("TOMOPY_PYTHON_THREADS", HW_CONCURRENCY);
using DeviceOptionList = std::deque<DeviceOption>;
DeviceOptionList options = { };
DeviceOptionList options = {};
std::string default_key = "cpu";

#if defined(TOMOPY_USE_OPENCV)
options.push_back(DeviceOption(0, "cpu", "Run on CPU (OpenCV)"));
# endif
#endif

#if defined(TOMOPY_USE_CUDA)
auto num_devices = cuda_device_count();
Expand All @@ -261,12 +259,13 @@ GetDevice(const std::string& preferred)
}
else
{
AutoLock l(TypeMutex<decltype(std::cout)>());
PTL::AutoLock l(PTL::TypeMutex<decltype(std::cout)>());
std::cerr << "\n##### No CUDA device(s) available #####\n" << std::endl;
}
#endif

if (options.empty()){
if(options.empty())
{
throw std::runtime_error("No devices found! Check that TomoPy was "
"compiled with OpenCV or CUDA.");
}
Expand All @@ -278,7 +277,8 @@ GetDevice(const std::string& preferred)

//------------------------------------------------------------------------//
// print the options the first time it is encountered
auto print_options = [&]() {
auto print_options = [&]()
{
static std::atomic_uint _once;
auto _count = _once++;
if(_count % pythreads > 0)
Expand All @@ -301,12 +301,13 @@ GetDevice(const std::string& preferred)
}
DeviceOption::footer(ss);

AutoLock l(TypeMutex<decltype(std::cout)>());
PTL::AutoLock l(PTL::TypeMutex<decltype(std::cout)>());
std::cout << "\n" << ss.str() << std::endl;
};
//------------------------------------------------------------------------//
// print the option selection first time it is encountered
auto print_selection = [&](DeviceOption& selected_opt) {
auto print_selection = [&](DeviceOption& selected_opt)
{
static std::atomic_uint _once;
auto _count = _once++;
if(_count % pythreads > 0)
Expand All @@ -323,7 +324,7 @@ GetDevice(const std::string& preferred)
ss << "Selected device: " << selected_opt << "\n";
DeviceOption::spacer(ss, '-');

AutoLock l(TypeMutex<decltype(std::cout)>());
PTL::AutoLock l(PTL::TypeMutex<decltype(std::cout)>());
std::cout << ss.str() << std::endl;
};
//------------------------------------------------------------------------//
Expand Down Expand Up @@ -356,7 +357,7 @@ stream_sync(cudaStream_t _stream)
cudaStreamSynchronize(_stream);
CUDA_CHECK_LAST_STREAM_ERROR(_stream);
#else
ConsumeParameters(_stream);
PTL::ConsumeParameters(_stream);
#endif
}

Expand Down
8 changes: 4 additions & 4 deletions source/libtomo/accel/cxx/mlem.cc
Expand Up @@ -76,7 +76,7 @@ cxx_mlem(const float* data, int dy, int dt, int dx, const float* center,
// local count for the thread
int count = registration.initialize();
// number of threads started at Python level
auto tcount = GetEnv("TOMOPY_PYTHON_THREADS", HW_CONCURRENCY);
auto tcount = PTL::GetEnv("TOMOPY_PYTHON_THREADS", HW_CONCURRENCY);

// configured runtime options
RuntimeOptions opts(pool_size, interp, device, grid_size, block_size);
Expand Down Expand Up @@ -109,7 +109,7 @@ cxx_mlem(const float* data, int dy, int dt, int dx, const float* center,
}
catch(std::exception& e)
{
AutoLock l(TypeMutex<decltype(std::cout)>());
PTL::AutoLock l(PTL::TypeMutex<decltype(std::cout)>());
std::cerr << "[TID: " << tid << "] " << e.what()
<< "\nFalling back to CPU algorithm..." << std::endl;
return EXIT_FAILURE;
Expand All @@ -129,7 +129,7 @@ void
mlem_cpu_compute_projection(data_array_t& cpu_data, int p, int dy, int dt, int dx, int nx,
int ny, const float* theta)
{
ConsumeParameters(dy);
PTL::ConsumeParameters(dy);
auto cache = cpu_data[GetThisThreadID() % cpu_data.size()];

// calculate some values
Expand Down Expand Up @@ -237,4 +237,4 @@ mlem_cpu(const float* data, int dy, int dt, int dx, const float*, const float* t
printf("\n");
}

#endif // TOMOPY_USE_OPENCV
#endif // TOMOPY_USE_OPENCV
8 changes: 4 additions & 4 deletions source/libtomo/accel/cxx/sirt.cc
Expand Up @@ -75,7 +75,7 @@ cxx_sirt(const float* data, int dy, int dt, int dx, const float* center,
// local count for the thread
int count = registration.initialize();
// number of threads started at Python level
auto tcount = GetEnv("TOMOPY_PYTHON_THREADS", HW_CONCURRENCY);
auto tcount = PTL::GetEnv("TOMOPY_PYTHON_THREADS", HW_CONCURRENCY);

// configured runtime options
RuntimeOptions opts(pool_size, interp, device, grid_size, block_size);
Expand Down Expand Up @@ -108,7 +108,7 @@ cxx_sirt(const float* data, int dy, int dt, int dx, const float* center,
}
catch(std::exception& e)
{
AutoLock l(TypeMutex<decltype(std::cout)>());
PTL::AutoLock l(PTL::TypeMutex<decltype(std::cout)>());
std::cerr << "[TID: " << tid << "] " << e.what()
<< "\nFalling back to CPU algorithm..." << std::endl;
// return failure code
Expand All @@ -130,7 +130,7 @@ void
sirt_cpu_compute_projection(data_array_t& cpu_data, int p, int dy, int dt, int dx, int nx,
int ny, const float* theta)
{
ConsumeParameters(dy);
PTL::ConsumeParameters(dy);
auto cache = cpu_data[GetThisThreadID() % cpu_data.size()];

// calculate some values
Expand Down Expand Up @@ -238,4 +238,4 @@ sirt_cpu(const float* data, int dy, int dt, int dx, const float*, const float* t
printf("\n");
}

#endif // TOMOPY_USE_OPENCV
#endif // TOMOPY_USE_OPENCV

0 comments on commit 87291c1

Please sign in to comment.