Merge pull request #630 from csparker247/update-ptl-v3

Compile against PTL v3
tomopy · Nov 8, 2023 · 87291c1 · 87291c1
2 parents 32b6497 + b28afd0
commit 87291c1
Show file tree

Hide file tree

Showing 16 changed files with 167 additions and 204 deletions.
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
@@ -82,6 +82,11 @@ jobs:
     displayName: List build environment
   - script: |
       source activate tomopy
+      export CMAKE_PREFIX_PATH="${CMAKE_PREFIX_PATH}:${CONDA_PREFIX}"
+      export CC=$(which gcc)
+      export CXX=$(which g++)
+      echo "C compiler is ${CC}"
+      echo "C++ compiler is ${CXX}"
       pip install . --no-deps
       cmake -S . -B build -GNinja -DCMAKE_INSTALL_PREFIX=$CONDA_PREFIX -DCMAKE_BUILD_TYPE=Release -DTOMOPY_USE_MKL:BOOL=$(use.mkl)
       cmake --build build
@@ -112,6 +117,11 @@ jobs:
     displayName: List build environment
   - script: |
       source activate tomopy
+      export CMAKE_PREFIX_PATH="${CMAKE_PREFIX_PATH}:${CONDA_PREFIX}"
+      export CC=$(which clang)
+      export CXX=$(which clang++)
+      echo "C compiler is ${CC}"
+      echo "C++ compiler is ${CXX}"
       pip install . --no-deps
       cmake -S . -B build -GNinja -DCMAKE_INSTALL_PREFIX=$CONDA_PREFIX -DCMAKE_BUILD_TYPE=Release
       cmake --build build

diff --git a/cmake/Modules/Options.cmake b/cmake/Modules/Options.cmake
@@ -71,7 +71,6 @@ add_option(TOMOPY_USE_SANITIZER "Enable sanitizer" OFF)
 add_option(TOMOPY_CXX_GRIDREC "Enable gridrec with C++ std::complex"
            ${_USE_CXX_GRIDREC})
 add_option(TOMOPY_USE_COVERAGE "Enable code coverage for C/C++" OFF)
-add_option(TOMOPY_USE_PTL "Enable Parallel Tasking Library (PTL)" ON)
 add_option(TOMOPY_USE_CLANG_TIDY "Enable clang-tidy (C++ linter)" OFF)
 add_option(TOMOPY_USE_CUDA "Enable CUDA option for GPU execution" ${_USE_CUDA})
 add_option(TOMOPY_USER_FLAGS

diff --git a/pyctest_tomopy.py b/pyctest_tomopy.py
@@ -198,7 +198,6 @@ def add_option(parser, lc_name, disp_name):
     add_bool_opt(args, "TOMOPY_USE_TIMEMORY", args.enable_timemory, args.disable_timemory)
     add_bool_opt(args, "TOMOPY_USE_SANITIZER",
                  args.enable_sanitizer, args.disable_sanitizer)
-    add_bool_opt(args, "TOMOPY_USE_PTL", args.enable_tasking, args.disable_tasking)
 
     if args.enable_sanitizer:
         args.cmake_args.append("-DSANITIZER_TYPE:STRING={}".format(args.sanitizer_type))

diff --git a/source/CMakeLists.txt b/source/CMakeLists.txt
@@ -20,25 +20,17 @@ set(CMAKE_POSITION_INDEPENDENT_CODE
 # PTL submodule
 #
 # ------------------------------------------------------------------------------#
-checkout_git_submodule(
-  RECURSIVE
-  TEST_FILE
-  CMakeLists.txt
-  RELATIVE_PATH
-  source/PTL
-  WORKING_DIRECTORY
-  ${PROJECT_SOURCE_DIR})
-
-if(TOMOPY_USE_PTL)
-  add_subdirectory(PTL)
-  if(BUILD_STATIC_LIBS)
-    list(APPEND TOMOPY_EXTERNAL_LIBRARIES ptl-static)
-  else()
-    list(APPEND TOMOPY_EXTERNAL_LIBRARIES ptl-shared)
-  endif()
-  list(APPEND ${PROJECT_NAME}_DEFINITIONS TOMOPY_USE_PTL)
+if(TOMOPY_USE_OPENCV OR TOMOPY_USE_CUDA)
+  checkout_git_submodule(
+    RECURSIVE
+    TEST_FILE
+    CMakeLists.txt
+    RELATIVE_PATH
+    source/PTL
+    WORKING_DIRECTORY
+    ${PROJECT_SOURCE_DIR})
+  add_subdirectory(PTL EXCLUDE_FROM_ALL)
 endif()
-
 # ------------------------------------------------------------------------------#
 #
 # TomoPy Python module

diff --git a/source/PTL b/source/PTL
diff --git a/source/libtomo/accel/CMakeLists.txt b/source/libtomo/accel/CMakeLists.txt
@@ -23,12 +23,6 @@ target_include_directories(
          $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
   PRIVATE ${CMAKE_CURRENT_LIST_DIR})
 
-if(TRUE)
-  # FIXME: Need PTL headers regardless of whether we use PTL
-  target_include_directories(tomo-accel
-                             PRIVATE ${tomopy_SOURCE_DIR}/source/PTL/source)
-endif()
-
 if(TOMOPY_USE_CUDA)
 
   target_sources(tomo-accel PRIVATE gpu/common.cu gpu/mlem.cu gpu/sirt.cu
@@ -54,7 +48,7 @@ if(TOMOPY_USE_CUDA)
 endif(TOMOPY_USE_CUDA)
 
 target_link_libraries(tomo-accel PRIVATE ${TOMOPY_EXTERNAL_LIBRARIES}
-                                         ${TOMOPY_EXTERNAL_PRIVATE_LIBRARIES})
+                                         ${TOMOPY_EXTERNAL_PRIVATE_LIBRARIES} ptl-static)
 
 target_compile_definitions(tomo-accel PRIVATE ${${PROJECT_NAME}_DEFINITIONS})
 

diff --git a/source/libtomo/accel/common.hh b/source/libtomo/accel/common.hh
@@ -58,9 +58,9 @@ CreateThreadPool(unique_thread_pool_t& tp, num_threads_t& pool_size)
     auto min_threads = num_threads_t(1);
     if(pool_size <= 0)
     {
-#if defined(TOMOPY_USE_PTL)
+
         // compute some properties (expected python threads, max threads)
-        auto pythreads = GetEnv("TOMOPY_PYTHON_THREADS", HW_CONCURRENCY);
+        auto pythreads = PTL::GetEnv("TOMOPY_PYTHON_THREADS", HW_CONCURRENCY);
 #    if defined(TOMOPY_USE_CUDA)
         // general oversubscription when CUDA is enabled
         auto max_threads =
@@ -69,11 +69,9 @@ CreateThreadPool(unique_thread_pool_t& tp, num_threads_t& pool_size)
         // if known that CPU only, just try to use all cores
         auto max_threads = HW_CONCURRENCY / std::max(pythreads, min_threads);
 #    endif
-        auto nthreads = std::max(GetEnv("TOMOPY_NUM_THREADS", max_threads), min_threads);
-        pool_size     = nthreads;
-#else
-        pool_size = 1;
-#endif
+        auto nthreads =
+            std::max(PTL::GetEnv("TOMOPY_NUM_THREADS", max_threads), min_threads);
+        pool_size = nthreads;
     }
     // always specify at least one thread even if not creating threads
     pool_size = std::max(pool_size, min_threads);
@@ -85,30 +83,30 @@ CreateThreadPool(unique_thread_pool_t& tp, num_threads_t& pool_size)
 #endif
     // use unique pointer per-thread so manager gets deleted when thread gets deleted
     // create the thread-pool instance
-    tp = unique_thread_pool_t(new tomopy::ThreadPool(pool_size));
+    tomopy::ThreadPool::Config cfg;
+    cfg.pool_size = pool_size;
+    tp = unique_thread_pool_t(new tomopy::ThreadPool(cfg));
 
-#if defined(TOMOPY_USE_PTL)
     // ensure this thread is assigned id, assign variable so no unused result warning
     auto tid = GetThisThreadID();
 
     // initialize the thread-local data information
-    auto& thread_data = ThreadData::GetInstance();
+    auto*& thread_data = PTL::ThreadData::GetInstance();
     if(!thread_data)
-        thread_data.reset(new ThreadData(tp.get()));
+        thread_data = new PTL::ThreadData(tp.get());
 
     // tell thread that initialized thread-pool to process tasks
-    // (typically master thread will only wait for other threads)
-    thread_data->is_master = true;
+    // (typically main thread will only wait for other threads)
+    thread_data->is_main = true;
 
     // tell thread that it is not currently within task
     thread_data->within_task = false;
 
     // notify
-    AutoLock l(TypeMutex<decltype(std::cout)>());
+    PTL::AutoLock l(PTL::TypeMutex<decltype(std::cout)>());
     std::cout << "\n"
               << "[" << tid << "] Initialized tasking run manager with " << tp->size()
               << " threads..." << std::endl;
-#endif
 }
 
 //======================================================================================//
@@ -237,14 +235,14 @@ public:
 inline DeviceOption
 GetDevice(const std::string& preferred)
 {
-    auto pythreads               = GetEnv("TOMOPY_PYTHON_THREADS", HW_CONCURRENCY);
+    auto pythreads               = PTL::GetEnv("TOMOPY_PYTHON_THREADS", HW_CONCURRENCY);
     using DeviceOptionList       = std::deque<DeviceOption>;
-    DeviceOptionList options     = { };
+    DeviceOptionList options     = {};
     std::string      default_key = "cpu";
 
 #if defined(TOMOPY_USE_OPENCV)
     options.push_back(DeviceOption(0, "cpu", "Run on CPU (OpenCV)"));
-# endif
+#endif
 
 #if defined(TOMOPY_USE_CUDA)
     auto num_devices = cuda_device_count();
@@ -261,12 +259,13 @@ GetDevice(const std::string& preferred)
     }
     else
     {
-        AutoLock l(TypeMutex<decltype(std::cout)>());
+        PTL::AutoLock l(PTL::TypeMutex<decltype(std::cout)>());
         std::cerr << "\n##### No CUDA device(s) available #####\n" << std::endl;
     }
 #endif
 
-    if (options.empty()){
+    if(options.empty())
+    {
         throw std::runtime_error("No devices found! Check that TomoPy was "
                                  "compiled with OpenCV or CUDA.");
     }
@@ -278,7 +277,8 @@ GetDevice(const std::string& preferred)
 
     //------------------------------------------------------------------------//
     // print the options the first time it is encountered
-    auto print_options = [&]() {
+    auto print_options = [&]()
+    {
         static std::atomic_uint _once;
         auto                    _count = _once++;
         if(_count % pythreads > 0)
@@ -301,12 +301,13 @@ GetDevice(const std::string& preferred)
         }
         DeviceOption::footer(ss);
 
-        AutoLock l(TypeMutex<decltype(std::cout)>());
+        PTL::AutoLock l(PTL::TypeMutex<decltype(std::cout)>());
         std::cout << "\n" << ss.str() << std::endl;
     };
     //------------------------------------------------------------------------//
     // print the option selection first time it is encountered
-    auto print_selection = [&](DeviceOption& selected_opt) {
+    auto print_selection = [&](DeviceOption& selected_opt)
+    {
         static std::atomic_uint _once;
         auto                    _count = _once++;
         if(_count % pythreads > 0)
@@ -323,7 +324,7 @@ GetDevice(const std::string& preferred)
         ss << "Selected device: " << selected_opt << "\n";
         DeviceOption::spacer(ss, '-');
 
-        AutoLock l(TypeMutex<decltype(std::cout)>());
+        PTL::AutoLock l(PTL::TypeMutex<decltype(std::cout)>());
         std::cout << ss.str() << std::endl;
     };
     //------------------------------------------------------------------------//
@@ -356,7 +357,7 @@ stream_sync(cudaStream_t _stream)
     cudaStreamSynchronize(_stream);
     CUDA_CHECK_LAST_STREAM_ERROR(_stream);
 #else
-    ConsumeParameters(_stream);
+    PTL::ConsumeParameters(_stream);
 #endif
 }
 

diff --git a/source/libtomo/accel/cxx/mlem.cc b/source/libtomo/accel/cxx/mlem.cc
@@ -76,7 +76,7 @@ cxx_mlem(const float* data, int dy, int dt, int dx, const float* center,
     // local count for the thread
     int count = registration.initialize();
     // number of threads started at Python level
-    auto tcount = GetEnv("TOMOPY_PYTHON_THREADS", HW_CONCURRENCY);
+    auto tcount = PTL::GetEnv("TOMOPY_PYTHON_THREADS", HW_CONCURRENCY);
 
     // configured runtime options
     RuntimeOptions opts(pool_size, interp, device, grid_size, block_size);
@@ -109,7 +109,7 @@ cxx_mlem(const float* data, int dy, int dt, int dx, const float* center,
     }
     catch(std::exception& e)
     {
-        AutoLock l(TypeMutex<decltype(std::cout)>());
+        PTL::AutoLock l(PTL::TypeMutex<decltype(std::cout)>());
         std::cerr << "[TID: " << tid << "] " << e.what()
                   << "\nFalling back to CPU algorithm..." << std::endl;
         return EXIT_FAILURE;
@@ -129,7 +129,7 @@ void
 mlem_cpu_compute_projection(data_array_t& cpu_data, int p, int dy, int dt, int dx, int nx,
                             int ny, const float* theta)
 {
-    ConsumeParameters(dy);
+    PTL::ConsumeParameters(dy);
     auto cache = cpu_data[GetThisThreadID() % cpu_data.size()];
 
     // calculate some values
@@ -237,4 +237,4 @@ mlem_cpu(const float* data, int dy, int dt, int dx, const float*, const float* t
     printf("\n");
 }
 
-#endif // TOMOPY_USE_OPENCV
+#endif  // TOMOPY_USE_OPENCV
diff --git a/source/libtomo/accel/cxx/sirt.cc b/source/libtomo/accel/cxx/sirt.cc
@@ -75,7 +75,7 @@ cxx_sirt(const float* data, int dy, int dt, int dx, const float* center,
     // local count for the thread
     int count = registration.initialize();
     // number of threads started at Python level
-    auto tcount = GetEnv("TOMOPY_PYTHON_THREADS", HW_CONCURRENCY);
+    auto tcount = PTL::GetEnv("TOMOPY_PYTHON_THREADS", HW_CONCURRENCY);
 
     // configured runtime options
     RuntimeOptions opts(pool_size, interp, device, grid_size, block_size);
@@ -108,7 +108,7 @@ cxx_sirt(const float* data, int dy, int dt, int dx, const float* center,
     }
     catch(std::exception& e)
     {
-        AutoLock l(TypeMutex<decltype(std::cout)>());
+        PTL::AutoLock l(PTL::TypeMutex<decltype(std::cout)>());
         std::cerr << "[TID: " << tid << "] " << e.what()
                   << "\nFalling back to CPU algorithm..." << std::endl;
         // return failure code
@@ -130,7 +130,7 @@ void
 sirt_cpu_compute_projection(data_array_t& cpu_data, int p, int dy, int dt, int dx, int nx,
                             int ny, const float* theta)
 {
-    ConsumeParameters(dy);
+    PTL::ConsumeParameters(dy);
     auto cache = cpu_data[GetThisThreadID() % cpu_data.size()];
 
     // calculate some values
@@ -238,4 +238,4 @@ sirt_cpu(const float* data, int dy, int dt, int dx, const float*, const float* t
     printf("\n");
 }
 
-#endif // TOMOPY_USE_OPENCV
+#endif  // TOMOPY_USE_OPENCV