From a126a8c0117a296f63c9adb5360bc2ddc929322d Mon Sep 17 00:00:00 2001
From: Daniel Ching <carterbox@users.noreply.github.com>
Date: Tue, 7 Feb 2023 19:11:14 -0600
Subject: [PATCH 01/13] Update PTL to latest version

---
 source/PTL | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/source/PTL b/source/PTL
index 79dc8785..9399c3c7 160000
--- a/source/PTL
+++ b/source/PTL
@@ -1 +1 @@
-Subproject commit 79dc8785c3ef39357669250ffa46e4329c67a084
+Subproject commit 9399c3c7296a1c248af3f3fa40e836df8381a215

From cf593e9cdffa12a00919f7dfc4e1f9e75fca94e1 Mon Sep 17 00:00:00 2001
From: Daniel Ching <carterbox@users.noreply.github.com>
Date: Tue, 7 Feb 2023 20:15:59 -0600
Subject: [PATCH 02/13] REF: Adapt tomopy to latest PTL API

---
 source/libtomo/accel/common.hh     | 40 +++++++++-------
 source/libtomo/accel/cxx/mlem.cc   |  8 ++--
 source/libtomo/accel/cxx/sirt.cc   |  8 ++--
 source/libtomo/accel/data.hh       | 12 ++---
 source/libtomo/accel/gpu/common.cu |  6 +--
 source/libtomo/accel/gpu/mlem.cu   |  2 +-
 source/libtomo/accel/gpu/sirt.cu   |  2 +-
 source/libtomo/accel/macros.hh     |  8 ++--
 source/libtomo/accel/utils.hh      | 75 +++++++++++++++---------------
 9 files changed, 83 insertions(+), 78 deletions(-)

diff --git a/source/libtomo/accel/common.hh b/source/libtomo/accel/common.hh
index 2bd7ab85..9a5d6522 100644
--- a/source/libtomo/accel/common.hh
+++ b/source/libtomo/accel/common.hh
@@ -60,7 +60,7 @@ CreateThreadPool(unique_thread_pool_t& tp, num_threads_t& pool_size)
     {
 #if defined(TOMOPY_USE_PTL)
         // compute some properties (expected python threads, max threads)
-        auto pythreads = GetEnv("TOMOPY_PYTHON_THREADS", HW_CONCURRENCY);
+        auto pythreads = PTL::GetEnv("TOMOPY_PYTHON_THREADS", HW_CONCURRENCY);
 #    if defined(TOMOPY_USE_CUDA)
         // general oversubscription when CUDA is enabled
         auto max_threads =
@@ -69,8 +69,9 @@ CreateThreadPool(unique_thread_pool_t& tp, num_threads_t& pool_size)
         // if known that CPU only, just try to use all cores
         auto max_threads = HW_CONCURRENCY / std::max(pythreads, min_threads);
 #    endif
-        auto nthreads = std::max(GetEnv("TOMOPY_NUM_THREADS", max_threads), min_threads);
-        pool_size     = nthreads;
+        auto nthreads =
+            std::max(PTL::GetEnv("TOMOPY_NUM_THREADS", max_threads), min_threads);
+        pool_size = nthreads;
 #else
         pool_size = 1;
 #endif
@@ -92,19 +93,19 @@ CreateThreadPool(unique_thread_pool_t& tp, num_threads_t& pool_size)
     auto tid = GetThisThreadID();
 
     // initialize the thread-local data information
-    auto& thread_data = ThreadData::GetInstance();
+    auto& thread_data = PTL::ThreadData::GetInstance();
     if(!thread_data)
-        thread_data.reset(new ThreadData(tp.get()));
+        thread_data = new PTL::ThreadData(tp.get());
 
     // tell thread that initialized thread-pool to process tasks
-    // (typically master thread will only wait for other threads)
-    thread_data->is_master = true;
+    // (typically main thread will only wait for other threads)
+    thread_data->is_main = true;
 
     // tell thread that it is not currently within task
     thread_data->within_task = false;
 
     // notify
-    AutoLock l(TypeMutex<decltype(std::cout)>());
+    PTL::AutoLock l(PTL::TypeMutex<decltype(std::cout)>());
     std::cout << "\n"
               << "[" << tid << "] Initialized tasking run manager with " << tp->size()
               << " threads..." << std::endl;
@@ -237,14 +238,14 @@ public:
 inline DeviceOption
 GetDevice(const std::string& preferred)
 {
-    auto pythreads               = GetEnv("TOMOPY_PYTHON_THREADS", HW_CONCURRENCY);
+    auto pythreads               = PTL::GetEnv("TOMOPY_PYTHON_THREADS", HW_CONCURRENCY);
     using DeviceOptionList       = std::deque<DeviceOption>;
-    DeviceOptionList options     = { };
+    DeviceOptionList options     = {};
     std::string      default_key = "cpu";
 
 #if defined(TOMOPY_USE_OPENCV)
     options.push_back(DeviceOption(0, "cpu", "Run on CPU (OpenCV)"));
-# endif
+#endif
 
 #if defined(TOMOPY_USE_CUDA)
     auto num_devices = cuda_device_count();
@@ -261,12 +262,13 @@ GetDevice(const std::string& preferred)
     }
     else
     {
-        AutoLock l(TypeMutex<decltype(std::cout)>());
+        PTL::AutoLock l(PTL::TypeMutex<decltype(std::cout)>());
         std::cerr << "\n##### No CUDA device(s) available #####\n" << std::endl;
     }
 #endif
 
-    if (options.empty()){
+    if(options.empty())
+    {
         throw std::runtime_error("No devices found! Check that TomoPy was "
                                  "compiled with OpenCV or CUDA.");
     }
@@ -278,7 +280,8 @@ GetDevice(const std::string& preferred)
 
     //------------------------------------------------------------------------//
     // print the options the first time it is encountered
-    auto print_options = [&]() {
+    auto print_options = [&]()
+    {
         static std::atomic_uint _once;
         auto                    _count = _once++;
         if(_count % pythreads > 0)
@@ -301,12 +304,13 @@ GetDevice(const std::string& preferred)
         }
         DeviceOption::footer(ss);
 
-        AutoLock l(TypeMutex<decltype(std::cout)>());
+        PTL::AutoLock l(PTL::TypeMutex<decltype(std::cout)>());
         std::cout << "\n" << ss.str() << std::endl;
     };
     //------------------------------------------------------------------------//
     // print the option selection first time it is encountered
-    auto print_selection = [&](DeviceOption& selected_opt) {
+    auto print_selection = [&](DeviceOption& selected_opt)
+    {
         static std::atomic_uint _once;
         auto                    _count = _once++;
         if(_count % pythreads > 0)
@@ -323,7 +327,7 @@ GetDevice(const std::string& preferred)
         ss << "Selected device: " << selected_opt << "\n";
         DeviceOption::spacer(ss, '-');
 
-        AutoLock l(TypeMutex<decltype(std::cout)>());
+        PTL::AutoLock l(PTL::TypeMutex<decltype(std::cout)>());
         std::cout << ss.str() << std::endl;
     };
     //------------------------------------------------------------------------//
@@ -356,7 +360,7 @@ stream_sync(cudaStream_t _stream)
     cudaStreamSynchronize(_stream);
     CUDA_CHECK_LAST_STREAM_ERROR(_stream);
 #else
-    ConsumeParameters(_stream);
+    PTL::ConsumeParameters(_stream);
 #endif
 }
 
diff --git a/source/libtomo/accel/cxx/mlem.cc b/source/libtomo/accel/cxx/mlem.cc
index 52fb7843..2261bfd3 100755
--- a/source/libtomo/accel/cxx/mlem.cc
+++ b/source/libtomo/accel/cxx/mlem.cc
@@ -76,7 +76,7 @@ cxx_mlem(const float* data, int dy, int dt, int dx, const float* center,
     // local count for the thread
     int count = registration.initialize();
     // number of threads started at Python level
-    auto tcount = GetEnv("TOMOPY_PYTHON_THREADS", HW_CONCURRENCY);
+    auto tcount = PTL::GetEnv("TOMOPY_PYTHON_THREADS", HW_CONCURRENCY);
 
     // configured runtime options
     RuntimeOptions opts(pool_size, interp, device, grid_size, block_size);
@@ -109,7 +109,7 @@ cxx_mlem(const float* data, int dy, int dt, int dx, const float* center,
     }
     catch(std::exception& e)
     {
-        AutoLock l(TypeMutex<decltype(std::cout)>());
+        PTL::AutoLock l(PTL::TypeMutex<decltype(std::cout)>());
         std::cerr << "[TID: " << tid << "] " << e.what()
                   << "\nFalling back to CPU algorithm..." << std::endl;
         return EXIT_FAILURE;
@@ -129,7 +129,7 @@ void
 mlem_cpu_compute_projection(data_array_t& cpu_data, int p, int dy, int dt, int dx, int nx,
                             int ny, const float* theta)
 {
-    ConsumeParameters(dy);
+    PTL::ConsumeParameters(dy);
     auto cache = cpu_data[GetThisThreadID() % cpu_data.size()];
 
     // calculate some values
@@ -237,4 +237,4 @@ mlem_cpu(const float* data, int dy, int dt, int dx, const float*, const float* t
     printf("\n");
 }
 
-#endif // TOMOPY_USE_OPENCV
+#endif  // TOMOPY_USE_OPENCV
diff --git a/source/libtomo/accel/cxx/sirt.cc b/source/libtomo/accel/cxx/sirt.cc
index beb56ed2..4edb6d33 100755
--- a/source/libtomo/accel/cxx/sirt.cc
+++ b/source/libtomo/accel/cxx/sirt.cc
@@ -75,7 +75,7 @@ cxx_sirt(const float* data, int dy, int dt, int dx, const float* center,
     // local count for the thread
     int count = registration.initialize();
     // number of threads started at Python level
-    auto tcount = GetEnv("TOMOPY_PYTHON_THREADS", HW_CONCURRENCY);
+    auto tcount = PTL::GetEnv("TOMOPY_PYTHON_THREADS", HW_CONCURRENCY);
 
     // configured runtime options
     RuntimeOptions opts(pool_size, interp, device, grid_size, block_size);
@@ -108,7 +108,7 @@ cxx_sirt(const float* data, int dy, int dt, int dx, const float* center,
     }
     catch(std::exception& e)
     {
-        AutoLock l(TypeMutex<decltype(std::cout)>());
+        PTL::AutoLock l(PTL::TypeMutex<decltype(std::cout)>());
         std::cerr << "[TID: " << tid << "] " << e.what()
                   << "\nFalling back to CPU algorithm..." << std::endl;
         // return failure code
@@ -130,7 +130,7 @@ void
 sirt_cpu_compute_projection(data_array_t& cpu_data, int p, int dy, int dt, int dx, int nx,
                             int ny, const float* theta)
 {
-    ConsumeParameters(dy);
+    PTL::ConsumeParameters(dy);
     auto cache = cpu_data[GetThisThreadID() % cpu_data.size()];
 
     // calculate some values
@@ -238,4 +238,4 @@ sirt_cpu(const float* data, int dy, int dt, int dx, const float*, const float* t
     printf("\n");
 }
 
-#endif // TOMOPY_USE_OPENCV
+#endif  // TOMOPY_USE_OPENCV
diff --git a/source/libtomo/accel/data.hh b/source/libtomo/accel/data.hh
index e3c82248..3c75dc8b 100644
--- a/source/libtomo/accel/data.hh
+++ b/source/libtomo/accel/data.hh
@@ -90,7 +90,7 @@ struct RuntimeOptions
     ~RuntimeOptions() {}
 
     // disable copying and copy assignment
-    RuntimeOptions(const RuntimeOptions&) = delete;
+    RuntimeOptions(const RuntimeOptions&)            = delete;
     RuntimeOptions& operator=(const RuntimeOptions&) = delete;
 
     // create the thread pool -- don't have this in the constructor
@@ -210,7 +210,7 @@ execute(RuntimeOptions* ops, int dt, DataArray& data, Func&& func, Args&&... arg
         std::stringstream ss;
         ss << "\n\nError executing :: " << e.what() << "\n\n";
         {
-            AutoLock l(TypeMutex<decltype(std::cout)>());
+            PTL::AutoLock l(PTL::TypeMutex<decltype(std::cout)>());
             std::cerr << e.what() << std::endl;
         }
         throw std::runtime_error(ss.str().c_str());
@@ -244,7 +244,7 @@ execute(RuntimeOptions* ops, int dt, DataArray& data, Func&& func, Args&&... arg
         std::stringstream ss;
         ss << "\n\nError executing :: " << e.what() << "\n\n";
         {
-            AutoLock l(TypeMutex<decltype(std::cout)>());
+            PTL::AutoLock l(PTL::TypeMutex<decltype(std::cout)>());
             std::cerr << e.what() << std::endl;
         }
         throw std::runtime_error(ss.str().c_str());
@@ -295,9 +295,9 @@ public:
 
     int interpolation() const { return m_interp; }
 
-    Mutex* upd_mutex() const
+    PTL::Mutex* upd_mutex() const
     {
-        static Mutex mtx;
+        static PTL::Mutex mtx;
         return &mtx;
     }
 
@@ -401,7 +401,7 @@ public:
     GpuData(this_type&&)      = default;
 
     this_type& operator=(const this_type&) = delete;
-    this_type& operator=(this_type&&) = default;
+    this_type& operator=(this_type&&)      = default;
 
 public:
     // access functions
diff --git a/source/libtomo/accel/gpu/common.cu b/source/libtomo/accel/gpu/common.cu
index e3f29b09..53cd66bf 100644
--- a/source/libtomo/accel/gpu/common.cu
+++ b/source/libtomo/accel/gpu/common.cu
@@ -238,7 +238,7 @@ cuda_device_count()
 void
 cuda_device_query()
 {
-    auto pythreads = GetEnv("TOMOPY_PYTHON_THREADS", HW_CONCURRENCY);
+    auto pythreads = PTL::GetEnv("TOMOPY_PYTHON_THREADS", HW_CONCURRENCY);
     static std::atomic<int16_t> _once;
     auto                        _count = _once++;
     if(_count + 1 == pythreads)
@@ -277,14 +277,14 @@ cuda_device_query()
         return;
     }
 
-    AutoLock l(TypeMutex<decltype(std::cout)>());
+    PTL::AutoLock l(PTL::TypeMutex<decltype(std::cout)>());
 
     if(deviceCount == 0)
         printf("No available CUDA device(s) detected\n");
     else
         printf("Detected %d CUDA capable devices\n", deviceCount);
 
-    int specific_device = GetEnv("TOMOPY_DEVICE_NUM", -1);
+    int specific_device = PTL::GetEnv("TOMOPY_DEVICE_NUM", -1);
 
     for(int dev = 0; dev < deviceCount; ++dev)
     {
diff --git a/source/libtomo/accel/gpu/mlem.cu b/source/libtomo/accel/gpu/mlem.cu
index 7a65e0b1..6f983270 100644
--- a/source/libtomo/accel/gpu/mlem.cu
+++ b/source/libtomo/accel/gpu/mlem.cu
@@ -176,7 +176,7 @@ mlem_cuda(const float* cpu_data, int dy, int dt, int dx, const float*, const flo
     // compute some properties (expected python threads, max threads, device assignment)
     int pythread_num = ntid++;
     int device       = pythread_num % cuda_device_count();  // assign to device
-    device           = GetEnv("TOMOPY_DEVICE_NUM", device) % cuda_device_count();
+    device           = PTL::GetEnv("TOMOPY_DEVICE_NUM", device) % cuda_device_count();
 
     TIMEMORY_AUTO_TIMER("");
 
diff --git a/source/libtomo/accel/gpu/sirt.cu b/source/libtomo/accel/gpu/sirt.cu
index c672161b..574fc78f 100644
--- a/source/libtomo/accel/gpu/sirt.cu
+++ b/source/libtomo/accel/gpu/sirt.cu
@@ -173,7 +173,7 @@ sirt_cuda(const float* cpu_data, int dy, int dt, int dx, const float* center,
     // compute some properties (expected python threads, max threads, device assignment)
     int pythread_num = ntid++;
     int device       = pythread_num % cuda_device_count();  // assign to device
-    device           = GetEnv("TOMOPY_DEVICE_NUM", device) % cuda_device_count();
+    device           = PTL::GetEnv("TOMOPY_DEVICE_NUM", device) % cuda_device_count();
 
     TIMEMORY_AUTO_TIMER("");
 
diff --git a/source/libtomo/accel/macros.hh b/source/libtomo/accel/macros.hh
index 6d8cbcbe..7e2988d2 100644
--- a/source/libtomo/accel/macros.hh
+++ b/source/libtomo/accel/macros.hh
@@ -151,7 +151,7 @@ inline uintmax_t
 GetThisThreadID()
 {
 #if defined(TOMOPY_USE_PTL)
-    return ThreadPool::GetThisThreadID();
+    return PTL::ThreadPool::get_this_thread_id();
 #else
     static std::atomic<uintmax_t> tcounter;
     static thread_local auto      tid = tcounter++;
@@ -403,15 +403,15 @@ struct cuda_algorithms
 
 //--------------------------------------------------------------------------------------//
 
-using ThreadPool = ::ThreadPool;
+using ThreadPool = PTL::ThreadPool;
 template <typename _Ret, typename _Arg = _Ret>
-using TaskGroup = ::TaskGroup<_Ret, _Arg>;
+using TaskGroup = PTL::TaskGroup<_Ret, _Arg>;
 
 //--------------------------------------------------------------------------------------//
 
 // when compiled with PTL, mark tomopy::ThreadPool as implemented
 template <>
-struct implementation_available<ThreadPool> : std::true_type
+struct implementation_available<PTL::ThreadPool> : std::true_type
 {
 };
 
diff --git a/source/libtomo/accel/utils.hh b/source/libtomo/accel/utils.hh
index 464492ae..39e10bbe 100644
--- a/source/libtomo/accel/utils.hh
+++ b/source/libtomo/accel/utils.hh
@@ -54,11 +54,11 @@ END_EXTERN_C
 
 #if defined(TOMOPY_USE_OPENCV)
 
-#define CPU_NN CV_INTER_NN
-#define CPU_LINEAR CV_INTER_LINEAR
-#define CPU_AREA CV_INTER_AREA
-#define CPU_CUBIC CV_INTER_CUBIC
-#define CPU_LANCZOS CV_INTER_LANCZOS4
+#    define CPU_NN CV_INTER_NN
+#    define CPU_LINEAR CV_INTER_LINEAR
+#    define CPU_AREA CV_INTER_AREA
+#    define CPU_CUBIC CV_INTER_CUBIC
+#    define CPU_LANCZOS CV_INTER_LANCZOS4
 
 //--------------------------------------------------------------------------------------//
 
@@ -73,16 +73,16 @@ struct OpenCVDataType
     }
 };
 
-#define DEFINE_OPENCV_DATA_TYPE(pod_type, opencv_type)                                   \
-    template <>                                                                          \
-    struct OpenCVDataType<pod_type>                                                      \
-    {                                                                                    \
-        template <typename _Up = pod_type>                                               \
-        static constexpr int value()                                                     \
+#    define DEFINE_OPENCV_DATA_TYPE(pod_type, opencv_type)                               \
+        template <>                                                                      \
+        struct OpenCVDataType<pod_type>                                                  \
         {                                                                                \
-            return opencv_type;                                                          \
-        }                                                                                \
-    };
+            template <typename _Up = pod_type>                                           \
+            static constexpr int value()                                                 \
+            {                                                                            \
+                return opencv_type;                                                      \
+            }                                                                            \
+        };
 
 // floating point types
 DEFINE_OPENCV_DATA_TYPE(float, CV_32F)
@@ -97,19 +97,19 @@ DEFINE_OPENCV_DATA_TYPE(int32_t, CV_32S)
 DEFINE_OPENCV_DATA_TYPE(uint8_t, CV_8U)
 DEFINE_OPENCV_DATA_TYPE(uint16_t, CV_16U)
 
-#undef DEFINE_OPENCV_DATA_TYPE  // don't pollute
+#    undef DEFINE_OPENCV_DATA_TYPE  // don't pollute
 
 //--------------------------------------------------------------------------------------//
 
 inline int
 GetOpenCVInterpolationMode(const std::string& preferred)
 {
-    EnvChoiceList<int> choices = {
-        EnvChoice<int>(CPU_NN, "NN", "nearest neighbor interpolation"),
-        EnvChoice<int>(CPU_LINEAR, "LINEAR", "bilinear interpolation"),
-        EnvChoice<int>(CPU_CUBIC, "CUBIC", "bicubic interpolation")
+    PTL::EnvChoiceList<int> choices = {
+        PTL::EnvChoice<int>(CPU_NN, "NN", "nearest neighbor interpolation"),
+        PTL::EnvChoice<int>(CPU_LINEAR, "LINEAR", "bilinear interpolation"),
+        PTL::EnvChoice<int>(CPU_CUBIC, "CUBIC", "bicubic interpolation")
     };
-    return GetChoice<int>(choices, preferred);
+    return PTL::GetChoice<int>(choices, preferred);
 }
 
 //--------------------------------------------------------------------------------------//
@@ -158,7 +158,8 @@ cxx_rotate(const _Tp* src, double theta, const intmax_t& nx, const intmax_t& ny,
 inline iarray_t
 cxx_compute_sum_dist(int dy, int dt, int dx, int nx, int ny, const float* theta)
 {
-    auto compute = [&](const iarray_t& ones, iarray_t& sum_dist, int p) {
+    auto compute = [&](const iarray_t& ones, iarray_t& sum_dist, int p)
+    {
         for(int s = 0; s < dy; ++s)
         {
             for(int d = 0; d < dx; ++d)
@@ -186,7 +187,7 @@ cxx_compute_sum_dist(int dy, int dt, int dx, int nx, int ny, const float* theta)
 
     return sum_dist;
 }
-#endif // TOMOPY_USE_OPENCV
+#endif  // TOMOPY_USE_OPENCV
 
 //======================================================================================//
 //
@@ -203,12 +204,12 @@ cxx_compute_sum_dist(int dy, int dt, int dx, int nx, int ny, const float* theta)
 inline int
 GetNppInterpolationMode(const std::string& preferred)
 {
-    EnvChoiceList<int> choices = {
-        EnvChoice<int>(GPU_NN, "NN", "nearest neighbor interpolation"),
-        EnvChoice<int>(GPU_LINEAR, "LINEAR", "bilinear interpolation"),
-        EnvChoice<int>(GPU_CUBIC, "CUBIC", "bicubic interpolation")
+    PTL::EnvChoiceList<int> choices = {
+        PTL::EnvChoice<int>(GPU_NN, "NN", "nearest neighbor interpolation"),
+        PTL::EnvChoice<int>(GPU_LINEAR, "LINEAR", "bilinear interpolation"),
+        PTL::EnvChoice<int>(GPU_CUBIC, "CUBIC", "bicubic interpolation")
     };
-    return GetChoice<int>(choices, preferred);
+    return PTL::GetChoice<int>(choices, preferred);
 }
 
 //======================================================================================//
@@ -220,7 +221,7 @@ GetNppInterpolationMode(const std::string& preferred)
 inline int
 GetBlockSize(const int& init = 32)
 {
-    static thread_local int _instance = GetEnv<int>("TOMOPY_BLOCK_SIZE", init);
+    static thread_local int _instance = PTL::GetEnv<int>("TOMOPY_BLOCK_SIZE", init);
     return _instance;
 }
 
@@ -230,7 +231,7 @@ inline int
 GetGridSize(const int& init = 0)
 {
     // default value of zero == calculated according to block and loop size
-    static thread_local int _instance = GetEnv<int>("TOMOPY_GRID_SIZE", init);
+    static thread_local int _instance = PTL::GetEnv<int>("TOMOPY_GRID_SIZE", init);
     return _instance;
 }
 
@@ -247,9 +248,9 @@ ComputeGridSize(const int& size, const int& block_size = GetBlockSize())
 inline dim3
 GetBlockDims(const dim3& init = dim3(32, 32, 1))
 {
-    int _x = GetEnv<int>("TOMOPY_BLOCK_SIZE_X", init.x);
-    int _y = GetEnv<int>("TOMOPY_BLOCK_SIZE_Y", init.y);
-    int _z = GetEnv<int>("TOMOPY_BLOCK_SIZE_Z", init.z);
+    int _x = PTL::GetEnv<int>("TOMOPY_BLOCK_SIZE_X", init.x);
+    int _y = PTL::GetEnv<int>("TOMOPY_BLOCK_SIZE_Y", init.y);
+    int _z = PTL::GetEnv<int>("TOMOPY_BLOCK_SIZE_Z", init.z);
     return dim3(_x, _y, _z);
 }
 
@@ -259,9 +260,9 @@ inline dim3
 GetGridDims(const dim3& init = dim3(0, 0, 0))
 {
     // default value of zero == calculated according to block and loop size
-    int _x = GetEnv<int>("TOMOPY_GRID_SIZE_X", init.x);
-    int _y = GetEnv<int>("TOMOPY_GRID_SIZE_Y", init.y);
-    int _z = GetEnv<int>("TOMOPY_GRID_SIZE_Z", init.z);
+    int _x = PTL::GetEnv<int>("TOMOPY_GRID_SIZE_X", init.x);
+    int _y = PTL::GetEnv<int>("TOMOPY_GRID_SIZE_Y", init.y);
+    int _z = PTL::GetEnv<int>("TOMOPY_GRID_SIZE_Z", init.z);
     return dim3(_x, _y, _z);
 }
 
@@ -501,8 +502,8 @@ reduce(float* _in, float* _out, int size);
 //======================================================================================//
 
 DLL int32_t*
-    cuda_rotate(const int32_t* src, const float theta_rad, const float theta_deg,
-                const int nx, const int ny, cudaStream_t stream, const int eInterp);
+cuda_rotate(const int32_t* src, const float theta_rad, const float theta_deg,
+            const int nx, const int ny, cudaStream_t stream, const int eInterp);
 
 //--------------------------------------------------------------------------------------//
 

From 6e2acdadf3d1901907696831a7a2fc3759673981 Mon Sep 17 00:00:00 2001
From: Daniel Ching <carterbox@users.noreply.github.com>
Date: Tue, 7 Feb 2023 20:28:15 -0600
Subject: [PATCH 03/13] BUG: Add missing CXX standard for gridrec module

---
 source/libtomo/gridrec/CMakeLists.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/source/libtomo/gridrec/CMakeLists.txt b/source/libtomo/gridrec/CMakeLists.txt
index 4f9a64ba..ada28f3d 100644
--- a/source/libtomo/gridrec/CMakeLists.txt
+++ b/source/libtomo/gridrec/CMakeLists.txt
@@ -44,6 +44,8 @@ target_compile_options(
   tomo-gridrec PRIVATE $<$<COMPILE_LANGUAGE:C>:${${PROJECT_NAME}_C_FLAGS}>
                        $<$<COMPILE_LANGUAGE:CXX>:${${PROJECT_NAME}_CXX_FLAGS}>)
 
+set_property(TARGET tomo-gridrec PROPERTY CXX_STANDARD 14)
+
 install(TARGETS tomo-gridrec EXPORT libtomoTargets)
 
 install(

From 895e19d7e437f400fab1f2957e8019312457f109 Mon Sep 17 00:00:00 2001
From: Daniel Ching <carterbox@users.noreply.github.com>
Date: Tue, 7 Feb 2023 21:20:28 -0600
Subject: [PATCH 04/13] REF: Alway statically link to PTL if using accel module

---
 source/CMakeLists.txt               | 28 ++++++++++------------------
 source/libtomo/accel/CMakeLists.txt | 10 ++--------
 2 files changed, 12 insertions(+), 26 deletions(-)

diff --git a/source/CMakeLists.txt b/source/CMakeLists.txt
index b639d8e1..70e11db9 100644
--- a/source/CMakeLists.txt
+++ b/source/CMakeLists.txt
@@ -20,25 +20,17 @@ set(CMAKE_POSITION_INDEPENDENT_CODE
 # PTL submodule
 #
 # ------------------------------------------------------------------------------#
-checkout_git_submodule(
-  RECURSIVE
-  TEST_FILE
-  CMakeLists.txt
-  RELATIVE_PATH
-  source/PTL
-  WORKING_DIRECTORY
-  ${PROJECT_SOURCE_DIR})
-
-if(TOMOPY_USE_PTL)
-  add_subdirectory(PTL)
-  if(BUILD_STATIC_LIBS)
-    list(APPEND TOMOPY_EXTERNAL_LIBRARIES ptl-static)
-  else()
-    list(APPEND TOMOPY_EXTERNAL_LIBRARIES ptl-shared)
-  endif()
-  list(APPEND ${PROJECT_NAME}_DEFINITIONS TOMOPY_USE_PTL)
+if(TOMOPY_USE_OPENCV OR TOMOPY_USE_CUDA)
+  checkout_git_submodule(
+    RECURSIVE
+    TEST_FILE
+    CMakeLists.txt
+    RELATIVE_PATH
+    source/PTL
+    WORKING_DIRECTORY
+    ${PROJECT_SOURCE_DIR})
+  add_subdirectory(PTL EXCLUDE_FROM_ALL)
 endif()
-
 # ------------------------------------------------------------------------------#
 #
 # TomoPy Python module
diff --git a/source/libtomo/accel/CMakeLists.txt b/source/libtomo/accel/CMakeLists.txt
index 91dd8cf9..93f1042e 100644
--- a/source/libtomo/accel/CMakeLists.txt
+++ b/source/libtomo/accel/CMakeLists.txt
@@ -23,12 +23,6 @@ target_include_directories(
          $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
   PRIVATE ${CMAKE_CURRENT_LIST_DIR})
 
-if(TRUE)
-  # FIXME: Need PTL headers regardless of whether we use PTL
-  target_include_directories(tomo-accel
-                             PRIVATE ${tomopy_SOURCE_DIR}/source/PTL/source)
-endif()
-
 if(TOMOPY_USE_CUDA)
 
   target_sources(tomo-accel PRIVATE gpu/common.cu gpu/mlem.cu gpu/sirt.cu
@@ -54,9 +48,9 @@ if(TOMOPY_USE_CUDA)
 endif(TOMOPY_USE_CUDA)
 
 target_link_libraries(tomo-accel PRIVATE ${TOMOPY_EXTERNAL_LIBRARIES}
-                                         ${TOMOPY_EXTERNAL_PRIVATE_LIBRARIES})
+                                         ${TOMOPY_EXTERNAL_PRIVATE_LIBRARIES} ptl-static)
 
-target_compile_definitions(tomo-accel PRIVATE ${${PROJECT_NAME}_DEFINITIONS})
+target_compile_definitions(tomo-accel PRIVATE ${${PROJECT_NAME}_DEFINITIONS} TOMOPY_USE_PTL)
 
 target_compile_options(
   tomo-accel

From 7c2649b064fdf2f2a4033ea61bec9a9b276184c7 Mon Sep 17 00:00:00 2001
From: Daniel Ching <carterbox@users.noreply.github.com>
Date: Tue, 7 Feb 2023 21:30:01 -0600
Subject: [PATCH 05/13] BLD: PTL no longer optional for accel module

---
 cmake/Modules/Options.cmake | 1 -
 1 file changed, 1 deletion(-)

diff --git a/cmake/Modules/Options.cmake b/cmake/Modules/Options.cmake
index 769e79e8..e684e6c8 100644
--- a/cmake/Modules/Options.cmake
+++ b/cmake/Modules/Options.cmake
@@ -71,7 +71,6 @@ add_option(TOMOPY_USE_SANITIZER "Enable sanitizer" OFF)
 add_option(TOMOPY_CXX_GRIDREC "Enable gridrec with C++ std::complex"
            ${_USE_CXX_GRIDREC})
 add_option(TOMOPY_USE_COVERAGE "Enable code coverage for C/C++" OFF)
-add_option(TOMOPY_USE_PTL "Enable Parallel Tasking Library (PTL)" ON)
 add_option(TOMOPY_USE_CLANG_TIDY "Enable clang-tidy (C++ linter)" OFF)
 add_option(TOMOPY_USE_CUDA "Enable CUDA option for GPU execution" ${_USE_CUDA})
 add_option(TOMOPY_USER_FLAGS

From 87d21745252a8d86bb7b21df7aad0bc9597553d9 Mon Sep 17 00:00:00 2001
From: Daniel Ching <carterbox@users.noreply.github.com>
Date: Wed, 8 Feb 2023 15:14:55 -0600
Subject: [PATCH 06/13] CI: Show which compilers are being used

---
 azure-pipelines.yml | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index e1940353..a3d044b7 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -82,6 +82,11 @@ jobs:
     displayName: List build environment
   - script: |
       source activate tomopy
+      export CMAKE_PREFIX_PATH="${CMAKE_PREFIX_PATH}:${CONDA_PREFIX}"
+      export CC=$(which gcc)
+      export CXX=$(which g++)
+      echo "C compiler is ${CC}"
+      echo "C++ compiler is ${CXX}"
       pip install . --no-deps
       cmake -S . -B build -GNinja -DCMAKE_INSTALL_PREFIX=$CONDA_PREFIX -DCMAKE_BUILD_TYPE=Release -DTOMOPY_USE_MKL:BOOL=$(use.mkl)
       cmake --build build
@@ -112,6 +117,11 @@ jobs:
     displayName: List build environment
   - script: |
       source activate tomopy
+      export CMAKE_PREFIX_PATH="${CMAKE_PREFIX_PATH}:${CONDA_PREFIX}"
+      export CC=$(which clang)
+      export CXX=$(which clang++)
+      echo "C compiler is ${CC}"
+      echo "C++ compiler is ${CXX}"
       pip install . --no-deps
       cmake -S . -B build -GNinja -DCMAKE_INSTALL_PREFIX=$CONDA_PREFIX -DCMAKE_BUILD_TYPE=Release
       cmake --build build

From e6e18bd6ccdc02a14c659bb3f71aed6d416505a4 Mon Sep 17 00:00:00 2001
From: Daniel Ching <carterbox@users.noreply.github.com>
Date: Wed, 8 Feb 2023 13:20:33 -0600
Subject: [PATCH 07/13] BUG: Pull latest (unreleased) PTL for missing header in
 MacOS

---
 source/PTL | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/source/PTL b/source/PTL
index 9399c3c7..3a258fd8 160000
--- a/source/PTL
+++ b/source/PTL
@@ -1 +1 @@
-Subproject commit 9399c3c7296a1c248af3f3fa40e836df8381a215
+Subproject commit 3a258fd8a047d8113e211debcd7999bb5d49bb7e

From 012e4ae9bf1c70d81b5e4c029fa9f4418e0be98f Mon Sep 17 00:00:00 2001
From: Seth Parker <csparker247@gmail.com>
Date: Thu, 26 Oct 2023 14:44:47 -0400
Subject: [PATCH 08/13] Update PTL ref

---
 source/PTL | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/source/PTL b/source/PTL
index 3a258fd8..f892a93d 160000
--- a/source/PTL
+++ b/source/PTL
@@ -1 +1 @@
-Subproject commit 3a258fd8a047d8113e211debcd7999bb5d49bb7e
+Subproject commit f892a93d79615ed8f51c1b9c71f0f7b771dd8223

From 4da5d46f9e89d6a84927a715a6dd930c66aeb59d Mon Sep 17 00:00:00 2001
From: Seth Parker <csparker247@gmail.com>
Date: Thu, 26 Oct 2023 20:29:31 -0400
Subject: [PATCH 09/13] Compile against PTLv3

---
 source/libtomo/accel/common.hh |  4 +-
 source/libtomo/accel/macros.hh |  2 +-
 source/libtomo/accel/utils.hh  | 77 +++++++++++++++++++++++++++++-----
 3 files changed, 71 insertions(+), 12 deletions(-)

diff --git a/source/libtomo/accel/common.hh b/source/libtomo/accel/common.hh
index 9a5d6522..ee166aa6 100644
--- a/source/libtomo/accel/common.hh
+++ b/source/libtomo/accel/common.hh
@@ -86,7 +86,9 @@ CreateThreadPool(unique_thread_pool_t& tp, num_threads_t& pool_size)
 #endif
     // use unique pointer per-thread so manager gets deleted when thread gets deleted
     // create the thread-pool instance
-    tp = unique_thread_pool_t(new tomopy::ThreadPool(pool_size));
+    tomopy::ThreadPool::Config cfg;
+    cfg.pool_size = pool_size;
+    tp = unique_thread_pool_t(new tomopy::ThreadPool(cfg));
 
 #if defined(TOMOPY_USE_PTL)
     // ensure this thread is assigned id, assign variable so no unused result warning
diff --git a/source/libtomo/accel/macros.hh b/source/libtomo/accel/macros.hh
index 7e2988d2..1530a7f2 100644
--- a/source/libtomo/accel/macros.hh
+++ b/source/libtomo/accel/macros.hh
@@ -97,7 +97,7 @@
 //
 #include "PTL/AutoLock.hh"
 #include "PTL/Types.hh"
-#include "PTL/Utility.hh"
+#include "PTL/GetEnv.hh"
 
 //--------------------------------------------------------------------------------------//
 // contain compiled implementations
diff --git a/source/libtomo/accel/utils.hh b/source/libtomo/accel/utils.hh
index 39e10bbe..b1b3ec0a 100644
--- a/source/libtomo/accel/utils.hh
+++ b/source/libtomo/accel/utils.hh
@@ -42,6 +42,7 @@
 //--------------------------------------------------------------------------------------//
 
 #include "macros.hh"
+#include "typedefs.hh"
 
 BEGIN_EXTERN_C
 #include "libtomo/accel.h"
@@ -62,6 +63,62 @@ END_EXTERN_C
 
 //--------------------------------------------------------------------------------------//
 
+// a non-string environment option with a string identifier
+template <typename Tp>
+using EnvChoice = std::tuple<Tp, std::string, std::string>;
+
+//--------------------------------------------------------------------------------------//
+// list of environment choices with non-string and string identifiers
+template <typename Tp>
+using EnvChoiceList = std::set<EnvChoice<Tp>>;
+
+//--------------------------------------------------------------------------------------//
+
+template <typename Tp>
+Tp
+GetChoice(const EnvChoiceList<Tp>& _choices, const std::string& str_var)
+{
+    auto asupper = [](std::string var) {
+        for(auto& itr : var)
+            itr = toupper(itr);
+        return var;
+    };
+
+    std::string upp_var = asupper(str_var);
+    Tp          var     = Tp();
+    // check to see if string matches a choice
+    for(const auto& itr : _choices)
+    {
+        if(asupper(std::get<1>(itr)) == upp_var)
+        {
+            // record value defined by environment
+            return std::get<0>(itr);
+        }
+    }
+    std::istringstream iss(str_var);
+    iss >> var;
+    // check to see if string matches a choice
+    for(const auto& itr : _choices)
+    {
+        if(var == std::get<0>(itr))
+        {
+            // record value defined by environment
+            return var;
+        }
+    }
+    // the value set in env did not match any choices
+    std::stringstream ss;
+    ss << "\n### Environment setting error @ " << __FUNCTION__ << " (line " << __LINE__
+       << ")! Invalid selection \"" << str_var << "\". Valid choices are:\n";
+    for(const auto& itr : _choices)
+        ss << "\t\"" << std::get<0>(itr) << "\" or \"" << std::get<1>(itr) << "\" ("
+           << std::get<2>(itr) << ")\n";
+    std::cerr << ss.str() << std::endl;
+    abort();
+}
+
+//--------------------------------------------------------------------------------------//
+
 template <typename _Tp>
 struct OpenCVDataType
 {
@@ -104,12 +161,12 @@ DEFINE_OPENCV_DATA_TYPE(uint16_t, CV_16U)
 inline int
 GetOpenCVInterpolationMode(const std::string& preferred)
 {
-    PTL::EnvChoiceList<int> choices = {
-        PTL::EnvChoice<int>(CPU_NN, "NN", "nearest neighbor interpolation"),
-        PTL::EnvChoice<int>(CPU_LINEAR, "LINEAR", "bilinear interpolation"),
-        PTL::EnvChoice<int>(CPU_CUBIC, "CUBIC", "bicubic interpolation")
+    EnvChoiceList<int> choices = {
+        EnvChoice<int>(CPU_NN, "NN", "nearest neighbor interpolation"),
+        EnvChoice<int>(CPU_LINEAR, "LINEAR", "bilinear interpolation"),
+        EnvChoice<int>(CPU_CUBIC, "CUBIC", "bicubic interpolation")
     };
-    return PTL::GetChoice<int>(choices, preferred);
+    return GetChoice<int>(choices, preferred);
 }
 
 //--------------------------------------------------------------------------------------//
@@ -204,12 +261,12 @@ cxx_compute_sum_dist(int dy, int dt, int dx, int nx, int ny, const float* theta)
 inline int
 GetNppInterpolationMode(const std::string& preferred)
 {
-    PTL::EnvChoiceList<int> choices = {
-        PTL::EnvChoice<int>(GPU_NN, "NN", "nearest neighbor interpolation"),
-        PTL::EnvChoice<int>(GPU_LINEAR, "LINEAR", "bilinear interpolation"),
-        PTL::EnvChoice<int>(GPU_CUBIC, "CUBIC", "bicubic interpolation")
+    EnvChoiceList<int> choices = {
+        EnvChoice<int>(GPU_NN, "NN", "nearest neighbor interpolation"),
+        EnvChoice<int>(GPU_LINEAR, "LINEAR", "bilinear interpolation"),
+        EnvChoice<int>(GPU_CUBIC, "CUBIC", "bicubic interpolation")
     };
-    return PTL::GetChoice<int>(choices, preferred);
+    return GetChoice<int>(choices, preferred);
 }
 
 //======================================================================================//

From e92a85fce443c1a73bc018fa93d60546de86640d Mon Sep 17 00:00:00 2001
From: Seth Parker <csparker247@gmail.com>
Date: Thu, 26 Oct 2023 20:29:48 -0400
Subject: [PATCH 10/13] Remove remaining TOMOPY_USE_PTL usages

---
 pyctest_tomopy.py                   |  1 -
 source/libtomo/accel/CMakeLists.txt |  2 +-
 source/libtomo/accel/common.hh      |  7 +--
 source/libtomo/accel/data.hh        | 38 ---------------
 source/libtomo/accel/macros.hh      | 73 ++++-------------------------
 5 files changed, 12 insertions(+), 109 deletions(-)

diff --git a/pyctest_tomopy.py b/pyctest_tomopy.py
index c7c81eb8..bcd56ad3 100755
--- a/pyctest_tomopy.py
+++ b/pyctest_tomopy.py
@@ -198,7 +198,6 @@ def add_option(parser, lc_name, disp_name):
     add_bool_opt(args, "TOMOPY_USE_TIMEMORY", args.enable_timemory, args.disable_timemory)
     add_bool_opt(args, "TOMOPY_USE_SANITIZER",
                  args.enable_sanitizer, args.disable_sanitizer)
-    add_bool_opt(args, "TOMOPY_USE_PTL", args.enable_tasking, args.disable_tasking)
 
     if args.enable_sanitizer:
         args.cmake_args.append("-DSANITIZER_TYPE:STRING={}".format(args.sanitizer_type))
diff --git a/source/libtomo/accel/CMakeLists.txt b/source/libtomo/accel/CMakeLists.txt
index 93f1042e..d4170616 100644
--- a/source/libtomo/accel/CMakeLists.txt
+++ b/source/libtomo/accel/CMakeLists.txt
@@ -50,7 +50,7 @@ endif(TOMOPY_USE_CUDA)
 target_link_libraries(tomo-accel PRIVATE ${TOMOPY_EXTERNAL_LIBRARIES}
                                          ${TOMOPY_EXTERNAL_PRIVATE_LIBRARIES} ptl-static)
 
-target_compile_definitions(tomo-accel PRIVATE ${${PROJECT_NAME}_DEFINITIONS} TOMOPY_USE_PTL)
+target_compile_definitions(tomo-accel PRIVATE ${${PROJECT_NAME}_DEFINITIONS})
 
 target_compile_options(
   tomo-accel
diff --git a/source/libtomo/accel/common.hh b/source/libtomo/accel/common.hh
index ee166aa6..1cf3a0c2 100644
--- a/source/libtomo/accel/common.hh
+++ b/source/libtomo/accel/common.hh
@@ -58,7 +58,7 @@ CreateThreadPool(unique_thread_pool_t& tp, num_threads_t& pool_size)
     auto min_threads = num_threads_t(1);
     if(pool_size <= 0)
     {
-#if defined(TOMOPY_USE_PTL)
+
         // compute some properties (expected python threads, max threads)
         auto pythreads = PTL::GetEnv("TOMOPY_PYTHON_THREADS", HW_CONCURRENCY);
 #    if defined(TOMOPY_USE_CUDA)
@@ -72,9 +72,6 @@ CreateThreadPool(unique_thread_pool_t& tp, num_threads_t& pool_size)
         auto nthreads =
             std::max(PTL::GetEnv("TOMOPY_NUM_THREADS", max_threads), min_threads);
         pool_size = nthreads;
-#else
-        pool_size = 1;
-#endif
     }
     // always specify at least one thread even if not creating threads
     pool_size = std::max(pool_size, min_threads);
@@ -90,7 +87,6 @@ CreateThreadPool(unique_thread_pool_t& tp, num_threads_t& pool_size)
     cfg.pool_size = pool_size;
     tp = unique_thread_pool_t(new tomopy::ThreadPool(cfg));
 
-#if defined(TOMOPY_USE_PTL)
     // ensure this thread is assigned id, assign variable so no unused result warning
     auto tid = GetThisThreadID();
 
@@ -111,7 +107,6 @@ CreateThreadPool(unique_thread_pool_t& tp, num_threads_t& pool_size)
     std::cout << "\n"
               << "[" << tid << "] Initialized tasking run manager with " << tp->size()
               << " threads..." << std::endl;
-#endif
 }
 
 //======================================================================================//
diff --git a/source/libtomo/accel/data.hh b/source/libtomo/accel/data.hh
index 3c75dc8b..45d912f8 100644
--- a/source/libtomo/accel/data.hh
+++ b/source/libtomo/accel/data.hh
@@ -180,8 +180,6 @@ struct Registration
 
 //======================================================================================//
 
-#if defined(TOMOPY_USE_PTL)
-
 //--------------------------------------------------------------------------------------//
 // when PTL thread-pool is available
 //
@@ -217,42 +215,6 @@ execute(RuntimeOptions* ops, int dt, DataArray& data, Func&& func, Args&&... arg
     }
 }
 
-#else
-
-//--------------------------------------------------------------------------------------//
-// when PTL thread-pool is not available
-//
-template <typename DataArray, typename Func, typename... Args>
-void
-execute(RuntimeOptions* ops, int dt, DataArray& data, Func&& func, Args&&... args)
-{
-    // sync streams
-    auto join = [&]() { stream_sync(0); };
-
-    try
-    {
-        for(int p = 0; p < dt; ++p)
-        {
-            auto _func = std::bind(std::forward<Func>(func), std::ref(data),
-                                   std::forward<int>(p), std::forward<Args>(args)...);
-            _func();
-        }
-        join();
-    }
-    catch(const std::exception& e)
-    {
-        std::stringstream ss;
-        ss << "\n\nError executing :: " << e.what() << "\n\n";
-        {
-            PTL::AutoLock l(PTL::TypeMutex<decltype(std::cout)>());
-            std::cerr << e.what() << std::endl;
-        }
-        throw std::runtime_error(ss.str().c_str());
-    }
-}
-
-#endif
-
 //======================================================================================//
 
 class CpuData
diff --git a/source/libtomo/accel/macros.hh b/source/libtomo/accel/macros.hh
index 1530a7f2..0e923dd9 100644
--- a/source/libtomo/accel/macros.hh
+++ b/source/libtomo/accel/macros.hh
@@ -102,16 +102,15 @@
 //--------------------------------------------------------------------------------------//
 // contain compiled implementations
 //
-#if defined(TOMOPY_USE_PTL)
-#    include "PTL/TBBTaskGroup.hh"
-#    include "PTL/Task.hh"
-#    include "PTL/TaskGroup.hh"
-#    include "PTL/TaskManager.hh"
-#    include "PTL/TaskRunManager.hh"
-#    include "PTL/ThreadData.hh"
-#    include "PTL/ThreadPool.hh"
-#    include "PTL/Threading.hh"
-#endif
+
+#include "PTL/Task.hh"
+#include "PTL/TaskGroup.hh"
+#include "PTL/TaskManager.hh"
+#include "PTL/TaskRunManager.hh"
+#include "PTL/ThreadData.hh"
+#include "PTL/ThreadPool.hh"
+#include "PTL/Threading.hh"
+
 
 //--------------------------------------------------------------------------------------//
 // CUDA headers
@@ -150,13 +149,7 @@
 inline uintmax_t
 GetThisThreadID()
 {
-#if defined(TOMOPY_USE_PTL)
     return PTL::ThreadPool::get_this_thread_id();
-#else
-    static std::atomic<uintmax_t> tcounter;
-    static thread_local auto      tid = tcounter++;
-    return tid;
-#endif
 }
 
 //======================================================================================//
@@ -394,12 +387,7 @@ struct cuda_algorithms
 //--------------------------------------------------------------------------------------//
 
 // Create a ThreadPool class in so we can refer to it safely when PTL is
-// not enabled. Do this within a namespace in case a header later includes
-// "PTL/ThreadPool.hh" and PTL is not enabled.
-// --> When PTL is enabled, tomopy::ThreadPool is an alias to PTL ThreadPool
-// --> When PTL is disabled, tomopy::ThreadPool is an alias to dummy class
-
-#if defined(TOMOPY_USE_PTL)
+// not enabled.
 
 //--------------------------------------------------------------------------------------//
 
@@ -417,47 +405,6 @@ struct implementation_available<PTL::ThreadPool> : std::true_type
 
 //--------------------------------------------------------------------------------------//
 
-#else
-
-//--------------------------------------------------------------------------------------//
-// dummy thread pool impl
-
-class ThreadPool
-{
-public:
-    ThreadPool(intmax_t = 1, bool = false) {}
-    intmax_t size() const { return 1; }
-    void destroy_threadpool() {}
-};
-
-template <typename _Ret, typename _Arg = _Ret>
-class TaskGroup
-{
-public:
-    template <typename _Func>
-    TaskGroup(_Func&& _join, ThreadPool* = nullptr)
-    : m_join(std::forward<_Func>(_join))
-    {
-    }
-
-    template <typename _Func, typename... _Args>
-    void run(_Func&& func, _Args&&... args)
-    {
-        std::forward<_Func>(func)(std::forward<_Args>(args)...);
-    }
-
-    void join() { m_join(); }
-
-private:
-    std::function<void()> m_join;
-};
-
-//--------------------------------------------------------------------------------------//
-
-#endif
-
-//--------------------------------------------------------------------------------------//
-
 #if defined(TOMOPY_USE_CUDA)
 
 template <>

From c434475ead5e418de1a3e62b55fb4cd8018e2fd2 Mon Sep 17 00:00:00 2001
From: Seth Parker <csparker247@gmail.com>
Date: Thu, 26 Oct 2023 20:36:37 -0400
Subject: [PATCH 11/13] Remove unnecessary include

---
 source/libtomo/accel/utils.hh | 1 -
 1 file changed, 1 deletion(-)

diff --git a/source/libtomo/accel/utils.hh b/source/libtomo/accel/utils.hh
index b1b3ec0a..f14dbcea 100644
--- a/source/libtomo/accel/utils.hh
+++ b/source/libtomo/accel/utils.hh
@@ -42,7 +42,6 @@
 //--------------------------------------------------------------------------------------//
 
 #include "macros.hh"
-#include "typedefs.hh"
 
 BEGIN_EXTERN_C
 #include "libtomo/accel.h"

From 6027ce03616766d92f17e61e33cdd6c1dfae848f Mon Sep 17 00:00:00 2001
From: Seth Parker <csparker247@gmail.com>
Date: Thu, 26 Oct 2023 20:57:31 -0400
Subject: [PATCH 12/13] Clarify thread_data auto type

---
 source/libtomo/accel/common.hh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/source/libtomo/accel/common.hh b/source/libtomo/accel/common.hh
index 1cf3a0c2..aa0729bb 100644
--- a/source/libtomo/accel/common.hh
+++ b/source/libtomo/accel/common.hh
@@ -91,7 +91,7 @@ CreateThreadPool(unique_thread_pool_t& tp, num_threads_t& pool_size)
     auto tid = GetThisThreadID();
 
     // initialize the thread-local data information
-    auto& thread_data = PTL::ThreadData::GetInstance();
+    auto*& thread_data = PTL::ThreadData::GetInstance();
     if(!thread_data)
         thread_data = new PTL::ThreadData(tp.get());
 

From b28afd01d6206f15ac06d1db0e107c11aab4c923 Mon Sep 17 00:00:00 2001
From: Seth Parker <csparker247@gmail.com>
Date: Tue, 7 Nov 2023 19:03:18 -0500
Subject: [PATCH 13/13] Update PTL ref to tomopy/PTL/3.x

---
 source/PTL | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/source/PTL b/source/PTL
index f892a93d..7cc409ee 160000
--- a/source/PTL
+++ b/source/PTL
@@ -1 +1 @@
-Subproject commit f892a93d79615ed8f51c1b9c71f0f7b771dd8223
+Subproject commit 7cc409eeef9de1e7ac3ec01c8b76f640deec36bd