diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index e19403537..a3d044b71 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -82,6 +82,11 @@ jobs:
     displayName: List build environment
   - script: |
       source activate tomopy
+      export CMAKE_PREFIX_PATH="${CMAKE_PREFIX_PATH}:${CONDA_PREFIX}"
+      export CC=$(which gcc)
+      export CXX=$(which g++)
+      echo "C compiler is ${CC}"
+      echo "C++ compiler is ${CXX}"
       pip install . --no-deps
       cmake -S . -B build -GNinja -DCMAKE_INSTALL_PREFIX=$CONDA_PREFIX -DCMAKE_BUILD_TYPE=Release -DTOMOPY_USE_MKL:BOOL=$(use.mkl)
       cmake --build build
@@ -112,6 +117,11 @@ jobs:
     displayName: List build environment
   - script: |
       source activate tomopy
+      export CMAKE_PREFIX_PATH="${CMAKE_PREFIX_PATH}:${CONDA_PREFIX}"
+      export CC=$(which clang)
+      export CXX=$(which clang++)
+      echo "C compiler is ${CC}"
+      echo "C++ compiler is ${CXX}"
       pip install . --no-deps
       cmake -S . -B build -GNinja -DCMAKE_INSTALL_PREFIX=$CONDA_PREFIX -DCMAKE_BUILD_TYPE=Release
       cmake --build build
diff --git a/cmake/Modules/Options.cmake b/cmake/Modules/Options.cmake
index 769e79e8c..e684e6c84 100644
--- a/cmake/Modules/Options.cmake
+++ b/cmake/Modules/Options.cmake
@@ -71,7 +71,6 @@ add_option(TOMOPY_USE_SANITIZER "Enable sanitizer" OFF)
 add_option(TOMOPY_CXX_GRIDREC "Enable gridrec with C++ std::complex"
            ${_USE_CXX_GRIDREC})
 add_option(TOMOPY_USE_COVERAGE "Enable code coverage for C/C++" OFF)
-add_option(TOMOPY_USE_PTL "Enable Parallel Tasking Library (PTL)" ON)
 add_option(TOMOPY_USE_CLANG_TIDY "Enable clang-tidy (C++ linter)" OFF)
 add_option(TOMOPY_USE_CUDA "Enable CUDA option for GPU execution" ${_USE_CUDA})
 add_option(TOMOPY_USER_FLAGS
diff --git a/pyctest_tomopy.py b/pyctest_tomopy.py
index c7c81eb83..bcd56ad3f 100755
--- a/pyctest_tomopy.py
+++ b/pyctest_tomopy.py
@@ -198,7 +198,6 @@ def add_option(parser, lc_name, disp_name):
     add_bool_opt(args, "TOMOPY_USE_TIMEMORY", args.enable_timemory, args.disable_timemory)
     add_bool_opt(args, "TOMOPY_USE_SANITIZER",
                  args.enable_sanitizer, args.disable_sanitizer)
-    add_bool_opt(args, "TOMOPY_USE_PTL", args.enable_tasking, args.disable_tasking)
 
     if args.enable_sanitizer:
         args.cmake_args.append("-DSANITIZER_TYPE:STRING={}".format(args.sanitizer_type))
diff --git a/source/CMakeLists.txt b/source/CMakeLists.txt
index b639d8e11..70e11db9f 100644
--- a/source/CMakeLists.txt
+++ b/source/CMakeLists.txt
@@ -20,25 +20,17 @@ set(CMAKE_POSITION_INDEPENDENT_CODE
 # PTL submodule
 #
 # ------------------------------------------------------------------------------#
-checkout_git_submodule(
-  RECURSIVE
-  TEST_FILE
-  CMakeLists.txt
-  RELATIVE_PATH
-  source/PTL
-  WORKING_DIRECTORY
-  ${PROJECT_SOURCE_DIR})
-
-if(TOMOPY_USE_PTL)
-  add_subdirectory(PTL)
-  if(BUILD_STATIC_LIBS)
-    list(APPEND TOMOPY_EXTERNAL_LIBRARIES ptl-static)
-  else()
-    list(APPEND TOMOPY_EXTERNAL_LIBRARIES ptl-shared)
-  endif()
-  list(APPEND ${PROJECT_NAME}_DEFINITIONS TOMOPY_USE_PTL)
+if(TOMOPY_USE_OPENCV OR TOMOPY_USE_CUDA)
+  checkout_git_submodule(
+    RECURSIVE
+    TEST_FILE
+    CMakeLists.txt
+    RELATIVE_PATH
+    source/PTL
+    WORKING_DIRECTORY
+    ${PROJECT_SOURCE_DIR})
+  add_subdirectory(PTL EXCLUDE_FROM_ALL)
 endif()
-
 # ------------------------------------------------------------------------------#
 #
 # TomoPy Python module
diff --git a/source/PTL b/source/PTL
index 79dc8785c..7cc409eee 160000
--- a/source/PTL
+++ b/source/PTL
@@ -1 +1 @@
-Subproject commit 79dc8785c3ef39357669250ffa46e4329c67a084
+Subproject commit 7cc409eeef9de1e7ac3ec01c8b76f640deec36bd
diff --git a/source/libtomo/accel/CMakeLists.txt b/source/libtomo/accel/CMakeLists.txt
index 91dd8cf98..d4170616e 100644
--- a/source/libtomo/accel/CMakeLists.txt
+++ b/source/libtomo/accel/CMakeLists.txt
@@ -23,12 +23,6 @@ target_include_directories(
          $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
   PRIVATE ${CMAKE_CURRENT_LIST_DIR})
 
-if(TRUE)
-  # FIXME: Need PTL headers regardless of whether we use PTL
-  target_include_directories(tomo-accel
-                             PRIVATE ${tomopy_SOURCE_DIR}/source/PTL/source)
-endif()
-
 if(TOMOPY_USE_CUDA)
 
   target_sources(tomo-accel PRIVATE gpu/common.cu gpu/mlem.cu gpu/sirt.cu
@@ -54,7 +48,7 @@ if(TOMOPY_USE_CUDA)
 endif(TOMOPY_USE_CUDA)
 
 target_link_libraries(tomo-accel PRIVATE ${TOMOPY_EXTERNAL_LIBRARIES}
-                                         ${TOMOPY_EXTERNAL_PRIVATE_LIBRARIES})
+                                         ${TOMOPY_EXTERNAL_PRIVATE_LIBRARIES} ptl-static)
 
 target_compile_definitions(tomo-accel PRIVATE ${${PROJECT_NAME}_DEFINITIONS})
 
diff --git a/source/libtomo/accel/common.hh b/source/libtomo/accel/common.hh
index 2bd7ab85d..aa0729bb6 100644
--- a/source/libtomo/accel/common.hh
+++ b/source/libtomo/accel/common.hh
@@ -58,9 +58,9 @@ CreateThreadPool(unique_thread_pool_t& tp, num_threads_t& pool_size)
     auto min_threads = num_threads_t(1);
     if(pool_size <= 0)
     {
-#if defined(TOMOPY_USE_PTL)
+
         // compute some properties (expected python threads, max threads)
-        auto pythreads = GetEnv("TOMOPY_PYTHON_THREADS", HW_CONCURRENCY);
+        auto pythreads = PTL::GetEnv("TOMOPY_PYTHON_THREADS", HW_CONCURRENCY);
 #    if defined(TOMOPY_USE_CUDA)
         // general oversubscription when CUDA is enabled
         auto max_threads =
@@ -69,11 +69,9 @@ CreateThreadPool(unique_thread_pool_t& tp, num_threads_t& pool_size)
         // if known that CPU only, just try to use all cores
         auto max_threads = HW_CONCURRENCY / std::max(pythreads, min_threads);
 #    endif
-        auto nthreads = std::max(GetEnv("TOMOPY_NUM_THREADS", max_threads), min_threads);
-        pool_size     = nthreads;
-#else
-        pool_size = 1;
-#endif
+        auto nthreads =
+            std::max(PTL::GetEnv("TOMOPY_NUM_THREADS", max_threads), min_threads);
+        pool_size = nthreads;
     }
     // always specify at least one thread even if not creating threads
     pool_size = std::max(pool_size, min_threads);
@@ -85,30 +83,30 @@ CreateThreadPool(unique_thread_pool_t& tp, num_threads_t& pool_size)
 #endif
     // use unique pointer per-thread so manager gets deleted when thread gets deleted
     // create the thread-pool instance
-    tp = unique_thread_pool_t(new tomopy::ThreadPool(pool_size));
+    tomopy::ThreadPool::Config cfg;
+    cfg.pool_size = pool_size;
+    tp = unique_thread_pool_t(new tomopy::ThreadPool(cfg));
 
-#if defined(TOMOPY_USE_PTL)
     // ensure this thread is assigned id, assign variable so no unused result warning
     auto tid = GetThisThreadID();
 
     // initialize the thread-local data information
-    auto& thread_data = ThreadData::GetInstance();
+    auto*& thread_data = PTL::ThreadData::GetInstance();
     if(!thread_data)
-        thread_data.reset(new ThreadData(tp.get()));
+        thread_data = new PTL::ThreadData(tp.get());
 
     // tell thread that initialized thread-pool to process tasks
-    // (typically master thread will only wait for other threads)
-    thread_data->is_master = true;
+    // (typically main thread will only wait for other threads)
+    thread_data->is_main = true;
 
     // tell thread that it is not currently within task
     thread_data->within_task = false;
 
     // notify
-    AutoLock l(TypeMutex<decltype(std::cout)>());
+    PTL::AutoLock l(PTL::TypeMutex<decltype(std::cout)>());
     std::cout << "\n"
               << "[" << tid << "] Initialized tasking run manager with " << tp->size()
               << " threads..." << std::endl;
-#endif
 }
 
 //======================================================================================//
@@ -237,14 +235,14 @@ public:
 inline DeviceOption
 GetDevice(const std::string& preferred)
 {
-    auto pythreads               = GetEnv("TOMOPY_PYTHON_THREADS", HW_CONCURRENCY);
+    auto pythreads               = PTL::GetEnv("TOMOPY_PYTHON_THREADS", HW_CONCURRENCY);
     using DeviceOptionList       = std::deque<DeviceOption>;
-    DeviceOptionList options     = { };
+    DeviceOptionList options     = {};
     std::string      default_key = "cpu";
 
 #if defined(TOMOPY_USE_OPENCV)
     options.push_back(DeviceOption(0, "cpu", "Run on CPU (OpenCV)"));
-# endif
+#endif
 
 #if defined(TOMOPY_USE_CUDA)
     auto num_devices = cuda_device_count();
@@ -261,12 +259,13 @@ GetDevice(const std::string& preferred)
     }
     else
     {
-        AutoLock l(TypeMutex<decltype(std::cout)>());
+        PTL::AutoLock l(PTL::TypeMutex<decltype(std::cout)>());
         std::cerr << "\n##### No CUDA device(s) available #####\n" << std::endl;
     }
 #endif
 
-    if (options.empty()){
+    if(options.empty())
+    {
         throw std::runtime_error("No devices found! Check that TomoPy was "
                                  "compiled with OpenCV or CUDA.");
     }
@@ -278,7 +277,8 @@ GetDevice(const std::string& preferred)
 
     //------------------------------------------------------------------------//
     // print the options the first time it is encountered
-    auto print_options = [&]() {
+    auto print_options = [&]()
+    {
         static std::atomic_uint _once;
         auto                    _count = _once++;
         if(_count % pythreads > 0)
@@ -301,12 +301,13 @@ GetDevice(const std::string& preferred)
         }
         DeviceOption::footer(ss);
 
-        AutoLock l(TypeMutex<decltype(std::cout)>());
+        PTL::AutoLock l(PTL::TypeMutex<decltype(std::cout)>());
         std::cout << "\n" << ss.str() << std::endl;
     };
     //------------------------------------------------------------------------//
     // print the option selection first time it is encountered
-    auto print_selection = [&](DeviceOption& selected_opt) {
+    auto print_selection = [&](DeviceOption& selected_opt)
+    {
         static std::atomic_uint _once;
         auto                    _count = _once++;
         if(_count % pythreads > 0)
@@ -323,7 +324,7 @@ GetDevice(const std::string& preferred)
         ss << "Selected device: " << selected_opt << "\n";
         DeviceOption::spacer(ss, '-');
 
-        AutoLock l(TypeMutex<decltype(std::cout)>());
+        PTL::AutoLock l(PTL::TypeMutex<decltype(std::cout)>());
         std::cout << ss.str() << std::endl;
     };
     //------------------------------------------------------------------------//
@@ -356,7 +357,7 @@ stream_sync(cudaStream_t _stream)
     cudaStreamSynchronize(_stream);
     CUDA_CHECK_LAST_STREAM_ERROR(_stream);
 #else
-    ConsumeParameters(_stream);
+    PTL::ConsumeParameters(_stream);
 #endif
 }
 
diff --git a/source/libtomo/accel/cxx/mlem.cc b/source/libtomo/accel/cxx/mlem.cc
index 52fb78432..2261bfd3e 100755
--- a/source/libtomo/accel/cxx/mlem.cc
+++ b/source/libtomo/accel/cxx/mlem.cc
@@ -76,7 +76,7 @@ cxx_mlem(const float* data, int dy, int dt, int dx, const float* center,
     // local count for the thread
     int count = registration.initialize();
     // number of threads started at Python level
-    auto tcount = GetEnv("TOMOPY_PYTHON_THREADS", HW_CONCURRENCY);
+    auto tcount = PTL::GetEnv("TOMOPY_PYTHON_THREADS", HW_CONCURRENCY);
 
     // configured runtime options
     RuntimeOptions opts(pool_size, interp, device, grid_size, block_size);
@@ -109,7 +109,7 @@ cxx_mlem(const float* data, int dy, int dt, int dx, const float* center,
     }
     catch(std::exception& e)
     {
-        AutoLock l(TypeMutex<decltype(std::cout)>());
+        PTL::AutoLock l(PTL::TypeMutex<decltype(std::cout)>());
         std::cerr << "[TID: " << tid << "] " << e.what()
                   << "\nFalling back to CPU algorithm..." << std::endl;
         return EXIT_FAILURE;
@@ -129,7 +129,7 @@ void
 mlem_cpu_compute_projection(data_array_t& cpu_data, int p, int dy, int dt, int dx, int nx,
                             int ny, const float* theta)
 {
-    ConsumeParameters(dy);
+    PTL::ConsumeParameters(dy);
     auto cache = cpu_data[GetThisThreadID() % cpu_data.size()];
 
     // calculate some values
@@ -237,4 +237,4 @@ mlem_cpu(const float* data, int dy, int dt, int dx, const float*, const float* t
     printf("\n");
 }
 
-#endif // TOMOPY_USE_OPENCV
+#endif  // TOMOPY_USE_OPENCV
diff --git a/source/libtomo/accel/cxx/sirt.cc b/source/libtomo/accel/cxx/sirt.cc
index beb56ed23..4edb6d336 100755
--- a/source/libtomo/accel/cxx/sirt.cc
+++ b/source/libtomo/accel/cxx/sirt.cc
@@ -75,7 +75,7 @@ cxx_sirt(const float* data, int dy, int dt, int dx, const float* center,
     // local count for the thread
     int count = registration.initialize();
     // number of threads started at Python level
-    auto tcount = GetEnv("TOMOPY_PYTHON_THREADS", HW_CONCURRENCY);
+    auto tcount = PTL::GetEnv("TOMOPY_PYTHON_THREADS", HW_CONCURRENCY);
 
     // configured runtime options
     RuntimeOptions opts(pool_size, interp, device, grid_size, block_size);
@@ -108,7 +108,7 @@ cxx_sirt(const float* data, int dy, int dt, int dx, const float* center,
     }
     catch(std::exception& e)
     {
-        AutoLock l(TypeMutex<decltype(std::cout)>());
+        PTL::AutoLock l(PTL::TypeMutex<decltype(std::cout)>());
         std::cerr << "[TID: " << tid << "] " << e.what()
                   << "\nFalling back to CPU algorithm..." << std::endl;
         // return failure code
@@ -130,7 +130,7 @@ void
 sirt_cpu_compute_projection(data_array_t& cpu_data, int p, int dy, int dt, int dx, int nx,
                             int ny, const float* theta)
 {
-    ConsumeParameters(dy);
+    PTL::ConsumeParameters(dy);
     auto cache = cpu_data[GetThisThreadID() % cpu_data.size()];
 
     // calculate some values
@@ -238,4 +238,4 @@ sirt_cpu(const float* data, int dy, int dt, int dx, const float*, const float* t
     printf("\n");
 }
 
-#endif // TOMOPY_USE_OPENCV
+#endif  // TOMOPY_USE_OPENCV
diff --git a/source/libtomo/accel/data.hh b/source/libtomo/accel/data.hh
index e3c822484..45d912f8b 100644
--- a/source/libtomo/accel/data.hh
+++ b/source/libtomo/accel/data.hh
@@ -90,7 +90,7 @@ struct RuntimeOptions
     ~RuntimeOptions() {}
 
     // disable copying and copy assignment
-    RuntimeOptions(const RuntimeOptions&) = delete;
+    RuntimeOptions(const RuntimeOptions&)            = delete;
     RuntimeOptions& operator=(const RuntimeOptions&) = delete;
 
     // create the thread pool -- don't have this in the constructor
@@ -180,8 +180,6 @@ struct Registration
 
 //======================================================================================//
 
-#if defined(TOMOPY_USE_PTL)
-
 //--------------------------------------------------------------------------------------//
 // when PTL thread-pool is available
 //
@@ -210,49 +208,13 @@ execute(RuntimeOptions* ops, int dt, DataArray& data, Func&& func, Args&&... arg
         std::stringstream ss;
         ss << "\n\nError executing :: " << e.what() << "\n\n";
         {
-            AutoLock l(TypeMutex<decltype(std::cout)>());
-            std::cerr << e.what() << std::endl;
-        }
-        throw std::runtime_error(ss.str().c_str());
-    }
-}
-
-#else
-
-//--------------------------------------------------------------------------------------//
-// when PTL thread-pool is not available
-//
-template <typename DataArray, typename Func, typename... Args>
-void
-execute(RuntimeOptions* ops, int dt, DataArray& data, Func&& func, Args&&... args)
-{
-    // sync streams
-    auto join = [&]() { stream_sync(0); };
-
-    try
-    {
-        for(int p = 0; p < dt; ++p)
-        {
-            auto _func = std::bind(std::forward<Func>(func), std::ref(data),
-                                   std::forward<int>(p), std::forward<Args>(args)...);
-            _func();
-        }
-        join();
-    }
-    catch(const std::exception& e)
-    {
-        std::stringstream ss;
-        ss << "\n\nError executing :: " << e.what() << "\n\n";
-        {
-            AutoLock l(TypeMutex<decltype(std::cout)>());
+            PTL::AutoLock l(PTL::TypeMutex<decltype(std::cout)>());
             std::cerr << e.what() << std::endl;
         }
         throw std::runtime_error(ss.str().c_str());
     }
 }
 
-#endif
-
 //======================================================================================//
 
 class CpuData
@@ -295,9 +257,9 @@ public:
 
     int interpolation() const { return m_interp; }
 
-    Mutex* upd_mutex() const
+    PTL::Mutex* upd_mutex() const
     {
-        static Mutex mtx;
+        static PTL::Mutex mtx;
         return &mtx;
     }
 
@@ -401,7 +363,7 @@ public:
     GpuData(this_type&&)      = default;
 
     this_type& operator=(const this_type&) = delete;
-    this_type& operator=(this_type&&) = default;
+    this_type& operator=(this_type&&)      = default;
 
 public:
     // access functions
diff --git a/source/libtomo/accel/gpu/common.cu b/source/libtomo/accel/gpu/common.cu
index e3f29b092..53cd66bf2 100644
--- a/source/libtomo/accel/gpu/common.cu
+++ b/source/libtomo/accel/gpu/common.cu
@@ -238,7 +238,7 @@ cuda_device_count()
 void
 cuda_device_query()
 {
-    auto pythreads = GetEnv("TOMOPY_PYTHON_THREADS", HW_CONCURRENCY);
+    auto pythreads = PTL::GetEnv("TOMOPY_PYTHON_THREADS", HW_CONCURRENCY);
     static std::atomic<int16_t> _once;
     auto                        _count = _once++;
     if(_count + 1 == pythreads)
@@ -277,14 +277,14 @@ cuda_device_query()
         return;
     }
 
-    AutoLock l(TypeMutex<decltype(std::cout)>());
+    PTL::AutoLock l(PTL::TypeMutex<decltype(std::cout)>());
 
     if(deviceCount == 0)
         printf("No available CUDA device(s) detected\n");
     else
         printf("Detected %d CUDA capable devices\n", deviceCount);
 
-    int specific_device = GetEnv("TOMOPY_DEVICE_NUM", -1);
+    int specific_device = PTL::GetEnv("TOMOPY_DEVICE_NUM", -1);
 
     for(int dev = 0; dev < deviceCount; ++dev)
     {
diff --git a/source/libtomo/accel/gpu/mlem.cu b/source/libtomo/accel/gpu/mlem.cu
index 7a65e0b1e..6f983270e 100644
--- a/source/libtomo/accel/gpu/mlem.cu
+++ b/source/libtomo/accel/gpu/mlem.cu
@@ -176,7 +176,7 @@ mlem_cuda(const float* cpu_data, int dy, int dt, int dx, const float*, const flo
     // compute some properties (expected python threads, max threads, device assignment)
     int pythread_num = ntid++;
     int device       = pythread_num % cuda_device_count();  // assign to device
-    device           = GetEnv("TOMOPY_DEVICE_NUM", device) % cuda_device_count();
+    device           = PTL::GetEnv("TOMOPY_DEVICE_NUM", device) % cuda_device_count();
 
     TIMEMORY_AUTO_TIMER("");
 
diff --git a/source/libtomo/accel/gpu/sirt.cu b/source/libtomo/accel/gpu/sirt.cu
index c672161b7..574fc78fd 100644
--- a/source/libtomo/accel/gpu/sirt.cu
+++ b/source/libtomo/accel/gpu/sirt.cu
@@ -173,7 +173,7 @@ sirt_cuda(const float* cpu_data, int dy, int dt, int dx, const float* center,
     // compute some properties (expected python threads, max threads, device assignment)
     int pythread_num = ntid++;
     int device       = pythread_num % cuda_device_count();  // assign to device
-    device           = GetEnv("TOMOPY_DEVICE_NUM", device) % cuda_device_count();
+    device           = PTL::GetEnv("TOMOPY_DEVICE_NUM", device) % cuda_device_count();
 
     TIMEMORY_AUTO_TIMER("");
 
diff --git a/source/libtomo/accel/macros.hh b/source/libtomo/accel/macros.hh
index 6d8cbcbe5..0e923dd98 100644
--- a/source/libtomo/accel/macros.hh
+++ b/source/libtomo/accel/macros.hh
@@ -97,21 +97,20 @@
 //
 #include "PTL/AutoLock.hh"
 #include "PTL/Types.hh"
-#include "PTL/Utility.hh"
+#include "PTL/GetEnv.hh"
 
 //--------------------------------------------------------------------------------------//
 // contain compiled implementations
 //
-#if defined(TOMOPY_USE_PTL)
-#    include "PTL/TBBTaskGroup.hh"
-#    include "PTL/Task.hh"
-#    include "PTL/TaskGroup.hh"
-#    include "PTL/TaskManager.hh"
-#    include "PTL/TaskRunManager.hh"
-#    include "PTL/ThreadData.hh"
-#    include "PTL/ThreadPool.hh"
-#    include "PTL/Threading.hh"
-#endif
+
+#include "PTL/Task.hh"
+#include "PTL/TaskGroup.hh"
+#include "PTL/TaskManager.hh"
+#include "PTL/TaskRunManager.hh"
+#include "PTL/ThreadData.hh"
+#include "PTL/ThreadPool.hh"
+#include "PTL/Threading.hh"
+
 
 //--------------------------------------------------------------------------------------//
 // CUDA headers
@@ -150,13 +149,7 @@
 inline uintmax_t
 GetThisThreadID()
 {
-#if defined(TOMOPY_USE_PTL)
-    return ThreadPool::GetThisThreadID();
-#else
-    static std::atomic<uintmax_t> tcounter;
-    static thread_local auto      tid = tcounter++;
-    return tid;
-#endif
+    return PTL::ThreadPool::get_this_thread_id();
 }
 
 //======================================================================================//
@@ -394,70 +387,24 @@ struct cuda_algorithms
 //--------------------------------------------------------------------------------------//
 
 // Create a ThreadPool class in so we can refer to it safely when PTL is
-// not enabled. Do this within a namespace in case a header later includes
-// "PTL/ThreadPool.hh" and PTL is not enabled.
-// --> When PTL is enabled, tomopy::ThreadPool is an alias to PTL ThreadPool
-// --> When PTL is disabled, tomopy::ThreadPool is an alias to dummy class
-
-#if defined(TOMOPY_USE_PTL)
+// not enabled.
 
 //--------------------------------------------------------------------------------------//
 
-using ThreadPool = ::ThreadPool;
+using ThreadPool = PTL::ThreadPool;
 template <typename _Ret, typename _Arg = _Ret>
-using TaskGroup = ::TaskGroup<_Ret, _Arg>;
+using TaskGroup = PTL::TaskGroup<_Ret, _Arg>;
 
 //--------------------------------------------------------------------------------------//
 
 // when compiled with PTL, mark tomopy::ThreadPool as implemented
 template <>
-struct implementation_available<ThreadPool> : std::true_type
+struct implementation_available<PTL::ThreadPool> : std::true_type
 {
 };
 
 //--------------------------------------------------------------------------------------//
 
-#else
-
-//--------------------------------------------------------------------------------------//
-// dummy thread pool impl
-
-class ThreadPool
-{
-public:
-    ThreadPool(intmax_t = 1, bool = false) {}
-    intmax_t size() const { return 1; }
-    void destroy_threadpool() {}
-};
-
-template <typename _Ret, typename _Arg = _Ret>
-class TaskGroup
-{
-public:
-    template <typename _Func>
-    TaskGroup(_Func&& _join, ThreadPool* = nullptr)
-    : m_join(std::forward<_Func>(_join))
-    {
-    }
-
-    template <typename _Func, typename... _Args>
-    void run(_Func&& func, _Args&&... args)
-    {
-        std::forward<_Func>(func)(std::forward<_Args>(args)...);
-    }
-
-    void join() { m_join(); }
-
-private:
-    std::function<void()> m_join;
-};
-
-//--------------------------------------------------------------------------------------//
-
-#endif
-
-//--------------------------------------------------------------------------------------//
-
 #if defined(TOMOPY_USE_CUDA)
 
 template <>
diff --git a/source/libtomo/accel/utils.hh b/source/libtomo/accel/utils.hh
index 464492aef..f14dbcea2 100644
--- a/source/libtomo/accel/utils.hh
+++ b/source/libtomo/accel/utils.hh
@@ -54,11 +54,67 @@ END_EXTERN_C
 
 #if defined(TOMOPY_USE_OPENCV)
 
-#define CPU_NN CV_INTER_NN
-#define CPU_LINEAR CV_INTER_LINEAR
-#define CPU_AREA CV_INTER_AREA
-#define CPU_CUBIC CV_INTER_CUBIC
-#define CPU_LANCZOS CV_INTER_LANCZOS4
+#    define CPU_NN CV_INTER_NN
+#    define CPU_LINEAR CV_INTER_LINEAR
+#    define CPU_AREA CV_INTER_AREA
+#    define CPU_CUBIC CV_INTER_CUBIC
+#    define CPU_LANCZOS CV_INTER_LANCZOS4
+
+//--------------------------------------------------------------------------------------//
+
+// a non-string environment option with a string identifier
+template <typename Tp>
+using EnvChoice = std::tuple<Tp, std::string, std::string>;
+
+//--------------------------------------------------------------------------------------//
+// list of environment choices with non-string and string identifiers
+template <typename Tp>
+using EnvChoiceList = std::set<EnvChoice<Tp>>;
+
+//--------------------------------------------------------------------------------------//
+
+template <typename Tp>
+Tp
+GetChoice(const EnvChoiceList<Tp>& _choices, const std::string& str_var)
+{
+    auto asupper = [](std::string var) {
+        for(auto& itr : var)
+            itr = toupper(itr);
+        return var;
+    };
+
+    std::string upp_var = asupper(str_var);
+    Tp          var     = Tp();
+    // check to see if string matches a choice
+    for(const auto& itr : _choices)
+    {
+        if(asupper(std::get<1>(itr)) == upp_var)
+        {
+            // record value defined by environment
+            return std::get<0>(itr);
+        }
+    }
+    std::istringstream iss(str_var);
+    iss >> var;
+    // check to see if string matches a choice
+    for(const auto& itr : _choices)
+    {
+        if(var == std::get<0>(itr))
+        {
+            // record value defined by environment
+            return var;
+        }
+    }
+    // the value set in env did not match any choices
+    std::stringstream ss;
+    ss << "\n### Environment setting error @ " << __FUNCTION__ << " (line " << __LINE__
+       << ")! Invalid selection \"" << str_var << "\". Valid choices are:\n";
+    for(const auto& itr : _choices)
+        ss << "\t\"" << std::get<0>(itr) << "\" or \"" << std::get<1>(itr) << "\" ("
+           << std::get<2>(itr) << ")\n";
+    std::cerr << ss.str() << std::endl;
+    abort();
+}
 
 //--------------------------------------------------------------------------------------//
 
@@ -73,16 +129,16 @@ struct OpenCVDataType
     }
 };
 
-#define DEFINE_OPENCV_DATA_TYPE(pod_type, opencv_type)                                   \
-    template <>                                                                          \
-    struct OpenCVDataType<pod_type>                                                      \
-    {                                                                                    \
-        template <typename _Up = pod_type>                                               \
-        static constexpr int value()                                                     \
+#    define DEFINE_OPENCV_DATA_TYPE(pod_type, opencv_type)                               \
+        template <>                                                                      \
+        struct OpenCVDataType<pod_type>                                                  \
         {                                                                                \
-            return opencv_type;                                                          \
-        }                                                                                \
-    };
+            template <typename _Up = pod_type>                                           \
+            static constexpr int value()                                                 \
+            {                                                                            \
+                return opencv_type;                                                      \
+            }                                                                            \
+        };
 
 // floating point types
 DEFINE_OPENCV_DATA_TYPE(float, CV_32F)
@@ -97,7 +153,7 @@ DEFINE_OPENCV_DATA_TYPE(int32_t, CV_32S)
 DEFINE_OPENCV_DATA_TYPE(uint8_t, CV_8U)
 DEFINE_OPENCV_DATA_TYPE(uint16_t, CV_16U)
 
-#undef DEFINE_OPENCV_DATA_TYPE  // don't pollute
+#    undef DEFINE_OPENCV_DATA_TYPE  // don't pollute
 
 //--------------------------------------------------------------------------------------//
 
@@ -158,7 +214,8 @@ cxx_rotate(const _Tp* src, double theta, const intmax_t& nx, const intmax_t& ny,
 inline iarray_t
 cxx_compute_sum_dist(int dy, int dt, int dx, int nx, int ny, const float* theta)
 {
-    auto compute = [&](const iarray_t& ones, iarray_t& sum_dist, int p) {
+    auto compute = [&](const iarray_t& ones, iarray_t& sum_dist, int p)
+    {
         for(int s = 0; s < dy; ++s)
         {
             for(int d = 0; d < dx; ++d)
@@ -186,7 +243,7 @@ cxx_compute_sum_dist(int dy, int dt, int dx, int nx, int ny, const float* theta)
 
     return sum_dist;
 }
-#endif // TOMOPY_USE_OPENCV
+#endif  // TOMOPY_USE_OPENCV
 
 //======================================================================================//
 //
@@ -220,7 +277,7 @@ GetNppInterpolationMode(const std::string& preferred)
 inline int
 GetBlockSize(const int& init = 32)
 {
-    static thread_local int _instance = GetEnv<int>("TOMOPY_BLOCK_SIZE", init);
+    static thread_local int _instance = PTL::GetEnv<int>("TOMOPY_BLOCK_SIZE", init);
     return _instance;
 }
 
@@ -230,7 +287,7 @@ inline int
 GetGridSize(const int& init = 0)
 {
     // default value of zero == calculated according to block and loop size
-    static thread_local int _instance = GetEnv<int>("TOMOPY_GRID_SIZE", init);
+    static thread_local int _instance = PTL::GetEnv<int>("TOMOPY_GRID_SIZE", init);
     return _instance;
 }
 
@@ -247,9 +304,9 @@ ComputeGridSize(const int& size, const int& block_size = GetBlockSize())
 inline dim3
 GetBlockDims(const dim3& init = dim3(32, 32, 1))
 {
-    int _x = GetEnv<int>("TOMOPY_BLOCK_SIZE_X", init.x);
-    int _y = GetEnv<int>("TOMOPY_BLOCK_SIZE_Y", init.y);
-    int _z = GetEnv<int>("TOMOPY_BLOCK_SIZE_Z", init.z);
+    int _x = PTL::GetEnv<int>("TOMOPY_BLOCK_SIZE_X", init.x);
+    int _y = PTL::GetEnv<int>("TOMOPY_BLOCK_SIZE_Y", init.y);
+    int _z = PTL::GetEnv<int>("TOMOPY_BLOCK_SIZE_Z", init.z);
     return dim3(_x, _y, _z);
 }
 
@@ -259,9 +316,9 @@ inline dim3
 GetGridDims(const dim3& init = dim3(0, 0, 0))
 {
     // default value of zero == calculated according to block and loop size
-    int _x = GetEnv<int>("TOMOPY_GRID_SIZE_X", init.x);
-    int _y = GetEnv<int>("TOMOPY_GRID_SIZE_Y", init.y);
-    int _z = GetEnv<int>("TOMOPY_GRID_SIZE_Z", init.z);
+    int _x = PTL::GetEnv<int>("TOMOPY_GRID_SIZE_X", init.x);
+    int _y = PTL::GetEnv<int>("TOMOPY_GRID_SIZE_Y", init.y);
+    int _z = PTL::GetEnv<int>("TOMOPY_GRID_SIZE_Z", init.z);
     return dim3(_x, _y, _z);
 }
 
@@ -501,8 +558,8 @@ reduce(float* _in, float* _out, int size);
 //======================================================================================//
 
 DLL int32_t*
-    cuda_rotate(const int32_t* src, const float theta_rad, const float theta_deg,
-                const int nx, const int ny, cudaStream_t stream, const int eInterp);
+cuda_rotate(const int32_t* src, const float theta_rad, const float theta_deg,
+            const int nx, const int ny, cudaStream_t stream, const int eInterp);
 
 //--------------------------------------------------------------------------------------//
 
diff --git a/source/libtomo/gridrec/CMakeLists.txt b/source/libtomo/gridrec/CMakeLists.txt
index 4f9a64bac..ada28f3d6 100644
--- a/source/libtomo/gridrec/CMakeLists.txt
+++ b/source/libtomo/gridrec/CMakeLists.txt
@@ -44,6 +44,8 @@ target_compile_options(
   tomo-gridrec PRIVATE $<$<COMPILE_LANGUAGE:C>:${${PROJECT_NAME}_C_FLAGS}>
                        $<$<COMPILE_LANGUAGE:CXX>:${${PROJECT_NAME}_CXX_FLAGS}>)
 
+set_property(TARGET tomo-gridrec PROPERTY CXX_STANDARD 14)
+
 install(TARGETS tomo-gridrec EXPORT libtomoTargets)
 
 install(