Skip to content

Commit

Permalink
Merge pull request #1371 from LLNL/v2022.10.2-RC
Browse files Browse the repository at this point in the history
V2022.10.2 rc
  • Loading branch information
rhornung67 committed Nov 7, 2022
2 parents 2176ef1 + 44e7af9 commit 54a0aaa
Show file tree
Hide file tree
Showing 38 changed files with 243 additions and 82 deletions.
21 changes: 8 additions & 13 deletions CMakeLists.txt
Expand Up @@ -16,7 +16,7 @@ include(CMakeDependentOption)
# Set version number
set(RAJA_VERSION_MAJOR 2022)
set(RAJA_VERSION_MINOR 10)
set(RAJA_VERSION_PATCHLEVEL 1)
set(RAJA_VERSION_PATCHLEVEL 2)

if (RAJA_LOADED AND (NOT RAJA_LOADED STREQUAL "${RAJA_VERSION_MAJOR}.${RAJA_VERSION_MINOR}.${RAJA_VERSION_PATCHLEVEL}"))
message(FATAL_ERROR "You are mixing RAJA versions. Loaded is ${RAJA_LOADED}, expected ${RAJA_VERSION_MAJOR}.${RAJA_VERSION_MINOR}.${RAJA_VERSION_PATCHLEVEL}")
Expand Down Expand Up @@ -118,28 +118,23 @@ include(cmake/SetupBasics.cmake)
include(cmake/SetupPackages.cmake)

if (RAJA_ENABLE_CUDA)
if (DEFINED CUDA_ARCH)
if (CUDA_ARCH MATCHES "^sm_*")
if ("${CUDA_ARCH}" STRLESS "sm_35")
message( FATAL_ERROR "RAJA requires minimum CUDA compute architecture of sm_35")
endif()
endif()
if (CUDA_ARCH MATCHES "^compute_*")
if ("${CUDA_ARCH}" STRLESS "compute_35")
message( FATAL_ERROR "RAJA requires minimum CUDA compute architecture of compute_35")
endif()
if (DEFINED CMAKE_CUDA_ARCHITECTURES)
if ("${CMAKE_CUDA_ARCHITECTURES}" STRLESS "35")
message( FATAL_ERROR "RAJA requires minimum CUDA compute architecture of 35")
endif()
else()
message(STATUS "CUDA compute architecture set to RAJA default sm_35 since it was not specified")
set(CUDA_ARCH "sm_35" CACHE STRING "Set CUDA_ARCH to RAJA minimum supported" FORCE)
message(STATUS "CUDA compute architecture set to RAJA default 35 since it was not specified")
set(CMAKE_CUDA_ARCHITECTURES "35" CACHE STRING "Set CMAKE_CUDA_ARCHITECTURES to RAJA minimum supported" FORCE)
endif()
message(STATUS "CMAKE_CUDA_ARCHITECTURES set to ${CMAKE_CUDA_ARCHITECTURES}")
if ( (CMAKE_CXX_COMPILER_ID MATCHES GNU) AND (CMAKE_SYSTEM_PROCESSOR MATCHES ppc64le) )
if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 8.0)
set (CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler -mno-float128")
endif ()
endif ()
endif()


# Setup vendor-specific compiler flags
include(cmake/SetupCompilers.cmake)
# Macros for building executables and libraries
Expand Down
28 changes: 26 additions & 2 deletions RELEASE_NOTES.md
Expand Up @@ -20,6 +20,29 @@ Notable changes include:
* Bug fixes/improvements:


Version 2022.10.2 -- Release date 2022-11-08
============================================

This release fixes a few issues that were found after the v2022.10.1 patch
release and updates a few things. Sorry for the churn, folks.

Notable changes include:

* Update desul submodule to commit e4b65e00.

* CUDA compute architecture must now be set using the
'CMAKE_CUDA_ARCHITECTURES' CMake variable. For example, by passing
'-DCMAKE_CUDA_ARCHITECTURES=70' to CMake for 'sm_70' architecture.
Using '-DCUDA_ARCH=sm_*' will not no longer do the right thing. Please
see the RAJA User Guide for more information.
* A linking bug was fixed related to the usage of the new RAJA::KernelName
capability.
* A compilation bug was fixed in the new reduction interface support for
OpenMP target offload.
* An issue was fixed in AVX compiler checking logic for RAJA vectorization
intrinsics capabilities.


Version 2022.10.1 -- Release date 2022-10-31
============================================

Expand Down Expand Up @@ -55,8 +78,9 @@ Notable changes include:
code and then select which to use at run time. There is no discussion
of this in the RAJA User Guide yet. However, there are a couple of
example codes in files RAJA/examples/*dynamic-forall*.cpp.
* The RAJA::launch framework has been moved out of the experimental namespace, into the RAJA:: namespace, which introduces an API change.
* Add support for all RAJA segment types in the RAJA::launch framework.
* The RAJA::launch framework has been moved out of the experimental
namespace, into the RAJA:: namespace, which introduces an API change.
* Add support for all RAJA segment types in the RAJA::launch framework.
* Add SYCL back-end support for RAJA::launch and dynamic shared memory
for all back-ends in RAJA::launch. These changes introduce API changes.
* Add additional policies to WorkGroup construct that allow for different
Expand Down
2 changes: 1 addition & 1 deletion cmake/SetupCompilers.cmake
Expand Up @@ -43,7 +43,7 @@ endif()

if (RAJA_ENABLE_CUDA)
set(CMAKE_CUDA_STANDARD 14)
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -restrict -arch ${CUDA_ARCH} --expt-extended-lambda --expt-relaxed-constexpr -Xcudafe \"--display_error_number\"")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -restrict --expt-extended-lambda --expt-relaxed-constexpr -Xcudafe \"--display_error_number\"")

if (NOT RAJA_HOST_CONFIG_LOADED)
set(CMAKE_CUDA_FLAGS_RELEASE "-O2")
Expand Down
2 changes: 1 addition & 1 deletion docs/conf.py
Expand Up @@ -88,7 +88,7 @@
# The short X.Y version.
version = u'2022.10'
# The full version, including alpha/beta/rc tags.
release = u'2022.10.1'
release = u'2022.10.2'

# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
Expand Down
1 change: 0 additions & 1 deletion docs/sphinx/user_guide/config_options.rst
Expand Up @@ -206,7 +206,6 @@ Other programming model specific compilation options are also available:
must be on too!)
RAJA_ENABLE_EXTERNAL_CUB Off
RAJA_ENABLE_NV_TOOLS_EXT Off
CUDA_ARCH sm_35 (based on hardware support)
RAJA_ENABLE_EXTERNAL_ROCPRIM Off
RAJA_ENABLE_ROCTX Off
====================================== =================================
Expand Down
16 changes: 9 additions & 7 deletions docs/sphinx/user_guide/getting_started.rst
Expand Up @@ -229,8 +229,9 @@ compiler options to nvcc.
``-Xcompiler`` directive to properly propagate.

To set the CUDA compute architecture, which should be chosen based on the
NVIDIA GPU hardware you are using, you can use the ``CUDA_ARCH`` CMake
variable. For example, the CMake option ``-DCUDA_ARCH=sm_70`` will tell the
NVIDIA GPU hardware you are using, you can use the ``CMAKE_CUDA_ARCHITECTURES``
CMake variable. For example, the CMake option
``-DCMAKE_CUDA_ARCHITECTURES=70`` will tell the
compiler to use the `sm_70` SASS architecture in its second stage of
compilation. The compiler will pick the PTX architecture to use in the first
stage of compilation that is suitable for the SASS architecture you specify.
Expand All @@ -244,12 +245,13 @@ appropriate nvcc options in the ``CMAKE_CUDA_FLAGS_*`` variables.
implemented inside RAJA. This is described in
:ref:`feat-atomics-label`.

* If you do not specify a value for ``CUDA_ARCH``, it will be set to
`sm_35` by default and CMake will emit a status message
indicating this choice was made.
* If you do not specify a value for ``CMAKE_CUDA_ARCHITECTURES``,
it will be set to `35` by default and CMake will emit a status
message indicating this choice was made.

* If you give a ``CUDA_ARCH`` value less than `sm_35` (e.g., `sm_30`),
CMake will report this as an error and stop processing.
* If you give a ``CMAKE_CUDA_ARCHITECTURES`` value less than `35`
(e.g., `30`), CMake will report this as an error and stop
processing.

Also, RAJA relies on the CUB CUDA utilities library, mentioned earlier, for
some CUDA back-end functionality. The CUB version included in the CUDA toolkit
Expand Down
2 changes: 1 addition & 1 deletion include/RAJA/pattern/params/kernel_name.hpp
Expand Up @@ -18,7 +18,7 @@ namespace detail

} // namespace detail

auto KernelName(const char * n)
inline auto KernelName(const char * n)
{
return detail::KernelName(n);
}
Expand Down
4 changes: 2 additions & 2 deletions include/RAJA/policy/desul/atomic.hpp
Expand Up @@ -87,7 +87,7 @@ RAJA_INLINE T atomicInc(AtomicPolicy, T volatile *acc, T val)
{
// See:
// http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#atomicinc
return desul::atomic_wrapping_fetch_inc(const_cast<T*>(acc),
return desul::atomic_fetch_inc_mod(const_cast<T*>(acc),
val,
raja_default_desul_order{},
raja_default_desul_scope{});
Expand All @@ -110,7 +110,7 @@ RAJA_INLINE T atomicDec(AtomicPolicy, T volatile *acc, T val)
{
// See:
// http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#atomicdec
return desul::atomic_wrapping_fetch_dec(const_cast<T*>(acc),
return desul::atomic_fetch_dec_mod(const_cast<T*>(acc),
val,
raja_default_desul_order{},
raja_default_desul_scope{});
Expand Down
6 changes: 3 additions & 3 deletions include/RAJA/policy/openmp/params/forall.hpp
Expand Up @@ -11,7 +11,7 @@
#define RAJA_OMP_DECLARE_REDUCTION_COMBINE \
_Pragma(" omp declare reduction( combine \
: typename std::remove_reference<decltype(f_params)>::type \
: RAJA::expt::ParamMultiplexer::combine<EXEC_POL>(omp_out, omp_in) ) \
: RAJA::expt::ParamMultiplexer::combine<EXEC_POL>(omp_out, omp_in) ) ")\
//initializer(omp_priv = omp_in) ")

namespace RAJA
Expand All @@ -38,7 +38,7 @@ namespace expt
ForallParam&& f_params)
{
using EXEC_POL = typename std::decay<decltype(p)>::type;
RAJA::expt::ParamMultiplexer::init<ExecPol>(f_params);
RAJA::expt::ParamMultiplexer::init<EXEC_POL>(f_params);
RAJA_OMP_DECLARE_REDUCTION_COMBINE;

RAJA_EXTRACT_BED_IT(iter);
Expand All @@ -47,7 +47,7 @@ namespace expt
RAJA::expt::invoke_body(f_params, loop_body, begin_it[i]);
}

RAJA::expt::ParamMultiplexer::resolve<ExecPol>(f_params);
RAJA::expt::ParamMultiplexer::resolve<EXEC_POL>(f_params);
}

//
Expand Down
2 changes: 1 addition & 1 deletion include/RAJA/policy/openmp_target/forall.hpp
Expand Up @@ -76,7 +76,7 @@ forall_impl(resources::Omp omp_res,
auto i = distance_it;

#pragma omp target teams distribute parallel for num_teams(numteams) \
schedule(static, 1) map(to : body,begin_it) redcution(combine: f_params)
schedule(static, 1) map(to : body,begin_it) reduction(combine: f_params)
for (i = 0; i < distance_it; ++i) {
Body ib = body;
RAJA::expt::invoke_body(f_params, ib, begin_it[i]);
Expand Down
4 changes: 2 additions & 2 deletions include/RAJA/policy/tensor/arch/avx512/avx512_int64.hpp
Expand Up @@ -126,7 +126,7 @@ namespace expt
RAJA_INLINE
self_type &load_packed(element_type const *ptr){
// AVX512F
#if (defined(__GNUC__) && ((__GNUC__ >= 7) && (__GNUC__ <= 10))) \
#if (defined(__GNUC__) && ((__GNUC__ >= 7) && (__GNUC__ <= 10))) || \
(!defined(SYCL_LANGUAGE_VERSION) && defined(__INTEL_LLVM_COMPILER)) // Check for oneapi's icpx.
m_value = _mm512_maskz_loadu_epi64(~0, ptr); // May cause slowdown due to looping over 8 bytes, one at a time.
#else
Expand Down Expand Up @@ -185,7 +185,7 @@ namespace expt
RAJA_INLINE
self_type const &store_packed(element_type *ptr) const{
// AVX512F
#if (defined(__GNUC__) && ((__GNUC__ >= 7) && (__GNUC__ <= 10))) \
#if (defined(__GNUC__) && ((__GNUC__ >= 7) && (__GNUC__ <= 10))) || \
(!defined(SYCL_LANGUAGE_VERSION) && defined(__INTEL_LLVM_COMPILER)) // Check for oneapi's icpx.
_mm512_mask_storeu_epi64(ptr, ~0, m_value); // May cause slowdown due to looping over 8 bytes, one at a time.
#else
Expand Down
3 changes: 2 additions & 1 deletion scripts/lc-builds/blueos_clang.sh
Expand Up @@ -22,7 +22,7 @@ shift 1
BUILD_SUFFIX=lc_blueos-clang-${COMP_VER}

echo
echo "Creating build directory ${BUILD_SUFFIX} and generating configuration in it"
echo "Creating build directory build_${BUILD_SUFFIX} and generating configuration in it"
echo "Configuration extra arguments:"
echo " $@"
echo
Expand All @@ -35,6 +35,7 @@ module load cmake/3.20.2
cmake \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_CXX_COMPILER=/usr/tce/packages/clang/clang-${COMP_VER}/bin/clang++ \
-DBLT_CXX_STD=c++14 \
-C ../host-configs/lc-builds/blueos/clang_X.cmake \
-DENABLE_OPENMP=On \
-DCMAKE_INSTALL_PREFIX=../install_${BUILD_SUFFIX} \
Expand Down
3 changes: 2 additions & 1 deletion scripts/lc-builds/blueos_clang_omptarget.sh
Expand Up @@ -22,7 +22,7 @@ shift 1
BUILD_SUFFIX=lc_blueos-clang-${COMP_VER}_omptarget

echo
echo "Creating build directory ${BUILD_SUFFIX} and generating configuration in it"
echo "Creating build directory build_${BUILD_SUFFIX} and generating configuration in it"
echo "Configuration extra arguments:"
echo " $@"
echo
Expand All @@ -35,6 +35,7 @@ module load cmake/3.20.2
cmake \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_CXX_COMPILER=/usr/tce/packages/clang/clang-${COMP_VER}/bin/clang++ \
-DBLT_CXX_STD=c++14 \
-C ../host-configs/lc-builds/blueos/clang_X.cmake \
-DENABLE_OPENMP=On \
-DENABLE_CUDA=Off \
Expand Down
3 changes: 2 additions & 1 deletion scripts/lc-builds/blueos_clangcuda.sh
Expand Up @@ -27,7 +27,7 @@ shift 3
BUILD_SUFFIX=lc_blueos-clangcuda${COMP_CLANG_VER}_cuda${TOOLKIT_CUDA_VER}-${CUDA_ARCH}

echo
echo "Creating build directory ${BUILD_SUFFIX} and generating configuration in it"
echo "Creating build directory build_${BUILD_SUFFIX} and generating configuration in it"
echo "Configuration extra arguments:"
echo " $@"
echo
Expand All @@ -43,6 +43,7 @@ cmake \
-DCMAKE_CXX_COMPILER=/usr/tce/packages/clang/clang-${COMP_CLANG_VER}/bin/clang++ \
-DCMAKE_C_COMPILER=/usr/tce/packages/clang/clang-${COMP_CLANG_VER}/bin/clang \
-DCUDA_TOOLKIT_ROOT_DIR=/usr/tce/packages/cuda/cuda-${TOOLKIT_CUDA_VER} \
-DBLT_CXX_STD=c++14 \
-C ../host-configs/lc-builds/blueos/clangcuda_X.cmake \
-DENABLE_OPENMP=Off \
-DENABLE_CLANG_CUDA=On \
Expand Down
3 changes: 2 additions & 1 deletion scripts/lc-builds/blueos_gcc.sh
Expand Up @@ -20,7 +20,7 @@ shift 1
BUILD_SUFFIX=lc_blueos-gcc-${COMP_VER}

echo
echo "Creating build directory ${BUILD_SUFFIX} and generating configuration in it"
echo "Creating build directory build_${BUILD_SUFFIX} and generating configuration in it"
echo "Configuration extra arguments:"
echo " $@"
echo
Expand All @@ -33,6 +33,7 @@ module load cmake/3.20.2
cmake \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_CXX_COMPILER=/usr/tce/packages/gcc/gcc-${COMP_VER}/bin/g++ \
-DBLT_CXX_STD=c++14 \
-C ../host-configs/lc-builds/blueos/gcc_X.cmake \
-DENABLE_OPENMP=On \
-DCMAKE_INSTALL_PREFIX=../install_${BUILD_SUFFIX} \
Expand Down
11 changes: 6 additions & 5 deletions scripts/lc-builds/blueos_nvcc_clang.sh
Expand Up @@ -11,11 +11,11 @@ if [[ $# -lt 3 ]]; then
echo
echo "You must pass 3 arguments to the script (in this order): "
echo " 1) compiler version number for nvcc"
echo " 2) CUDA compute architecture"
echo " 3) compiler version number for clang. "
echo " 2) CUDA compute architecture (number only, not 'sm_70' for example)"
echo " 3) compiler version number for clang"
echo
echo "For example: "
echo " blueos_nvcc_clang.sh 10.2.89 sm_70 10.0.1"
echo " blueos_nvcc_clang.sh 10.2.89 70 10.0.1"
exit
fi

Expand All @@ -27,7 +27,7 @@ shift 3
BUILD_SUFFIX=lc_blueos-nvcc${COMP_NVCC_VER}-${COMP_ARCH}-clang${COMP_CLANG_VER}

echo
echo "Creating build directory ${BUILD_SUFFIX} and generating configuration in it"
echo "Creating build directory build_${BUILD_SUFFIX} and generating configuration in it"
echo "Configuration extra arguments:"
echo " $@"
echo
Expand All @@ -40,12 +40,13 @@ module load cmake/3.20.2
cmake \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_CXX_COMPILER=/usr/tce/packages/clang/clang-${COMP_CLANG_VER}/bin/clang++ \
-DBLT_CXX_STD=c++14 \
-C ../host-configs/lc-builds/blueos/nvcc_clang_X.cmake \
-DENABLE_OPENMP=On \
-DENABLE_CUDA=On \
-DCUDA_TOOLKIT_ROOT_DIR=/usr/tce/packages/cuda/cuda-${COMP_NVCC_VER} \
-DCMAKE_CUDA_COMPILER=/usr/tce/packages/cuda/cuda-${COMP_NVCC_VER}/bin/nvcc \
-DCUDA_ARCH=${COMP_ARCH} \
-DCMAKE_CUDA_ARCHITECTURES=${COMP_ARCH} \
-DCMAKE_INSTALL_PREFIX=../install_${BUILD_SUFFIX} \
"$@" \
..
Expand Down
11 changes: 6 additions & 5 deletions scripts/lc-builds/blueos_nvcc_gcc.sh
Expand Up @@ -11,11 +11,11 @@ if [[ $# -lt 3 ]]; then
echo
echo "You must pass 3 arguments to the script (in this order): "
echo " 1) compiler version number for nvcc"
echo " 2) CUDA compute architecture"
echo " 3) compiler version number for gcc. "
echo " 2) CUDA compute architecture (number only, not 'sm_70' for example)"
echo " 3) compiler version number for gcc"
echo
echo "For example: "
echo " blueos_nvcc_gcc.sh 10.2.89 sm_70 8.3.1"
echo " blueos_nvcc_gcc.sh 10.2.89 70 8.3.1"
exit
fi

Expand All @@ -27,7 +27,7 @@ shift 3
BUILD_SUFFIX=lc_blueos-nvcc${COMP_NVCC_VER}-${COMP_ARCH}-gcc${COMP_GCC_VER}

echo
echo "Creating build directory ${BUILD_SUFFIX} and generating configuration in it"
echo "Creating build directory build_${BUILD_SUFFIX} and generating configuration in it"
echo "Configuration extra arguments:"
echo " $@"
echo
Expand All @@ -40,12 +40,13 @@ module load cmake/3.20.2
cmake \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_CXX_COMPILER=/usr/tce/packages/gcc/gcc-${COMP_GCC_VER}/bin/g++ \
-DBLT_CXX_STD=c++14 \
-C ../host-configs/lc-builds/blueos/nvcc_gcc_X.cmake \
-DENABLE_OPENMP=On \
-DENABLE_CUDA=On \
-DCUDA_TOOLKIT_ROOT_DIR=/usr/tce/packages/cuda/cuda-${COMP_NVCC_VER} \
-DCMAKE_CUDA_COMPILER=/usr/tce/packages/cuda/cuda-${COMP_NVCC_VER}/bin/nvcc \
-DCUDA_ARCH=${COMP_ARCH} \
-DCMAKE_CUDA_ARCHITECTURES=${COMP_ARCH} \
-DCMAKE_INSTALL_PREFIX=../install_${BUILD_SUFFIX} \
"$@" \
..
Expand Down

0 comments on commit 54a0aaa

Please sign in to comment.