Skip to content

Commit

Permalink
Merge pull request #1405 from LLNL/rc-v2022.10.4
Browse files Browse the repository at this point in the history
Rc v2022.10.4
  • Loading branch information
artv3 committed Dec 15, 2022
2 parents a83a448 + 887c9e0 commit c2a6b17
Show file tree
Hide file tree
Showing 5 changed files with 31 additions and 11 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Expand Up @@ -16,7 +16,7 @@ include(CMakeDependentOption)
# Set version number
set(RAJA_VERSION_MAJOR 2022)
set(RAJA_VERSION_MINOR 10)
set(RAJA_VERSION_PATCHLEVEL 3)
set(RAJA_VERSION_PATCHLEVEL 4)

if (RAJA_LOADED AND (NOT RAJA_LOADED STREQUAL "${RAJA_VERSION_MAJOR}.${RAJA_VERSION_MINOR}.${RAJA_VERSION_PATCHLEVEL}"))
message(FATAL_ERROR "You are mixing RAJA versions. Loaded is ${RAJA_LOADED}, expected ${RAJA_VERSION_MAJOR}.${RAJA_VERSION_MINOR}.${RAJA_VERSION_PATCHLEVEL}")
Expand Down
7 changes: 7 additions & 0 deletions RELEASE_NOTES.md
Expand Up @@ -19,6 +19,13 @@ Notable changes include:

* Bug fixes/improvements:

Version 2022.10.4 -- Release date 2022-14-01
============================================

This release fixes an issue that was found after the v2022.10.3 release.

* Fixes device alignment bug in workgroups which led to missing symbol errors
with the AMD clang compiler.

Version 2022.10.3 -- Release date 2022-12-01
============================================
Expand Down
2 changes: 1 addition & 1 deletion docs/conf.py
Expand Up @@ -88,7 +88,7 @@
# The short X.Y version.
version = u'2022.10'
# The full version, including alpha/beta/rc tags.
release = u'2022.10.3'
release = u'2022.10.4'

# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
Expand Down
26 changes: 20 additions & 6 deletions include/RAJA/config.hpp.in
Expand Up @@ -32,6 +32,7 @@
#define RAJA_config_HPP

#include <utility>
#include <cstddef>
#include <type_traits>

#if defined(_MSVC_LANG)
Expand Down Expand Up @@ -239,6 +240,15 @@ static_assert(RAJA_HAS_SOME_CXX14,
#define RAJA_PRAGMA(x) _Pragma(RAJA_STRINGIFY(x))
#endif


/* NOTE: Below we define RAJA_MAX_ALIGN for each compiler, currently it is set as 16 bytes
for all cases, except MSVC. Previously this was set by alignof(std::max_align_t) which, in Clang,
is based on the sizeof(long double). This causes an in inconsistency as CUDA/HIP long doubles
are demoted to doubles causing alignof(std::max_align_t) to return 8 bytes on the device and
16 bytes on the host. We therefore set a standard size and ensure validity through a
static_assert.
*/

namespace RAJA {

#if defined(RAJA_ENABLE_OPENMP) && !defined(__HIP_DEVICE_COMPILE__)
Expand Down Expand Up @@ -374,7 +384,7 @@ const int DATA_ALIGN = @RAJA_DATA_ALIGN@;
//
// Configuration options for Intel compilers
//

#define RAJA_MAX_ALIGN 16
#if defined (RAJA_ENABLE_FORCEINLINE_RECURSIVE)
#define RAJA_FORCEINLINE_RECURSIVE RAJA_PRAGMA(forceinline recursive)
#else
Expand All @@ -387,6 +397,7 @@ const int DATA_ALIGN = @RAJA_DATA_ALIGN@;
#define RAJA_INLINE inline __attribute__((always_inline))
#endif


#define RAJA_UNROLL RAJA_PRAGMA(unroll)
#define RAJA_UNROLL_COUNT(N) RAJA_PRAGMA(unroll(N))

Expand All @@ -412,9 +423,9 @@ const int DATA_ALIGN = @RAJA_DATA_ALIGN@;
//
// Configuration options for GNU compilers
//
#define RAJA_MAX_ALIGN 16
#define RAJA_FORCEINLINE_RECURSIVE
#define RAJA_INLINE inline __attribute__((always_inline))

#if !defined(__NVCC__)
#define RAJA_UNROLL RAJA_PRAGMA(GCC unroll 10000)
#define RAJA_UNROLL_COUNT(N) RAJA_PRAGMA(GCC unroll N)
Expand Down Expand Up @@ -446,11 +457,11 @@ const int DATA_ALIGN = @RAJA_DATA_ALIGN@;
//
// Configuration options for xlc compiler (i.e., bgq/sequoia).
//
#define RAJA_MAX_ALIGN 16
#define RAJA_FORCEINLINE_RECURSIVE
#define RAJA_INLINE inline __attribute__((always_inline))
#define RAJA_UNROLL
#define RAJA_UNROLL_COUNT(N)

// FIXME: alignx is breaking CUDA+xlc
#if defined(RAJA_ENABLE_CUDA)
#define RAJA_ALIGN_DATA(d) d
Expand All @@ -476,12 +487,11 @@ const int DATA_ALIGN = @RAJA_DATA_ALIGN@;
//
// Configuration options for clang compilers
//
#define RAJA_MAX_ALIGN 16
#define RAJA_FORCEINLINE_RECURSIVE
#define RAJA_INLINE inline __attribute__((always_inline))
#define RAJA_UNROLL RAJA_PRAGMA(clang loop unroll(enable))
#define RAJA_UNROLL_COUNT(N) RAJA_PRAGMA(clang loop unroll_count(N))


// note that neither nvcc nor Apple Clang compiler currently doesn't support
// the __builtin_assume_aligned attribute
#if defined(RAJA_ENABLE_CUDA) || defined(__APPLE__)
Expand Down Expand Up @@ -514,7 +524,7 @@ const int DATA_ALIGN = @RAJA_DATA_ALIGN@;

// This is the same as undefined compiler, but squelches the warning message
#elif defined(RAJA_COMPILER_MSVC)

#define RAJA_MAX_ALIGN alignof(std::max_align_t)
#define RAJA_FORCEINLINE_RECURSIVE
#define RAJA_INLINE inline
#define RAJA_ALIGN_DATA(d) d
Expand All @@ -526,6 +536,7 @@ const int DATA_ALIGN = @RAJA_DATA_ALIGN@;
#else

#pragma message("RAJA_COMPILER unknown, using default empty macros.")
#define RAJA_MAX_ALIGN 16
#define RAJA_FORCEINLINE_RECURSIVE
#define RAJA_INLINE inline
#define RAJA_ALIGN_DATA(d) d
Expand All @@ -536,6 +547,9 @@ const int DATA_ALIGN = @RAJA_DATA_ALIGN@;

#endif

static_assert(RAJA_MAX_ALIGN >= alignof(std::max_align_t) && (RAJA_MAX_ALIGN/alignof(std::max_align_t))*alignof(std::max_align_t) == RAJA_MAX_ALIGN,
"Inconsistent RAJA_MAX_ALIGN size");

#cmakedefine RAJA_HAVE_POSIX_MEMALIGN
#cmakedefine RAJA_HAVE_ALIGNED_ALLOC
#cmakedefine RAJA_HAVE_MM_MALLOC
Expand Down
5 changes: 2 additions & 3 deletions include/RAJA/pattern/WorkGroup/WorkStruct.hpp
Expand Up @@ -45,7 +45,7 @@ struct WorkStruct;
* sizeof(GenericWorkStruct) <= sizeof(WorkStruct<size>)
*/
template < typename Dispatcher_T >
using GenericWorkStruct = WorkStruct<alignof(std::max_align_t), Dispatcher_T>;
using GenericWorkStruct = WorkStruct<RAJA_MAX_ALIGN, Dispatcher_T>;

template < size_t size, Platform platform, typename dispatch_policy, typename DispatcherID, typename ... CallArgs >
struct WorkStruct<size, Dispatcher<platform, dispatch_policy, DispatcherID, CallArgs...>>
Expand All @@ -71,7 +71,6 @@ struct WorkStruct<size, Dispatcher<platform, dispatch_policy, DispatcherID, Call
"WorkStruct and GenericWorkStruct must have obj at the same offset");
static_assert(sizeof(value_type) <= sizeof(true_value_type),
"WorkStruct must not be smaller than GenericWorkStruct");

true_value_type* value_ptr = static_cast<true_value_type*>(ptr);

value_ptr->dispatcher = dispatcher;
Expand Down Expand Up @@ -112,7 +111,7 @@ struct WorkStruct<size, Dispatcher<platform, dispatch_policy, DispatcherID, Call

const dispatcher_type* dispatcher;
typename dispatcher_type::invoker_type invoke;
typename std::aligned_storage<size, alignof(std::max_align_t)>::type obj;
typename std::aligned_storage<size, RAJA_MAX_ALIGN>::type obj;
};

} // namespace detail
Expand Down

0 comments on commit c2a6b17

Please sign in to comment.