Skip to content

Commit

Permalink
Merge pull request #1628 from LLNL/artv3/sycl-thread-grid-layout-fix
Browse files Browse the repository at this point in the history
Swap ordering of thread configuration in Sycl
  • Loading branch information
artv3 committed Apr 30, 2024
2 parents e7e2477 + aef818c commit c315ddd
Show file tree
Hide file tree
Showing 8 changed files with 42 additions and 26 deletions.
18 changes: 17 additions & 1 deletion docs/sphinx/user_guide/feature/policies.rst
Original file line number Diff line number Diff line change
Expand Up @@ -524,10 +524,26 @@ write more explicit policies.
ignored. For example in cuda_thread_x_direct block_size is
unspecified so a runtime number of threads is used, but grid_size is
ignored so blocks are ignored when getting indices.

GPU Policies for SYCL
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

.. note:: SYCL uses C++-style ordering in which the right
most index corresponds to having unit stride.
In a three-dimensional compute grid this means
that dimension 2 has the unit stride while
dimension 0 has the longest stride. This is
important to note as the ordering is reverse
compared to the CUDA and HIP programming models.
CUDA and HIP employ a x/y/z ordering in which
dimension x has the unit stride.

When using RAJA::launch, thread and team configuration
follows CUDA and HIP programming models and is always
configured in three-dimensions. This means that dimension
2 always exists and should be used as one would use the
x dimension for CUDA and HIP.

======================================== ============= ==============================
SYCL Execution Policies Works with Brief description
======================================== ============= ==============================
Expand Down
16 changes: 8 additions & 8 deletions include/RAJA/policy/sycl/launch.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,13 +56,13 @@ struct LaunchExecute<RAJA::sycl_launch_t<async, 0>> {
// Compute the number of blocks and threads
//

const ::sycl::range<3> blockSize(params.threads.value[0],
const ::sycl::range<3> blockSize(params.threads.value[2],
params.threads.value[1],
params.threads.value[2]);
params.threads.value[0]);

const ::sycl::range<3> gridSize(params.threads.value[0] * params.teams.value[0],
const ::sycl::range<3> gridSize(params.threads.value[2] * params.teams.value[2],
params.threads.value[1] * params.teams.value[1],
params.threads.value[2] * params.teams.value[2]);
params.threads.value[0] * params.teams.value[0]);

// Only launch kernel if we have something to iterate over
constexpr size_t zero = 0;
Expand Down Expand Up @@ -138,13 +138,13 @@ struct LaunchExecute<RAJA::sycl_launch_t<async, 0>> {
// Compute the number of blocks and threads
//

const ::sycl::range<3> blockSize(params.threads.value[0],
const ::sycl::range<3> blockSize(params.threads.value[2],
params.threads.value[1],
params.threads.value[2]);
params.threads.value[0]);

const ::sycl::range<3> gridSize(params.threads.value[0] * params.teams.value[0],
const ::sycl::range<3> gridSize(params.threads.value[2] * params.teams.value[2],
params.threads.value[1] * params.teams.value[1],
params.threads.value[2] * params.teams.value[2]);
params.threads.value[0] * params.teams.value[0]);

// Only launch kernel if we have something to iterate over
constexpr size_t zero = 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,8 @@ using Hip_launch_policies = camp::list<hip_direct_policies>;
using sycl_direct_policies =
camp::list<
RAJA::LaunchPolicy<RAJA::sycl_launch_t<true>>,
RAJA::LoopPolicy<RAJA::sycl_group_0_direct>,
RAJA::LoopPolicy<RAJA::sycl_local_0_direct>
RAJA::LoopPolicy<RAJA::sycl_group_2_direct>,
RAJA::LoopPolicy<RAJA::sycl_local_2_direct>
>;

using Sycl_launch_policies = camp::list<sycl_direct_policies>;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -100,12 +100,12 @@ using Hip_launch_policies = camp::list<hip_direct_policies>;
using sycl_direct_policies =
camp::list<
RAJA::LaunchPolicy<RAJA::sycl_launch_t<true>>,
RAJA::LoopPolicy<RAJA::sycl_group_2_direct>,
RAJA::LoopPolicy<RAJA::sycl_group_0_direct>, //slowest
RAJA::LoopPolicy<RAJA::sycl_group_1_direct>,
RAJA::LoopPolicy<RAJA::sycl_group_0_direct>,
RAJA::LoopPolicy<RAJA::sycl_local_2_direct>,
RAJA::LoopPolicy<RAJA::sycl_group_2_direct>, //fastest
RAJA::LoopPolicy<RAJA::sycl_local_0_direct>,
RAJA::LoopPolicy<RAJA::sycl_local_1_direct>,
RAJA::LoopPolicy<RAJA::sycl_local_0_direct>
RAJA::LoopPolicy<RAJA::sycl_local_2_direct>
>;

using Sycl_launch_policies = camp::list<sycl_direct_policies>;
Expand Down
2 changes: 1 addition & 1 deletion test/include/RAJA_test-launch-execpol.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ using Hip_launch_policies = camp::list<

using sycl_policies = camp::list<
RAJA::LaunchPolicy<RAJA::sycl_launch_t<true>>,
RAJA::LoopPolicy<RAJA::sycl_global_item_0>>;
RAJA::LoopPolicy<RAJA::sycl_global_item_2>>;

using Sycl_launch_policies = camp::list<
sycl_policies
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,8 @@ using Hip_launch_policies = camp::list<
#if defined(RAJA_ENABLE_SYCL)
using sycl_loop_policies = camp::list<
RAJA::LaunchPolicy<RAJA::sycl_launch_t<true>>,
RAJA::LoopPolicy<RAJA::sycl_group_0_loop>,
RAJA::LoopPolicy<RAJA::sycl_local_0_loop>
RAJA::LoopPolicy<RAJA::sycl_group_2_loop>,
RAJA::LoopPolicy<RAJA::sycl_local_2_loop>
>;

using Sycl_launch_policies = camp::list<
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -95,12 +95,12 @@ using Hip_launch_policies = camp::list<
#if defined(RAJA_ENABLE_SYCL)
using sycl_loop_policies = camp::list<
RAJA::LaunchPolicy<RAJA::sycl_launch_t<true>>,
RAJA::LoopPolicy<RAJA::sycl_group_2_loop>,
RAJA::LoopPolicy<RAJA::sycl_group_0_loop>, //slowest index
RAJA::LoopPolicy<RAJA::sycl_group_1_loop>,
RAJA::LoopPolicy<RAJA::sycl_group_0_loop>,
RAJA::LoopPolicy<RAJA::sycl_local_2_loop>,
RAJA::LoopPolicy<RAJA::sycl_group_2_loop>, //fastest index
RAJA::LoopPolicy<RAJA::sycl_local_0_loop>,
RAJA::LoopPolicy<RAJA::sycl_local_1_loop>,
RAJA::LoopPolicy<RAJA::sycl_local_0_loop>
RAJA::LoopPolicy<RAJA::sycl_local_2_loop>
>;

using Sycl_launch_policies = camp::list<
Expand Down
8 changes: 4 additions & 4 deletions test/include/RAJA_test-launch-runtime-execpol.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,8 @@ using Sequential_launch_policies = camp::list<seq_hip_policies>;
using seq_sycl_policies =
camp::list<
RAJA::LaunchPolicy<RAJA::seq_launch_t,RAJA::sycl_launch_t<true>>,
RAJA::LoopPolicy<RAJA::seq_exec, RAJA::sycl_group_0_direct>,
RAJA::LoopPolicy<RAJA::seq_exec,RAJA::sycl_local_0_loop>
RAJA::LoopPolicy<RAJA::seq_exec, RAJA::sycl_group_2_direct>,
RAJA::LoopPolicy<RAJA::seq_exec,RAJA::sycl_local_2_loop>
>;

using Sequential_launch_policies = camp::list<seq_sycl_policies>;
Expand Down Expand Up @@ -110,8 +110,8 @@ using OpenMP_launch_policies = camp::list<omp_hip_policies>;
using omp_sycl_policies =
camp::list<
RAJA::LaunchPolicy<RAJA::omp_launch_t,RAJA::sycl_launch_t<false>>,
RAJA::LoopPolicy<RAJA::omp_for_exec, RAJA::sycl_group_0_direct>,
RAJA::LoopPolicy<RAJA::seq_exec,RAJA::sycl_local_0_loop>
RAJA::LoopPolicy<RAJA::omp_for_exec, RAJA::sycl_group_2_direct>,
RAJA::LoopPolicy<RAJA::seq_exec,RAJA::sycl_local_2_loop>
>;

using OpenMP_launch_policies = camp::list<omp_sycl_policies>;
Expand Down

0 comments on commit c315ddd

Please sign in to comment.