Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Swap ordering of thread configuration in Sycl #1628

Merged
merged 9 commits into from
Apr 30, 2024
18 changes: 17 additions & 1 deletion docs/sphinx/user_guide/feature/policies.rst
Original file line number Diff line number Diff line change
Expand Up @@ -524,10 +524,26 @@ write more explicit policies.
ignored. For example in cuda_thread_x_direct block_size is
unspecified so a runtime number of threads is used, but grid_size is
ignored so blocks are ignored when getting indices.

GPU Policies for SYCL
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

.. note:: SYCL uses C++-style ordering in which the right
most index corresponds to having unit stride.
In a three-dimensional compute grid this means
that dimension 2 has the unit stride while
dimension 0 has the longest stride. This is
important to note as the ordering is reverse
compared to the CUDA and HIP programming models.
CUDA and HIP employ a x/y/z ordering in which
dimension x has the unit stride.

When using RAJA::launch, thread and team configuration
follows CUDA and HIP programming models and is always
configured in three-dimensions. This means that dimension
2 always exists and should be used as one would use the
x dimension for CUDA and HIP.

======================================== ============= ==============================
SYCL Execution Policies Works with Brief description
======================================== ============= ==============================
Expand Down
16 changes: 8 additions & 8 deletions include/RAJA/policy/sycl/launch.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,13 +56,13 @@ struct LaunchExecute<RAJA::sycl_launch_t<async, 0>> {
// Compute the number of blocks and threads
//

const ::sycl::range<3> blockSize(params.threads.value[0],
const ::sycl::range<3> blockSize(params.threads.value[2],
params.threads.value[1],
params.threads.value[2]);
params.threads.value[0]);

const ::sycl::range<3> gridSize(params.threads.value[0] * params.teams.value[0],
const ::sycl::range<3> gridSize(params.threads.value[2] * params.teams.value[2],
params.threads.value[1] * params.teams.value[1],
params.threads.value[2] * params.teams.value[2]);
params.threads.value[0] * params.teams.value[0]);

// Only launch kernel if we have something to iterate over
constexpr size_t zero = 0;
Expand Down Expand Up @@ -138,13 +138,13 @@ struct LaunchExecute<RAJA::sycl_launch_t<async, 0>> {
// Compute the number of blocks and threads
//

const ::sycl::range<3> blockSize(params.threads.value[0],
const ::sycl::range<3> blockSize(params.threads.value[2],
params.threads.value[1],
params.threads.value[2]);
params.threads.value[0]);

const ::sycl::range<3> gridSize(params.threads.value[0] * params.teams.value[0],
const ::sycl::range<3> gridSize(params.threads.value[2] * params.teams.value[2],
params.threads.value[1] * params.teams.value[1],
params.threads.value[2] * params.teams.value[2]);
params.threads.value[0] * params.teams.value[0]);

// Only launch kernel if we have something to iterate over
constexpr size_t zero = 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,8 @@ using Hip_launch_policies = camp::list<hip_direct_policies>;
using sycl_direct_policies =
camp::list<
RAJA::LaunchPolicy<RAJA::sycl_launch_t<true>>,
RAJA::LoopPolicy<RAJA::sycl_group_0_direct>,
RAJA::LoopPolicy<RAJA::sycl_local_0_direct>
RAJA::LoopPolicy<RAJA::sycl_group_2_direct>,
RAJA::LoopPolicy<RAJA::sycl_local_2_direct>
>;

using Sycl_launch_policies = camp::list<sycl_direct_policies>;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -100,12 +100,12 @@ using Hip_launch_policies = camp::list<hip_direct_policies>;
using sycl_direct_policies =
camp::list<
RAJA::LaunchPolicy<RAJA::sycl_launch_t<true>>,
RAJA::LoopPolicy<RAJA::sycl_group_2_direct>,
RAJA::LoopPolicy<RAJA::sycl_group_0_direct>, //slowest
RAJA::LoopPolicy<RAJA::sycl_group_1_direct>,
RAJA::LoopPolicy<RAJA::sycl_group_0_direct>,
RAJA::LoopPolicy<RAJA::sycl_local_2_direct>,
RAJA::LoopPolicy<RAJA::sycl_group_2_direct>, //fastest
RAJA::LoopPolicy<RAJA::sycl_local_0_direct>,
RAJA::LoopPolicy<RAJA::sycl_local_1_direct>,
RAJA::LoopPolicy<RAJA::sycl_local_0_direct>
RAJA::LoopPolicy<RAJA::sycl_local_2_direct>
>;

using Sycl_launch_policies = camp::list<sycl_direct_policies>;
Expand Down
2 changes: 1 addition & 1 deletion test/include/RAJA_test-launch-execpol.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ using Hip_launch_policies = camp::list<

using sycl_policies = camp::list<
RAJA::LaunchPolicy<RAJA::sycl_launch_t<true>>,
RAJA::LoopPolicy<RAJA::sycl_global_item_0>>;
RAJA::LoopPolicy<RAJA::sycl_global_item_2>>;

using Sycl_launch_policies = camp::list<
sycl_policies
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,8 @@ using Hip_launch_policies = camp::list<
#if defined(RAJA_ENABLE_SYCL)
using sycl_loop_policies = camp::list<
RAJA::LaunchPolicy<RAJA::sycl_launch_t<true>>,
RAJA::LoopPolicy<RAJA::sycl_group_0_loop>,
RAJA::LoopPolicy<RAJA::sycl_local_0_loop>
RAJA::LoopPolicy<RAJA::sycl_group_2_loop>,
RAJA::LoopPolicy<RAJA::sycl_local_2_loop>
>;

using Sycl_launch_policies = camp::list<
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -95,12 +95,12 @@ using Hip_launch_policies = camp::list<
#if defined(RAJA_ENABLE_SYCL)
using sycl_loop_policies = camp::list<
RAJA::LaunchPolicy<RAJA::sycl_launch_t<true>>,
RAJA::LoopPolicy<RAJA::sycl_group_2_loop>,
RAJA::LoopPolicy<RAJA::sycl_group_0_loop>, //slowest index
RAJA::LoopPolicy<RAJA::sycl_group_1_loop>,
RAJA::LoopPolicy<RAJA::sycl_group_0_loop>,
RAJA::LoopPolicy<RAJA::sycl_local_2_loop>,
RAJA::LoopPolicy<RAJA::sycl_group_2_loop>, //fastest index
RAJA::LoopPolicy<RAJA::sycl_local_0_loop>,
RAJA::LoopPolicy<RAJA::sycl_local_1_loop>,
RAJA::LoopPolicy<RAJA::sycl_local_0_loop>
RAJA::LoopPolicy<RAJA::sycl_local_2_loop>
>;

using Sycl_launch_policies = camp::list<
Expand Down
8 changes: 4 additions & 4 deletions test/include/RAJA_test-launch-runtime-execpol.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,8 @@ using Sequential_launch_policies = camp::list<seq_hip_policies>;
using seq_sycl_policies =
camp::list<
RAJA::LaunchPolicy<RAJA::seq_launch_t,RAJA::sycl_launch_t<true>>,
RAJA::LoopPolicy<RAJA::seq_exec, RAJA::sycl_group_0_direct>,
RAJA::LoopPolicy<RAJA::seq_exec,RAJA::sycl_local_0_loop>
RAJA::LoopPolicy<RAJA::seq_exec, RAJA::sycl_group_2_direct>,
RAJA::LoopPolicy<RAJA::seq_exec,RAJA::sycl_local_2_loop>
>;

using Sequential_launch_policies = camp::list<seq_sycl_policies>;
Expand Down Expand Up @@ -110,8 +110,8 @@ using OpenMP_launch_policies = camp::list<omp_hip_policies>;
using omp_sycl_policies =
camp::list<
RAJA::LaunchPolicy<RAJA::omp_launch_t,RAJA::sycl_launch_t<false>>,
RAJA::LoopPolicy<RAJA::omp_for_exec, RAJA::sycl_group_0_direct>,
RAJA::LoopPolicy<RAJA::seq_exec,RAJA::sycl_local_0_loop>
RAJA::LoopPolicy<RAJA::omp_for_exec, RAJA::sycl_group_2_direct>,
RAJA::LoopPolicy<RAJA::seq_exec,RAJA::sycl_local_2_loop>
>;

using OpenMP_launch_policies = camp::list<omp_sycl_policies>;
Expand Down