From c03ead175d6e2c0b4aafa4f12e2c1ecdb763e026 Mon Sep 17 00:00:00 2001 From: "Adrien M. BERNEDE" Date: Mon, 19 Sep 2022 14:33:50 +0200 Subject: [PATCH 01/94] Use raw Spack packages for update (from spack@develop) --- scripts/spack_packages/camp/package.py | 92 ++-- scripts/spack_packages/raja/package.py | 556 +++++++++---------------- 2 files changed, 249 insertions(+), 399 deletions(-) diff --git a/scripts/spack_packages/camp/package.py b/scripts/spack_packages/camp/package.py index 3bff14ef61..8f40f6b295 100644 --- a/scripts/spack_packages/camp/package.py +++ b/scripts/spack_packages/camp/package.py @@ -8,6 +8,14 @@ from spack.package import * +def hip_repair_options(options, spec): + # there is only one dir like this, but the version component is unknown + options.append( + "-DHIP_CLANG_INCLUDE_PATH=" + + glob.glob("{}/lib/clang/*/include".format(spec["llvm-amdgpu"].prefix))[0] + ) + + class Camp(CMakePackage, CudaPackage, ROCmPackage): """ Compiler agnostic metaprogramming library providing concepts, @@ -15,69 +23,61 @@ class Camp(CMakePackage, CudaPackage, ROCmPackage): """ homepage = "https://github.com/LLNL/camp" - git = "https://github.com/LLNL/camp.git" - url = "https://github.com/LLNL/camp/archive/v0.1.0.tar.gz" + git = "https://github.com/LLNL/camp.git" + url = "https://github.com/LLNL/camp/archive/v0.1.0.tar.gz" - maintainers = ['trws'] + maintainers = ["trws"] - version('main', branch='main', submodules='True') - version('2022.03.0', sha256='e9090d5ee191ea3a8e36b47a8fe78f3ac95d51804f1d986d931e85b8f8dad721') - version('0.3.0', sha256='129431a049ca5825443038ad5a37a86ba6d09b2618d5fe65d35f83136575afdb') - version('0.2.3', sha256='58a0f3bd5eadb588d7dc83f3d050aff8c8db639fc89e8d6553f9ce34fc2421a7') - version('0.2.2', sha256='194d38b57e50e3494482a7f94940b27f37a2bee8291f2574d64db342b981d819') - version('0.1.0', sha256='fd4f0f2a60b82a12a1d9f943f8893dc6fe770db493f8fae5ef6f7d0c439bebcc') + version("main", branch="main", submodules="True") + version("2022.03.2", sha256="bc4aaeacfe8f2912e28f7a36fc731ab9e481bee15f2c6daf0cb208eed3f201eb") + version("2022.03.0", sha256="e9090d5ee191ea3a8e36b47a8fe78f3ac95d51804f1d986d931e85b8f8dad721") + version("0.3.0", sha256="129431a049ca5825443038ad5a37a86ba6d09b2618d5fe65d35f83136575afdb") + version("0.2.3", sha256="58a0f3bd5eadb588d7dc83f3d050aff8c8db639fc89e8d6553f9ce34fc2421a7") + version("0.2.2", sha256="194d38b57e50e3494482a7f94940b27f37a2bee8291f2574d64db342b981d819") + version("0.1.0", sha256="fd4f0f2a60b82a12a1d9f943f8893dc6fe770db493f8fae5ef6f7d0c439bebcc") # TODO: figure out gtest dependency and then set this default True. - variant('tests', default=False, description='Build tests') - variant('openmp', default=False, description='Build with OpenMP support') + variant("tests", default=False, description="Build tests") + variant("openmp", default=False, description="Build OpenMP support") - depends_on('cub', when='+cuda') + depends_on("cub", when="+cuda") - depends_on('blt') + depends_on("blt") def cmake_args(self): spec = self.spec options = [] - options.append("-DBLT_SOURCE_DIR={0}".format(spec['blt'].prefix)) + options.append("-DBLT_SOURCE_DIR={0}".format(spec["blt"].prefix)) - if '+cuda' in spec: - options.extend([ - '-DENABLE_CUDA=ON', - '-DCUDA_TOOLKIT_ROOT_DIR=%s' % (spec['cuda'].prefix)]) + options.append("-DENABLE_OPENMP=" + ("On" if "+openmp" in spec else "Off")) + if "+cuda" in spec: + options.extend( + ["-DENABLE_CUDA=ON", "-DCUDA_TOOLKIT_ROOT_DIR=%s" % (spec["cuda"].prefix)] + ) - if not spec.satisfies('cuda_arch=none'): - cuda_arch = spec.variants['cuda_arch'].value - options.append('-DCMAKE_CUDA_ARCHITECTURES={0}'.format(cuda_arch[0])) - options.append('-DCUDA_ARCH=sm_{0}'.format(cuda_arch[0])) - flag = '-arch sm_{0}'.format(cuda_arch[0]) - options.append('-DCMAKE_CUDA_FLAGS:STRING={0}'.format(flag)) + if not spec.satisfies("cuda_arch=none"): + cuda_arch = spec.variants["cuda_arch"].value + options.append("-DCMAKE_CUDA_ARCHITECTURES={0}".format(cuda_arch[0])) + options.append("-DCUDA_ARCH=sm_{0}".format(cuda_arch[0])) + flag = "-arch sm_{0}".format(cuda_arch[0]) + options.append("-DCMAKE_CUDA_FLAGS:STRING={0}".format(flag)) else: - options.append('-DENABLE_CUDA=OFF') - - if '+rocm' in spec: - options.extend([ - '-DENABLE_HIP=ON', - '-DHIP_ROOT_DIR={0}'.format(spec['hip'].prefix) - ]) - archs = self.spec.variants['amdgpu_target'].value - if archs != 'none': + options.append("-DENABLE_CUDA=OFF") + + if "+rocm" in spec: + options.extend(["-DENABLE_HIP=ON", "-DHIP_ROOT_DIR={0}".format(spec["hip"].prefix)]) + + hip_repair_options(options, spec) + + archs = self.spec.variants["amdgpu_target"].value + if archs != "none": arch_str = ",".join(archs) - options.append( - '-DHIP_HIPCC_FLAGS=--amdgpu-target={0}'.format(arch_str) - ) - # there is only one dir like this, but the version component is unknown - options.append( - "-DHIP_CLANG_INCLUDE_PATH=" + glob.glob( - "{}/lib/clang/*/include".format(spec['llvm-amdgpu'].prefix) - )[0] - ) + options.append("-DHIP_HIPCC_FLAGS=--amdgpu-target={0}".format(arch_str)) else: - options.append('-DENABLE_HIP=OFF') - - options.append(self.define_from_variant('ENABLE_TESTS', 'tests')) - options.append(self.define_from_variant('ENABLE_OPENMP', 'openmp')) + options.append("-DENABLE_HIP=OFF") + options.append(self.define_from_variant("ENABLE_TESTS", "tests")) return options diff --git a/scripts/spack_packages/raja/package.py b/scripts/spack_packages/raja/package.py index 35b138e447..4850ed1c54 100644 --- a/scripts/spack_packages/raja/package.py +++ b/scripts/spack_packages/raja/package.py @@ -1,382 +1,232 @@ -# Copyright 2013-2020 Lawrence Livermore National Security, LLC and other +# Copyright 2013-2022 Lawrence Livermore National Security, LLC and other # Spack Project Developers. See the top-level COPYRIGHT file for details. # # SPDX-License-Identifier: (Apache-2.0 OR MIT) - -from spack import * - -import glob -import os import socket -from os import environ as env -from os.path import join as pjoin - -import re - -def cmake_cache_entry(name, value, comment=""): - """Generate a string for a cmake cache variable""" - - return 'set(%s "%s" CACHE PATH "%s")\n\n' % (name,value,comment) - - -def cmake_cache_string(name, string, comment=""): - """Generate a string for a cmake cache variable""" - - return 'set(%s "%s" CACHE STRING "%s")\n\n' % (name,string,comment) - - -def cmake_cache_option(name, boolean_value, comment=""): - """Generate a string for a cmake configuration option""" - - value = "ON" if boolean_value else "OFF" - return 'set(%s %s CACHE BOOL "%s")\n\n' % (name,value,comment) - - -def get_spec_path(spec, package_name, path_replacements = {}, use_bin = False) : - """Extracts the prefix path for the given spack package - path_replacements is a dictionary with string replacements for the path. - """ - - if not use_bin: - path = spec[package_name].prefix - else: - path = spec[package_name].prefix.bin - - path = os.path.realpath(path) - - for key in path_replacements: - path = path.replace(key, path_replacements[key]) - - return path - - -class Raja(CMakePackage, CudaPackage, ROCmPackage): - """RAJA Performance Portability Abstractions for C++ HPC Applications.""" - - homepage = "https://github.com/LLNL/RAJA" - git = "https://github.com/LLNL/RAJA.git" - tags = ['radiuss', 'e4s'] - - maintainers = ['davidbeckingsale'] - - version('develop', branch='develop', submodules='True') - version('main', branch='main', submodules='True') - version('0.14.1', tag='v0.14.1', submodules="True") - version('0.14.0', tag='v0.14.0', submodules="True") - version('0.13.0', tag='v0.13.0', submodules="True") - version('0.12.1', tag='v0.12.1', submodules="True") - version('0.12.0', tag='v0.12.0', submodules="True") - version('0.11.0', tag='v0.11.0', submodules="True") - version('0.10.1', tag='v0.10.1', submodules="True") - version('0.10.0', tag='v0.10.0', submodules="True") - version('0.9.0', tag='v0.9.0', submodules="True") - version('0.8.0', tag='v0.8.0', submodules="True") - version('0.7.0', tag='v0.7.0', submodules="True") - version('0.6.0', tag='v0.6.0', submodules="True") - version('0.5.3', tag='v0.5.3', submodules="True") - version('0.5.2', tag='v0.5.2', submodules="True") - version('0.5.1', tag='v0.5.1', submodules="True") - version('0.5.0', tag='v0.5.0', submodules="True") - version('0.4.1', tag='v0.4.1', submodules="True") - version('0.4.0', tag='v0.4.0', submodules="True") - - variant('openmp', default=True, description='Build OpenMP backend') - variant('shared', default=False, description='Build Shared Libs') - variant('libcpp', default=False, description='Uses libc++ instead of libstdc++') - variant('tests', default='basic', values=('none', 'basic', 'benchmarks'), - multi=False, description='Tests to run') - variant('desul', default=False, description='Build Desul Atomics backend') - - depends_on('cmake@3.9:', type='build') - - depends_on('blt@0.4.1', type='build', when='@main') - depends_on('blt@0.4.1:', type='build') - - depends_on('camp') - depends_on('camp@main') # TODO: remove this ASAP - depends_on('camp+rocm', when='+rocm') - depends_on('camp+openmp', when='+openmp') - for val in ROCmPackage.amdgpu_targets: - depends_on('camp amdgpu_target=%s' % val, when='amdgpu_target=%s' % val) - - depends_on('camp+cuda', when='+cuda') - for sm_ in CudaPackage.cuda_arch_values: - depends_on('camp cuda_arch={0}'.format(sm_), - when='cuda_arch={0}'.format(sm_)) - - conflicts('+openmp', when='+rocm') - depends_on('rocprim', when='+rocm') +from spack.package import * +from spack.pkg.builtin.camp import hip_repair_options + + +class Raja(CachedCMakePackage, CudaPackage, ROCmPackage): + """RAJA Parallel Framework.""" + + homepage = "https://software.llnl.gov/RAJA/" + git = "https://github.com/LLNL/RAJA.git" + tags = ["radiuss", "e4s"] + + maintainers = ["davidbeckingsale"] + + version("develop", branch="develop", submodules=False) + version("main", branch="main", submodules=False) + version("2022.03.0", tag="v2022.03.0", submodules=False) + version("0.14.0", tag="v0.14.0", submodules="True") + version("0.13.0", tag="v0.13.0", submodules="True") + version("0.12.1", tag="v0.12.1", submodules="True") + version("0.12.0", tag="v0.12.0", submodules="True") + version("0.11.0", tag="v0.11.0", submodules="True") + version("0.10.1", tag="v0.10.1", submodules="True") + version("0.10.0", tag="v0.10.0", submodules="True") + version("0.9.0", tag="v0.9.0", submodules="True") + version("0.8.0", tag="v0.8.0", submodules="True") + version("0.7.0", tag="v0.7.0", submodules="True") + version("0.6.0", tag="v0.6.0", submodules="True") + version("0.5.3", tag="v0.5.3", submodules="True") + version("0.5.2", tag="v0.5.2", submodules="True") + version("0.5.1", tag="v0.5.1", submodules="True") + version("0.5.0", tag="v0.5.0", submodules="True") + version("0.4.1", tag="v0.4.1", submodules="True") + version("0.4.0", tag="v0.4.0", submodules="True") + + # export targets when building pre-2.4.0 release with BLT 0.4.0+ + patch( + "https://github.com/LLNL/RAJA/commit/eca1124ee4af380d6613adc6012c307d1fd4176b.patch?full_index=1", + sha256="12bb78c00b6683ad3e7fd4e3f87f9776bae074b722431b79696bc862816735ef", + when="@:0.13.0 ^blt@0.4:", + ) + + variant("openmp", default=True, description="Build OpenMP backend") + variant("shared", default=True, description="Build Shared Libs") + variant("examples", default=True, description="Build examples.") + variant("exercises", default=True, description="Build exercises.") + # TODO: figure out gtest dependency and then set this default True + # and remove the +tests conflict below. + variant("tests", default=False, description="Build tests") + + depends_on("blt") + depends_on("blt@0.5.0:", type="build", when="@0.14.1:") + depends_on("blt@0.4.1", type="build", when="@0.14.0") + depends_on("blt@0.4.0:", type="build", when="@0.13.0") + depends_on("blt@0.3.6:", type="build", when="@:0.12.0") + + depends_on("camp@0.2.2:0.2.3", when="@0.14.0") + depends_on("camp@0.1.0", when="@0.10.0:0.13.0") + depends_on("camp@2022.03.2:", when="@2022.03.0:") + depends_on("camp@main", when="@main") + depends_on("camp@main", when="@develop") + depends_on("camp+openmp", when="+openmp") + + depends_on("cmake@:3.20", when="+rocm", type="build") + depends_on("cmake@3.14:", when="@2022.03.0:") + + depends_on("llvm-openmp", when="+openmp %apple-clang") + + depends_on("rocprim", when="+rocm") + with when("+rocm @0.12.0:"): + depends_on("camp+rocm") + for arch in ROCmPackage.amdgpu_targets: + depends_on( + "camp+rocm amdgpu_target={0}".format(arch), when="amdgpu_target={0}".format(arch) + ) + conflicts("+openmp") - phases = ['hostconfig', 'cmake', 'build', 'install'] + with when("+cuda @0.12.0:"): + depends_on("camp+cuda") + for sm_ in CudaPackage.cuda_arch_values: + depends_on("camp +cuda cuda_arch={0}".format(sm_), when="cuda_arch={0}".format(sm_)) def _get_sys_type(self, spec): - sys_type = str(spec.architecture) - # if on llnl systems, we can use the SYS_TYPE + sys_type = spec.architecture if "SYS_TYPE" in env: sys_type = env["SYS_TYPE"] return sys_type - def _get_host_config_path(self, spec): - var='' - if '+cuda' in spec: - var= '-'.join([var,'cuda']) - if '+libcpp' in spec: - var='-'.join([var,'libcpp']) - - host_config_path = "hc-%s-%s-%s%s-%s.cmake" % (socket.gethostname().rstrip('1234567890'), - self._get_sys_type(spec), - spec.compiler, - var, - spec.dag_hash()) - dest_dir = self.stage.source_path - host_config_path = os.path.abspath(pjoin(dest_dir, host_config_path)) - return host_config_path - - def hostconfig(self, spec, prefix, py_site_pkgs_dir=None): - """ - This method creates a 'host-config' file that specifies - all of the options used to configure and build Umpire. - - For more details about 'host-config' files see: - http://software.llnl.gov/conduit/building.html - - Note: - The `py_site_pkgs_dir` arg exists to allow a package that - subclasses this package provide a specific site packages - dir when calling this function. `py_site_pkgs_dir` should - be an absolute path or `None`. - - This is necessary because the spack `site_packages_dir` - var will not exist in the base class. For more details - on this issue see: https://github.com/spack/spack/issues/6261 - """ - - ####################### - # Compiler Info - ####################### - c_compiler = env["SPACK_CC"] - cpp_compiler = env["SPACK_CXX"] - - # Even though we don't have fortran code in our project we sometimes - # use the Fortran compiler to determine which libstdc++ to use - f_compiler = "" - if "SPACK_FC" in env.keys(): - # even if this is set, it may not exist - # do one more sanity check - if os.path.isfile(env["SPACK_FC"]): - f_compiler = env["SPACK_FC"] - - ####################################################################### - # By directly fetching the names of the actual compilers we appear - # to doing something evil here, but this is necessary to create a - # 'host config' file that works outside of the spack install env. - ####################################################################### - - sys_type = self._get_sys_type(spec) - - ############################################## - # Find and record what CMake is used - ############################################## - - cmake_exe = spec['cmake'].command.path - cmake_exe = os.path.realpath(cmake_exe) - - host_config_path = self._get_host_config_path(spec) - cfg = open(host_config_path, "w") - cfg.write("###################\n".format("#" * 60)) - cfg.write("# Generated host-config - Edit at own risk!\n") - cfg.write("###################\n".format("#" * 60)) - cfg.write("# Copyright 2016-22, Lawrence Livermore National Security, LLC\n") - cfg.write("# and RAJA project contributors. See the RAJA/LICENSE file\n") - cfg.write("# for details.\n") - cfg.write("#\n") - cfg.write("# SPDX-License-Identifier: (BSD-3-Clause) \n") - cfg.write("###################\n\n".format("#" * 60)) - - cfg.write("#------------------\n".format("-" * 60)) - cfg.write("# SYS_TYPE: {0}\n".format(sys_type)) - cfg.write("# Compiler Spec: {0}\n".format(spec.compiler)) - cfg.write("# CMake executable path: %s\n" % cmake_exe) - cfg.write("#------------------\n\n".format("-" * 60)) - - cfg.write(cmake_cache_string("CMAKE_BUILD_TYPE", spec.variants['build_type'].value)) - - ####################### - # Compiler Settings - ####################### - - cfg.write("#------------------\n".format("-" * 60)) - cfg.write("# Compilers\n") - cfg.write("#------------------\n\n".format("-" * 60)) - cfg.write(cmake_cache_entry("CMAKE_C_COMPILER", c_compiler)) - cfg.write(cmake_cache_entry("CMAKE_CXX_COMPILER", cpp_compiler)) - - # use global spack compiler flags - cflags = ' '.join(spec.compiler_flags['cflags']) - if "+libcpp" in spec: - cflags += ' '.join([cflags,"-DGTEST_HAS_CXXABI_H_=0"]) - if cflags: - cfg.write(cmake_cache_entry("CMAKE_C_FLAGS", cflags)) - - cxxflags = ' '.join(spec.compiler_flags['cxxflags']) - if "+libcpp" in spec: - cxxflags += ' '.join([cxxflags,"-stdlib=libc++ -DGTEST_HAS_CXXABI_H_=0"]) - if cxxflags: - cfg.write(cmake_cache_entry("CMAKE_CXX_FLAGS", cxxflags)) - - # TODO (bernede1@llnl.gov): Is this useful for RAJA? - if ("gfortran" in f_compiler) and ("clang" in cpp_compiler): - libdir = pjoin(os.path.dirname( - os.path.dirname(f_compiler)), "lib") - flags = "" - for _libpath in [libdir, libdir + "64"]: - if os.path.exists(_libpath): - flags += " -Wl,-rpath,{0}".format(_libpath) - description = ("Adds a missing libstdc++ rpath") - #if flags: - # cfg.write(cmake_cache_string("BLT_EXE_LINKER_FLAGS", flags, - # description)) - - gcc_toolchain_regex = re.compile("--gcc-toolchain=(.*)") - gcc_name_regex = re.compile(".*gcc-name.*") - - using_toolchain = list(filter(gcc_toolchain_regex.match, spec.compiler_flags['cxxflags'])) - if(using_toolchain): - gcc_toolchain_path = gcc_toolchain_regex.match(using_toolchain[0]) - using_gcc_name = list(filter(gcc_name_regex.match, spec.compiler_flags['cxxflags'])) - compilers_using_toolchain = ["pgi", "xl", "icpc"] - if any(compiler in cpp_compiler for compiler in compilers_using_toolchain): - if using_toolchain or using_gcc_name: - cfg.write(cmake_cache_entry("BLT_CMAKE_IMPLICIT_LINK_DIRECTORIES_EXCLUDE", - "/usr/tce/packages/gcc/gcc-4.9.3/lib64;/usr/tce/packages/gcc/gcc-4.9.3/gnu/lib64/gcc/powerpc64le-unknown-linux-gnu/4.9.3;/usr/tce/packages/gcc/gcc-4.9.3/gnu/lib64;/usr/tce/packages/gcc/gcc-4.9.3/lib64/gcc/x86_64-unknown-linux-gnu/4.9.3")) - - compilers_using_cxx14 = ["intel-17", "intel-18", "xl"] - if any(compiler in cpp_compiler for compiler in compilers_using_cxx14): - cfg.write(cmake_cache_entry("BLT_CXX_STD", "c++14")) + @property + def cache_name(self): + hostname = socket.gethostname() + if "SYS_TYPE" in env: + hostname = hostname.rstrip("1234567890") + return "{0}-{1}-{2}@{3}.cmake".format( + hostname, + self._get_sys_type(self.spec), + self.spec.compiler.name, + self.spec.compiler.version, + ) + + def initconfig_hardware_entries(self): + spec = self.spec + entries = super(Raja, self).initconfig_hardware_entries() + + entries.append(cmake_cache_option("ENABLE_OPENMP", "+openmp" in spec)) if "+cuda" in spec: - cfg.write("#------------------{0}\n".format("-" * 60)) - cfg.write("# Cuda\n") - cfg.write("#------------------{0}\n\n".format("-" * 60)) - - cfg.write(cmake_cache_option("ENABLE_CUDA", True)) - - cudatoolkitdir = spec['cuda'].prefix - cfg.write(cmake_cache_entry("CUDA_TOOLKIT_ROOT_DIR", - cudatoolkitdir)) - cudacompiler = "${CUDA_TOOLKIT_ROOT_DIR}/bin/nvcc" - cfg.write(cmake_cache_entry("CMAKE_CUDA_COMPILER", - cudacompiler)) - - if ("xl" in cpp_compiler): - cfg.write(cmake_cache_entry("CMAKE_CUDA_FLAGS", "-Xcompiler -O3 -Xcompiler -qxlcompatmacros -Xcompiler -qalias=noansi " + - "-Xcompiler -qsmp=omp -Xcompiler -qhot -Xcompiler -qnoeh -Xcompiler -qsuppress=1500-029 " + - "-Xcompiler -qsuppress=1500-036 -Xcompiler -qsuppress=1500-030")) - cuda_release_flags = "-O3" - cuda_reldebinf_flags = "-O3 -g" - cuda_debug_flags = "-O0 -g" - - cfg.write(cmake_cache_string("BLT_CXX_STD", "c++14")) - elif ("gcc" in cpp_compiler): - cuda_release_flags = "-O3 -Xcompiler -Ofast -Xcompiler -finline-functions -Xcompiler -finline-limit=20000" - cuda_reldebinf_flags = "-O3 -g -Xcompiler -Ofast -Xcompiler -finline-functions -Xcompiler -finline-limit=20000" - cuda_debug_flags = "-O0 -g -Xcompiler -O0 -Xcompiler -finline-functions -Xcompiler -finline-limit=20000" - else: - cuda_release_flags = "-O3 -Xcompiler -Ofast -Xcompiler -finline-functions" - cuda_reldebinf_flags = "-O3 -g -Xcompiler -Ofast -Xcompiler -finline-functions" - cuda_debug_flags = "-O0 -g -Xcompiler -O0 -Xcompiler -finline-functions" - - cfg.write(cmake_cache_string("CMAKE_CUDA_FLAGS_RELEASE", cuda_release_flags)) - cfg.write(cmake_cache_string("CMAKE_CUDA_FLAGS_RELWITHDEBINFO", cuda_reldebinf_flags)) - cfg.write(cmake_cache_string("CMAKE_CUDA_FLAGS_DEBUG", cuda_debug_flags)) - - if not spec.satisfies('cuda_arch=none'): - cuda_arch = spec.variants['cuda_arch'].value - cfg.write(cmake_cache_string("CUDA_ARCH", 'sm_{0}'.format(cuda_arch[0]))) + entries.append(cmake_cache_option("ENABLE_CUDA", True)) + if not spec.satisfies("cuda_arch=none"): + cuda_arch = spec.variants["cuda_arch"].value + entries.append(cmake_cache_string("CUDA_ARCH", "sm_{0}".format(cuda_arch[0]))) + entries.append( + cmake_cache_string("CMAKE_CUDA_ARCHITECTURES", "{0}".format(cuda_arch[0])) + ) else: - cfg.write(cmake_cache_option("ENABLE_CUDA", False)) + entries.append(cmake_cache_option("ENABLE_CUDA", False)) if "+rocm" in spec: - cfg.write("#------------------{0}\n".format("-" * 60)) - cfg.write("# HIP\n") - cfg.write("#------------------{0}\n\n".format("-" * 60)) - - cfg.write(cmake_cache_option("ENABLE_HIP", True)) - - hip_root = spec['hip'].prefix - rocm_root = hip_root + "/.." - hip_arch = spec.variants['amdgpu_target'].value - cfg.write(cmake_cache_entry("HIP_ROOT_DIR", - hip_root)) - # there is only one dir like this, but the version component is unknown - cfg.write( - cmake_cache_path( - "HIP_CLANG_INCLUDE_PATH", - glob.glob( - "{}/lib/clang/*/include".format(spec['llvm-amdgpu'].prefix) - )[0] + entries.append(cmake_cache_option("ENABLE_HIP", True)) + entries.append(cmake_cache_path("HIP_ROOT_DIR", "{0}".format(spec["hip"].prefix))) + hip_repair_options(entries, spec) + archs = self.spec.variants["amdgpu_target"].value + if archs != "none": + arch_str = ",".join(archs) + entries.append( + cmake_cache_string("HIP_HIPCC_FLAGS", "--amdgpu-target={0}".format(arch_str)) ) - ) - cfg.write(cmake_cache_entry("ROCM_ROOT_DIR", - rocm_root)) - cfg.write(cmake_cache_entry("HIP_PATH", - rocm_root + '/llvm/bin')) - cfg.write(cmake_cache_entry("CMAKE_HIP_ARCHITECTURES", hip_arch[0])) - - if ('%gcc' in spec) or (using_toolchain): - if ('%gcc' in spec): - gcc_bin = os.path.dirname(self.compiler.cxx) - gcc_prefix = join_path(gcc_bin, '..') - else: - gcc_prefix = gcc_toolchain_path.group(1) - cfg.write(cmake_cache_entry("HIP_CLANG_FLAGS", - "--gcc-toolchain={0}".format(gcc_prefix))) - cfg.write(cmake_cache_entry("CMAKE_EXE_LINKER_FLAGS", - " -Wl,-rpath {}/lib64".format(gcc_prefix))) - else: - cfg.write(cmake_cache_option("ENABLE_HIP", False)) - - cfg.write("#------------------{0}\n".format("-" * 60)) - cfg.write("# Other\n") - cfg.write("#------------------{0}\n\n".format("-" * 60)) - - cfg.write(cmake_cache_string("RAJA_RANGE_ALIGN", "4")) - cfg.write(cmake_cache_string("RAJA_RANGE_MIN_LENGTH", "32")) - cfg.write(cmake_cache_string("RAJA_DATA_ALIGN", "64")) - - cfg.write(cmake_cache_option("RAJA_HOST_CONFIG_LOADED", True)) + entries.append(cmake_cache_option("ENABLE_HIP", False)) - # shared vs static libs - cfg.write(cmake_cache_option("BUILD_SHARED_LIBS","+shared" in spec)) - cfg.write(cmake_cache_option("ENABLE_OPENMP","+openmp" in spec)) - cfg.write(cmake_cache_option("RAJA_ENABLE_DESUL_ATOMICS","+desul" in spec)) + return entries - if "+desul" in spec: - cfg.write(cmake_cache_string("BLT_CXX_STD","c++14")) - if "+cuda" in spec: - cfg.write(cmake_cache_string("CMAKE_CUDA_STANDARD", "14")) - - cfg.write(cmake_cache_option("ENABLE_BENCHMARKS", 'tests=benchmarks' in spec)) - cfg.write(cmake_cache_option("ENABLE_TESTS", not 'tests=none' in spec or self.run_tests)) - cfg.write(cmake_cache_string("camp_DIR", spec['camp'].prefix)) + def initconfig_package_entries(self): + spec = self.spec + entries = [] + + option_prefix = "RAJA_" if spec.satisfies("@0.14.0:") else "" + + entries.append(cmake_cache_path("BLT_SOURCE_DIR", spec["blt"].prefix)) + if "camp" in self.spec: + entries.append(cmake_cache_path("camp_DIR", spec["camp"].prefix)) + entries.append(cmake_cache_option("BUILD_SHARED_LIBS", "+shared" in spec)) + entries.append( + cmake_cache_option("{}ENABLE_EXAMPLES".format(option_prefix), "+examples" in spec) + ) + if spec.satisfies("@0.14.0:"): + entries.append( + cmake_cache_option( + "{}ENABLE_EXERCISES".format(option_prefix), "+exercises" in spec + ) + ) + else: + entries.append(cmake_cache_option("ENABLE_EXERCISES", "+exercises" in spec)) - ####################### - # Close and save - ####################### - cfg.write("\n") - cfg.close() + # Work around spack adding -march=ppc64le to SPACK_TARGET_ARGS which + # is used by the spack compiler wrapper. This can go away when BLT + # removes -Werror from GTest flags + if self.spec.satisfies("%clang target=ppc64le:") or not self.run_tests: + entries.append(cmake_cache_option("ENABLE_TESTS", False)) + else: + entries.append(cmake_cache_option("ENABLE_TESTS", True)) - print("OUT: host-config file {0}".format(host_config_path)) + return entries def cmake_args(self): - spec = self.spec - host_config_path = self._get_host_config_path(spec) - options = [] - options.extend(['-C', host_config_path]) - return options + + @property + def build_relpath(self): + """Relative path to the cmake build subdirectory.""" + return join_path("..", self.build_dirname) + + @run_after("install") + def setup_build_tests(self): + """Copy the build test files after the package is installed to a + relative install test subdirectory for use during `spack test run`.""" + # Now copy the relative files + self.cache_extra_test_sources(self.build_relpath) + + # Ensure the path exists since relying on a relative path at the + # same level as the normal stage source path. + mkdirp(self.install_test_root) + + @property + def _extra_tests_path(self): + # TODO: The tests should be converted to re-build and run examples + # TODO: using the installed libraries. + return join_path(self.install_test_root, self.build_relpath, "bin") + + def _test_examples(self): + """Perform very basic checks on a subset of copied examples.""" + checks = [ + ( + "ex5_line-of-sight_solution", + [r"RAJA sequential", r"RAJA OpenMP", r"result -- PASS"], + ), + ( + "ex6_stencil-offset-layout_solution", + [r"RAJA Views \(permuted\)", r"result -- PASS"], + ), + ( + "ex8_tiled-matrix-transpose_solution", + [r"parallel top inner loop", r"collapsed inner loops", r"result -- PASS"], + ), + ("kernel-dynamic-tile", [r"Running index", r"(24,24)"]), + ("plugin-example", [r"Launching host kernel for the 10 time"]), + ("tut_batched-matrix-multiply", [r"result -- PASS"]), + ("wave-eqn", [r"Max Error = 2", r"Evolved solution to time"]), + ] + for exe, expected in checks: + reason = "test: checking output of {0} for {1}".format(exe, expected) + self.run_test( + exe, + [], + expected, + installed=False, + purpose=reason, + skip_missing=True, + work_dir=self._extra_tests_path, + ) + + def test(self): + """Perform smoke tests.""" + self._test_examples() From 891f5a4e3eccc9ead6b78349e2a110d5faba3e39 Mon Sep 17 00:00:00 2001 From: "Adrien M. BERNEDE" Date: Mon, 19 Sep 2022 14:39:57 +0200 Subject: [PATCH 02/94] Update uberenv config with correct configuration phase --- .uberenv_config.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.uberenv_config.json b/.uberenv_config.json index 2fc700f855..dc692a9045 100644 --- a/.uberenv_config.json +++ b/.uberenv_config.json @@ -1,7 +1,7 @@ { "package_name" : "raja", "package_version" : "develop", -"package_final_phase" : "hostconfig", +"package_final_phase" : "initconfig", "package_source_dir" : "../..", "spack_url": "https://github.com/spack/spack.git", "spack_branch": "v0.18.1", From df4c5d686c4b5ae724e8d827604413a4a355addf Mon Sep 17 00:00:00 2001 From: "Adrien M. BERNEDE" Date: Mon, 19 Sep 2022 14:50:20 +0200 Subject: [PATCH 03/94] Retrieve CMake from hostconfig file (same as in umpire) --- scripts/gitlab/build_and_test.sh | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/scripts/gitlab/build_and_test.sh b/scripts/gitlab/build_and_test.sh index eda0c550ef..34acbeae37 100755 --- a/scripts/gitlab/build_and_test.sh +++ b/scripts/gitlab/build_and_test.sh @@ -127,8 +127,7 @@ fi build_dir="${build_root}/build_${hostconfig//.cmake/}" install_dir="${build_root}/install_${hostconfig//.cmake/}" -# TODO: This is from Umpire, could it work with RAJA ? -#cmake_exe=`grep 'CMake executable' ${hostconfig_path} | cut -d ':' -f 2 | xargs` +cmake_exe=`grep 'CMake executable' ${hostconfig_path} | cut -d ':' -f 2 | xargs` # Build if [[ "${option}" != "--deps-only" && "${option}" != "--test-only" ]] @@ -161,17 +160,14 @@ then then module unload rocm fi - - module load cmake/3.20.2 || module load cmake/3.19.2 || module load cmake/3.21.1 - - cmake \ + $cmake_exe \ -C ${hostconfig_path} \ -DCMAKE_INSTALL_PREFIX=${install_dir} \ ${project_dir} - if ! cmake --build . -j ${core_counts[$truehostname]} + if ! $cmake_exe --build . -j ${core_counts[$truehostname]} then echo "ERROR: compilation failed, building with verbose output..." - cmake --build . --verbose -j 1 + $cmake_exe --build . --verbose -j 1 else make install fi @@ -236,8 +232,8 @@ then cd ${install_dir}/examples/RAJA/using-with-cmake mkdir build && cd build - if ! cmake -C ../host-config.cmake ..; then - echo "ERROR: running cmake for using-with-cmake test" && exit 1 + if ! $cmake_exe -C ../host-config.cmake ..; then + echo "ERROR: running $cmake_exe for using-with-cmake test" && exit 1 fi if ! make; then From bf65fd7fde93a46fbf0c641149125e8d887d2977 Mon Sep 17 00:00:00 2001 From: "Adrien M. BERNEDE" Date: Mon, 19 Sep 2022 15:26:20 +0200 Subject: [PATCH 04/94] add desul option --- scripts/spack_packages/raja/package.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/scripts/spack_packages/raja/package.py b/scripts/spack_packages/raja/package.py index 4850ed1c54..08aa43fad1 100644 --- a/scripts/spack_packages/raja/package.py +++ b/scripts/spack_packages/raja/package.py @@ -53,6 +53,7 @@ class Raja(CachedCMakePackage, CudaPackage, ROCmPackage): # TODO: figure out gtest dependency and then set this default True # and remove the +tests conflict below. variant("tests", default=False, description="Build tests") + variant("desul", default=False, description='Build Desul Atomics backend') depends_on("blt") depends_on("blt@0.5.0:", type="build", when="@0.14.1:") @@ -143,6 +144,13 @@ def initconfig_package_entries(self): option_prefix = "RAJA_" if spec.satisfies("@0.14.0:") else "" + entries.append(cmake_cache_option("RAJA_ENABLE_DESUL_ATOMICS", "+desul" in spec)) + + if "+desul" in spec: + entries.append(cmake_cache_string("BLT_CXX_STD","c++14")) + if "+cuda" in spec: + entries.append(cmake_cache_string("CMAKE_CUDA_STANDARD", "14")) + entries.append(cmake_cache_path("BLT_SOURCE_DIR", spec["blt"].prefix)) if "camp" in self.spec: entries.append(cmake_cache_path("camp_DIR", spec["camp"].prefix)) From 1f7e5b73ba0d94dcfa71010c1ca169e0d1bb8079 Mon Sep 17 00:00:00 2001 From: "Adrien M. BERNEDE" Date: Mon, 19 Sep 2022 18:31:51 +0200 Subject: [PATCH 05/94] add libcpp variant --- scripts/spack_packages/raja/package.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/scripts/spack_packages/raja/package.py b/scripts/spack_packages/raja/package.py index 08aa43fad1..82039dd49d 100644 --- a/scripts/spack_packages/raja/package.py +++ b/scripts/spack_packages/raja/package.py @@ -53,7 +53,8 @@ class Raja(CachedCMakePackage, CudaPackage, ROCmPackage): # TODO: figure out gtest dependency and then set this default True # and remove the +tests conflict below. variant("tests", default=False, description="Build tests") - variant("desul", default=False, description='Build Desul Atomics backend') + variant("libcpp", default=False, description="Uses libc++ instead of libstdc++") + variant("desul", default=False, description="Build Desul Atomics backend") depends_on("blt") depends_on("blt@0.5.0:", type="build", when="@0.14.1:") @@ -94,6 +95,7 @@ def _get_sys_type(self, spec): return sys_type @property + # TODO: name cache file conditionally to cuda and libcpp variants def cache_name(self): hostname = socket.gethostname() if "SYS_TYPE" in env: @@ -146,6 +148,19 @@ def initconfig_package_entries(self): entries.append(cmake_cache_option("RAJA_ENABLE_DESUL_ATOMICS", "+desul" in spec)) + # use global spack compiler flags + cflags = " ".join(spec.compiler_flags["cflags"]) + if "+libcpp" in spec: + cflags += " ".join([cflags,"-DGTEST_HAS_CXXABI_H_=0"]) + if cflags: + entries.append(cmake_cache_entry("CMAKE_C_FLAGS", cflags)) + + cxxflags = " ".join(spec.compiler_flags["cxxflags"]) + if "+libcpp" in spec: + cxxflags += " ".join([cxxflags,"-stdlib=libc++ -DGTEST_HAS_CXXABI_H_=0"]) + if cxxflags: + entries.append(cmake_cache_entry("CMAKE_CXX_FLAGS", cxxflags)) + if "+desul" in spec: entries.append(cmake_cache_string("BLT_CXX_STD","c++14")) if "+cuda" in spec: From 3d39e66612b55429e7308f2945aa0063d3295c8a Mon Sep 17 00:00:00 2001 From: "Adrien M. BERNEDE" Date: Mon, 19 Sep 2022 18:54:27 +0200 Subject: [PATCH 06/94] Fix HIP option: resulted in wrong CMake Cache file --- scripts/spack_packages/camp/package.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/scripts/spack_packages/camp/package.py b/scripts/spack_packages/camp/package.py index 8f40f6b295..864fafbd70 100644 --- a/scripts/spack_packages/camp/package.py +++ b/scripts/spack_packages/camp/package.py @@ -10,10 +10,7 @@ def hip_repair_options(options, spec): # there is only one dir like this, but the version component is unknown - options.append( - "-DHIP_CLANG_INCLUDE_PATH=" - + glob.glob("{}/lib/clang/*/include".format(spec["llvm-amdgpu"].prefix))[0] - ) + entries.append(cmake_cache_path("HIP_CLANG_INCLUDE_PATH", glob.glob("{}/lib/clang/*/include".format(spec["llvm-amdgpu"].prefix))[0])) class Camp(CMakePackage, CudaPackage, ROCmPackage): From 7af22d56a7454fbab6e66c188ffd11b639e98df6 Mon Sep 17 00:00:00 2001 From: "Adrien M. BERNEDE" Date: Mon, 19 Sep 2022 19:05:32 +0200 Subject: [PATCH 07/94] Revert "Fix HIP option: resulted in wrong CMake Cache file" This reverts commit 4dd80d0a7d71df946245679d32880867724dae13. --- scripts/spack_packages/camp/package.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/scripts/spack_packages/camp/package.py b/scripts/spack_packages/camp/package.py index 864fafbd70..8f40f6b295 100644 --- a/scripts/spack_packages/camp/package.py +++ b/scripts/spack_packages/camp/package.py @@ -10,7 +10,10 @@ def hip_repair_options(options, spec): # there is only one dir like this, but the version component is unknown - entries.append(cmake_cache_path("HIP_CLANG_INCLUDE_PATH", glob.glob("{}/lib/clang/*/include".format(spec["llvm-amdgpu"].prefix))[0])) + options.append( + "-DHIP_CLANG_INCLUDE_PATH=" + + glob.glob("{}/lib/clang/*/include".format(spec["llvm-amdgpu"].prefix))[0] + ) class Camp(CMakePackage, CudaPackage, ROCmPackage): From 3c27b36c2704f4782200b3087a0efd26ccf4664f Mon Sep 17 00:00:00 2001 From: "Adrien M. BERNEDE" Date: Mon, 19 Sep 2022 19:06:04 +0200 Subject: [PATCH 08/94] Fix hip-repaired-option usage: resulted in wrong CMake Cache file --- scripts/spack_packages/raja/package.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/scripts/spack_packages/raja/package.py b/scripts/spack_packages/raja/package.py index 82039dd49d..1da2d666a4 100644 --- a/scripts/spack_packages/raja/package.py +++ b/scripts/spack_packages/raja/package.py @@ -4,11 +4,15 @@ # SPDX-License-Identifier: (Apache-2.0 OR MIT) import socket +import glob from spack.package import * -from spack.pkg.builtin.camp import hip_repair_options +def hip_repair_entries(entries, spec): + # there is only one dir like this, but the version component is unknown + entries.append(cmake_cache_path("HIP_CLANG_INCLUDE_PATH", glob.glob("{}/lib/clang/*/include".format(spec["llvm-amdgpu"].prefix))[0])) + class Raja(CachedCMakePackage, CudaPackage, ROCmPackage): """RAJA Parallel Framework.""" @@ -128,7 +132,7 @@ def initconfig_hardware_entries(self): if "+rocm" in spec: entries.append(cmake_cache_option("ENABLE_HIP", True)) entries.append(cmake_cache_path("HIP_ROOT_DIR", "{0}".format(spec["hip"].prefix))) - hip_repair_options(entries, spec) + hip_repair_entries(entries, spec) archs = self.spec.variants["amdgpu_target"].value if archs != "none": arch_str = ",".join(archs) From fd38921999bfe345729d9dd7d709576162031586 Mon Sep 17 00:00:00 2001 From: "Adrien M. BERNEDE" Date: Mon, 19 Sep 2022 19:18:56 +0200 Subject: [PATCH 09/94] Activate tests variant in all specs --- .gitlab/corona-build-and-test-extra.yml | 2 +- .gitlab/custom-jobs-and-variables.yml | 6 +++--- .gitlab/lassen-build-and-test-extra.yml | 14 +++++++------- .gitlab/ruby-build-and-test-extra.yml | 8 ++++---- 4 files changed, 15 insertions(+), 15 deletions(-) diff --git a/.gitlab/corona-build-and-test-extra.yml b/.gitlab/corona-build-and-test-extra.yml index a94300f85b..cbe0c4a9f0 100644 --- a/.gitlab/corona-build-and-test-extra.yml +++ b/.gitlab/corona-build-and-test-extra.yml @@ -23,6 +23,6 @@ rocm_5_1_1_clang_13_0_0_desul_atomics: variables: - SPEC: " +rocm ~openmp +desul amdgpu_target=gfx906 %clang@13.0.0 ^hip@5.1.1 ^blt@develop" + SPEC: " +rocm ~openmp +tests +desul amdgpu_target=gfx906 %clang@13.0.0 ^hip@5.1.1 ^blt@develop" extends: .build_and_test_on_corona diff --git a/.gitlab/custom-jobs-and-variables.yml b/.gitlab/custom-jobs-and-variables.yml index b615686b5a..1bb569fcad 100644 --- a/.gitlab/custom-jobs-and-variables.yml +++ b/.gitlab/custom-jobs-and-variables.yml @@ -18,7 +18,7 @@ variables: # Arguments for job level allocation RUBY_BUILD_AND_TEST_JOB_ALLOC: "--time=45 --nodes=1" # Project specific variants for ruby - PROJECT_RUBY_VARIANTS: "+openmp " + PROJECT_RUBY_VARIANTS: "+openmp +tests" # Project specific deps for ruby PROJECT_RUBY_DEPS: "" @@ -28,7 +28,7 @@ variables: # Arguments for job level allocation CORONA_BUILD_AND_TEST_JOB_ALLOC: "--time-limit=45m --nodes=1" # Project specific variants for corona - PROJECT_CORONA_VARIANTS: "~openmp " + PROJECT_CORONA_VARIANTS: "~openmp +tests" # Project specific deps for corona PROJECT_CORONA_DEPS: "^blt@develop " @@ -37,7 +37,7 @@ variables: # Arguments for job level allocation LASSEN_BUILD_AND_TEST_JOB_ALLOC: "1 -W 60" # Project specific variants for lassen - PROJECT_LASSEN_VARIANTS: "+openmp " + PROJECT_LASSEN_VARIANTS: "+openmp +tests" # Project specific deps for lassen PROJECT_LASSEN_DEPS: "" diff --git a/.gitlab/lassen-build-and-test-extra.yml b/.gitlab/lassen-build-and-test-extra.yml index 16a1230515..6dbf25ea2f 100644 --- a/.gitlab/lassen-build-and-test-extra.yml +++ b/.gitlab/lassen-build-and-test-extra.yml @@ -89,7 +89,7 @@ xl_16_1_1_12_gcc_8_3_1_cuda_11_1_0: clang_14_0_5: variables: - SPEC: " +openmp %clang@14.0.5" + SPEC: " +openmp +tests %clang@14.0.5" extends: .build_and_test_on_lassen ########## @@ -98,18 +98,18 @@ clang_14_0_5: clang_12_0_1_cuda_11_5_0: variables: - SPEC: " +openmp +cuda cuda_arch=70 %clang@12.0.1 ^cuda@11.5.0" + SPEC: " +openmp +tests +cuda cuda_arch=70 %clang@12.0.1 ^cuda@11.5.0" extends: .build_and_test_on_lassen gcc_8_3_1_cuda_11_1_0: variables: - SPEC: " +openmp +cuda %gcc@8.3.1 cuda_arch=70 ^cuda@11.1.0" + SPEC: " +openmp +tests +cuda %gcc@8.3.1 cuda_arch=70 ^cuda@11.1.0" extends: .build_and_test_on_lassen gcc_8_3_1_cuda_11_5_0_ats_disabled: extends: .build_and_test_on_lassen variables: - SPEC: " +openmp +cuda %gcc@8.3.1 cuda_arch=70 ^cuda@11.5.0" + SPEC: " +openmp +tests +cuda %gcc@8.3.1 cuda_arch=70 ^cuda@11.5.0" LASSEN_BUILD_AND_TEST_JOB_ALLOC: "1 --atsdisable -W 60" ########## @@ -118,16 +118,16 @@ gcc_8_3_1_cuda_11_5_0_ats_disabled: clang_13_0_1_libcpp: variables: - SPEC: " +openmp %clang@13.0.1+libcpp" + SPEC: " +openmp +tests %clang@13.0.1+libcpp" extends: .build_and_test_on_lassen clang_14_0_5_asan: variables: - SPEC: " +openmp %clang@14.0.5 cxxflags=-fsanitize=address" + SPEC: " +openmp +tests %clang@14.0.5 cxxflags=-fsanitize=address" ASAN_OPTIONS: "detect_leaks=1" extends: .build_and_test_on_lassen gcc_8_3_1_cuda_10_1_168_desul_atomics: variables: - SPEC: " +openmp +cuda +desul %gcc@8.3.1 cuda_arch=70 ^cuda@10.1.168" + SPEC: " +openmp +tests +cuda +desul %gcc@8.3.1 cuda_arch=70 ^cuda@10.1.168" extends: .build_and_test_on_lassen diff --git a/.gitlab/ruby-build-and-test-extra.yml b/.gitlab/ruby-build-and-test-extra.yml index 9bebc62530..1aaceec243 100644 --- a/.gitlab/ruby-build-and-test-extra.yml +++ b/.gitlab/ruby-build-and-test-extra.yml @@ -35,24 +35,24 @@ pgi_20_1_gcc_local_8_3_1: clang_9_0_0_openmp_off: variables: - SPEC: " ~openmp %clang@9.0.0" + SPEC: " ~openmp +tests %clang@9.0.0" extends: .build_and_test_on_ruby gcc_8_1_0_openmp_default: variables: - SPEC: " %gcc@8.1.0" + SPEC: " +tests %gcc@8.1.0" RUBY_BUILD_AND_TEST_JOB_ALLOC: "--time=60 --nodes=1" extends: .build_and_test_on_ruby icpc_19_1_0: variables: - SPEC: " +openmp %intel@19.1.0" + SPEC: " +openmp +tests %intel@19.1.0" RUBY_BUILD_AND_TEST_JOB_ALLOC: "--time=40 --nodes=1" extends: .build_and_test_on_ruby # OTHERS clang_10_0_1_gcc_8_3_1_desul_atomics: variables: - SPEC: " +openmp +desul %clang@10.0.1 cxxflags=--gcc-toolchain=/usr/tce/packages/gcc/gcc-8.3.1 cflags=--gcc-toolchain=/usr/tce/packages/gcc/gcc-8.3.1" + SPEC: " +openmp +tests +desul %clang@10.0.1 cxxflags=--gcc-toolchain=/usr/tce/packages/gcc/gcc-8.3.1 cflags=--gcc-toolchain=/usr/tce/packages/gcc/gcc-8.3.1" extends: .build_and_test_on_ruby From 6fa1fe6c69e9722469784bddfbd030cbd5ef3f1c Mon Sep 17 00:00:00 2001 From: "Adrien M. BERNEDE" Date: Mon, 19 Sep 2022 19:21:12 +0200 Subject: [PATCH 10/94] Fix add desul option: wrong function name --- scripts/spack_packages/raja/package.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/spack_packages/raja/package.py b/scripts/spack_packages/raja/package.py index 1da2d666a4..f5dba192e8 100644 --- a/scripts/spack_packages/raja/package.py +++ b/scripts/spack_packages/raja/package.py @@ -157,13 +157,13 @@ def initconfig_package_entries(self): if "+libcpp" in spec: cflags += " ".join([cflags,"-DGTEST_HAS_CXXABI_H_=0"]) if cflags: - entries.append(cmake_cache_entry("CMAKE_C_FLAGS", cflags)) + entries.append(cmake_cache_option("CMAKE_C_FLAGS", cflags)) cxxflags = " ".join(spec.compiler_flags["cxxflags"]) if "+libcpp" in spec: cxxflags += " ".join([cxxflags,"-stdlib=libc++ -DGTEST_HAS_CXXABI_H_=0"]) if cxxflags: - entries.append(cmake_cache_entry("CMAKE_CXX_FLAGS", cxxflags)) + entries.append(cmake_cache_option("CMAKE_CXX_FLAGS", cxxflags)) if "+desul" in spec: entries.append(cmake_cache_string("BLT_CXX_STD","c++14")) From aa98a157a7f1473316379f148ccf62fd329d99dc Mon Sep 17 00:00:00 2001 From: "Adrien M. BERNEDE" Date: Mon, 19 Sep 2022 21:31:02 +0200 Subject: [PATCH 11/94] Fix add libcpp: wrong cmake cache function --- scripts/spack_packages/raja/package.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/spack_packages/raja/package.py b/scripts/spack_packages/raja/package.py index f5dba192e8..189e85e41d 100644 --- a/scripts/spack_packages/raja/package.py +++ b/scripts/spack_packages/raja/package.py @@ -157,13 +157,13 @@ def initconfig_package_entries(self): if "+libcpp" in spec: cflags += " ".join([cflags,"-DGTEST_HAS_CXXABI_H_=0"]) if cflags: - entries.append(cmake_cache_option("CMAKE_C_FLAGS", cflags)) + entries.append(cmake_cache_string("CMAKE_C_FLAGS", cflags)) cxxflags = " ".join(spec.compiler_flags["cxxflags"]) if "+libcpp" in spec: cxxflags += " ".join([cxxflags,"-stdlib=libc++ -DGTEST_HAS_CXXABI_H_=0"]) if cxxflags: - entries.append(cmake_cache_option("CMAKE_CXX_FLAGS", cxxflags)) + entries.append(cmake_cache_string("CMAKE_CXX_FLAGS", cxxflags)) if "+desul" in spec: entries.append(cmake_cache_string("BLT_CXX_STD","c++14")) From 59b20b07998502635d31150a0ca2d8d8e713d938 Mon Sep 17 00:00:00 2001 From: "Adrien M. BERNEDE" Date: Mon, 19 Sep 2022 22:22:12 +0200 Subject: [PATCH 12/94] Let +tests variant actually do activate tests --- scripts/spack_packages/raja/package.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/spack_packages/raja/package.py b/scripts/spack_packages/raja/package.py index 189e85e41d..e8b923aed2 100644 --- a/scripts/spack_packages/raja/package.py +++ b/scripts/spack_packages/raja/package.py @@ -189,7 +189,7 @@ def initconfig_package_entries(self): # Work around spack adding -march=ppc64le to SPACK_TARGET_ARGS which # is used by the spack compiler wrapper. This can go away when BLT # removes -Werror from GTest flags - if self.spec.satisfies("%clang target=ppc64le:") or not self.run_tests: + if self.spec.satisfies("%clang target=ppc64le:") or ( not self.run_tests and not "+tests" in spec): entries.append(cmake_cache_option("ENABLE_TESTS", False)) else: entries.append(cmake_cache_option("ENABLE_TESTS", True)) From ebc9f1ec96605b0d59a4029e0765e9c4911a8c57 Mon Sep 17 00:00:00 2001 From: "Adrien M. BERNEDE" Date: Tue, 20 Sep 2022 14:49:31 +0200 Subject: [PATCH 13/94] Do not build shared libs --- .gitlab/custom-jobs-and-variables.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.gitlab/custom-jobs-and-variables.yml b/.gitlab/custom-jobs-and-variables.yml index 1bb569fcad..cc838751d9 100644 --- a/.gitlab/custom-jobs-and-variables.yml +++ b/.gitlab/custom-jobs-and-variables.yml @@ -18,7 +18,7 @@ variables: # Arguments for job level allocation RUBY_BUILD_AND_TEST_JOB_ALLOC: "--time=45 --nodes=1" # Project specific variants for ruby - PROJECT_RUBY_VARIANTS: "+openmp +tests" + PROJECT_RUBY_VARIANTS: "~shared +openmp +tests" # Project specific deps for ruby PROJECT_RUBY_DEPS: "" @@ -28,7 +28,7 @@ variables: # Arguments for job level allocation CORONA_BUILD_AND_TEST_JOB_ALLOC: "--time-limit=45m --nodes=1" # Project specific variants for corona - PROJECT_CORONA_VARIANTS: "~openmp +tests" + PROJECT_CORONA_VARIANTS: "~shared ~openmp +tests" # Project specific deps for corona PROJECT_CORONA_DEPS: "^blt@develop " @@ -37,7 +37,7 @@ variables: # Arguments for job level allocation LASSEN_BUILD_AND_TEST_JOB_ALLOC: "1 -W 60" # Project specific variants for lassen - PROJECT_LASSEN_VARIANTS: "+openmp +tests" + PROJECT_LASSEN_VARIANTS: "~shared +openmp +tests" # Project specific deps for lassen PROJECT_LASSEN_DEPS: "" From d17eaad5e67eed3a6b9211116f75a8791b7636be Mon Sep 17 00:00:00 2001 From: "Adrien M. BERNEDE" Date: Tue, 20 Sep 2022 15:00:50 +0200 Subject: [PATCH 14/94] Add missing RAJA_HOST_CONFIG_LOADED to hostconfig file through spack package --- scripts/spack_packages/raja/package.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/spack_packages/raja/package.py b/scripts/spack_packages/raja/package.py index e8b923aed2..122e9d41b8 100644 --- a/scripts/spack_packages/raja/package.py +++ b/scripts/spack_packages/raja/package.py @@ -194,6 +194,8 @@ def initconfig_package_entries(self): else: entries.append(cmake_cache_option("ENABLE_TESTS", True)) + entries.append(cmake_cache_option("RAJA_HOST_CONFIG_LOADED", True)) + return entries def cmake_args(self): From af976e204ad06536e294d999ce890f30dff63432 Mon Sep 17 00:00:00 2001 From: "Adrien M. BERNEDE" Date: Tue, 20 Sep 2022 15:34:40 +0200 Subject: [PATCH 15/94] Add missing CMAKE_HIP_ARCHITECTURE option to hostconfig through spack package --- scripts/spack_packages/raja/package.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/scripts/spack_packages/raja/package.py b/scripts/spack_packages/raja/package.py index 122e9d41b8..7a1f3b1f73 100644 --- a/scripts/spack_packages/raja/package.py +++ b/scripts/spack_packages/raja/package.py @@ -139,6 +139,9 @@ def initconfig_hardware_entries(self): entries.append( cmake_cache_string("HIP_HIPCC_FLAGS", "--amdgpu-target={0}".format(arch_str)) ) + entries.append( + cmake_cache_string("CMAKE_HIP_ARCHITECTURES", arch_str) + ) else: entries.append(cmake_cache_option("ENABLE_HIP", False)) From 0fe3d49c27e38a109b3e9bef7553a4966ba50b34 Mon Sep 17 00:00:00 2001 From: "Adrien M. BERNEDE" Date: Tue, 20 Sep 2022 15:52:22 +0200 Subject: [PATCH 16/94] WARN: Test that lassen tests pass, but remove a workaround potentially needed with regular spack builds --- scripts/spack_packages/raja/package.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/scripts/spack_packages/raja/package.py b/scripts/spack_packages/raja/package.py index 7a1f3b1f73..365c7743ee 100644 --- a/scripts/spack_packages/raja/package.py +++ b/scripts/spack_packages/raja/package.py @@ -189,10 +189,13 @@ def initconfig_package_entries(self): else: entries.append(cmake_cache_option("ENABLE_EXERCISES", "+exercises" in spec)) - # Work around spack adding -march=ppc64le to SPACK_TARGET_ARGS which - # is used by the spack compiler wrapper. This can go away when BLT - # removes -Werror from GTest flags - if self.spec.satisfies("%clang target=ppc64le:") or ( not self.run_tests and not "+tests" in spec): + ### #TODO: Treat the workaround when building tests with spack wrapper + ### # For now, removing it to test CI, which builds tests outside of wrapper. + ### # Work around spack adding -march=ppc64le to SPACK_TARGET_ARGS which + ### # is used by the spack compiler wrapper. This can go away when BLT + ### # removes -Werror from GTest flags + ### if self.spec.satisfies("%clang target=ppc64le:") or ( not self.run_tests and not "+tests" in spec): + if not self.run_tests and not "+tests" in spec: entries.append(cmake_cache_option("ENABLE_TESTS", False)) else: entries.append(cmake_cache_option("ENABLE_TESTS", True)) From eb97f73f836afa6d49491d3b1938a395880c9bfe Mon Sep 17 00:00:00 2001 From: "Adrien M. BERNEDE" Date: Mon, 3 Oct 2022 10:39:44 +0200 Subject: [PATCH 17/94] Add missing ~shared variant to extra specs --- .gitlab/corona-build-and-test-extra.yml | 2 +- .gitlab/lassen-build-and-test-extra.yml | 14 +++++++------- .gitlab/ruby-build-and-test-extra.yml | 8 ++++---- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/.gitlab/corona-build-and-test-extra.yml b/.gitlab/corona-build-and-test-extra.yml index cbe0c4a9f0..29624d700f 100644 --- a/.gitlab/corona-build-and-test-extra.yml +++ b/.gitlab/corona-build-and-test-extra.yml @@ -23,6 +23,6 @@ rocm_5_1_1_clang_13_0_0_desul_atomics: variables: - SPEC: " +rocm ~openmp +tests +desul amdgpu_target=gfx906 %clang@13.0.0 ^hip@5.1.1 ^blt@develop" + SPEC: " ~shared +rocm ~openmp +tests +desul amdgpu_target=gfx906 %clang@13.0.0 ^hip@5.1.1 ^blt@develop" extends: .build_and_test_on_corona diff --git a/.gitlab/lassen-build-and-test-extra.yml b/.gitlab/lassen-build-and-test-extra.yml index 6dbf25ea2f..ab28bf01da 100644 --- a/.gitlab/lassen-build-and-test-extra.yml +++ b/.gitlab/lassen-build-and-test-extra.yml @@ -89,7 +89,7 @@ xl_16_1_1_12_gcc_8_3_1_cuda_11_1_0: clang_14_0_5: variables: - SPEC: " +openmp +tests %clang@14.0.5" + SPEC: " ~shared +openmp +tests %clang@14.0.5" extends: .build_and_test_on_lassen ########## @@ -98,18 +98,18 @@ clang_14_0_5: clang_12_0_1_cuda_11_5_0: variables: - SPEC: " +openmp +tests +cuda cuda_arch=70 %clang@12.0.1 ^cuda@11.5.0" + SPEC: " ~shared +openmp +tests +cuda cuda_arch=70 %clang@12.0.1 ^cuda@11.5.0" extends: .build_and_test_on_lassen gcc_8_3_1_cuda_11_1_0: variables: - SPEC: " +openmp +tests +cuda %gcc@8.3.1 cuda_arch=70 ^cuda@11.1.0" + SPEC: " ~shared +openmp +tests +cuda %gcc@8.3.1 cuda_arch=70 ^cuda@11.1.0" extends: .build_and_test_on_lassen gcc_8_3_1_cuda_11_5_0_ats_disabled: extends: .build_and_test_on_lassen variables: - SPEC: " +openmp +tests +cuda %gcc@8.3.1 cuda_arch=70 ^cuda@11.5.0" + SPEC: " ~shared +openmp +tests +cuda %gcc@8.3.1 cuda_arch=70 ^cuda@11.5.0" LASSEN_BUILD_AND_TEST_JOB_ALLOC: "1 --atsdisable -W 60" ########## @@ -118,16 +118,16 @@ gcc_8_3_1_cuda_11_5_0_ats_disabled: clang_13_0_1_libcpp: variables: - SPEC: " +openmp +tests %clang@13.0.1+libcpp" + SPEC: " ~shared +openmp +tests %clang@13.0.1+libcpp" extends: .build_and_test_on_lassen clang_14_0_5_asan: variables: - SPEC: " +openmp +tests %clang@14.0.5 cxxflags=-fsanitize=address" + SPEC: " ~shared +openmp +tests %clang@14.0.5 cxxflags=-fsanitize=address" ASAN_OPTIONS: "detect_leaks=1" extends: .build_and_test_on_lassen gcc_8_3_1_cuda_10_1_168_desul_atomics: variables: - SPEC: " +openmp +tests +cuda +desul %gcc@8.3.1 cuda_arch=70 ^cuda@10.1.168" + SPEC: " ~shared +openmp +tests +cuda +desul %gcc@8.3.1 cuda_arch=70 ^cuda@10.1.168" extends: .build_and_test_on_lassen diff --git a/.gitlab/ruby-build-and-test-extra.yml b/.gitlab/ruby-build-and-test-extra.yml index 1aaceec243..33faa56442 100644 --- a/.gitlab/ruby-build-and-test-extra.yml +++ b/.gitlab/ruby-build-and-test-extra.yml @@ -35,24 +35,24 @@ pgi_20_1_gcc_local_8_3_1: clang_9_0_0_openmp_off: variables: - SPEC: " ~openmp +tests %clang@9.0.0" + SPEC: " ~shared ~openmp +tests %clang@9.0.0" extends: .build_and_test_on_ruby gcc_8_1_0_openmp_default: variables: - SPEC: " +tests %gcc@8.1.0" + SPEC: " ~shared +tests %gcc@8.1.0" RUBY_BUILD_AND_TEST_JOB_ALLOC: "--time=60 --nodes=1" extends: .build_and_test_on_ruby icpc_19_1_0: variables: - SPEC: " +openmp +tests %intel@19.1.0" + SPEC: " ~shared +openmp +tests %intel@19.1.0" RUBY_BUILD_AND_TEST_JOB_ALLOC: "--time=40 --nodes=1" extends: .build_and_test_on_ruby # OTHERS clang_10_0_1_gcc_8_3_1_desul_atomics: variables: - SPEC: " +openmp +tests +desul %clang@10.0.1 cxxflags=--gcc-toolchain=/usr/tce/packages/gcc/gcc-8.3.1 cflags=--gcc-toolchain=/usr/tce/packages/gcc/gcc-8.3.1" + SPEC: " ~shared +openmp +tests +desul %clang@10.0.1 cxxflags=--gcc-toolchain=/usr/tce/packages/gcc/gcc-8.3.1 cflags=--gcc-toolchain=/usr/tce/packages/gcc/gcc-8.3.1" extends: .build_and_test_on_ruby From 254857868c93e3ca8a2855d62fe7787722aa268a Mon Sep 17 00:00:00 2001 From: "Adrien M. BERNEDE" Date: Mon, 3 Oct 2022 19:58:58 +0200 Subject: [PATCH 18/94] Fix incompatible tests variant specification --- .gitlab/lassen-build-and-test-extra.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.gitlab/lassen-build-and-test-extra.yml b/.gitlab/lassen-build-and-test-extra.yml index ab28bf01da..85ced723c9 100644 --- a/.gitlab/lassen-build-and-test-extra.yml +++ b/.gitlab/lassen-build-and-test-extra.yml @@ -67,6 +67,12 @@ xl_16_1_1_12_cuda_11_1_1: allow_failure: true extends: .build_and_test_on_lassen +# Overriding shared spec: Do not run test on that config. This spec will be removed soon. +xl_16_1_1_12_gcc_8_3_1_cuda_11_0_2: + variables: + SPEC: "~shared +openmp +cuda ~tests %xl@16.1.1.12 cxxflags=\"--gcc-toolchain=/usr/tce/packages/gcc/gcc-8.3.1\" cflags=\"--gcc-toolchain=/usr/tce/packages/gcc/gcc-8.3.1\" ^cuda@11.0.2 ${PROJECT_LASSEN_DEPS}" + extends: .build_and_test_on_lassen + # Overriding shared spec: Longer allocation + Extra flags + Allow failure + Updated cuda xl_16_1_1_12_gcc_8_3_1_cuda_11_1_0: variables: From cc0810ace3fd9f77fc8daf7f9805fa3e59879eae Mon Sep 17 00:00:00 2001 From: "Adrien M. BERNEDE" Date: Mon, 10 Oct 2022 10:56:23 +0200 Subject: [PATCH 19/94] Fix attempt: specify cuda arch --- .gitlab/lassen-build-and-test-extra.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.gitlab/lassen-build-and-test-extra.yml b/.gitlab/lassen-build-and-test-extra.yml index 85ced723c9..9fdf3ccec2 100644 --- a/.gitlab/lassen-build-and-test-extra.yml +++ b/.gitlab/lassen-build-and-test-extra.yml @@ -67,16 +67,16 @@ xl_16_1_1_12_cuda_11_1_1: allow_failure: true extends: .build_and_test_on_lassen -# Overriding shared spec: Do not run test on that config. This spec will be removed soon. +# Overriding shared spec: Do not run test on that config + specify cuda arch. This spec will be removed soon. xl_16_1_1_12_gcc_8_3_1_cuda_11_0_2: variables: - SPEC: "~shared +openmp +cuda ~tests %xl@16.1.1.12 cxxflags=\"--gcc-toolchain=/usr/tce/packages/gcc/gcc-8.3.1\" cflags=\"--gcc-toolchain=/usr/tce/packages/gcc/gcc-8.3.1\" ^cuda@11.0.2 ${PROJECT_LASSEN_DEPS}" + SPEC: "~shared +openmp +cuda ~tests %xl@16.1.1.12 cxxflags=\"--gcc-toolchain=/usr/tce/packages/gcc/gcc-8.3.1\" cflags=\"--gcc-toolchain=/usr/tce/packages/gcc/gcc-8.3.1\" cuda_arch=70 ^cuda@11.0.2 ${PROJECT_LASSEN_DEPS}" extends: .build_and_test_on_lassen # Overriding shared spec: Longer allocation + Extra flags + Allow failure + Updated cuda xl_16_1_1_12_gcc_8_3_1_cuda_11_1_0: variables: - SPEC: " ${PROJECT_LASSEN_VARIANTS} +cuda %xl@16.1.1.12 cxxflags=\"--gcc-toolchain=/usr/tce/packages/gcc/gcc-8.3.1 -qthreaded -std=c++14 -O3 -qstrict -qxlcompatmacros -qlanglvl=extended0x -qalias=noansi -qhot -qpic -qsmp=omp -qsuppress=1500-029 -qsuppress=1500-036\" cflags=--gcc-toolchain=/usr/tce/packages/gcc/gcc-8.3.1 cuda_arch=70 ^cuda@11.1.0 ^cmake@3.14.5 ${PROJECT_LASSEN_DEPS}" + SPEC: " ${PROJECT_LASSEN_VARIANTS} +cuda %xl@16.1.1.12 cxxflags=\"--gcc-toolchain=/usr/tce/packages/gcc/gcc-8.3.1 -qthreaded -std=c++14 -O3 -qstrict -qxlcompatmacros -qlanglvl=extended0x -qalias=noansi -qhot -qpic -qsmp=omp -qsuppress=1500-029 -qsuppress=1500-036\" cflags=\"--gcc-toolchain=/usr/tce/packages/gcc/gcc-8.3.1\" cuda_arch=70 ^cuda@11.1.0 ^cmake@3.14.5 ${PROJECT_LASSEN_DEPS}" LASSEN_BUILD_AND_TEST_JOB_ALLOC: "1 -W 120" allow_failure: true extends: .build_and_test_on_lassen From 3c0c12aef84fa2bbb71d0444d14b0ea777650035 Mon Sep 17 00:00:00 2001 From: "Adrien M. BERNEDE" Date: Wed, 2 Nov 2022 22:33:50 +0100 Subject: [PATCH 20/94] Update radiuss-spack-configs to get newer cmake on corona --- blt | 2 +- scripts/radiuss-spack-configs | 2 +- tpl/camp | 2 +- tpl/desul | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/blt b/blt index 97ea54d892..e35f490a8a 160000 --- a/blt +++ b/blt @@ -1 +1 @@ -Subproject commit 97ea54d892b4b1d56736830575c3db62e3d7674d +Subproject commit e35f490a8a8b1689e99b5f4308b5251f97eb36cf diff --git a/scripts/radiuss-spack-configs b/scripts/radiuss-spack-configs index 1ce0f4421c..6f4b566f03 160000 --- a/scripts/radiuss-spack-configs +++ b/scripts/radiuss-spack-configs @@ -1 +1 @@ -Subproject commit 1ce0f4421cfe6be4200ae9aa8abd113e09ee4c2d +Subproject commit 6f4b566f031a6dd6669c1dfbabf008ad57759055 diff --git a/tpl/camp b/tpl/camp index 9a6b8216a9..3a7486edb8 160000 --- a/tpl/camp +++ b/tpl/camp @@ -1 +1 @@ -Subproject commit 9a6b8216a9c5f6d8f05a77fc1402fa7e91043d5c +Subproject commit 3a7486edb8b1c50ce36ecace56384d32a1009e4f diff --git a/tpl/desul b/tpl/desul index e4b65e00a8..ac4eb0229a 160000 --- a/tpl/desul +++ b/tpl/desul @@ -1 +1 @@ -Subproject commit e4b65e00a8f26cfc7b59cf5f2fb75a24f69111ab +Subproject commit ac4eb0229a75a715b4d80f64ffff56c578ef6f41 From ccee7575447726d4e884c2f84af2c26d406e6591 Mon Sep 17 00:00:00 2001 From: "Adrien M. BERNEDE" Date: Thu, 3 Nov 2022 10:39:02 +0100 Subject: [PATCH 21/94] Update radiuss-spack-configs to fix cmake versions --- scripts/radiuss-spack-configs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/radiuss-spack-configs b/scripts/radiuss-spack-configs index 6f4b566f03..8d955b4a49 160000 --- a/scripts/radiuss-spack-configs +++ b/scripts/radiuss-spack-configs @@ -1 +1 @@ -Subproject commit 6f4b566f031a6dd6669c1dfbabf008ad57759055 +Subproject commit 8d955b4a49406a12dc44f1a9baab7ab7fa9c68df From f4eac778a19a678e664198077d20f0e9c0a5ddbc Mon Sep 17 00:00:00 2001 From: "Adrien M. BERNEDE" Date: Thu, 3 Nov 2022 11:39:37 +0100 Subject: [PATCH 22/94] Add new constraint on CMake version in RAJA package --- scripts/spack_packages/raja/package.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/spack_packages/raja/package.py b/scripts/spack_packages/raja/package.py index 365c7743ee..1288f0a67c 100644 --- a/scripts/spack_packages/raja/package.py +++ b/scripts/spack_packages/raja/package.py @@ -73,8 +73,9 @@ class Raja(CachedCMakePackage, CudaPackage, ROCmPackage): depends_on("camp@main", when="@develop") depends_on("camp+openmp", when="+openmp") - depends_on("cmake@:3.20", when="+rocm", type="build") + depends_on("cmake@3.23:", when="@2022.10.0:") depends_on("cmake@3.14:", when="@2022.03.0:") + depends_on("cmake@:3.20", when="@2022.03.0:2022.03 +rocm", type="build") depends_on("llvm-openmp", when="+openmp %apple-clang") From 294e25dee79e542692b8ed3a88236110a6af8ac5 Mon Sep 17 00:00:00 2001 From: "Adrien M. BERNEDE" Date: Thu, 3 Nov 2022 12:17:16 +0100 Subject: [PATCH 23/94] More accurate CMake version requirements is RAJA package --- scripts/spack_packages/raja/package.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/scripts/spack_packages/raja/package.py b/scripts/spack_packages/raja/package.py index 1288f0a67c..6f1d2196cc 100644 --- a/scripts/spack_packages/raja/package.py +++ b/scripts/spack_packages/raja/package.py @@ -73,8 +73,9 @@ class Raja(CachedCMakePackage, CudaPackage, ROCmPackage): depends_on("camp@main", when="@develop") depends_on("camp+openmp", when="+openmp") - depends_on("cmake@3.23:", when="@2022.10.0:") - depends_on("cmake@3.14:", when="@2022.03.0:") + depends_on("cmake@3.20:", when="@2022.10.0:", type="build") + depends_on("cmake@3.23:", when="@2022.10.0: +rocm", type="build") + depends_on("cmake@3.14:", when="@2022.03.0:", type="build") depends_on("cmake@:3.20", when="@2022.03.0:2022.03 +rocm", type="build") depends_on("llvm-openmp", when="+openmp %apple-clang") From 43b4432939e0ad7dc4f60789ac0f6984b0cbbe14 Mon Sep 17 00:00:00 2001 From: "Adrien M. BERNEDE" Date: Thu, 3 Nov 2022 14:26:35 +0100 Subject: [PATCH 24/94] Add latest releases --- scripts/spack_packages/raja/package.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/scripts/spack_packages/raja/package.py b/scripts/spack_packages/raja/package.py index 6f1d2196cc..73be8b9674 100644 --- a/scripts/spack_packages/raja/package.py +++ b/scripts/spack_packages/raja/package.py @@ -24,6 +24,9 @@ class Raja(CachedCMakePackage, CudaPackage, ROCmPackage): version("develop", branch="develop", submodules=False) version("main", branch="main", submodules=False) + version("2022.10.1", tag="v2022.10.1", submodules=False) + version("2022.10.0", tag="v2022.10.0", submodules=False) + version("2022.03.1", tag="v2022.03.1", submodules=False) version("2022.03.0", tag="v2022.03.0", submodules=False) version("0.14.0", tag="v0.14.0", submodules="True") version("0.13.0", tag="v0.13.0", submodules="True") From d73cdd016201d5b841a24aa0f1a447ece1d4d3b2 Mon Sep 17 00:00:00 2001 From: Jason Burmark Date: Thu, 3 Nov 2022 13:52:52 -0700 Subject: [PATCH 25/94] Fix use of privatizer in tbb forall --- include/RAJA/policy/tbb/forall.hpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/include/RAJA/policy/tbb/forall.hpp b/include/RAJA/policy/tbb/forall.hpp index de811a4251..3f8bd966db 100644 --- a/include/RAJA/policy/tbb/forall.hpp +++ b/include/RAJA/policy/tbb/forall.hpp @@ -99,9 +99,9 @@ forall_impl(resources::Host host_res, [=](const brange& r, ForallParam fp) { using RAJA::internal::thread_privatize; auto privatizer = thread_privatize(loop_body); - auto body = privatizer.get_priv(); + auto& body = privatizer.get_priv(); for (auto i = r.begin(); i != r.end(); ++i) - expt::invoke_body(fp, loop_body, b[i]); + expt::invoke_body(fp, body, b[i]); return fp; }, @@ -138,7 +138,7 @@ forall_impl(resources::Host host_res, ::tbb::parallel_for(brange(0, dist, p.grain_size), [=](const brange& r) { using RAJA::internal::thread_privatize; auto privatizer = thread_privatize(loop_body); - auto body = privatizer.get_priv(); + auto& body = privatizer.get_priv(); for (auto i = r.begin(); i != r.end(); ++i) body(b[i]); }); @@ -196,9 +196,9 @@ forall_impl(resources::Host host_res, [=](const brange& r, ForallParam fp) { using RAJA::internal::thread_privatize; auto privatizer = thread_privatize(loop_body); - auto body = privatizer.get_priv(); + auto& body = privatizer.get_priv(); for (auto i = r.begin(); i != r.end(); ++i) - expt::invoke_body(fp, loop_body, b[i]); + expt::invoke_body(fp, body, b[i]); return fp; }, @@ -240,7 +240,7 @@ forall_impl(resources::Host host_res, [=](const brange& r) { using RAJA::internal::thread_privatize; auto privatizer = thread_privatize(loop_body); - auto body = privatizer.get_priv(); + auto& body = privatizer.get_priv(); for (auto i = r.begin(); i != r.end(); ++i) body(b[i]); }, From 5f5f11f81c595d218a9792b5012fc82f7c943bdb Mon Sep 17 00:00:00 2001 From: Jason Burmark Date: Thu, 3 Nov 2022 13:53:30 -0700 Subject: [PATCH 26/94] Add toss4 gcc and icpx scripts --- host-configs/lc-builds/toss4/gcc_X.cmake | 16 +++++++ host-configs/lc-builds/toss4/icpx_X.cmake | 16 +++++++ scripts/lc-builds/toss4_gcc.sh | 41 ++++++++++++++++++ scripts/lc-builds/toss4_icpx.sh | 51 +++++++++++++++++++++++ 4 files changed, 124 insertions(+) create mode 100755 host-configs/lc-builds/toss4/gcc_X.cmake create mode 100755 host-configs/lc-builds/toss4/icpx_X.cmake create mode 100755 scripts/lc-builds/toss4_gcc.sh create mode 100755 scripts/lc-builds/toss4_icpx.sh diff --git a/host-configs/lc-builds/toss4/gcc_X.cmake b/host-configs/lc-builds/toss4/gcc_X.cmake new file mode 100755 index 0000000000..017fabca22 --- /dev/null +++ b/host-configs/lc-builds/toss4/gcc_X.cmake @@ -0,0 +1,16 @@ +############################################################################### +# Copyright (c) 2016-22, Lawrence Livermore National Security, LLC +# and RAJA project contributors. See the RAJA/LICENSE file for details. +# +# SPDX-License-Identifier: (BSD-3-Clause) +############################################################################### + +set(RAJA_COMPILER "RAJA_COMPILER_GNU" CACHE STRING "") + +set(CMAKE_CXX_FLAGS_RELEASE "-Ofast -march=native" CACHE STRING "") +set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-Ofast -march=native -g" CACHE STRING "") +set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g" CACHE STRING "") + +set(RAJA_DATA_ALIGN 64 CACHE STRING "") + +set(RAJA_HOST_CONFIG_LOADED On CACHE BOOL "") diff --git a/host-configs/lc-builds/toss4/icpx_X.cmake b/host-configs/lc-builds/toss4/icpx_X.cmake new file mode 100755 index 0000000000..3b1e6cd194 --- /dev/null +++ b/host-configs/lc-builds/toss4/icpx_X.cmake @@ -0,0 +1,16 @@ +############################################################################### +# Copyright (c) 2016-22, Lawrence Livermore National Security, LLC +# and RAJA project contributors. See the RAJA/LICENSE file for details. +# +# SPDX-License-Identifier: (BSD-3-Clause) +############################################################################### + +set(RAJA_COMPILER "RAJA_COMPILER_ICC" CACHE STRING "") + +set(CMAKE_CXX_FLAGS_RELEASE "-O3 -march=native" CACHE STRING "") +set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O3 -g -march=native" CACHE STRING "") +set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g" CACHE STRING "") + +set(RAJA_DATA_ALIGN 64 CACHE STRING "") + +set(RAJA_HOST_CONFIG_LOADED On CACHE BOOL "") diff --git a/scripts/lc-builds/toss4_gcc.sh b/scripts/lc-builds/toss4_gcc.sh new file mode 100755 index 0000000000..e33fd2ed83 --- /dev/null +++ b/scripts/lc-builds/toss4_gcc.sh @@ -0,0 +1,41 @@ +#!/usr/bin/env bash + +############################################################################### +# Copyright (c) 2016-22, Lawrence Livermore National Security, LLC +# and RAJA project contributors. See the RAJA/LICENSE file for details. +# +# SPDX-License-Identifier: (BSD-3-Clause) +############################################################################### + +if [ "$1" == "" ]; then + echo + echo "You must pass a compiler version number to script. For example," + echo " toss4_gcc.sh 10.3.1" + exit +fi + +COMP_VER=$1 +shift 1 + +BUILD_SUFFIX=lc_toss4-gcc-${COMP_VER} + +echo +echo "Creating build directory ${BUILD_SUFFIX} and generating configuration in it" +echo "Configuration extra arguments:" +echo " $@" +echo + +rm -rf build_${BUILD_SUFFIX} 2>/dev/null +mkdir build_${BUILD_SUFFIX} && cd build_${BUILD_SUFFIX} + +module load cmake/3.21.1 + +cmake \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_CXX_COMPILER=/usr/tce/packages/gcc/gcc-${COMP_VER}/bin/g++ \ + -DBLT_CXX_STD=c++14 \ + -C ../host-configs/lc-builds/toss4/gcc_X.cmake \ + -DENABLE_OPENMP=On \ + -DCMAKE_INSTALL_PREFIX=../install_${BUILD_SUFFIX} \ + "$@" \ + .. diff --git a/scripts/lc-builds/toss4_icpx.sh b/scripts/lc-builds/toss4_icpx.sh new file mode 100755 index 0000000000..dcbe916968 --- /dev/null +++ b/scripts/lc-builds/toss4_icpx.sh @@ -0,0 +1,51 @@ +#!/usr/bin/env bash + +############################################################################### +# Copyright (c) 2016-22, Lawrence Livermore National Security, LLC +# and RAJA project contributors. See the RAJA/LICENSE file for details. +# +# SPDX-License-Identifier: (BSD-3-Clause) +############################################################################### + +if [ "$1" == "" ]; then + echo + echo "You must pass a compiler version number to script. For example," + echo " toss4_icpx.sh 2022.1.0" + exit +fi + +COMP_VER=$1 +shift 1 + +USE_TBB=On + +BUILD_SUFFIX=lc_toss4-icpx-${COMP_VER} + +echo +echo "Creating build directory ${BUILD_SUFFIX} and generating configuration in it" +echo "Configuration extra arguments:" +echo " $@" +echo + +rm -rf build_${BUILD_SUFFIX} 2>/dev/null +mkdir build_${BUILD_SUFFIX} && cd build_${BUILD_SUFFIX} + +module load cmake/3.21.1 + +# +# Note: we are using the intel-tce install path as the vanilla intel install +# path is not in /usr/tce/packages +# + +cmake \ + -DBLT_CXX_STD=c++14 \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_CXX_COMPILER=/usr/tce/packages/intel-tce/intel-${COMP_VER}/bin/icpx \ + -DCMAKE_C_COMPILER=/usr/tce/packages/intel-tce/intel-${COMP_VER}/bin/icx \ + -DBLT_CXX_STD=c++14 \ + -C ../host-configs/lc-builds/toss4/icpx_X.cmake \ + -DENABLE_OPENMP=On \ + -DRAJA_ENABLE_TBB=${USE_TBB} \ + -DCMAKE_INSTALL_PREFIX=../install_${BUILD_SUFFIX} \ + "$@" \ + .. From ea35c6b3a61cb2fa482d00ccc2cef24162f90bc6 Mon Sep 17 00:00:00 2001 From: Jason Burmark Date: Thu, 3 Nov 2022 16:48:17 -0700 Subject: [PATCH 27/94] Add toss4 icpc script --- host-configs/lc-builds/toss4/icpc_X.cmake | 16 +++++++ scripts/lc-builds/toss4_gcc.sh | 2 +- scripts/lc-builds/toss4_icpc.sh | 52 +++++++++++++++++++++++ scripts/lc-builds/toss4_icpx.sh | 2 +- 4 files changed, 70 insertions(+), 2 deletions(-) create mode 100755 host-configs/lc-builds/toss4/icpc_X.cmake create mode 100755 scripts/lc-builds/toss4_icpc.sh diff --git a/host-configs/lc-builds/toss4/icpc_X.cmake b/host-configs/lc-builds/toss4/icpc_X.cmake new file mode 100755 index 0000000000..9810b093f6 --- /dev/null +++ b/host-configs/lc-builds/toss4/icpc_X.cmake @@ -0,0 +1,16 @@ +############################################################################### +# Copyright (c) 2016-22, Lawrence Livermore National Security, LLC +# and RAJA project contributors. See the RAJA/LICENSE file for details. +# +# SPDX-License-Identifier: (BSD-3-Clause) +############################################################################### + +set(RAJA_COMPILER "RAJA_COMPILER_ICC" CACHE STRING "") + +set(CMAKE_CXX_FLAGS_RELEASE "-O3 -march=native -ansi-alias -diag-disable cpu-dispatch" CACHE STRING "") +set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O3 -g -march=native -ansi-alias -diag-disable cpu-dispatch" CACHE STRING "") +set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g" CACHE STRING "") + +set(RAJA_DATA_ALIGN 64 CACHE STRING "") + +set(RAJA_HOST_CONFIG_LOADED On CACHE BOOL "") diff --git a/scripts/lc-builds/toss4_gcc.sh b/scripts/lc-builds/toss4_gcc.sh index e33fd2ed83..532d2ff130 100755 --- a/scripts/lc-builds/toss4_gcc.sh +++ b/scripts/lc-builds/toss4_gcc.sh @@ -20,7 +20,7 @@ shift 1 BUILD_SUFFIX=lc_toss4-gcc-${COMP_VER} echo -echo "Creating build directory ${BUILD_SUFFIX} and generating configuration in it" +echo "Creating build directory build_${BUILD_SUFFIX} and generating configuration in it" echo "Configuration extra arguments:" echo " $@" echo diff --git a/scripts/lc-builds/toss4_icpc.sh b/scripts/lc-builds/toss4_icpc.sh new file mode 100755 index 0000000000..e4d091a6ae --- /dev/null +++ b/scripts/lc-builds/toss4_icpc.sh @@ -0,0 +1,52 @@ +#!/usr/bin/env bash + +############################################################################### +# Copyright (c) 2016-22, Lawrence Livermore National Security, LLC +# and RAJA project contributors. See the RAJA/LICENSE file for details. +# +# SPDX-License-Identifier: (BSD-3-Clause) +############################################################################### + +if [ "$1" == "" ]; then + echo + echo "You must pass a compiler version number to script. For example," + echo " toss4_icpc.sh 2021.6.0" + exit +fi + +COMP_VER=$1 +shift 1 + +USE_TBB=On + +BUILD_SUFFIX=lc_toss4-icpc-${COMP_VER} + +echo +echo "Creating build directory build_${BUILD_SUFFIX} and generating configuration in it" +echo "Configuration extra arguments:" +echo " $@" +echo + +rm -rf build_${BUILD_SUFFIX} 2>/dev/null +mkdir build_${BUILD_SUFFIX} && cd build_${BUILD_SUFFIX} + +module load cmake/3.21.1 + +## +# CMake option -DRAJA_ENABLE_FORCEINLINE_RECURSIVE=Off used to speed up compile +# times at a potential cost of slower 'forall' execution. +## + +cmake \ + -DBLT_CXX_STD=c++14 \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_CXX_COMPILER=/usr/tce/packages/intel-classic/intel-classic-${COMP_VER}/bin/icpc \ + -DCMAKE_C_COMPILER=/usr/tce/packages/intel-classic/intel-classic-${COMP_VER}/bin/icc \ + -DBLT_CXX_STD=c++14 \ + -C ../host-configs/lc-builds/toss4/icpc_X.cmake \ + -DRAJA_ENABLE_FORCEINLINE_RECURSIVE=Off \ + -DENABLE_OPENMP=On \ + -DRAJA_ENABLE_TBB=${USE_TBB} \ + -DCMAKE_INSTALL_PREFIX=../install_${BUILD_SUFFIX} \ + "$@" \ + .. diff --git a/scripts/lc-builds/toss4_icpx.sh b/scripts/lc-builds/toss4_icpx.sh index dcbe916968..37181a4efd 100755 --- a/scripts/lc-builds/toss4_icpx.sh +++ b/scripts/lc-builds/toss4_icpx.sh @@ -22,7 +22,7 @@ USE_TBB=On BUILD_SUFFIX=lc_toss4-icpx-${COMP_VER} echo -echo "Creating build directory ${BUILD_SUFFIX} and generating configuration in it" +echo "Creating build directory build_${BUILD_SUFFIX} and generating configuration in it" echo "Configuration extra arguments:" echo " $@" echo From 31691cd27de76672ad8bdfe3d93e61b556e7eebb Mon Sep 17 00:00:00 2001 From: "Adrien M. BERNEDE" Date: Fri, 4 Nov 2022 15:20:01 +0100 Subject: [PATCH 28/94] add latest camp version + update blt dependency --- scripts/spack_packages/camp/package.py | 1 + scripts/spack_packages/raja/package.py | 1 + 2 files changed, 2 insertions(+) diff --git a/scripts/spack_packages/camp/package.py b/scripts/spack_packages/camp/package.py index 8f40f6b295..c0aae50c4e 100644 --- a/scripts/spack_packages/camp/package.py +++ b/scripts/spack_packages/camp/package.py @@ -29,6 +29,7 @@ class Camp(CMakePackage, CudaPackage, ROCmPackage): maintainers = ["trws"] version("main", branch="main", submodules="True") + version('2022.10.0', sha256='3561c3ef00bbcb61fe3183c53d49b110e54910f47e7fc689ad9ccce57e55d6b8') version("2022.03.2", sha256="bc4aaeacfe8f2912e28f7a36fc731ab9e481bee15f2c6daf0cb208eed3f201eb") version("2022.03.0", sha256="e9090d5ee191ea3a8e36b47a8fe78f3ac95d51804f1d986d931e85b8f8dad721") version("0.3.0", sha256="129431a049ca5825443038ad5a37a86ba6d09b2618d5fe65d35f83136575afdb") diff --git a/scripts/spack_packages/raja/package.py b/scripts/spack_packages/raja/package.py index 73be8b9674..e91687bdb1 100644 --- a/scripts/spack_packages/raja/package.py +++ b/scripts/spack_packages/raja/package.py @@ -64,6 +64,7 @@ class Raja(CachedCMakePackage, CudaPackage, ROCmPackage): variant("desul", default=False, description="Build Desul Atomics backend") depends_on("blt") + depends_on("blt@0.5.2:", type="build", when="@2022.10.0:") depends_on("blt@0.5.0:", type="build", when="@0.14.1:") depends_on("blt@0.4.1", type="build", when="@0.14.0") depends_on("blt@0.4.0:", type="build", when="@0.13.0") From ec96ec3bde53aa6856edff66cc22f422ac8de4dc Mon Sep 17 00:00:00 2001 From: "Adrien M. BERNEDE" Date: Fri, 4 Nov 2022 15:23:17 +0100 Subject: [PATCH 29/94] Minor cosmetic change --- scripts/spack_packages/camp/package.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/spack_packages/camp/package.py b/scripts/spack_packages/camp/package.py index c0aae50c4e..446f8e8ded 100644 --- a/scripts/spack_packages/camp/package.py +++ b/scripts/spack_packages/camp/package.py @@ -29,7 +29,7 @@ class Camp(CMakePackage, CudaPackage, ROCmPackage): maintainers = ["trws"] version("main", branch="main", submodules="True") - version('2022.10.0', sha256='3561c3ef00bbcb61fe3183c53d49b110e54910f47e7fc689ad9ccce57e55d6b8') + version("2022.10.0", sha256="3561c3ef00bbcb61fe3183c53d49b110e54910f47e7fc689ad9ccce57e55d6b8") version("2022.03.2", sha256="bc4aaeacfe8f2912e28f7a36fc731ab9e481bee15f2c6daf0cb208eed3f201eb") version("2022.03.0", sha256="e9090d5ee191ea3a8e36b47a8fe78f3ac95d51804f1d986d931e85b8f8dad721") version("0.3.0", sha256="129431a049ca5825443038ad5a37a86ba6d09b2618d5fe65d35f83136575afdb") From 18516a8c1aa095190ed846986d5299c4041143e3 Mon Sep 17 00:00:00 2001 From: "Adrien M. BERNEDE" Date: Fri, 4 Nov 2022 16:50:21 +0100 Subject: [PATCH 30/94] Cosmetic sync from Umpire --- scripts/spack_packages/camp/package.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/scripts/spack_packages/camp/package.py b/scripts/spack_packages/camp/package.py index 446f8e8ded..f5563ffe19 100644 --- a/scripts/spack_packages/camp/package.py +++ b/scripts/spack_packages/camp/package.py @@ -39,7 +39,7 @@ class Camp(CMakePackage, CudaPackage, ROCmPackage): # TODO: figure out gtest dependency and then set this default True. variant("tests", default=False, description="Build tests") - variant("openmp", default=False, description="Build OpenMP support") + variant("openmp", default=False, description="Build with OpenMP support") depends_on("cub", when="+cuda") @@ -52,11 +52,11 @@ def cmake_args(self): options.append("-DBLT_SOURCE_DIR={0}".format(spec["blt"].prefix)) - options.append("-DENABLE_OPENMP=" + ("On" if "+openmp" in spec else "Off")) if "+cuda" in spec: - options.extend( - ["-DENABLE_CUDA=ON", "-DCUDA_TOOLKIT_ROOT_DIR=%s" % (spec["cuda"].prefix)] - ) + options.extend([ + "-DENABLE_CUDA=ON", + "-DCUDA_TOOLKIT_ROOT_DIR=%s" % (spec["cuda"].prefix) + ]) if not spec.satisfies("cuda_arch=none"): cuda_arch = spec.variants["cuda_arch"].value @@ -68,7 +68,10 @@ def cmake_args(self): options.append("-DENABLE_CUDA=OFF") if "+rocm" in spec: - options.extend(["-DENABLE_HIP=ON", "-DHIP_ROOT_DIR={0}".format(spec["hip"].prefix)]) + options.extend([ + "-DENABLE_HIP=ON", + "-DHIP_ROOT_DIR={0}".format(spec["hip"].prefix) + ]) hip_repair_options(options, spec) @@ -79,6 +82,7 @@ def cmake_args(self): else: options.append("-DENABLE_HIP=OFF") + options.append(self.define_from_variant("ENABLE_OPENMP", "openmp")) options.append(self.define_from_variant("ENABLE_TESTS", "tests")) return options From fe27236878e91f3d609a2f16077d0293f20591fe Mon Sep 17 00:00:00 2001 From: "Adrien M. BERNEDE" Date: Fri, 4 Nov 2022 17:06:51 +0100 Subject: [PATCH 31/94] Add newest BLT package to get blt@5.2.0 --- scripts/spack_packages/blt/package.py | 40 +++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 scripts/spack_packages/blt/package.py diff --git a/scripts/spack_packages/blt/package.py b/scripts/spack_packages/blt/package.py new file mode 100644 index 0000000000..87fb122dff --- /dev/null +++ b/scripts/spack_packages/blt/package.py @@ -0,0 +1,40 @@ +# Copyright 2013-2022 Lawrence Livermore National Security, LLC and other +# Spack Project Developers. See the top-level COPYRIGHT file for details. +# +# SPDX-License-Identifier: (Apache-2.0 OR MIT) + +from spack.package import * + + +class Blt(Package): + """BLT is a streamlined CMake-based foundation for Building, Linking and + Testing large-scale high performance computing (HPC) applications.""" + + homepage = "https://github.com/LLNL/blt" + url = "https://github.com/LLNL/blt/archive/v0.4.0.tar.gz" + git = "https://github.com/LLNL/blt.git" + tags = ["radiuss"] + + maintainers = ["white238", "davidbeckingsale"] + + version("develop", branch="develop") + version("main", branch="main") + # Note: 0.4.0+ contains a breaking change to BLT created targets + # if you export targets this could cause problems in downstream + # projects if not handled properly. More info here: + # https://llnl-blt.readthedocs.io/en/develop/tutorial/exporting_targets.html + version("0.5.2", sha256="95b924cfbb2bddd9b1a92e96603b2fd485a19721d59ddf8ff50baefc1714d7ea") + version("0.5.1", sha256="ff7e87eefc48704a0721b66174612b945955adaa0a56aa69dd0473074fa4badf") + version("0.5.0", sha256="5f680ef922d0e0a7ff1b1a5fc8aa107cd4f543ad888cbc9b12639bea72a6ab1f") + version("0.4.1", sha256="16cc3e067ddcf48b99358107e5035a17549f52dcc701a35cd18a9d9f536826c1") + version("0.4.0", sha256="f3bc45d28b9b2eb6df43b75d4f6f89a1557d73d012da7b75bac1be0574767193") + version("0.3.6", sha256="6276317c29e7ff8524fbea47d9288ddb40ac06e9f9da5e878bf9011e2c99bf71") + version("0.3.5", sha256="68a1c224bb9203461ae6f5ab0ff3c50b4a58dcce6c2d2799489a1811f425fb84") + version("0.3.0", sha256="bb917a67cb7335d6721c997ba9c5dca70506006d7bba5e0e50033dd0836481a5") + version("0.2.5", sha256="3a000f60194e47b3e5623cc528cbcaf88f7fea4d9620b3c7446ff6658dc582a5") + version("0.2.0", sha256="c0cadf1269c2feb189e398a356e3c49170bc832df95e5564e32bdbb1eb0fa1b3") + + depends_on("cmake", type="run") + + def install(self, spec, prefix): + install_tree(".", prefix) From efb6f9a1efbe6bc59634377acdda62c6d4842fb6 Mon Sep 17 00:00:00 2001 From: Jason Burmark Date: Fri, 4 Nov 2022 11:38:23 -0700 Subject: [PATCH 32/94] Remove redundant BLT_CXX_STD --- scripts/lc-builds/toss3_icpc.sh | 1 - scripts/lc-builds/toss4_icpc.sh | 1 - scripts/lc-builds/toss4_icpx.sh | 1 - 3 files changed, 3 deletions(-) diff --git a/scripts/lc-builds/toss3_icpc.sh b/scripts/lc-builds/toss3_icpc.sh index 5f2474bb44..7e0487dacd 100755 --- a/scripts/lc-builds/toss3_icpc.sh +++ b/scripts/lc-builds/toss3_icpc.sh @@ -50,7 +50,6 @@ module load cmake/3.20.2 ## cmake \ - -DBLT_CXX_STD=c++14 \ -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_CXX_COMPILER=/usr/tce/packages/intel/intel-${COMP_VER}/bin/icpc \ -DCMAKE_C_COMPILER=/usr/tce/packages/intel/intel-${COMP_VER}/bin/icc \ diff --git a/scripts/lc-builds/toss4_icpc.sh b/scripts/lc-builds/toss4_icpc.sh index e4d091a6ae..bc1f9427cf 100755 --- a/scripts/lc-builds/toss4_icpc.sh +++ b/scripts/lc-builds/toss4_icpc.sh @@ -38,7 +38,6 @@ module load cmake/3.21.1 ## cmake \ - -DBLT_CXX_STD=c++14 \ -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_CXX_COMPILER=/usr/tce/packages/intel-classic/intel-classic-${COMP_VER}/bin/icpc \ -DCMAKE_C_COMPILER=/usr/tce/packages/intel-classic/intel-classic-${COMP_VER}/bin/icc \ diff --git a/scripts/lc-builds/toss4_icpx.sh b/scripts/lc-builds/toss4_icpx.sh index 37181a4efd..7c2a3c4561 100755 --- a/scripts/lc-builds/toss4_icpx.sh +++ b/scripts/lc-builds/toss4_icpx.sh @@ -38,7 +38,6 @@ module load cmake/3.21.1 # cmake \ - -DBLT_CXX_STD=c++14 \ -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_CXX_COMPILER=/usr/tce/packages/intel-tce/intel-${COMP_VER}/bin/icpx \ -DCMAKE_C_COMPILER=/usr/tce/packages/intel-tce/intel-${COMP_VER}/bin/icx \ From 337ea9ec0b5f003570cffcb7d69511fb64227cfd Mon Sep 17 00:00:00 2001 From: Jason Burmark Date: Fri, 4 Nov 2022 11:49:34 -0700 Subject: [PATCH 33/94] Add icpc omp shared lib message --- scripts/lc-builds/toss4_icpc.sh | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/scripts/lc-builds/toss4_icpc.sh b/scripts/lc-builds/toss4_icpc.sh index bc1f9427cf..90d72d92be 100755 --- a/scripts/lc-builds/toss4_icpc.sh +++ b/scripts/lc-builds/toss4_icpc.sh @@ -49,3 +49,15 @@ cmake \ -DCMAKE_INSTALL_PREFIX=../install_${BUILD_SUFFIX} \ "$@" \ .. + +echo +echo "***********************************************************************" +echo +echo "cd into directory build_${BUILD_SUFFIX} and run make to build RAJA" +echo +echo " Please note that you may need to add some intel openmp libraries to your" +echo " LD_LIBRARY_PATH to run with openmp." +echo +echo " LD_LIBRARY_PATH=\$LD_LIBRARY_PATH:/usr/tce/packages/intel-classic-tce/intel-classic-2021.6.0/compiler/lib/intel64_lin" +echo +echo "***********************************************************************" From 8ebec2caede03fc16a9e74411113f2321728e1ca Mon Sep 17 00:00:00 2001 From: Jason Burmark Date: Fri, 4 Nov 2022 15:32:14 -0700 Subject: [PATCH 34/94] use tce wrappers with icpc --- scripts/lc-builds/toss4_icpc.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/lc-builds/toss4_icpc.sh b/scripts/lc-builds/toss4_icpc.sh index 90d72d92be..4c46e80f9c 100755 --- a/scripts/lc-builds/toss4_icpc.sh +++ b/scripts/lc-builds/toss4_icpc.sh @@ -39,8 +39,8 @@ module load cmake/3.21.1 cmake \ -DCMAKE_BUILD_TYPE=Release \ - -DCMAKE_CXX_COMPILER=/usr/tce/packages/intel-classic/intel-classic-${COMP_VER}/bin/icpc \ - -DCMAKE_C_COMPILER=/usr/tce/packages/intel-classic/intel-classic-${COMP_VER}/bin/icc \ + -DCMAKE_CXX_COMPILER=/usr/tce/packages/intel-classic-tce/intel-classic-${COMP_VER}/bin/icpc \ + -DCMAKE_C_COMPILER=/usr/tce/packages/intel-classic-tce/intel-classic-${COMP_VER}/bin/icc \ -DBLT_CXX_STD=c++14 \ -C ../host-configs/lc-builds/toss4/icpc_X.cmake \ -DRAJA_ENABLE_FORCEINLINE_RECURSIVE=Off \ From a7130048ba26bf0be1e72ec194b199700a546d46 Mon Sep 17 00:00:00 2001 From: "Adrien M. BERNEDE" Date: Mon, 7 Nov 2022 15:46:26 +0100 Subject: [PATCH 35/94] Update camp package with latest spack develop + update camp requirement --- scripts/spack_packages/camp/package.py | 10 ++++++++++ scripts/spack_packages/raja/package.py | 1 + 2 files changed, 11 insertions(+) diff --git a/scripts/spack_packages/camp/package.py b/scripts/spack_packages/camp/package.py index f5563ffe19..4e25af8022 100644 --- a/scripts/spack_packages/camp/package.py +++ b/scripts/spack_packages/camp/package.py @@ -16,6 +16,16 @@ def hip_repair_options(options, spec): ) +def hip_repair_cache(options, spec): + # there is only one dir like this, but the version component is unknown + options.append( + cmake_cache_path( + "HIP_CLANG_INCLUDE_PATH", + glob.glob("{}/lib/clang/*/include".format(spec["llvm-amdgpu"].prefix))[0], + ) + ) + + class Camp(CMakePackage, CudaPackage, ROCmPackage): """ Compiler agnostic metaprogramming library providing concepts, diff --git a/scripts/spack_packages/raja/package.py b/scripts/spack_packages/raja/package.py index e91687bdb1..557afbe422 100644 --- a/scripts/spack_packages/raja/package.py +++ b/scripts/spack_packages/raja/package.py @@ -72,6 +72,7 @@ class Raja(CachedCMakePackage, CudaPackage, ROCmPackage): depends_on("camp@0.2.2:0.2.3", when="@0.14.0") depends_on("camp@0.1.0", when="@0.10.0:0.13.0") + depends_on("camp@2022.10.0:", when="@2022.10.0:") depends_on("camp@2022.03.2:", when="@2022.03.0:") depends_on("camp@main", when="@main") depends_on("camp@main", when="@develop") From 77229afe7c98561361127d3defdf323cea228bb0 Mon Sep 17 00:00:00 2001 From: "Adrien M. BERNEDE" Date: Mon, 7 Nov 2022 16:08:34 +0100 Subject: [PATCH 36/94] Share utility function definition with camp --- scripts/spack_packages/raja/package.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/scripts/spack_packages/raja/package.py b/scripts/spack_packages/raja/package.py index 557afbe422..16e1996a97 100644 --- a/scripts/spack_packages/raja/package.py +++ b/scripts/spack_packages/raja/package.py @@ -7,12 +7,9 @@ import glob from spack.package import * +from spack.pkg.builtin.camp import hip_repair_cache -def hip_repair_entries(entries, spec): - # there is only one dir like this, but the version component is unknown - entries.append(cmake_cache_path("HIP_CLANG_INCLUDE_PATH", glob.glob("{}/lib/clang/*/include".format(spec["llvm-amdgpu"].prefix))[0])) - class Raja(CachedCMakePackage, CudaPackage, ROCmPackage): """RAJA Parallel Framework.""" @@ -139,7 +136,7 @@ def initconfig_hardware_entries(self): if "+rocm" in spec: entries.append(cmake_cache_option("ENABLE_HIP", True)) entries.append(cmake_cache_path("HIP_ROOT_DIR", "{0}".format(spec["hip"].prefix))) - hip_repair_entries(entries, spec) + hip_repair_cache(entries, spec) archs = self.spec.variants["amdgpu_target"].value if archs != "none": arch_str = ",".join(archs) From ab01fa1dee8895c97cbd101fbd8d6f0529b9056f Mon Sep 17 00:00:00 2001 From: Kenny Weiss Date: Mon, 7 Nov 2022 18:56:00 -0800 Subject: [PATCH 37/94] Exports `roctx` when present --- cmake/SetupPackages.cmake | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/cmake/SetupPackages.cmake b/cmake/SetupPackages.cmake index 45dadbee4b..9e057f3a50 100644 --- a/cmake/SetupPackages.cmake +++ b/cmake/SetupPackages.cmake @@ -101,13 +101,16 @@ if (RAJA_ENABLE_HIP AND RAJA_ENABLE_ROCTX) include(FindRoctracer) blt_import_library(NAME roctx INCLUDES ${ROCTX_INCLUDE_DIRS} - LIBRARIES ${ROCTX_LIBRARIES}) + LIBRARIES ${ROCTX_LIBRARIES} + EXPORTABLE ON + TREAT_INCLUDES_AS_SYSTEM ON) endif () set(TPL_DEPS) blt_list_append(TO TPL_DEPS ELEMENTS nvtoolsext IF RAJA_ENABLE_NV_TOOLS_EXT) blt_list_append(TO TPL_DEPS ELEMENTS cub IF RAJA_ENABLE_EXTERNAL_CUB) blt_list_append(TO TPL_DEPS ELEMENTS rocPRIM IF RAJA_ENABLE_EXTERNAL_ROCPRIM) +blt_list_append(TO TPL_DEPS ELEMENTS roctx IF RAJA_ENABLE_ROCTX) set(RAJA_NEEDS_BLT_TPLS False) if (RAJA_ENABLE_CUDA OR RAJA_ENABLE_HIP OR RAJA_ENABLE_OPENMP OR RAJA_ENABLE_MPI) From b799a5e52edf77e2c3aed104066de7f23f39ce1a Mon Sep 17 00:00:00 2001 From: "Adrien M. BERNEDE" Date: Thu, 10 Nov 2022 11:06:29 +0100 Subject: [PATCH 38/94] Adds a proper compiler entry + attempt to use hip CXX compiler --- scripts/spack_packages/raja/package.py | 52 +++++++++++++++++++------- 1 file changed, 38 insertions(+), 14 deletions(-) diff --git a/scripts/spack_packages/raja/package.py b/scripts/spack_packages/raja/package.py index 16e1996a97..113d8e5956 100644 --- a/scripts/spack_packages/raja/package.py +++ b/scripts/spack_packages/raja/package.py @@ -108,13 +108,50 @@ def cache_name(self): hostname = socket.gethostname() if "SYS_TYPE" in env: hostname = hostname.rstrip("1234567890") - return "{0}-{1}-{2}@{3}.cmake".format( + return "{0}-{1}-{2}@{3}#{4}.cmake".format( hostname, self._get_sys_type(self.spec), self.spec.compiler.name, self.spec.compiler.version, + self.spec.dag_hash ) + def initconfig_compiler_entries(self): + spec = self.spec + # Default entries are already defined in CachedCMakePackage, inherit them: + entries = super(Raja, self).initconfig_compiler_entries() + + # Switch to hip as a CPP compiler. + # adrienbernede-22-11: + # This was only done in upstream Spack raja package. + # I could not find the equivalent logic in Spack source, so keeping it. + if "+rocm" in spec: + entries.insert(0, cmake_cache_path("CMAKE_CXX_COMPILER", spec["hip"].hipcc)) + + # Override CachedCMakePackage CMAKE_C_FLAGS and CMAKE_CXX_FLAGS add + # +libcpp specific flags + flags = spec.compiler_flags + + # use global spack compiler flags + cppflags = " ".join(flags["cppflags"]) + if cppflags: + # avoid always ending up with " " with no flags defined + cppflags += " " + + cflags = cppflags + " ".join(flags["cflags"]) + if "+libcpp" in spec: + cflags += " ".join([cflags,"-DGTEST_HAS_CXXABI_H_=0"]) + if cflags: + entries.append(cmake_cache_string("CMAKE_C_FLAGS", cflags)) + + cxxflags = cppflags + " ".join(flags["cxxflags"]) + if "+libcpp" in spec: + cxxflags += " ".join([cxxflags,"-stdlib=libc++ -DGTEST_HAS_CXXABI_H_=0"]) + if cxxflags: + entries.append(cmake_cache_string("CMAKE_CXX_FLAGS", cxxflags)) + + return entries + def initconfig_hardware_entries(self): spec = self.spec entries = super(Raja, self).initconfig_hardware_entries() @@ -159,19 +196,6 @@ def initconfig_package_entries(self): entries.append(cmake_cache_option("RAJA_ENABLE_DESUL_ATOMICS", "+desul" in spec)) - # use global spack compiler flags - cflags = " ".join(spec.compiler_flags["cflags"]) - if "+libcpp" in spec: - cflags += " ".join([cflags,"-DGTEST_HAS_CXXABI_H_=0"]) - if cflags: - entries.append(cmake_cache_string("CMAKE_C_FLAGS", cflags)) - - cxxflags = " ".join(spec.compiler_flags["cxxflags"]) - if "+libcpp" in spec: - cxxflags += " ".join([cxxflags,"-stdlib=libc++ -DGTEST_HAS_CXXABI_H_=0"]) - if cxxflags: - entries.append(cmake_cache_string("CMAKE_CXX_FLAGS", cxxflags)) - if "+desul" in spec: entries.append(cmake_cache_string("BLT_CXX_STD","c++14")) if "+cuda" in spec: From d76633a08ff7397653da0d5536f617d646335236 Mon Sep 17 00:00:00 2001 From: "Adrien M. BERNEDE" Date: Thu, 10 Nov 2022 11:23:22 +0100 Subject: [PATCH 39/94] Fix call to get dag_hash --- scripts/spack_packages/raja/package.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/spack_packages/raja/package.py b/scripts/spack_packages/raja/package.py index 113d8e5956..5798a961e7 100644 --- a/scripts/spack_packages/raja/package.py +++ b/scripts/spack_packages/raja/package.py @@ -113,7 +113,7 @@ def cache_name(self): self._get_sys_type(self.spec), self.spec.compiler.name, self.spec.compiler.version, - self.spec.dag_hash + self.spec.dag_hash(8) ) def initconfig_compiler_entries(self): From c04d7425825ab5460369f439252c6f7568d2fc8c Mon Sep 17 00:00:00 2001 From: "Adrien M. BERNEDE" Date: Thu, 10 Nov 2022 11:33:39 +0100 Subject: [PATCH 40/94] Fix "#" not allowed --- scripts/spack_packages/raja/package.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/spack_packages/raja/package.py b/scripts/spack_packages/raja/package.py index 5798a961e7..e414fa70bd 100644 --- a/scripts/spack_packages/raja/package.py +++ b/scripts/spack_packages/raja/package.py @@ -108,7 +108,7 @@ def cache_name(self): hostname = socket.gethostname() if "SYS_TYPE" in env: hostname = hostname.rstrip("1234567890") - return "{0}-{1}-{2}@{3}#{4}.cmake".format( + return "{0}-{1}-{2}@{3}-{4}.cmake".format( hostname, self._get_sys_type(self.spec), self.spec.compiler.name, From 18c14e2162d7d901cff1a66118e40f9251efcb4a Mon Sep 17 00:00:00 2001 From: "Adrien M. BERNEDE" Date: Thu, 10 Nov 2022 15:35:18 +0100 Subject: [PATCH 41/94] Cosmetic reordering + add build type --- scripts/spack_packages/raja/package.py | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/scripts/spack_packages/raja/package.py b/scripts/spack_packages/raja/package.py index e414fa70bd..875e63107d 100644 --- a/scripts/spack_packages/raja/package.py +++ b/scripts/spack_packages/raja/package.py @@ -194,6 +194,24 @@ def initconfig_package_entries(self): option_prefix = "RAJA_" if spec.satisfies("@0.14.0:") else "" + # TPL locations + entries.append("#------------------{0}".format("-" * 60)) + entries.append("# TPLs") + entries.append("#------------------{0}\n".format("-" * 60)) + + entries.append(cmake_cache_path("BLT_SOURCE_DIR", spec["blt"].prefix)) + if "camp" in self.spec: + entries.append(cmake_cache_path("camp_DIR", spec["camp"].prefix)) + + # Build options + entries.append("#------------------{0}".format("-" * 60)) + entries.append("# Build Options") + entries.append("#------------------{0}\n".format("-" * 60)) + + entries.append(cmake_cache_string( + "CMAKE_BUILD_TYPE", spec.variants["build_type"].value)) + entries.append(cmake_cache_option("BUILD_SHARED_LIBS", "+shared" in spec)) + entries.append(cmake_cache_option("RAJA_ENABLE_DESUL_ATOMICS", "+desul" in spec)) if "+desul" in spec: @@ -201,10 +219,6 @@ def initconfig_package_entries(self): if "+cuda" in spec: entries.append(cmake_cache_string("CMAKE_CUDA_STANDARD", "14")) - entries.append(cmake_cache_path("BLT_SOURCE_DIR", spec["blt"].prefix)) - if "camp" in self.spec: - entries.append(cmake_cache_path("camp_DIR", spec["camp"].prefix)) - entries.append(cmake_cache_option("BUILD_SHARED_LIBS", "+shared" in spec)) entries.append( cmake_cache_option("{}ENABLE_EXAMPLES".format(option_prefix), "+examples" in spec) ) From af59e4f074e967c132e33a00f2636b90c15144d3 Mon Sep 17 00:00:00 2001 From: Jason Burmark Date: Fri, 11 Nov 2022 10:37:12 -0800 Subject: [PATCH 42/94] Add forall_param_pack where missing --- include/RAJA/policy/openmp_target/kernel/For.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/RAJA/policy/openmp_target/kernel/For.hpp b/include/RAJA/policy/openmp_target/kernel/For.hpp index c5d60d01a9..08f64e227a 100644 --- a/include/RAJA/policy/openmp_target/kernel/For.hpp +++ b/include/RAJA/policy/openmp_target/kernel/For.hpp @@ -57,7 +57,7 @@ struct StatementExecutor{}, TypedRangeSegment(0, len), for_wrapper); + forall_impl(r, omp_target_parallel_for_exec{}, TypedRangeSegment(0, len), for_wrapper, RAJA::expt::get_empty_forall_param_pack()); } }; From 3196e70f638fafdcab206d1f7a0ec3570428d597 Mon Sep 17 00:00:00 2001 From: Arturo Vargas Date: Mon, 14 Nov 2022 10:56:04 -0800 Subject: [PATCH 43/94] initial try to add chai support to launch --- include/RAJA/policy/cuda/launch.hpp | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/include/RAJA/policy/cuda/launch.hpp b/include/RAJA/policy/cuda/launch.hpp index 9221925176..0f5b79aa50 100644 --- a/include/RAJA/policy/cuda/launch.hpp +++ b/include/RAJA/policy/cuda/launch.hpp @@ -69,11 +69,23 @@ struct LaunchExecute> { static_cast(params.threads.value[1]), static_cast(params.threads.value[2]) }; + std::cout<<"launching "< zero && gridSize.y > zero && gridSize.z > zero && blockSize.x > zero && blockSize.y > zero && blockSize.z > zero ) { + + util::PluginContext context{util::make_context>()}; + util::callPreCapturePlugins(context); + + using RAJA::util::trigger_updates_before; + trigger_updates_before(body_in); + + util::callPostCapturePlugins(context); + + util::callPreLaunchPlugins(context); + RAJA_FT_BEGIN; { // @@ -89,6 +101,9 @@ struct LaunchExecute> { } RAJA_FT_END; + + util::callPostLaunchPlugins(context); + } }; From 14c9f3f23e1f6f74b6840f5f13ca44cdca6db94d Mon Sep 17 00:00:00 2001 From: Jason Burmark Date: Mon, 14 Nov 2022 12:16:18 -0800 Subject: [PATCH 44/94] Use compile time blocksize in forall --- include/RAJA/policy/cuda/forall.hpp | 10 ++++++---- include/RAJA/policy/hip/forall.hpp | 10 ++++++---- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/include/RAJA/policy/cuda/forall.hpp b/include/RAJA/policy/cuda/forall.hpp index bc0936c353..ce278c4ee2 100644 --- a/include/RAJA/policy/cuda/forall.hpp +++ b/include/RAJA/policy/cuda/forall.hpp @@ -81,15 +81,17 @@ cuda_dim_t getGridDim(cuda_dim_member_t len, cuda_dim_t blockDim) * ****************************************************************************** */ +template __device__ __forceinline__ unsigned int getGlobalIdx_1D_1D() { unsigned int blockId = blockIdx.x; - unsigned int threadId = blockId * blockDim.x + threadIdx.x; + unsigned int threadId = blockId * BlockSize + threadIdx.x; return threadId; } +template __device__ __forceinline__ unsigned int getGlobalNumThreads_1D_1D() { - unsigned int numThreads = blockDim.x * gridDim.x; + unsigned int numThreads = BlockSize * gridDim.x; return numThreads; } @@ -144,7 +146,7 @@ __launch_bounds__(BlockSize, BlocksPerSM) __global__ using RAJA::internal::thread_privatize; auto privatizer = thread_privatize(loop_body); auto& body = privatizer.get_priv(); - auto ii = static_cast(getGlobalIdx_1D_1D()); + auto ii = static_cast(getGlobalIdx_1D_1D()); if (ii < length) { body(idx[ii]); } @@ -167,7 +169,7 @@ __launch_bounds__(BlockSize, 1) __global__ using RAJA::internal::thread_privatize; auto privatizer = thread_privatize(loop_body); auto& body = privatizer.get_priv(); - auto ii = static_cast(getGlobalIdx_1D_1D()); + auto ii = static_cast(getGlobalIdx_1D_1D()); if ( ii < length ) { RAJA::expt::invoke_body( f_params, body, idx[ii] ); diff --git a/include/RAJA/policy/hip/forall.hpp b/include/RAJA/policy/hip/forall.hpp index ab3109b354..1aa447633e 100644 --- a/include/RAJA/policy/hip/forall.hpp +++ b/include/RAJA/policy/hip/forall.hpp @@ -77,15 +77,17 @@ hip_dim_t getGridDim(hip_dim_member_t len, hip_dim_t blockDim) * ****************************************************************************** */ +template __device__ __forceinline__ unsigned int getGlobalIdx_1D_1D() { unsigned int blockId = blockIdx.x; - unsigned int threadId = blockId * blockDim.x + threadIdx.x; + unsigned int threadId = blockId * BlockSize + threadIdx.x; return threadId; } +template __device__ __forceinline__ unsigned int getGlobalNumThreads_1D_1D() { - unsigned int numThreads = blockDim.x * gridDim.x; + unsigned int numThreads = BlockSize * gridDim.x; return numThreads; } @@ -139,7 +141,7 @@ __launch_bounds__(BlockSize, 1) __global__ using RAJA::internal::thread_privatize; auto privatizer = thread_privatize(loop_body); auto& body = privatizer.get_priv(); - auto ii = static_cast(getGlobalIdx_1D_1D()); + auto ii = static_cast(getGlobalIdx_1D_1D()); if (ii < length) { body(idx[ii]); } @@ -161,7 +163,7 @@ __launch_bounds__(BlockSize, 1) __global__ using RAJA::internal::thread_privatize; auto privatizer = thread_privatize(loop_body); auto& body = privatizer.get_priv(); - auto ii = static_cast(getGlobalIdx_1D_1D()); + auto ii = static_cast(getGlobalIdx_1D_1D()); if ( ii < length ) { RAJA::expt::invoke_body( f_params, body, idx[ii] ); From 1d38664ee5ea35bf405689c275a7b96e2114089e Mon Sep 17 00:00:00 2001 From: Arturo Vargas Date: Mon, 14 Nov 2022 12:58:33 -0800 Subject: [PATCH 45/94] reorg of plugin calls --- include/RAJA/policy/cuda/launch.hpp | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/include/RAJA/policy/cuda/launch.hpp b/include/RAJA/policy/cuda/launch.hpp index 0f5b79aa50..308641e80e 100644 --- a/include/RAJA/policy/cuda/launch.hpp +++ b/include/RAJA/policy/cuda/launch.hpp @@ -76,16 +76,6 @@ struct LaunchExecute> { blockSize.x > zero && blockSize.y > zero && blockSize.z > zero ) { - util::PluginContext context{util::make_context>()}; - util::callPreCapturePlugins(context); - - using RAJA::util::trigger_updates_before; - trigger_updates_before(body_in); - - util::callPostCapturePlugins(context); - - util::callPreLaunchPlugins(context); - RAJA_FT_BEGIN; { // @@ -93,17 +83,27 @@ struct LaunchExecute> { // BODY body = RAJA::cuda::make_launch_body(gridSize, blockSize, params.shared_mem_size, cuda_res, std::forward(body_in)); + util::PluginContext context{util::make_context>()}; + util::callPreCapturePlugins(context); + + using RAJA::util::trigger_updates_before; + trigger_updates_before(body); + + util::callPostCapturePlugins(context); + + util::callPreLaunchPlugins(context); + // // Launch the kernel // void *args[] = {(void*)&body}; RAJA::cuda::launch((const void*)func, gridSize, blockSize, args, params.shared_mem_size, cuda_res, async, kernel_name); + + util::callPostLaunchPlugins(context); } RAJA_FT_END; - util::callPostLaunchPlugins(context); - } }; From d8e1edfde04deca756e93c1c38788aa934cd9c1f Mon Sep 17 00:00:00 2001 From: Arturo Vargas Date: Mon, 14 Nov 2022 13:07:30 -0800 Subject: [PATCH 46/94] working version of launch + chai --- include/RAJA/policy/cuda/launch.hpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/include/RAJA/policy/cuda/launch.hpp b/include/RAJA/policy/cuda/launch.hpp index 308641e80e..0cf23eb9ad 100644 --- a/include/RAJA/policy/cuda/launch.hpp +++ b/include/RAJA/policy/cuda/launch.hpp @@ -69,7 +69,6 @@ struct LaunchExecute> { static_cast(params.threads.value[1]), static_cast(params.threads.value[2]) }; - std::cout<<"launching "< zero && gridSize.y > zero && gridSize.z > zero && @@ -83,11 +82,14 @@ struct LaunchExecute> { // BODY body = RAJA::cuda::make_launch_body(gridSize, blockSize, params.shared_mem_size, cuda_res, std::forward(body_in)); + // + // Configure plugins + // util::PluginContext context{util::make_context>()}; util::callPreCapturePlugins(context); using RAJA::util::trigger_updates_before; - trigger_updates_before(body); + auto p_body = trigger_updates_before(body); util::callPostCapturePlugins(context); @@ -96,7 +98,7 @@ struct LaunchExecute> { // // Launch the kernel // - void *args[] = {(void*)&body}; + void *args[] = {(void*)&p_body}; RAJA::cuda::launch((const void*)func, gridSize, blockSize, args, params.shared_mem_size, cuda_res, async, kernel_name); util::callPostLaunchPlugins(context); From 3643233a3a994477d3c3b2a0b928995d885cc44a Mon Sep 17 00:00:00 2001 From: Arturo Vargas Date: Mon, 14 Nov 2022 13:46:46 -0800 Subject: [PATCH 47/94] plugin support for cuda and hip --- include/RAJA/policy/cuda/#launch.hpp# | 1202 +++++++++++++++++++++++++ include/RAJA/policy/cuda/launch.hpp | 63 +- include/RAJA/policy/hip/launch.hpp | 68 +- 3 files changed, 1319 insertions(+), 14 deletions(-) create mode 100644 include/RAJA/policy/cuda/#launch.hpp# diff --git a/include/RAJA/policy/cuda/#launch.hpp# b/include/RAJA/policy/cuda/#launch.hpp# new file mode 100644 index 0000000000..d98dd20c5b --- /dev/null +++ b/include/RAJA/policy/cuda/#launch.hpp# @@ -0,0 +1,1202 @@ +/*! + ****************************************************************************** + * + * \file + * + * \brief RAJA header file containing user interface for RAJA::launch::cuda + * + ****************************************************************************** + */ + +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2016-22, Lawrence Livermore National Security, LLC +// and RAJA project contributors. See the RAJA/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#ifndef RAJA_pattern_launch_cuda_HPP +#define RAJA_pattern_launch_cuda_HPP + +#include "RAJA/pattern/launch/launch_core.hpp" +#include "RAJA/pattern/detail/privatizer.hpp" +#include "RAJA/policy/cuda/policy.hpp" +#include "RAJA/policy/cuda/MemUtils_CUDA.hpp" +#include "RAJA/policy/cuda/raja_cudaerrchk.hpp" +#include "RAJA/util/resource.hpp" + +namespace RAJA +{ + +template +__global__ void launch_global_fcn(BODY body_in) +{ + + LaunchContext ctx; + + using RAJA::internal::thread_privatize; + auto privatizer = thread_privatize(body_in); + auto& body = privatizer.get_priv(); + + //Set pointer to shared memory + extern __shared__ char raja_shmem_ptr[]; + ctx.shared_mem_ptr = raja_shmem_ptr; + + body(ctx); +} + +template +struct LaunchExecute> { +// cuda_launch_t num_threads set to 1, but not used in launch of kernel + + template + static void exec(const LaunchParams ¶ms, const char *kernel_name, BODY_IN &&body_in) + { + + using BODY = camp::decay; + + auto func = launch_global_fcn; + + resources::Cuda cuda_res = resources::Cuda::get_default(); + // + // Compute the number of blocks and threads + // + cuda_dim_t gridSize{ static_cast(params.teams.value[0]), + static_cast(params.teams.value[1]), + static_cast(params.teams.value[2]) }; + + cuda_dim_t blockSize{ static_cast(params.threads.value[0]), + static_cast(params.threads.value[1]), + static_cast(params.threads.value[2]) }; + + // Only launch kernel if we have something to iterate over + constexpr cuda_dim_member_t zero = 0; + if ( gridSize.x > zero && gridSize.y > zero && gridSize.z > zero && + blockSize.x > zero && blockSize.y > zero && blockSize.z > zero ) { + + + RAJA_FT_BEGIN; + { + // + // Privatize the loop_body, using make_launch_body to setup reductions + // + BODY body = RAJA::cuda::make_launch_body(gridSize, blockSize, params.shared_mem_size, cuda_res, std::forward(body_in)); + + // + // Configure plugins + // + util::PluginContext context{util::make_context>()}; + util::callPreCapturePlugins(context); + + using RAJA::util::trigger_updates_before; + auto p_body = trigger_updates_before(body); + + util::callPostCapturePlugins(context); + + util::callPreLaunchPlugins(context); + + // + // Launch the kernel + // + void *args[] = {(void*)&p_body}; + RAJA::cuda::launch((const void*)func, gridSize, blockSize, args, params.shared_mem_size, cuda_res, async, kernel_name); + + util::callPostLaunchPlugins(context); + } + + RAJA_FT_END; + + } + + }; + + template + static resources::EventProxy + exec(RAJA::resources::Resource res, const LaunchParams ¶ms, const char *kernel_name, BODY_IN &&body_in) + { + + using BODY = camp::decay; + + auto func = launch_global_fcn; + + // Get the concrete resource + resources::Cuda cuda_res = res.get(); + + // + // Compute the number of blocks and threads + // + + cuda_dim_t gridSize{ static_cast(params.teams.value[0]), + static_cast(params.teams.value[1]), + static_cast(params.teams.value[2]) }; + + cuda_dim_t blockSize{ static_cast(params.threads.value[0]), + static_cast(params.threads.value[1]), + static_cast(params.threads.value[2]) }; + + // Only launch kernel if we have something to iterate over + constexpr cuda_dim_member_t zero = 0; + if ( gridSize.x > zero && gridSize.y > zero && gridSize.z > zero && + blockSize.x > zero && blockSize.y > zero && blockSize.z > zero ) { + + RAJA_FT_BEGIN; + + { + // + // Privatize the loop_body, using make_launch_body to setup reductions + // + BODY body = RAJA::cuda::make_launch_body( + gridSize, blockSize, params.shared_mem_size, cuda_res, std::forward(body_in)); + + // + // Configure plugins + // + util::PluginContext context{util::make_context>()}; + util::callPreCapturePlugins(context); + + using RAJA::util::trigger_updates_before; + auto p_body = trigger_updates_before(body); + + util::callPostCapturePlugins(context); + + util::callPreLaunchPlugins(context); + + // + // Launch the kernel + // + void *args[] = {(void*)&p_body}; + RAJA::cuda::launch((const void*)func, gridSize, blockSize, args, params.shared_mem_size, cuda_res, async, kernel_name); + + util::callPostLaunchPlugins(context); + } + + RAJA_FT_END; + } + + return resources::EventProxy(res); + } + +}; + + +template +__launch_bounds__(num_threads, BLOCKS_PER_SM) __global__ + void launch_global_fcn_fixed(BODY body_in) +{ + + LaunchContext ctx; + + using RAJA::internal::thread_privatize; + auto privatizer = thread_privatize(body_in); + auto& body = privatizer.get_priv(); + + //Set pointer to shared memory + extern __shared__ char raja_shmem_ptr[]; + ctx.shared_mem_ptr = raja_shmem_ptr; + + body(ctx); +} + +template +struct LaunchExecute> { + + template + static void exec(const LaunchParams ¶ms, const char *kernel_name, BODY_IN &&body_in) + { + + using BODY = camp::decay; + + auto func = launch_global_fcn_fixed; + + resources::Cuda cuda_res = resources::Cuda::get_default(); + + // + // Compute the number of blocks and threads + // + + cuda_dim_t gridSize{ static_cast(params.teams.value[0]), + static_cast(params.teams.value[1]), + static_cast(params.teams.value[2]) }; + + cuda_dim_t blockSize{ static_cast(params.threads.value[0]), + static_cast(params.threads.value[1]), + static_cast(params.threads.value[2]) }; + + // Only launch kernel if we have something to iterate over + constexpr cuda_dim_member_t zero = 0; + if ( gridSize.x > zero && gridSize.y > zero && gridSize.z > zero && + blockSize.x > zero && blockSize.y > zero && blockSize.z > zero ) { + + RAJA_FT_BEGIN; + + { + // + // Privatize the loop_body, using make_launch_body to setup reductions + // + BODY body = RAJA::cuda::make_launch_body( + gridSize, blockSize, params.shared_mem_size, cuda_res, std::forward(body_in)); + + // + // Configure plugins + // + util::PluginContext context{util::make_context>()}; + util::callPreCapturePlugins(context); + + using RAJA::util::trigger_updates_before; + auto p_body = trigger_updates_before(body); + + util::callPostCapturePlugins(context); + + util::callPreLaunchPlugins(context); + + // + // Launch the kernel + // + void *args[] = {(void*)&p_body}; + RAJA::cuda::launch((const void*)func, gridSize, blockSize, args, params.shared_mem_size, cuda_res, async, kernel_name); + + util::callPostLaunchPlugins(context); + } + + RAJA_FT_END; + } + + } +} + + template + static resources::EventProxy + exec(RAJA::resources::Resource res, const LaunchParams ¶ms, const char *kernel_name, BODY_IN &&body_in) + { + + using BODY = camp::decay; + + auto func = launch_global_fcn_fixed; + + //Get the concrete resource + resources::Cuda cuda_res = res.get(); + + // + // Compute the number of blocks and threads + // + + cuda_dim_t gridSize{ static_cast(params.teams.value[0]), + static_cast(params.teams.value[1]), + static_cast(params.teams.value[2]) }; + + cuda_dim_t blockSize{ static_cast(params.threads.value[0]), + static_cast(params.threads.value[1]), + static_cast(params.threads.value[2]) }; + + // Only launch kernel if we have something to iterate over + constexpr cuda_dim_member_t zero = 0; + if ( gridSize.x > zero && gridSize.y > zero && gridSize.z > zero && + blockSize.x > zero && blockSize.y > zero && blockSize.z > zero ) { + + RAJA_FT_BEGIN; + + { + // + // Privatize the loop_body, using make_launch_body to setup reductions + // + BODY body = RAJA::cuda::make_launch_body( + gridSize, blockSize, params.shared_mem_size, cuda_res, std::forward(body_in)); + + + // + // Configure plugins + // + util::PluginContext context{util::make_context>()}; + util::callPreCapturePlugins(context); + + using RAJA::util::trigger_updates_before; + auto p_body = trigger_updates_before(body); + + util::callPostCapturePlugins(context); + + util::callPreLaunchPlugins(context); + + // + // Launch the kernel + // + void *args[] = {(void*)&p_body}; + RAJA::cuda::launch((const void*)func, gridSize, blockSize, args, params.shared_mem_size, cuda_res, async, kernel_name); + + util::callPostLaunchPlugins(context); + } + + RAJA_FT_END; + } + + return resources::EventProxy(res); + } + +}; + +/* + CUDA global thread mapping +*/ +template +struct cuda_global_thread; + +using cuda_global_thread_x = cuda_global_thread<0>; +using cuda_global_thread_y = cuda_global_thread<1>; +using cuda_global_thread_z = cuda_global_thread<2>; + +template +struct LoopExecute, SEGMENT> { + + template + static RAJA_INLINE RAJA_DEVICE void exec( + LaunchContext const RAJA_UNUSED_ARG(&ctx), + SEGMENT const &segment, + BODY const &body) + { + + const int len = segment.end() - segment.begin(); + { + const int tx = internal::get_cuda_dim(threadIdx) + + internal::get_cuda_dim(blockDim)*internal::get_cuda_dim(blockIdx); + + if (tx < len) body(*(segment.begin() + tx)); + } + } +}; + +using cuda_global_thread_xy = cuda_global_thread<0,1>; +using cuda_global_thread_xz = cuda_global_thread<0,2>; +using cuda_global_thread_yx = cuda_global_thread<1,0>; +using cuda_global_thread_yz = cuda_global_thread<1,2>; +using cuda_global_thread_zx = cuda_global_thread<2,0>; +using cuda_global_thread_zy = cuda_global_thread<2,1>; + +template +struct LoopExecute, SEGMENT> { + + template + static RAJA_INLINE RAJA_DEVICE void exec( + LaunchContext const RAJA_UNUSED_ARG(&ctx), + SEGMENT const &segment0, + SEGMENT const &segment1, + BODY const &body) + { + const int len1 = segment1.end() - segment1.begin(); + const int len0 = segment0.end() - segment0.begin(); + { + const int tx = internal::get_cuda_dim(threadIdx) + + internal::get_cuda_dim(blockDim)*internal::get_cuda_dim(blockIdx); + + const int ty = internal::get_cuda_dim(threadIdx) + + internal::get_cuda_dim(blockDim)*internal::get_cuda_dim(blockIdx); + + if (tx < len0 && ty < len1) + body(*(segment0.begin() + tx), *(segment1.begin() + ty)); + } + } +}; + +using cuda_global_thread_xyz = cuda_global_thread<0,1,2>; +using cuda_global_thread_xzy = cuda_global_thread<0,2,1>; +using cuda_global_thread_yxz = cuda_global_thread<1,0,2>; +using cuda_global_thread_yzx = cuda_global_thread<1,2,0>; +using cuda_global_thread_zxy = cuda_global_thread<2,0,1>; +using cuda_global_thread_zyx = cuda_global_thread<2,1,0>; + +template +struct LoopExecute, SEGMENT> { + + template + static RAJA_INLINE RAJA_DEVICE void exec( + LaunchContext const RAJA_UNUSED_ARG(&ctx), + SEGMENT const &segment0, + SEGMENT const &segment1, + SEGMENT const &segment2, + BODY const &body) + { + const int len2 = segment2.end() - segment2.begin(); + const int len1 = segment1.end() - segment1.begin(); + const int len0 = segment0.end() - segment0.begin(); + { + const int tx = internal::get_cuda_dim(threadIdx) + + internal::get_cuda_dim(blockDim)*internal::get_cuda_dim(blockIdx); + + const int ty = internal::get_cuda_dim(threadIdx) + + internal::get_cuda_dim(blockDim)*internal::get_cuda_dim(blockIdx); + + const int tz = internal::get_cuda_dim(threadIdx) + + internal::get_cuda_dim(blockDim)*internal::get_cuda_dim(blockIdx); + + if (tx < len0 && ty < len1 && tz < len2) + body(*(segment0.begin() + tx), + *(segment1.begin() + ty), + *(segment1.begin() + ty)); + } + } +}; + +/* +Reshape threads in a block into a 1D iteration space +*/ +template +struct cuda_flatten_block_threads_direct{}; + +using cuda_flatten_block_threads_xy_direct = cuda_flatten_block_threads_direct<0,1>; +using cuda_flatten_block_threads_xz_direct = cuda_flatten_block_threads_direct<0,2>; +using cuda_flatten_block_threads_yx_direct = cuda_flatten_block_threads_direct<1,0>; +using cuda_flatten_block_threads_yz_direct = cuda_flatten_block_threads_direct<1,2>; +using cuda_flatten_block_threads_zx_direct = cuda_flatten_block_threads_direct<2,0>; +using cuda_flatten_block_threads_zy_direct = cuda_flatten_block_threads_direct<2,1>; + +using cuda_flatten_block_threads_xyz_direct = cuda_flatten_block_threads_direct<0,1,2>; +using cuda_flatten_block_threads_xzy_direct = cuda_flatten_block_threads_direct<0,2,1>; +using cuda_flatten_block_threads_yxz_direct = cuda_flatten_block_threads_direct<1,0,2>; +using cuda_flatten_block_threads_yzx_direct = cuda_flatten_block_threads_direct<1,2,0>; +using cuda_flatten_block_threads_zxy_direct = cuda_flatten_block_threads_direct<2,0,1>; +using cuda_flatten_block_threads_zyx_direct = cuda_flatten_block_threads_direct<2,1,0>; + +template +struct cuda_flatten_block_threads_loop{}; + +using cuda_flatten_block_threads_xy_loop = cuda_flatten_block_threads_loop<0,1>; +using cuda_flatten_block_threads_xz_loop = cuda_flatten_block_threads_loop<0,2>; +using cuda_flatten_block_threads_yx_loop = cuda_flatten_block_threads_loop<1,0>; +using cuda_flatten_block_threads_yz_loop = cuda_flatten_block_threads_loop<1,2>; +using cuda_flatten_block_threads_zx_loop = cuda_flatten_block_threads_loop<2,0>; +using cuda_flatten_block_threads_zy_loop = cuda_flatten_block_threads_loop<2,1>; + +using cuda_flatten_block_threads_xyz_loop = cuda_flatten_block_threads_loop<0,1,2>; +using cuda_flatten_block_threads_xzy_loop = cuda_flatten_block_threads_loop<0,2,1>; +using cuda_flatten_block_threads_yxz_loop = cuda_flatten_block_threads_loop<1,0,2>; +using cuda_flatten_block_threads_yzx_loop = cuda_flatten_block_threads_loop<1,2,0>; +using cuda_flatten_block_threads_zxy_loop = cuda_flatten_block_threads_loop<2,0,1>; +using cuda_flatten_block_threads_zyx_loop = cuda_flatten_block_threads_loop<2,1,0>; + +template +struct LoopExecute, SEGMENT> +{ + template + static RAJA_INLINE RAJA_DEVICE void exec( + LaunchContext const RAJA_UNUSED_ARG(&ctx), + SEGMENT const &segment, + BODY const &body) + { + + const int len = segment.end() - segment.begin(); + { + const int tx = internal::get_cuda_dim(threadIdx); + const int ty = internal::get_cuda_dim(threadIdx); + const int bx = internal::get_cuda_dim(blockDim); + const int tid = tx + bx*ty; + + if (tid < len) body(*(segment.begin() + tid)); + } + } +}; + +template +struct LoopExecute, SEGMENT> +{ + template + static RAJA_INLINE RAJA_DEVICE void exec( + LaunchContext const RAJA_UNUSED_ARG(&ctx), + SEGMENT const &segment, + BODY const &body) + { + const int len = segment.end() - segment.begin(); + + const int tx = internal::get_cuda_dim(threadIdx); + const int ty = internal::get_cuda_dim(threadIdx); + + const int bx = internal::get_cuda_dim(blockDim); + const int by = internal::get_cuda_dim(blockDim); + + for(int tid = tx + bx*ty; tid < len; tid += bx*by) { + body(*(segment.begin() + tid)); + } + + } +}; + +template +struct LoopExecute, SEGMENT> +{ + template + static RAJA_INLINE RAJA_DEVICE void exec( + LaunchContext const RAJA_UNUSED_ARG(&ctx), + SEGMENT const &segment, + BODY const &body) + { + const int len = segment.end() - segment.begin(); + { + const int tx = internal::get_cuda_dim(threadIdx); + const int ty = internal::get_cuda_dim(threadIdx); + const int tz = internal::get_cuda_dim(threadIdx); + const int bx = internal::get_cuda_dim(blockDim); + const int by = internal::get_cuda_dim(blockDim); + const int tid = tx + bx*(ty + by*tz); + + if (tid < len) body(*(segment.begin() + tid)); + } + } +}; + +template +struct LoopExecute, SEGMENT> +{ + template + static RAJA_INLINE RAJA_DEVICE void exec( + LaunchContext const RAJA_UNUSED_ARG(&ctx), + SEGMENT const &segment, + BODY const &body) + { + const int len = segment.end() - segment.begin(); + + const int tx = internal::get_cuda_dim(threadIdx); + const int ty = internal::get_cuda_dim(threadIdx); + const int tz = internal::get_cuda_dim(threadIdx); + const int bx = internal::get_cuda_dim(blockDim); + const int by = internal::get_cuda_dim(blockDim); + const int bz = internal::get_cuda_dim(blockDim); + + for(int tid = tx + bx*(ty + by*tz); tid < len; tid += bx*by*bz) { + body(*(segment.begin() + tid)); + } + + } +}; + + +/* + CUDA thread loops with block strides +*/ +template +struct LoopExecute, SEGMENT> { + + template + static RAJA_INLINE RAJA_DEVICE void exec( + LaunchContext const RAJA_UNUSED_ARG(&ctx), + SEGMENT const &segment, + BODY const &body) + { + + const int len = segment.end() - segment.begin(); + + for (int tx = internal::get_cuda_dim(threadIdx); + tx < len; + tx += internal::get_cuda_dim(blockDim) ) + { + body(*(segment.begin() + tx)); + } + } +}; + +/* + CUDA thread direct mappings +*/ +template +struct LoopExecute, SEGMENT> { + + template + static RAJA_INLINE RAJA_DEVICE void exec( + LaunchContext const RAJA_UNUSED_ARG(&ctx), + SEGMENT const &segment, + BODY const &body) + { + + const int len = segment.end() - segment.begin(); + { + const int tx = internal::get_cuda_dim(threadIdx); + if (tx < len) body(*(segment.begin() + tx)); + } + } +}; + + +/* + CUDA block loops with grid strides +*/ +template +struct LoopExecute, SEGMENT> { + + template + static RAJA_INLINE RAJA_DEVICE void exec( + LaunchContext const RAJA_UNUSED_ARG(&ctx), + SEGMENT const &segment, + BODY const &body) + { + + const int len = segment.end() - segment.begin(); + + for (int bx = internal::get_cuda_dim(blockIdx); + bx < len; + bx += internal::get_cuda_dim(gridDim) ) { + body(*(segment.begin() + bx)); + } + } +}; + +/* + CUDA block direct mappings +*/ +template +struct LoopExecute, SEGMENT> { + + template + static RAJA_INLINE RAJA_DEVICE void exec( + LaunchContext const RAJA_UNUSED_ARG(&ctx), + SEGMENT const &segment, + BODY const &body) + { + + const int len = segment.end() - segment.begin(); + { + const int bx = internal::get_cuda_dim(blockIdx); + if (bx < len) body(*(segment.begin() + bx)); + } + } +}; + +/* + CUDA thread loops with block strides + Return Index +*/ +template +struct LoopICountExecute, SEGMENT> { + + template + static RAJA_INLINE RAJA_DEVICE void exec( + LaunchContext const RAJA_UNUSED_ARG(&ctx), + SEGMENT const &segment, + BODY const &body) + { + + const int len = segment.end() - segment.begin(); + + for (int tx = internal::get_cuda_dim(threadIdx); + tx < len; + tx += internal::get_cuda_dim(blockDim) ) + { + body(*(segment.begin() + tx), tx); + } + } +}; + +/* + CUDA thread direct mappings +*/ +template +struct LoopICountExecute, SEGMENT> { + + template + static RAJA_INLINE RAJA_DEVICE void exec( + LaunchContext const RAJA_UNUSED_ARG(&ctx), + SEGMENT const &segment, + BODY const &body) + { + + const int len = segment.end() - segment.begin(); + { + const int tx = internal::get_cuda_dim(threadIdx); + if (tx < len) body(*(segment.begin() + tx), tx); + } + } +}; + +/* + CUDA block loops with grid strides +*/ +template +struct LoopICountExecute, SEGMENT> { + + template + static RAJA_INLINE RAJA_DEVICE void exec( + LaunchContext const RAJA_UNUSED_ARG(&ctx), + SEGMENT const &segment, + BODY const &body) + { + + const int len = segment.end() - segment.begin(); + + for (int bx = internal::get_cuda_dim(blockIdx); + bx < len; + bx += internal::get_cuda_dim(gridDim) ) { + body(*(segment.begin() + bx), bx); + } + } +}; + +/* + CUDA block direct mappings +*/ +template +struct LoopICountExecute, SEGMENT> { + + template + static RAJA_INLINE RAJA_DEVICE void exec( + LaunchContext const RAJA_UNUSED_ARG(&ctx), + SEGMENT const &segment, + BODY const &body) + { + + const int len = segment.end() - segment.begin(); + { + const int bx = internal::get_cuda_dim(blockIdx); + if (bx < len) body(*(segment.begin() + bx), bx); + } + } +}; + +// perfectly nested cuda direct policies +using cuda_block_xy_nested_direct = cuda_block_xyz_direct<0,1>; +using cuda_block_xz_nested_direct = cuda_block_xyz_direct<0,2>; +using cuda_block_yx_nested_direct = cuda_block_xyz_direct<1,0>; +using cuda_block_yz_nested_direct = cuda_block_xyz_direct<1,2>; +using cuda_block_zx_nested_direct = cuda_block_xyz_direct<2,0>; +using cuda_block_zy_nested_direct = cuda_block_xyz_direct<2,1>; + +using cuda_block_xyz_nested_direct = cuda_block_xyz_direct<0,1,2>; +using cuda_block_xzy_nested_direct = cuda_block_xyz_direct<0,2,1>; +using cuda_block_yxz_nested_direct = cuda_block_xyz_direct<1,0,2>; +using cuda_block_yzx_nested_direct = cuda_block_xyz_direct<1,2,0>; +using cuda_block_zxy_nested_direct = cuda_block_xyz_direct<2,0,1>; +using cuda_block_zyx_nested_direct = cuda_block_xyz_direct<2,1,0>; + +template +struct LoopExecute, SEGMENT> { + + template + static RAJA_INLINE RAJA_DEVICE void exec( + LaunchContext const RAJA_UNUSED_ARG(&ctx), + SEGMENT const &segment0, + SEGMENT const &segment1, + BODY const &body) + { + const int len1 = segment1.end() - segment1.begin(); + const int len0 = segment0.end() - segment0.begin(); + { + const int tx = internal::get_cuda_dim(blockIdx); + const int ty = internal::get_cuda_dim(blockIdx); + if (tx < len0 && ty < len1) + body(*(segment0.begin() + tx), *(segment1.begin() + ty)); + } + } +}; + +template +struct LoopExecute, SEGMENT> { + + template + static RAJA_INLINE RAJA_DEVICE void exec( + LaunchContext const RAJA_UNUSED_ARG(&ctx), + SEGMENT const &segment0, + SEGMENT const &segment1, + SEGMENT const &segment2, + BODY const &body) + { + const int len2 = segment2.end() - segment2.begin(); + const int len1 = segment1.end() - segment1.begin(); + const int len0 = segment0.end() - segment0.begin(); + { + const int tx = internal::get_cuda_dim(blockIdx); + const int ty = internal::get_cuda_dim(blockIdx); + const int tz = internal::get_cuda_dim(blockIdx); + if (tx < len0 && ty < len1 && tz < len2) + body(*(segment0.begin() + tx), + *(segment1.begin() + ty), + *(segment2.begin() + tz)); + } + } +}; + +/* + Perfectly nested cuda direct policies + Return local index +*/ +template +struct LoopICountExecute, SEGMENT> { + + template + static RAJA_INLINE RAJA_DEVICE void exec( + LaunchContext const RAJA_UNUSED_ARG(&ctx), + SEGMENT const &segment0, + SEGMENT const &segment1, + BODY const &body) + { + const int len1 = segment1.end() - segment1.begin(); + const int len0 = segment0.end() - segment0.begin(); + { + const int tx = internal::get_cuda_dim(blockIdx); + const int ty = internal::get_cuda_dim(blockIdx); + if (tx < len0 && ty < len1) + body(*(segment0.begin() + tx), *(segment1.begin() + ty), + tx, ty); + } + } +}; + +template +struct LoopICountExecute, SEGMENT> { + + template + static RAJA_INLINE RAJA_DEVICE void exec( + LaunchContext const RAJA_UNUSED_ARG(&ctx), + SEGMENT const &segment0, + SEGMENT const &segment1, + SEGMENT const &segment2, + BODY const &body) + { + const int len2 = segment2.end() - segment2.begin(); + const int len1 = segment1.end() - segment1.begin(); + const int len0 = segment0.end() - segment0.begin(); + { + const int tx = internal::get_cuda_dim(blockIdx); + const int ty = internal::get_cuda_dim(blockIdx); + const int tz = internal::get_cuda_dim(blockIdx); + if (tx < len0 && ty < len1 && tz < len2) + body(*(segment0.begin() + tx), + *(segment1.begin() + ty), + *(segment2.begin() + tz), tx, ty, tz); + } + } +}; + +// perfectly nested cuda loop policies +using cuda_block_xy_nested_loop = cuda_block_xyz_loop<0,1>; +using cuda_block_xz_nested_loop = cuda_block_xyz_loop<0,2>; +using cuda_block_yx_nested_loop = cuda_block_xyz_loop<1,0>; +using cuda_block_yz_nested_loop = cuda_block_xyz_loop<1,2>; +using cuda_block_zx_nested_loop = cuda_block_xyz_loop<2,0>; +using cuda_block_zy_nested_loop = cuda_block_xyz_loop<2,1>; + +using cuda_block_xyz_nested_loop = cuda_block_xyz_loop<0,1,2>; +using cuda_block_xzy_nested_loop = cuda_block_xyz_loop<0,2,1>; +using cuda_block_yxz_nested_loop = cuda_block_xyz_loop<1,0,2>; +using cuda_block_yzx_nested_loop = cuda_block_xyz_loop<1,2,0>; +using cuda_block_zxy_nested_loop = cuda_block_xyz_loop<2,0,1>; +using cuda_block_zyx_nested_loop = cuda_block_xyz_loop<2,1,0>; + +template +struct LoopExecute, SEGMENT> { + + template + static RAJA_INLINE RAJA_DEVICE void exec( + LaunchContext const RAJA_UNUSED_ARG(&ctx), + SEGMENT const &segment0, + SEGMENT const &segment1, + BODY const &body) + { + const int len1 = segment1.end() - segment1.begin(); + const int len0 = segment0.end() - segment0.begin(); + { + + for (int bx = internal::get_cuda_dim(blockIdx); + bx < len0; + bx += internal::get_cuda_dim(gridDim)) + { + for (int by = internal::get_cuda_dim(blockIdx); + by < len1; + by += internal::get_cuda_dim(gridDim)) + { + + body(*(segment0.begin() + bx), *(segment1.begin() + by)); + } + } + } + } +}; + +template +struct LoopExecute, SEGMENT> { + + template + static RAJA_INLINE RAJA_DEVICE void exec( + LaunchContext const RAJA_UNUSED_ARG(&ctx), + SEGMENT const &segment0, + SEGMENT const &segment1, + SEGMENT const &segment2, + BODY const &body) + { + const int len2 = segment2.end() - segment2.begin(); + const int len1 = segment1.end() - segment1.begin(); + const int len0 = segment0.end() - segment0.begin(); + + for (int bx = internal::get_cuda_dim(blockIdx); + bx < len0; + bx += internal::get_cuda_dim(gridDim)) + { + + for (int by = internal::get_cuda_dim(blockIdx); + by < len1; + by += internal::get_cuda_dim(gridDim)) + { + + for (int bz = internal::get_cuda_dim(blockIdx); + bz < len2; + bz += internal::get_cuda_dim(gridDim)) + { + + body(*(segment0.begin() + bx), + *(segment1.begin() + by), + *(segment2.begin() + bz)); + } + } + } + } +}; + +/* + perfectly nested cuda loop policies + returns local index +*/ +template +struct LoopICountExecute, SEGMENT> { + + template + static RAJA_INLINE RAJA_DEVICE void exec( + LaunchContext const RAJA_UNUSED_ARG(&ctx), + SEGMENT const &segment0, + SEGMENT const &segment1, + BODY const &body) + { + const int len1 = segment1.end() - segment1.begin(); + const int len0 = segment0.end() - segment0.begin(); + { + + for (int bx = internal::get_cuda_dim(blockIdx); + bx < len0; + bx += internal::get_cuda_dim(gridDim)) + { + for (int by = internal::get_cuda_dim(blockIdx); + by < len1; + by += internal::get_cuda_dim(gridDim)) + { + + body(*(segment0.begin() + bx), *(segment1.begin() + by), bx, by); + } + } + } + } +}; + + +template +struct LoopICountExecute, SEGMENT> { + + template + static RAJA_INLINE RAJA_DEVICE void exec( + LaunchContext const RAJA_UNUSED_ARG(&ctx), + SEGMENT const &segment0, + SEGMENT const &segment1, + SEGMENT const &segment2, + BODY const &body) + { + const int len2 = segment2.end() - segment2.begin(); + const int len1 = segment1.end() - segment1.begin(); + const int len0 = segment0.end() - segment0.begin(); + + for (int bx = internal::get_cuda_dim(blockIdx); + bx < len0; + bx += internal::get_cuda_dim(gridDim)) + { + + for (int by = internal::get_cuda_dim(blockIdx); + by < len1; + by += internal::get_cuda_dim(gridDim)) + { + + for (int bz = internal::get_cuda_dim(blockIdx); + bz < len2; + bz += internal::get_cuda_dim(gridDim)) + { + + body(*(segment0.begin() + bx), + *(segment1.begin() + by), + *(segment2.begin() + bz), bx, by, bz); + } + } + } + } +}; + + +template +struct TileExecute, SEGMENT> { + + template + static RAJA_INLINE RAJA_DEVICE void exec( + LaunchContext const RAJA_UNUSED_ARG(&ctx), + TILE_T tile_size, + SEGMENT const &segment, + BODY const &body) + { + + const int len = segment.end() - segment.begin(); + + for (int tx = internal::get_cuda_dim(threadIdx) * tile_size; + tx < len; + tx += internal::get_cuda_dim(blockDim) * tile_size) + { + body(segment.slice(tx, tile_size)); + } + } +}; + + +template +struct TileExecute, SEGMENT> { + + template + static RAJA_INLINE RAJA_DEVICE void exec( + LaunchContext const RAJA_UNUSED_ARG(&ctx), + TILE_T tile_size, + SEGMENT const &segment, + BODY const &body) + { + + const int len = segment.end() - segment.begin(); + + int tx = internal::get_cuda_dim(threadIdx) * tile_size; + if(tx < len) + { + body(segment.slice(tx, tile_size)); + } + } +}; + + +template +struct TileExecute, SEGMENT> { + + template + static RAJA_INLINE RAJA_DEVICE void exec( + LaunchContext const RAJA_UNUSED_ARG(&ctx), + TILE_T tile_size, + SEGMENT const &segment, + BODY const &body) + { + + const int len = segment.end() - segment.begin(); + + for (int tx = internal::get_cuda_dim(blockIdx) * tile_size; + + tx < len; + + tx += internal::get_cuda_dim(gridDim) * tile_size) + { + body(segment.slice(tx, tile_size)); + } + } +}; + + +template +struct TileExecute, SEGMENT> { + + template + static RAJA_INLINE RAJA_DEVICE void exec( + LaunchContext const RAJA_UNUSED_ARG(&ctx), + TILE_T tile_size, + SEGMENT const &segment, + BODY const &body) + { + + const int len = segment.end() - segment.begin(); + + int tx = internal::get_cuda_dim(blockIdx) * tile_size; + if(tx < len){ + body(segment.slice(tx, tile_size)); + } + } +}; + +//Tile execute + return index +template +struct TileICountExecute, SEGMENT> { + + template + static RAJA_INLINE RAJA_DEVICE void exec( + LaunchContext const RAJA_UNUSED_ARG(&ctx), + TILE_T tile_size, + SEGMENT const &segment, + BODY const &body) + { + + const int len = segment.end() - segment.begin(); + + for (int tx = internal::get_cuda_dim(threadIdx) * tile_size; + tx < len; + tx += internal::get_cuda_dim(blockDim) * tile_size) + { + body(segment.slice(tx, tile_size), tx/tile_size); + } + } +}; + + +template +struct TileICountExecute, SEGMENT> { + + template + static RAJA_INLINE RAJA_DEVICE void exec( + LaunchContext const RAJA_UNUSED_ARG(&ctx), + TILE_T tile_size, + SEGMENT const &segment, + BODY const &body) + { + + const int len = segment.end() - segment.begin(); + + int tx = internal::get_cuda_dim(threadIdx) * tile_size; + if(tx < len) + { + body(segment.slice(tx, tile_size), tx/tile_size); + } + } +}; + + +template +struct TileICountExecute, SEGMENT> { + + template + static RAJA_INLINE RAJA_DEVICE void exec( + LaunchContext const RAJA_UNUSED_ARG(&ctx), + TILE_T tile_size, + SEGMENT const &segment, + BODY const &body) + { + + const int len = segment.end() - segment.begin(); + + for (int bx = internal::get_cuda_dim(blockIdx) * tile_size; + + bx < len; + + bx += internal::get_cuda_dim(gridDim) * tile_size) + { + body(segment.slice(bx, tile_size), bx/tile_size); + } + } +}; + + +template +struct TileICountExecute, SEGMENT> { + + template + static RAJA_INLINE RAJA_DEVICE void exec( + LaunchContext const RAJA_UNUSED_ARG(&ctx), + TILE_T tile_size, + SEGMENT const &segment, + BODY const &body) + { + + const int len = segment.end() - segment.begin(); + + int bx = internal::get_cuda_dim(blockIdx) * tile_size; + if(bx < len){ + body(segment.slice(bx, tile_size), bx/tile_size); + } + } +}; + +} // namespace RAJA +#endif diff --git a/include/RAJA/policy/cuda/launch.hpp b/include/RAJA/policy/cuda/launch.hpp index 0cf23eb9ad..6c22ced4a9 100644 --- a/include/RAJA/policy/cuda/launch.hpp +++ b/include/RAJA/policy/cuda/launch.hpp @@ -148,13 +148,26 @@ struct LaunchExecute> { BODY body = RAJA::cuda::make_launch_body( gridSize, blockSize, params.shared_mem_size, cuda_res, std::forward(body_in)); + // + // Configure plugins + // + util::PluginContext context{util::make_context>()}; + util::callPreCapturePlugins(context); + + using RAJA::util::trigger_updates_before; + auto p_body = trigger_updates_before(body); + + util::callPostCapturePlugins(context); + + util::callPreLaunchPlugins(context); + // // Launch the kernel // - void *args[] = {(void*)&body}; - { - RAJA::cuda::launch((const void*)func, gridSize, blockSize, args, params.shared_mem_size, cuda_res, async, kernel_name); - } + void *args[] = {(void*)&p_body}; + RAJA::cuda::launch((const void*)func, gridSize, blockSize, args, params.shared_mem_size, cuda_res, async, kernel_name); + + util::callPostLaunchPlugins(context); } RAJA_FT_END; @@ -223,15 +236,31 @@ struct LaunchExecute(body_in)); + // + // Configure plugins + // + util::PluginContext context{util::make_context>()}; + util::callPreCapturePlugins(context); + + using RAJA::util::trigger_updates_before; + auto p_body = trigger_updates_before(body); + + util::callPostCapturePlugins(context); + + util::callPreLaunchPlugins(context); + // // Launch the kernel // - void *args[] = {(void*)&body}; + void *args[] = {(void*)&p_body}; RAJA::cuda::launch((const void*)func, gridSize, blockSize, args, params.shared_mem_size, cuda_res, async, kernel_name); + + util::callPostLaunchPlugins(context); } RAJA_FT_END; - } + + } } template @@ -272,13 +301,27 @@ struct LaunchExecute(body_in)); + + // + // Configure plugins + // + util::PluginContext context{util::make_context>()}; + util::callPreCapturePlugins(context); + + using RAJA::util::trigger_updates_before; + auto p_body = trigger_updates_before(body); + + util::callPostCapturePlugins(context); + + util::callPreLaunchPlugins(context); + // // Launch the kernel // - void *args[] = {(void*)&body}; - { - RAJA::cuda::launch((const void*)func, gridSize, blockSize, args, params.shared_mem_size, cuda_res, async, kernel_name); - } + void *args[] = {(void*)&p_body}; + RAJA::cuda::launch((const void*)func, gridSize, blockSize, args, params.shared_mem_size, cuda_res, async, kernel_name); + + util::callPostLaunchPlugins(context); } RAJA_FT_END; diff --git a/include/RAJA/policy/hip/launch.hpp b/include/RAJA/policy/hip/launch.hpp index edeb6a91b6..d8d3164841 100644 --- a/include/RAJA/policy/hip/launch.hpp +++ b/include/RAJA/policy/hip/launch.hpp @@ -82,11 +82,26 @@ struct LaunchExecute> { BODY body = RAJA::hip::make_launch_body( gridSize, blockSize, params.shared_mem_size, hip_res, std::forward(body_in)); + // + // Configure plugins + // + util::PluginContext context{util::make_context>()}; + util::callPreCapturePlugins(context); + + using RAJA::util::trigger_updates_before; + auto p_body = trigger_updates_before(body); + + util::callPostCapturePlugins(context); + + util::callPreLaunchPlugins(context); + // // Launch the kernel // - void *args[] = {(void*)&body}; + void *args[] = {(void*)&p_body}; RAJA::hip::launch((const void*)func, gridSize, blockSize, args, params.shared_mem_size, hip_res, async, kernel_name); + + util::callPostLaunchPlugins(context); } RAJA_FT_END; @@ -130,11 +145,26 @@ struct LaunchExecute> { BODY body = RAJA::hip::make_launch_body( gridSize, blockSize, params.shared_mem_size, hip_res, std::forward(body_in)); + // + // Configure plugins + // + util::PluginContext context{util::make_context>()}; + util::callPreCapturePlugins(context); + + using RAJA::util::trigger_updates_before; + auto p_body = trigger_updates_before(body); + + util::callPostCapturePlugins(context); + + util::callPreLaunchPlugins(context); + // // Launch the kernel // - void *args[] = {(void*)&body}; + void *args[] = {(void*)&p_body}; RAJA::hip::launch((const void*)func, gridSize, blockSize, args, params.shared_mem_size, hip_res, async, kernel_name); + + util::callPostLaunchPlugins(context); } RAJA_FT_END; @@ -200,11 +230,26 @@ struct LaunchExecute> { BODY body = RAJA::hip::make_launch_body( gridSize, blockSize, params.shared_mem_size, hip_res, std::forward(body_in)); + // + // Configure plugins + // + util::PluginContext context{util::make_context>()}; + util::callPreCapturePlugins(context); + + using RAJA::util::trigger_updates_before; + auto p_body = trigger_updates_before(body); + + util::callPostCapturePlugins(context); + + util::callPreLaunchPlugins(context); + // // Launch the kernel // - void *args[] = {(void*)&body}; + void *args[] = {(void*)&p_body}; RAJA::hip::launch((const void*)func, gridSize, blockSize, args, params.shared_mem_size, hip_res, async, kernel_name); + + util::callPostLaunchPlugins(context); } RAJA_FT_END; @@ -248,11 +293,26 @@ struct LaunchExecute> { BODY body = RAJA::hip::make_launch_body( gridSize, blockSize, params.shared_mem_size, hip_res, std::forward(body_in)); + // + // Configure plugins + // + util::PluginContext context{util::make_context>()}; + util::callPreCapturePlugins(context); + + using RAJA::util::trigger_updates_before; + auto p_body = trigger_updates_before(body); + + util::callPostCapturePlugins(context); + + util::callPreLaunchPlugins(context); + // // Launch the kernel // - void *args[] = {(void*)&body}; + void *args[] = {(void*)&p_body}; RAJA::hip::launch((const void*)func, gridSize, blockSize, args, params.shared_mem_size, hip_res, async, kernel_name); + + util::callPostLaunchPlugins(context); } RAJA_FT_END; From efc103d1d284028021a1b84a50da065fded0c657 Mon Sep 17 00:00:00 2001 From: Arturo Vargas Date: Mon, 14 Nov 2022 14:53:30 -0800 Subject: [PATCH 48/94] update omp seq launch to support plugins --- include/RAJA/policy/loop/launch.hpp | 37 ++++++++++++++++++++++----- include/RAJA/policy/openmp/launch.hpp | 36 ++++++++++++++++++++++++-- 2 files changed, 65 insertions(+), 8 deletions(-) diff --git a/include/RAJA/policy/loop/launch.hpp b/include/RAJA/policy/loop/launch.hpp index 50d5ff1c0a..06b2d6332a 100644 --- a/include/RAJA/policy/loop/launch.hpp +++ b/include/RAJA/policy/loop/launch.hpp @@ -47,10 +47,24 @@ struct LaunchExecute { ctx.shared_mem_ptr = (char*) malloc(params.shared_mem_size); - body(ctx); + // + // Configure plugins + // + util::PluginContext context{util::make_context()}; + util::callPreCapturePlugins(context); + + using RAJA::util::trigger_updates_before; + auto p_body = trigger_updates_before(body); + + util::callPostCapturePlugins(context); + + util::callPreLaunchPlugins(context); + + p_body(ctx); free(ctx.shared_mem_ptr); ctx.shared_mem_ptr = nullptr; + util::callPostLaunchPlugins(context); } template @@ -59,15 +73,26 @@ struct LaunchExecute { { LaunchContext ctx; + ctx.shared_mem_ptr = (char*) malloc(params.shared_mem_size); - char *kernel_local_mem = new char[params.shared_mem_size]; - ctx.shared_mem_ptr = kernel_local_mem; + // + // Configure plugins + // + util::PluginContext context{util::make_context()}; + util::callPreCapturePlugins(context); - body(ctx); + using RAJA::util::trigger_updates_before; + auto p_body = trigger_updates_before(body); - delete[] kernel_local_mem; - ctx.shared_mem_ptr = nullptr; + util::callPostCapturePlugins(context); + + util::callPreLaunchPlugins(context); + + p_body(ctx); + free(ctx.shared_mem_ptr); + ctx.shared_mem_ptr = nullptr; + util::callPostLaunchPlugins(context); return resources::EventProxy(res); } diff --git a/include/RAJA/policy/openmp/launch.hpp b/include/RAJA/policy/openmp/launch.hpp index 7ec9a7c5ed..191125c33d 100644 --- a/include/RAJA/policy/openmp/launch.hpp +++ b/include/RAJA/policy/openmp/launch.hpp @@ -32,12 +32,26 @@ struct LaunchExecute { template static void exec(LaunchParams const ¶ms, const char *, BODY const &body) { + + // + // Configure plugins + // + util::PluginContext context{util::make_context()}; + util::callPreCapturePlugins(context); + + using RAJA::util::trigger_updates_before; + auto p_body = trigger_updates_before(body); + + util::callPostCapturePlugins(context); + + util::callPreLaunchPlugins(context); + RAJA::region([&]() { LaunchContext ctx; using RAJA::internal::thread_privatize; - auto loop_body = thread_privatize(body); + auto loop_body = thread_privatize(p_body); ctx.shared_mem_ptr = (char*) malloc(params.shared_mem_size); @@ -46,18 +60,34 @@ struct LaunchExecute { free(ctx.shared_mem_ptr); ctx.shared_mem_ptr = nullptr; }); + + util::callPostLaunchPlugins(context); } template static resources::EventProxy exec(RAJA::resources::Resource res, LaunchParams const ¶ms, const char *, BODY const &body) { + + // + // Configure plugins + // + util::PluginContext context{util::make_context()}; + util::callPreCapturePlugins(context); + + using RAJA::util::trigger_updates_before; + auto p_body = trigger_updates_before(body); + + util::callPostCapturePlugins(context); + + util::callPreLaunchPlugins(context); + RAJA::region([&]() { LaunchContext ctx; using RAJA::internal::thread_privatize; - auto loop_body = thread_privatize(body); + auto loop_body = thread_privatize(p_body); ctx.shared_mem_ptr = (char*) malloc(params.shared_mem_size); @@ -67,6 +97,8 @@ struct LaunchExecute { ctx.shared_mem_ptr = nullptr; }); + util::callPostLaunchPlugins(context); + return resources::EventProxy(res); } From 6a1980f543b0da220a379073dd0ddcb46940ea6d Mon Sep 17 00:00:00 2001 From: Arturo Vargas Date: Mon, 14 Nov 2022 14:54:09 -0800 Subject: [PATCH 49/94] remove scratch file --- include/RAJA/policy/cuda/#launch.hpp# | 1202 ------------------------- 1 file changed, 1202 deletions(-) delete mode 100644 include/RAJA/policy/cuda/#launch.hpp# diff --git a/include/RAJA/policy/cuda/#launch.hpp# b/include/RAJA/policy/cuda/#launch.hpp# deleted file mode 100644 index d98dd20c5b..0000000000 --- a/include/RAJA/policy/cuda/#launch.hpp# +++ /dev/null @@ -1,1202 +0,0 @@ -/*! - ****************************************************************************** - * - * \file - * - * \brief RAJA header file containing user interface for RAJA::launch::cuda - * - ****************************************************************************** - */ - -//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// -// Copyright (c) 2016-22, Lawrence Livermore National Security, LLC -// and RAJA project contributors. See the RAJA/LICENSE file for details. -// -// SPDX-License-Identifier: (BSD-3-Clause) -//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// - -#ifndef RAJA_pattern_launch_cuda_HPP -#define RAJA_pattern_launch_cuda_HPP - -#include "RAJA/pattern/launch/launch_core.hpp" -#include "RAJA/pattern/detail/privatizer.hpp" -#include "RAJA/policy/cuda/policy.hpp" -#include "RAJA/policy/cuda/MemUtils_CUDA.hpp" -#include "RAJA/policy/cuda/raja_cudaerrchk.hpp" -#include "RAJA/util/resource.hpp" - -namespace RAJA -{ - -template -__global__ void launch_global_fcn(BODY body_in) -{ - - LaunchContext ctx; - - using RAJA::internal::thread_privatize; - auto privatizer = thread_privatize(body_in); - auto& body = privatizer.get_priv(); - - //Set pointer to shared memory - extern __shared__ char raja_shmem_ptr[]; - ctx.shared_mem_ptr = raja_shmem_ptr; - - body(ctx); -} - -template -struct LaunchExecute> { -// cuda_launch_t num_threads set to 1, but not used in launch of kernel - - template - static void exec(const LaunchParams ¶ms, const char *kernel_name, BODY_IN &&body_in) - { - - using BODY = camp::decay; - - auto func = launch_global_fcn; - - resources::Cuda cuda_res = resources::Cuda::get_default(); - // - // Compute the number of blocks and threads - // - cuda_dim_t gridSize{ static_cast(params.teams.value[0]), - static_cast(params.teams.value[1]), - static_cast(params.teams.value[2]) }; - - cuda_dim_t blockSize{ static_cast(params.threads.value[0]), - static_cast(params.threads.value[1]), - static_cast(params.threads.value[2]) }; - - // Only launch kernel if we have something to iterate over - constexpr cuda_dim_member_t zero = 0; - if ( gridSize.x > zero && gridSize.y > zero && gridSize.z > zero && - blockSize.x > zero && blockSize.y > zero && blockSize.z > zero ) { - - - RAJA_FT_BEGIN; - { - // - // Privatize the loop_body, using make_launch_body to setup reductions - // - BODY body = RAJA::cuda::make_launch_body(gridSize, blockSize, params.shared_mem_size, cuda_res, std::forward(body_in)); - - // - // Configure plugins - // - util::PluginContext context{util::make_context>()}; - util::callPreCapturePlugins(context); - - using RAJA::util::trigger_updates_before; - auto p_body = trigger_updates_before(body); - - util::callPostCapturePlugins(context); - - util::callPreLaunchPlugins(context); - - // - // Launch the kernel - // - void *args[] = {(void*)&p_body}; - RAJA::cuda::launch((const void*)func, gridSize, blockSize, args, params.shared_mem_size, cuda_res, async, kernel_name); - - util::callPostLaunchPlugins(context); - } - - RAJA_FT_END; - - } - - }; - - template - static resources::EventProxy - exec(RAJA::resources::Resource res, const LaunchParams ¶ms, const char *kernel_name, BODY_IN &&body_in) - { - - using BODY = camp::decay; - - auto func = launch_global_fcn; - - // Get the concrete resource - resources::Cuda cuda_res = res.get(); - - // - // Compute the number of blocks and threads - // - - cuda_dim_t gridSize{ static_cast(params.teams.value[0]), - static_cast(params.teams.value[1]), - static_cast(params.teams.value[2]) }; - - cuda_dim_t blockSize{ static_cast(params.threads.value[0]), - static_cast(params.threads.value[1]), - static_cast(params.threads.value[2]) }; - - // Only launch kernel if we have something to iterate over - constexpr cuda_dim_member_t zero = 0; - if ( gridSize.x > zero && gridSize.y > zero && gridSize.z > zero && - blockSize.x > zero && blockSize.y > zero && blockSize.z > zero ) { - - RAJA_FT_BEGIN; - - { - // - // Privatize the loop_body, using make_launch_body to setup reductions - // - BODY body = RAJA::cuda::make_launch_body( - gridSize, blockSize, params.shared_mem_size, cuda_res, std::forward(body_in)); - - // - // Configure plugins - // - util::PluginContext context{util::make_context>()}; - util::callPreCapturePlugins(context); - - using RAJA::util::trigger_updates_before; - auto p_body = trigger_updates_before(body); - - util::callPostCapturePlugins(context); - - util::callPreLaunchPlugins(context); - - // - // Launch the kernel - // - void *args[] = {(void*)&p_body}; - RAJA::cuda::launch((const void*)func, gridSize, blockSize, args, params.shared_mem_size, cuda_res, async, kernel_name); - - util::callPostLaunchPlugins(context); - } - - RAJA_FT_END; - } - - return resources::EventProxy(res); - } - -}; - - -template -__launch_bounds__(num_threads, BLOCKS_PER_SM) __global__ - void launch_global_fcn_fixed(BODY body_in) -{ - - LaunchContext ctx; - - using RAJA::internal::thread_privatize; - auto privatizer = thread_privatize(body_in); - auto& body = privatizer.get_priv(); - - //Set pointer to shared memory - extern __shared__ char raja_shmem_ptr[]; - ctx.shared_mem_ptr = raja_shmem_ptr; - - body(ctx); -} - -template -struct LaunchExecute> { - - template - static void exec(const LaunchParams ¶ms, const char *kernel_name, BODY_IN &&body_in) - { - - using BODY = camp::decay; - - auto func = launch_global_fcn_fixed; - - resources::Cuda cuda_res = resources::Cuda::get_default(); - - // - // Compute the number of blocks and threads - // - - cuda_dim_t gridSize{ static_cast(params.teams.value[0]), - static_cast(params.teams.value[1]), - static_cast(params.teams.value[2]) }; - - cuda_dim_t blockSize{ static_cast(params.threads.value[0]), - static_cast(params.threads.value[1]), - static_cast(params.threads.value[2]) }; - - // Only launch kernel if we have something to iterate over - constexpr cuda_dim_member_t zero = 0; - if ( gridSize.x > zero && gridSize.y > zero && gridSize.z > zero && - blockSize.x > zero && blockSize.y > zero && blockSize.z > zero ) { - - RAJA_FT_BEGIN; - - { - // - // Privatize the loop_body, using make_launch_body to setup reductions - // - BODY body = RAJA::cuda::make_launch_body( - gridSize, blockSize, params.shared_mem_size, cuda_res, std::forward(body_in)); - - // - // Configure plugins - // - util::PluginContext context{util::make_context>()}; - util::callPreCapturePlugins(context); - - using RAJA::util::trigger_updates_before; - auto p_body = trigger_updates_before(body); - - util::callPostCapturePlugins(context); - - util::callPreLaunchPlugins(context); - - // - // Launch the kernel - // - void *args[] = {(void*)&p_body}; - RAJA::cuda::launch((const void*)func, gridSize, blockSize, args, params.shared_mem_size, cuda_res, async, kernel_name); - - util::callPostLaunchPlugins(context); - } - - RAJA_FT_END; - } - - } -} - - template - static resources::EventProxy - exec(RAJA::resources::Resource res, const LaunchParams ¶ms, const char *kernel_name, BODY_IN &&body_in) - { - - using BODY = camp::decay; - - auto func = launch_global_fcn_fixed; - - //Get the concrete resource - resources::Cuda cuda_res = res.get(); - - // - // Compute the number of blocks and threads - // - - cuda_dim_t gridSize{ static_cast(params.teams.value[0]), - static_cast(params.teams.value[1]), - static_cast(params.teams.value[2]) }; - - cuda_dim_t blockSize{ static_cast(params.threads.value[0]), - static_cast(params.threads.value[1]), - static_cast(params.threads.value[2]) }; - - // Only launch kernel if we have something to iterate over - constexpr cuda_dim_member_t zero = 0; - if ( gridSize.x > zero && gridSize.y > zero && gridSize.z > zero && - blockSize.x > zero && blockSize.y > zero && blockSize.z > zero ) { - - RAJA_FT_BEGIN; - - { - // - // Privatize the loop_body, using make_launch_body to setup reductions - // - BODY body = RAJA::cuda::make_launch_body( - gridSize, blockSize, params.shared_mem_size, cuda_res, std::forward(body_in)); - - - // - // Configure plugins - // - util::PluginContext context{util::make_context>()}; - util::callPreCapturePlugins(context); - - using RAJA::util::trigger_updates_before; - auto p_body = trigger_updates_before(body); - - util::callPostCapturePlugins(context); - - util::callPreLaunchPlugins(context); - - // - // Launch the kernel - // - void *args[] = {(void*)&p_body}; - RAJA::cuda::launch((const void*)func, gridSize, blockSize, args, params.shared_mem_size, cuda_res, async, kernel_name); - - util::callPostLaunchPlugins(context); - } - - RAJA_FT_END; - } - - return resources::EventProxy(res); - } - -}; - -/* - CUDA global thread mapping -*/ -template -struct cuda_global_thread; - -using cuda_global_thread_x = cuda_global_thread<0>; -using cuda_global_thread_y = cuda_global_thread<1>; -using cuda_global_thread_z = cuda_global_thread<2>; - -template -struct LoopExecute, SEGMENT> { - - template - static RAJA_INLINE RAJA_DEVICE void exec( - LaunchContext const RAJA_UNUSED_ARG(&ctx), - SEGMENT const &segment, - BODY const &body) - { - - const int len = segment.end() - segment.begin(); - { - const int tx = internal::get_cuda_dim(threadIdx) + - internal::get_cuda_dim(blockDim)*internal::get_cuda_dim(blockIdx); - - if (tx < len) body(*(segment.begin() + tx)); - } - } -}; - -using cuda_global_thread_xy = cuda_global_thread<0,1>; -using cuda_global_thread_xz = cuda_global_thread<0,2>; -using cuda_global_thread_yx = cuda_global_thread<1,0>; -using cuda_global_thread_yz = cuda_global_thread<1,2>; -using cuda_global_thread_zx = cuda_global_thread<2,0>; -using cuda_global_thread_zy = cuda_global_thread<2,1>; - -template -struct LoopExecute, SEGMENT> { - - template - static RAJA_INLINE RAJA_DEVICE void exec( - LaunchContext const RAJA_UNUSED_ARG(&ctx), - SEGMENT const &segment0, - SEGMENT const &segment1, - BODY const &body) - { - const int len1 = segment1.end() - segment1.begin(); - const int len0 = segment0.end() - segment0.begin(); - { - const int tx = internal::get_cuda_dim(threadIdx) + - internal::get_cuda_dim(blockDim)*internal::get_cuda_dim(blockIdx); - - const int ty = internal::get_cuda_dim(threadIdx) + - internal::get_cuda_dim(blockDim)*internal::get_cuda_dim(blockIdx); - - if (tx < len0 && ty < len1) - body(*(segment0.begin() + tx), *(segment1.begin() + ty)); - } - } -}; - -using cuda_global_thread_xyz = cuda_global_thread<0,1,2>; -using cuda_global_thread_xzy = cuda_global_thread<0,2,1>; -using cuda_global_thread_yxz = cuda_global_thread<1,0,2>; -using cuda_global_thread_yzx = cuda_global_thread<1,2,0>; -using cuda_global_thread_zxy = cuda_global_thread<2,0,1>; -using cuda_global_thread_zyx = cuda_global_thread<2,1,0>; - -template -struct LoopExecute, SEGMENT> { - - template - static RAJA_INLINE RAJA_DEVICE void exec( - LaunchContext const RAJA_UNUSED_ARG(&ctx), - SEGMENT const &segment0, - SEGMENT const &segment1, - SEGMENT const &segment2, - BODY const &body) - { - const int len2 = segment2.end() - segment2.begin(); - const int len1 = segment1.end() - segment1.begin(); - const int len0 = segment0.end() - segment0.begin(); - { - const int tx = internal::get_cuda_dim(threadIdx) + - internal::get_cuda_dim(blockDim)*internal::get_cuda_dim(blockIdx); - - const int ty = internal::get_cuda_dim(threadIdx) + - internal::get_cuda_dim(blockDim)*internal::get_cuda_dim(blockIdx); - - const int tz = internal::get_cuda_dim(threadIdx) + - internal::get_cuda_dim(blockDim)*internal::get_cuda_dim(blockIdx); - - if (tx < len0 && ty < len1 && tz < len2) - body(*(segment0.begin() + tx), - *(segment1.begin() + ty), - *(segment1.begin() + ty)); - } - } -}; - -/* -Reshape threads in a block into a 1D iteration space -*/ -template -struct cuda_flatten_block_threads_direct{}; - -using cuda_flatten_block_threads_xy_direct = cuda_flatten_block_threads_direct<0,1>; -using cuda_flatten_block_threads_xz_direct = cuda_flatten_block_threads_direct<0,2>; -using cuda_flatten_block_threads_yx_direct = cuda_flatten_block_threads_direct<1,0>; -using cuda_flatten_block_threads_yz_direct = cuda_flatten_block_threads_direct<1,2>; -using cuda_flatten_block_threads_zx_direct = cuda_flatten_block_threads_direct<2,0>; -using cuda_flatten_block_threads_zy_direct = cuda_flatten_block_threads_direct<2,1>; - -using cuda_flatten_block_threads_xyz_direct = cuda_flatten_block_threads_direct<0,1,2>; -using cuda_flatten_block_threads_xzy_direct = cuda_flatten_block_threads_direct<0,2,1>; -using cuda_flatten_block_threads_yxz_direct = cuda_flatten_block_threads_direct<1,0,2>; -using cuda_flatten_block_threads_yzx_direct = cuda_flatten_block_threads_direct<1,2,0>; -using cuda_flatten_block_threads_zxy_direct = cuda_flatten_block_threads_direct<2,0,1>; -using cuda_flatten_block_threads_zyx_direct = cuda_flatten_block_threads_direct<2,1,0>; - -template -struct cuda_flatten_block_threads_loop{}; - -using cuda_flatten_block_threads_xy_loop = cuda_flatten_block_threads_loop<0,1>; -using cuda_flatten_block_threads_xz_loop = cuda_flatten_block_threads_loop<0,2>; -using cuda_flatten_block_threads_yx_loop = cuda_flatten_block_threads_loop<1,0>; -using cuda_flatten_block_threads_yz_loop = cuda_flatten_block_threads_loop<1,2>; -using cuda_flatten_block_threads_zx_loop = cuda_flatten_block_threads_loop<2,0>; -using cuda_flatten_block_threads_zy_loop = cuda_flatten_block_threads_loop<2,1>; - -using cuda_flatten_block_threads_xyz_loop = cuda_flatten_block_threads_loop<0,1,2>; -using cuda_flatten_block_threads_xzy_loop = cuda_flatten_block_threads_loop<0,2,1>; -using cuda_flatten_block_threads_yxz_loop = cuda_flatten_block_threads_loop<1,0,2>; -using cuda_flatten_block_threads_yzx_loop = cuda_flatten_block_threads_loop<1,2,0>; -using cuda_flatten_block_threads_zxy_loop = cuda_flatten_block_threads_loop<2,0,1>; -using cuda_flatten_block_threads_zyx_loop = cuda_flatten_block_threads_loop<2,1,0>; - -template -struct LoopExecute, SEGMENT> -{ - template - static RAJA_INLINE RAJA_DEVICE void exec( - LaunchContext const RAJA_UNUSED_ARG(&ctx), - SEGMENT const &segment, - BODY const &body) - { - - const int len = segment.end() - segment.begin(); - { - const int tx = internal::get_cuda_dim(threadIdx); - const int ty = internal::get_cuda_dim(threadIdx); - const int bx = internal::get_cuda_dim(blockDim); - const int tid = tx + bx*ty; - - if (tid < len) body(*(segment.begin() + tid)); - } - } -}; - -template -struct LoopExecute, SEGMENT> -{ - template - static RAJA_INLINE RAJA_DEVICE void exec( - LaunchContext const RAJA_UNUSED_ARG(&ctx), - SEGMENT const &segment, - BODY const &body) - { - const int len = segment.end() - segment.begin(); - - const int tx = internal::get_cuda_dim(threadIdx); - const int ty = internal::get_cuda_dim(threadIdx); - - const int bx = internal::get_cuda_dim(blockDim); - const int by = internal::get_cuda_dim(blockDim); - - for(int tid = tx + bx*ty; tid < len; tid += bx*by) { - body(*(segment.begin() + tid)); - } - - } -}; - -template -struct LoopExecute, SEGMENT> -{ - template - static RAJA_INLINE RAJA_DEVICE void exec( - LaunchContext const RAJA_UNUSED_ARG(&ctx), - SEGMENT const &segment, - BODY const &body) - { - const int len = segment.end() - segment.begin(); - { - const int tx = internal::get_cuda_dim(threadIdx); - const int ty = internal::get_cuda_dim(threadIdx); - const int tz = internal::get_cuda_dim(threadIdx); - const int bx = internal::get_cuda_dim(blockDim); - const int by = internal::get_cuda_dim(blockDim); - const int tid = tx + bx*(ty + by*tz); - - if (tid < len) body(*(segment.begin() + tid)); - } - } -}; - -template -struct LoopExecute, SEGMENT> -{ - template - static RAJA_INLINE RAJA_DEVICE void exec( - LaunchContext const RAJA_UNUSED_ARG(&ctx), - SEGMENT const &segment, - BODY const &body) - { - const int len = segment.end() - segment.begin(); - - const int tx = internal::get_cuda_dim(threadIdx); - const int ty = internal::get_cuda_dim(threadIdx); - const int tz = internal::get_cuda_dim(threadIdx); - const int bx = internal::get_cuda_dim(blockDim); - const int by = internal::get_cuda_dim(blockDim); - const int bz = internal::get_cuda_dim(blockDim); - - for(int tid = tx + bx*(ty + by*tz); tid < len; tid += bx*by*bz) { - body(*(segment.begin() + tid)); - } - - } -}; - - -/* - CUDA thread loops with block strides -*/ -template -struct LoopExecute, SEGMENT> { - - template - static RAJA_INLINE RAJA_DEVICE void exec( - LaunchContext const RAJA_UNUSED_ARG(&ctx), - SEGMENT const &segment, - BODY const &body) - { - - const int len = segment.end() - segment.begin(); - - for (int tx = internal::get_cuda_dim(threadIdx); - tx < len; - tx += internal::get_cuda_dim(blockDim) ) - { - body(*(segment.begin() + tx)); - } - } -}; - -/* - CUDA thread direct mappings -*/ -template -struct LoopExecute, SEGMENT> { - - template - static RAJA_INLINE RAJA_DEVICE void exec( - LaunchContext const RAJA_UNUSED_ARG(&ctx), - SEGMENT const &segment, - BODY const &body) - { - - const int len = segment.end() - segment.begin(); - { - const int tx = internal::get_cuda_dim(threadIdx); - if (tx < len) body(*(segment.begin() + tx)); - } - } -}; - - -/* - CUDA block loops with grid strides -*/ -template -struct LoopExecute, SEGMENT> { - - template - static RAJA_INLINE RAJA_DEVICE void exec( - LaunchContext const RAJA_UNUSED_ARG(&ctx), - SEGMENT const &segment, - BODY const &body) - { - - const int len = segment.end() - segment.begin(); - - for (int bx = internal::get_cuda_dim(blockIdx); - bx < len; - bx += internal::get_cuda_dim(gridDim) ) { - body(*(segment.begin() + bx)); - } - } -}; - -/* - CUDA block direct mappings -*/ -template -struct LoopExecute, SEGMENT> { - - template - static RAJA_INLINE RAJA_DEVICE void exec( - LaunchContext const RAJA_UNUSED_ARG(&ctx), - SEGMENT const &segment, - BODY const &body) - { - - const int len = segment.end() - segment.begin(); - { - const int bx = internal::get_cuda_dim(blockIdx); - if (bx < len) body(*(segment.begin() + bx)); - } - } -}; - -/* - CUDA thread loops with block strides + Return Index -*/ -template -struct LoopICountExecute, SEGMENT> { - - template - static RAJA_INLINE RAJA_DEVICE void exec( - LaunchContext const RAJA_UNUSED_ARG(&ctx), - SEGMENT const &segment, - BODY const &body) - { - - const int len = segment.end() - segment.begin(); - - for (int tx = internal::get_cuda_dim(threadIdx); - tx < len; - tx += internal::get_cuda_dim(blockDim) ) - { - body(*(segment.begin() + tx), tx); - } - } -}; - -/* - CUDA thread direct mappings -*/ -template -struct LoopICountExecute, SEGMENT> { - - template - static RAJA_INLINE RAJA_DEVICE void exec( - LaunchContext const RAJA_UNUSED_ARG(&ctx), - SEGMENT const &segment, - BODY const &body) - { - - const int len = segment.end() - segment.begin(); - { - const int tx = internal::get_cuda_dim(threadIdx); - if (tx < len) body(*(segment.begin() + tx), tx); - } - } -}; - -/* - CUDA block loops with grid strides -*/ -template -struct LoopICountExecute, SEGMENT> { - - template - static RAJA_INLINE RAJA_DEVICE void exec( - LaunchContext const RAJA_UNUSED_ARG(&ctx), - SEGMENT const &segment, - BODY const &body) - { - - const int len = segment.end() - segment.begin(); - - for (int bx = internal::get_cuda_dim(blockIdx); - bx < len; - bx += internal::get_cuda_dim(gridDim) ) { - body(*(segment.begin() + bx), bx); - } - } -}; - -/* - CUDA block direct mappings -*/ -template -struct LoopICountExecute, SEGMENT> { - - template - static RAJA_INLINE RAJA_DEVICE void exec( - LaunchContext const RAJA_UNUSED_ARG(&ctx), - SEGMENT const &segment, - BODY const &body) - { - - const int len = segment.end() - segment.begin(); - { - const int bx = internal::get_cuda_dim(blockIdx); - if (bx < len) body(*(segment.begin() + bx), bx); - } - } -}; - -// perfectly nested cuda direct policies -using cuda_block_xy_nested_direct = cuda_block_xyz_direct<0,1>; -using cuda_block_xz_nested_direct = cuda_block_xyz_direct<0,2>; -using cuda_block_yx_nested_direct = cuda_block_xyz_direct<1,0>; -using cuda_block_yz_nested_direct = cuda_block_xyz_direct<1,2>; -using cuda_block_zx_nested_direct = cuda_block_xyz_direct<2,0>; -using cuda_block_zy_nested_direct = cuda_block_xyz_direct<2,1>; - -using cuda_block_xyz_nested_direct = cuda_block_xyz_direct<0,1,2>; -using cuda_block_xzy_nested_direct = cuda_block_xyz_direct<0,2,1>; -using cuda_block_yxz_nested_direct = cuda_block_xyz_direct<1,0,2>; -using cuda_block_yzx_nested_direct = cuda_block_xyz_direct<1,2,0>; -using cuda_block_zxy_nested_direct = cuda_block_xyz_direct<2,0,1>; -using cuda_block_zyx_nested_direct = cuda_block_xyz_direct<2,1,0>; - -template -struct LoopExecute, SEGMENT> { - - template - static RAJA_INLINE RAJA_DEVICE void exec( - LaunchContext const RAJA_UNUSED_ARG(&ctx), - SEGMENT const &segment0, - SEGMENT const &segment1, - BODY const &body) - { - const int len1 = segment1.end() - segment1.begin(); - const int len0 = segment0.end() - segment0.begin(); - { - const int tx = internal::get_cuda_dim(blockIdx); - const int ty = internal::get_cuda_dim(blockIdx); - if (tx < len0 && ty < len1) - body(*(segment0.begin() + tx), *(segment1.begin() + ty)); - } - } -}; - -template -struct LoopExecute, SEGMENT> { - - template - static RAJA_INLINE RAJA_DEVICE void exec( - LaunchContext const RAJA_UNUSED_ARG(&ctx), - SEGMENT const &segment0, - SEGMENT const &segment1, - SEGMENT const &segment2, - BODY const &body) - { - const int len2 = segment2.end() - segment2.begin(); - const int len1 = segment1.end() - segment1.begin(); - const int len0 = segment0.end() - segment0.begin(); - { - const int tx = internal::get_cuda_dim(blockIdx); - const int ty = internal::get_cuda_dim(blockIdx); - const int tz = internal::get_cuda_dim(blockIdx); - if (tx < len0 && ty < len1 && tz < len2) - body(*(segment0.begin() + tx), - *(segment1.begin() + ty), - *(segment2.begin() + tz)); - } - } -}; - -/* - Perfectly nested cuda direct policies - Return local index -*/ -template -struct LoopICountExecute, SEGMENT> { - - template - static RAJA_INLINE RAJA_DEVICE void exec( - LaunchContext const RAJA_UNUSED_ARG(&ctx), - SEGMENT const &segment0, - SEGMENT const &segment1, - BODY const &body) - { - const int len1 = segment1.end() - segment1.begin(); - const int len0 = segment0.end() - segment0.begin(); - { - const int tx = internal::get_cuda_dim(blockIdx); - const int ty = internal::get_cuda_dim(blockIdx); - if (tx < len0 && ty < len1) - body(*(segment0.begin() + tx), *(segment1.begin() + ty), - tx, ty); - } - } -}; - -template -struct LoopICountExecute, SEGMENT> { - - template - static RAJA_INLINE RAJA_DEVICE void exec( - LaunchContext const RAJA_UNUSED_ARG(&ctx), - SEGMENT const &segment0, - SEGMENT const &segment1, - SEGMENT const &segment2, - BODY const &body) - { - const int len2 = segment2.end() - segment2.begin(); - const int len1 = segment1.end() - segment1.begin(); - const int len0 = segment0.end() - segment0.begin(); - { - const int tx = internal::get_cuda_dim(blockIdx); - const int ty = internal::get_cuda_dim(blockIdx); - const int tz = internal::get_cuda_dim(blockIdx); - if (tx < len0 && ty < len1 && tz < len2) - body(*(segment0.begin() + tx), - *(segment1.begin() + ty), - *(segment2.begin() + tz), tx, ty, tz); - } - } -}; - -// perfectly nested cuda loop policies -using cuda_block_xy_nested_loop = cuda_block_xyz_loop<0,1>; -using cuda_block_xz_nested_loop = cuda_block_xyz_loop<0,2>; -using cuda_block_yx_nested_loop = cuda_block_xyz_loop<1,0>; -using cuda_block_yz_nested_loop = cuda_block_xyz_loop<1,2>; -using cuda_block_zx_nested_loop = cuda_block_xyz_loop<2,0>; -using cuda_block_zy_nested_loop = cuda_block_xyz_loop<2,1>; - -using cuda_block_xyz_nested_loop = cuda_block_xyz_loop<0,1,2>; -using cuda_block_xzy_nested_loop = cuda_block_xyz_loop<0,2,1>; -using cuda_block_yxz_nested_loop = cuda_block_xyz_loop<1,0,2>; -using cuda_block_yzx_nested_loop = cuda_block_xyz_loop<1,2,0>; -using cuda_block_zxy_nested_loop = cuda_block_xyz_loop<2,0,1>; -using cuda_block_zyx_nested_loop = cuda_block_xyz_loop<2,1,0>; - -template -struct LoopExecute, SEGMENT> { - - template - static RAJA_INLINE RAJA_DEVICE void exec( - LaunchContext const RAJA_UNUSED_ARG(&ctx), - SEGMENT const &segment0, - SEGMENT const &segment1, - BODY const &body) - { - const int len1 = segment1.end() - segment1.begin(); - const int len0 = segment0.end() - segment0.begin(); - { - - for (int bx = internal::get_cuda_dim(blockIdx); - bx < len0; - bx += internal::get_cuda_dim(gridDim)) - { - for (int by = internal::get_cuda_dim(blockIdx); - by < len1; - by += internal::get_cuda_dim(gridDim)) - { - - body(*(segment0.begin() + bx), *(segment1.begin() + by)); - } - } - } - } -}; - -template -struct LoopExecute, SEGMENT> { - - template - static RAJA_INLINE RAJA_DEVICE void exec( - LaunchContext const RAJA_UNUSED_ARG(&ctx), - SEGMENT const &segment0, - SEGMENT const &segment1, - SEGMENT const &segment2, - BODY const &body) - { - const int len2 = segment2.end() - segment2.begin(); - const int len1 = segment1.end() - segment1.begin(); - const int len0 = segment0.end() - segment0.begin(); - - for (int bx = internal::get_cuda_dim(blockIdx); - bx < len0; - bx += internal::get_cuda_dim(gridDim)) - { - - for (int by = internal::get_cuda_dim(blockIdx); - by < len1; - by += internal::get_cuda_dim(gridDim)) - { - - for (int bz = internal::get_cuda_dim(blockIdx); - bz < len2; - bz += internal::get_cuda_dim(gridDim)) - { - - body(*(segment0.begin() + bx), - *(segment1.begin() + by), - *(segment2.begin() + bz)); - } - } - } - } -}; - -/* - perfectly nested cuda loop policies + returns local index -*/ -template -struct LoopICountExecute, SEGMENT> { - - template - static RAJA_INLINE RAJA_DEVICE void exec( - LaunchContext const RAJA_UNUSED_ARG(&ctx), - SEGMENT const &segment0, - SEGMENT const &segment1, - BODY const &body) - { - const int len1 = segment1.end() - segment1.begin(); - const int len0 = segment0.end() - segment0.begin(); - { - - for (int bx = internal::get_cuda_dim(blockIdx); - bx < len0; - bx += internal::get_cuda_dim(gridDim)) - { - for (int by = internal::get_cuda_dim(blockIdx); - by < len1; - by += internal::get_cuda_dim(gridDim)) - { - - body(*(segment0.begin() + bx), *(segment1.begin() + by), bx, by); - } - } - } - } -}; - - -template -struct LoopICountExecute, SEGMENT> { - - template - static RAJA_INLINE RAJA_DEVICE void exec( - LaunchContext const RAJA_UNUSED_ARG(&ctx), - SEGMENT const &segment0, - SEGMENT const &segment1, - SEGMENT const &segment2, - BODY const &body) - { - const int len2 = segment2.end() - segment2.begin(); - const int len1 = segment1.end() - segment1.begin(); - const int len0 = segment0.end() - segment0.begin(); - - for (int bx = internal::get_cuda_dim(blockIdx); - bx < len0; - bx += internal::get_cuda_dim(gridDim)) - { - - for (int by = internal::get_cuda_dim(blockIdx); - by < len1; - by += internal::get_cuda_dim(gridDim)) - { - - for (int bz = internal::get_cuda_dim(blockIdx); - bz < len2; - bz += internal::get_cuda_dim(gridDim)) - { - - body(*(segment0.begin() + bx), - *(segment1.begin() + by), - *(segment2.begin() + bz), bx, by, bz); - } - } - } - } -}; - - -template -struct TileExecute, SEGMENT> { - - template - static RAJA_INLINE RAJA_DEVICE void exec( - LaunchContext const RAJA_UNUSED_ARG(&ctx), - TILE_T tile_size, - SEGMENT const &segment, - BODY const &body) - { - - const int len = segment.end() - segment.begin(); - - for (int tx = internal::get_cuda_dim(threadIdx) * tile_size; - tx < len; - tx += internal::get_cuda_dim(blockDim) * tile_size) - { - body(segment.slice(tx, tile_size)); - } - } -}; - - -template -struct TileExecute, SEGMENT> { - - template - static RAJA_INLINE RAJA_DEVICE void exec( - LaunchContext const RAJA_UNUSED_ARG(&ctx), - TILE_T tile_size, - SEGMENT const &segment, - BODY const &body) - { - - const int len = segment.end() - segment.begin(); - - int tx = internal::get_cuda_dim(threadIdx) * tile_size; - if(tx < len) - { - body(segment.slice(tx, tile_size)); - } - } -}; - - -template -struct TileExecute, SEGMENT> { - - template - static RAJA_INLINE RAJA_DEVICE void exec( - LaunchContext const RAJA_UNUSED_ARG(&ctx), - TILE_T tile_size, - SEGMENT const &segment, - BODY const &body) - { - - const int len = segment.end() - segment.begin(); - - for (int tx = internal::get_cuda_dim(blockIdx) * tile_size; - - tx < len; - - tx += internal::get_cuda_dim(gridDim) * tile_size) - { - body(segment.slice(tx, tile_size)); - } - } -}; - - -template -struct TileExecute, SEGMENT> { - - template - static RAJA_INLINE RAJA_DEVICE void exec( - LaunchContext const RAJA_UNUSED_ARG(&ctx), - TILE_T tile_size, - SEGMENT const &segment, - BODY const &body) - { - - const int len = segment.end() - segment.begin(); - - int tx = internal::get_cuda_dim(blockIdx) * tile_size; - if(tx < len){ - body(segment.slice(tx, tile_size)); - } - } -}; - -//Tile execute + return index -template -struct TileICountExecute, SEGMENT> { - - template - static RAJA_INLINE RAJA_DEVICE void exec( - LaunchContext const RAJA_UNUSED_ARG(&ctx), - TILE_T tile_size, - SEGMENT const &segment, - BODY const &body) - { - - const int len = segment.end() - segment.begin(); - - for (int tx = internal::get_cuda_dim(threadIdx) * tile_size; - tx < len; - tx += internal::get_cuda_dim(blockDim) * tile_size) - { - body(segment.slice(tx, tile_size), tx/tile_size); - } - } -}; - - -template -struct TileICountExecute, SEGMENT> { - - template - static RAJA_INLINE RAJA_DEVICE void exec( - LaunchContext const RAJA_UNUSED_ARG(&ctx), - TILE_T tile_size, - SEGMENT const &segment, - BODY const &body) - { - - const int len = segment.end() - segment.begin(); - - int tx = internal::get_cuda_dim(threadIdx) * tile_size; - if(tx < len) - { - body(segment.slice(tx, tile_size), tx/tile_size); - } - } -}; - - -template -struct TileICountExecute, SEGMENT> { - - template - static RAJA_INLINE RAJA_DEVICE void exec( - LaunchContext const RAJA_UNUSED_ARG(&ctx), - TILE_T tile_size, - SEGMENT const &segment, - BODY const &body) - { - - const int len = segment.end() - segment.begin(); - - for (int bx = internal::get_cuda_dim(blockIdx) * tile_size; - - bx < len; - - bx += internal::get_cuda_dim(gridDim) * tile_size) - { - body(segment.slice(bx, tile_size), bx/tile_size); - } - } -}; - - -template -struct TileICountExecute, SEGMENT> { - - template - static RAJA_INLINE RAJA_DEVICE void exec( - LaunchContext const RAJA_UNUSED_ARG(&ctx), - TILE_T tile_size, - SEGMENT const &segment, - BODY const &body) - { - - const int len = segment.end() - segment.begin(); - - int bx = internal::get_cuda_dim(blockIdx) * tile_size; - if(bx < len){ - body(segment.slice(bx, tile_size), bx/tile_size); - } - } -}; - -} // namespace RAJA -#endif From f4973ce0e0c3925da3a990d95cb125cce52c8239 Mon Sep 17 00:00:00 2001 From: Arturo Vargas Date: Mon, 14 Nov 2022 14:55:57 -0800 Subject: [PATCH 50/94] remove extra lines --- include/RAJA/policy/cuda/launch.hpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/RAJA/policy/cuda/launch.hpp b/include/RAJA/policy/cuda/launch.hpp index 6c22ced4a9..0d84f01754 100644 --- a/include/RAJA/policy/cuda/launch.hpp +++ b/include/RAJA/policy/cuda/launch.hpp @@ -74,7 +74,6 @@ struct LaunchExecute> { if ( gridSize.x > zero && gridSize.y > zero && gridSize.z > zero && blockSize.x > zero && blockSize.y > zero && blockSize.z > zero ) { - RAJA_FT_BEGIN; { // @@ -105,7 +104,6 @@ struct LaunchExecute> { } RAJA_FT_END; - } }; From 77c80cb6edfae6ac01d7e4a52625e2a63026750c Mon Sep 17 00:00:00 2001 From: Rich Hornung Date: Tue, 15 Nov 2022 13:13:53 -0800 Subject: [PATCH 51/94] Update build scripts to work with newer cmake version --- scripts/lc-builds/blueos_clang_omptarget.sh | 3 ++- scripts/lc-builds/blueos_xl_omptarget.sh | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/scripts/lc-builds/blueos_clang_omptarget.sh b/scripts/lc-builds/blueos_clang_omptarget.sh index e0aa7104a7..67a477288b 100755 --- a/scripts/lc-builds/blueos_clang_omptarget.sh +++ b/scripts/lc-builds/blueos_clang_omptarget.sh @@ -40,7 +40,8 @@ cmake \ -DENABLE_OPENMP=On \ -DENABLE_CUDA=Off \ -DRAJA_ENABLE_TARGET_OPENMP=On \ - -DOpenMP_CXX_FLAGS="-fopenmp;-fopenmp-targets=nvptx64-nvidia-cuda" \ + -DBLT_OPENMP_COMPILE_FLAGS="-fopenmp;-fopenmp-targets=nvptx64-nvidia-cuda" \ + -DBLT_OPENMP_LINK_FLAGS="-fopenmp;-fopenmp-targets=nvptx64-nvidia-cuda" \ -DCMAKE_INSTALL_PREFIX=../install_${BUILD_SUFFIX} \ "$@" \ .. diff --git a/scripts/lc-builds/blueos_xl_omptarget.sh b/scripts/lc-builds/blueos_xl_omptarget.sh index 2b3fadcbb0..6eb7e162b2 100755 --- a/scripts/lc-builds/blueos_xl_omptarget.sh +++ b/scripts/lc-builds/blueos_xl_omptarget.sh @@ -37,7 +37,8 @@ cmake \ -C ../host-configs/lc-builds/blueos/xl_X.cmake \ -DENABLE_OPENMP=On \ -DRAJA_ENABLE_TARGET_OPENMP=On \ - -DOpenMP_CXX_FLAGS="-qoffload;-qsmp=omp;-qalias=noansi" \ + -DBLT_OPENMP_COMPILE_FLAGS="-qoffload;-qsmp=omp;-qalias=noansi" \ + -DBLT_OPENMP_LINK_FLAGS="-qoffload;-qsmp=omp;-qalias=noansi" \ -DCMAKE_INSTALL_PREFIX=../install_${BUILD_SUFFIX} \ "$@" \ .. From 289949c74bb0fe91e4bfb73cabc89cd4a682719c Mon Sep 17 00:00:00 2001 From: Arturo Vargas Date: Tue, 15 Nov 2022 14:32:08 -0800 Subject: [PATCH 52/94] integration test for plugin and launch --- test/include/RAJA_test-plugin-launchpol.hpp | 35 ++++++++ test/integration/plugin/CMakeLists.txt | 11 +++ .../plugin/test-plugin-launch.cpp.in | 38 +++++++++ .../plugin/tests/test-plugin-launch.hpp | 85 +++++++++++++++++++ 4 files changed, 169 insertions(+) create mode 100644 test/include/RAJA_test-plugin-launchpol.hpp create mode 100644 test/integration/plugin/test-plugin-launch.cpp.in create mode 100644 test/integration/plugin/tests/test-plugin-launch.hpp diff --git a/test/include/RAJA_test-plugin-launchpol.hpp b/test/include/RAJA_test-plugin-launchpol.hpp new file mode 100644 index 0000000000..862f2018dd --- /dev/null +++ b/test/include/RAJA_test-plugin-launchpol.hpp @@ -0,0 +1,35 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2016-22, Lawrence Livermore National Security, LLC +// and RAJA project contributors. See the RAJA/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +// +// Kernel execution policy lists used throughout plugin tests +// + +#ifndef __RAJA_test_plugin_kernelpol_HPP__ +#define __RAJA_test_plugin_kernelpol_HPP__ + +#include "RAJA/RAJA.hpp" + +#include "camp/list.hpp" + +// Sequential execution policy types +using SequentialPluginLaunchExecPols = camp::list>; + +#if defined(RAJA_ENABLE_OPENMP) +using OpenMPPluginLaunchExecPols = camp::list>; +#endif + +#if defined(RAJA_ENABLE_CUDA) +using CudaPluginLaunchExecPols = camp::list>>; +#endif + +#if defined(RAJA_ENABLE_HIP) +using HipPluginLaunchExecPols = camp::list>>; + +#endif + +#endif // __RAJA_test_plugin_kernelpol_HPP__ diff --git a/test/integration/plugin/CMakeLists.txt b/test/integration/plugin/CMakeLists.txt index da41e2a0bd..0d8d56643b 100644 --- a/test/integration/plugin/CMakeLists.txt +++ b/test/integration/plugin/CMakeLists.txt @@ -32,6 +32,17 @@ foreach( BACKEND ${PLUGIN_BACKENDS} ) PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/tests) endforeach() +foreach( BACKEND ${PLUGIN_BACKENDS} ) + configure_file( test-plugin-launch.cpp.in + test-plugin-launch-${BACKEND}.cpp ) + raja_add_test( NAME test-plugin-launch-${BACKEND} + SOURCES ${CMAKE_CURRENT_BINARY_DIR}/test-plugin-launch-${BACKEND}.cpp + plugin_to_test.cpp ) + + target_include_directories(test-plugin-launch-${BACKEND}.exe + PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/tests) +endforeach() + set(DISPATCHERS Direct IndirectFunction IndirectVirtual) foreach( BACKEND ${PLUGIN_BACKENDS} ) diff --git a/test/integration/plugin/test-plugin-launch.cpp.in b/test/integration/plugin/test-plugin-launch.cpp.in new file mode 100644 index 0000000000..7df20f0ba4 --- /dev/null +++ b/test/integration/plugin/test-plugin-launch.cpp.in @@ -0,0 +1,38 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2016-22, Lawrence Livermore National Security, LLC +// and RAJA project contributors. See the RAJA/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +// +// test/include headers +// +#include "RAJA_test-base.hpp" +#include "RAJA_test-camp.hpp" +#include "RAJA_test-platform.hpp" + +#include "RAJA_test-plugin-launchpol.hpp" + +// +// Header for tests in ./tests directory +// +// Note: CMake adds ./tests as an include dir for these tests. +// +#include "test-plugin-launch.hpp" + + +// +// Cartesian product of types used in parameterized tests +// +using @BACKEND@PluginLaunchTypes = + Test< camp::cartesian_product<@BACKEND@PluginLaunchExecPols, + @BACKEND@ResourceList, + @BACKEND@PlatformList > >::Types; + +// +// Instantiate parameterized test +// +INSTANTIATE_TYPED_TEST_SUITE_P(@BACKEND@, + PluginLaunchTest, + @BACKEND@PluginLaunchTypes); diff --git a/test/integration/plugin/tests/test-plugin-launch.hpp b/test/integration/plugin/tests/test-plugin-launch.hpp new file mode 100644 index 0000000000..68f3dfbbaf --- /dev/null +++ b/test/integration/plugin/tests/test-plugin-launch.hpp @@ -0,0 +1,85 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2016-22, Lawrence Livermore National Security, LLC +// and RAJA project contributors. See the RAJA/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +/// +/// Header file containing basic integration tests for plugins with launch. +/// + +#ifndef __TEST_PLUGIN_LAUNCH_HPP__ +#define __TEST_PLUGIN_LAUNCH_HPP__ + +#include "test-plugin.hpp" + + +// Check that the plugin is called with the right Platform. +// Check that the plugin is called the correct number of times, +// once before and after each launch capture for the capture counter, +// once before and after each launch invocation for the launch counter. + +// test with basic launch +template +void PluginLaunchTestImpl() +{ + SetupPluginVars spv(WORKING_RES::get_default()); + + CounterData* data = plugin_test_resource->allocate(10); + + for (int i = 0; i < 10; i++) { + + PluginTestCallable p_callable{data}; + + RAJA::launch + (RAJA::LaunchParams(RAJA::Teams(1), RAJA::Threads(1)), + [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) + { + p_callable(i); + }); + + CounterData loop_data; + plugin_test_resource->memcpy(&loop_data, &data[i], sizeof(CounterData)); + ASSERT_EQ(loop_data.capture_platform_active, PLATFORM); + ASSERT_EQ(loop_data.capture_counter_pre, i+1); + ASSERT_EQ(loop_data.capture_counter_post, i); + ASSERT_EQ(loop_data.launch_platform_active, PLATFORM); + ASSERT_EQ(loop_data.launch_counter_pre, i+1); + ASSERT_EQ(loop_data.launch_counter_post, i); + } + + CounterData plugin_data; + plugin_test_resource->memcpy(&plugin_data, plugin_test_data, sizeof(CounterData)); + ASSERT_EQ(plugin_data.capture_platform_active, RAJA::Platform::undefined); + ASSERT_EQ(plugin_data.capture_counter_pre, 10); + ASSERT_EQ(plugin_data.capture_counter_post, 10); + ASSERT_EQ(plugin_data.launch_platform_active, RAJA::Platform::undefined); + ASSERT_EQ(plugin_data.launch_counter_pre, 10); + ASSERT_EQ(plugin_data.launch_counter_post, 10); + + plugin_test_resource->deallocate(data); +} + + +TYPED_TEST_SUITE_P(PluginLaunchTest); +template +class PluginLaunchTest : public ::testing::Test +{ +}; + +TYPED_TEST_P(PluginLaunchTest, PluginLaunch) +{ + using LaunchPolicy = typename camp::at>::type; + using ResType = typename camp::at>::type; + using PlatformHolder = typename camp::at>::type; + + PluginLaunchTestImpl( ); +} + +REGISTER_TYPED_TEST_SUITE_P(PluginLaunchTest, + PluginLaunch); + +#endif //__TEST_PLUGIN_LAUNCH_HPP__ From 9623c68717d112a52a8140fc53cfd4b1ae4668d1 Mon Sep 17 00:00:00 2001 From: Arturo Vargas Date: Tue, 15 Nov 2022 15:12:19 -0800 Subject: [PATCH 53/94] move plugin work to launch_core --- include/RAJA/pattern/launch/launch_core.hpp | 60 ++++++++++++++-- include/RAJA/policy/cuda/launch.hpp | 80 +++------------------ include/RAJA/policy/loop/launch.hpp | 37 ++-------- 3 files changed, 72 insertions(+), 105 deletions(-) diff --git a/include/RAJA/pattern/launch/launch_core.hpp b/include/RAJA/pattern/launch/launch_core.hpp index 12d6f16f6f..455ae733f8 100644 --- a/include/RAJA/pattern/launch/launch_core.hpp +++ b/include/RAJA/pattern/launch/launch_core.hpp @@ -227,8 +227,20 @@ void launch(LaunchParams const ¶ms, const char *kernel_name, BODY const &bod { //Take the first policy as we assume the second policy is not user defined. //We rely on the user to pair launch and loop policies correctly. + util::PluginContext context{util::make_context()}; + util::callPreCapturePlugins(context); + + using RAJA::util::trigger_updates_before; + auto p_body = trigger_updates_before(body); + + util::callPostCapturePlugins(context); + + util::callPreLaunchPlugins(context); + using launch_t = LaunchExecute; - launch_t::exec(params, kernel_name, body); + launch_t::exec(params, kernel_name, p_body); + + util::callPostLaunchPlugins(context); } @@ -242,22 +254,41 @@ void launch(ExecPlace place, LaunchParams const ¶ms, BODY const &body) template void launch(ExecPlace place, const LaunchParams ¶ms, const char *kernel_name, BODY const &body) { + + // + //Configure plugins + // + util::PluginContext context{place == ExecPlace::HOST ? + util::make_context() + : util::make_context()}; + + util::callPreCapturePlugins(context); + + using RAJA::util::trigger_updates_before; + auto p_body = trigger_updates_before(body); + + util::callPostCapturePlugins(context); + + util::callPreLaunchPlugins(context); + switch (place) { case ExecPlace::HOST: { using launch_t = LaunchExecute; - launch_t::exec(params, kernel_name, body); + launch_t::exec(params, kernel_name, p_body); break; } #ifdef RAJA_DEVICE_ACTIVE case ExecPlace::DEVICE: { using launch_t = LaunchExecute; - launch_t::exec(params, kernel_name, body); + launch_t::exec(params, kernel_name, p_body); break; } #endif default: RAJA_ABORT_OR_THROW("Unknown launch place or device is not enabled"); } + + util::callPostLaunchPlugins(context); } // Helper function to retrieve a resource based on the run-time policy - if a device is active @@ -297,21 +328,40 @@ launch(RAJA::resources::Resource res, LaunchParams const ¶ms, const char *ke place = RAJA::ExecPlace::DEVICE; } + // + //Configure plugins + // + util::PluginContext context{place == ExecPlace::HOST ? + util::make_context() + : util::make_context()}; + + util::callPreCapturePlugins(context); + + using RAJA::util::trigger_updates_before; + auto p_body = trigger_updates_before(body); + + util::callPostCapturePlugins(context); + + util::callPreLaunchPlugins(context); + switch (place) { case ExecPlace::HOST: { using launch_t = LaunchExecute; - return launch_t::exec(res, params, kernel_name, body); break; + return launch_t::exec(res, params, kernel_name, p_body); break; } #ifdef RAJA_DEVICE_ACTIVE case ExecPlace::DEVICE: { using launch_t = LaunchExecute; - return launch_t::exec(res, params, kernel_name, body); break; + return launch_t::exec(res, params, kernel_name, p_body); break; } #endif default: { RAJA_ABORT_OR_THROW("Unknown launch place or device is not enabled"); } } + + util::callPostLaunchPlugins(context); + //Should not get here; return resources::EventProxy(res); } diff --git a/include/RAJA/policy/cuda/launch.hpp b/include/RAJA/policy/cuda/launch.hpp index 0d84f01754..9221925176 100644 --- a/include/RAJA/policy/cuda/launch.hpp +++ b/include/RAJA/policy/cuda/launch.hpp @@ -81,26 +81,11 @@ struct LaunchExecute> { // BODY body = RAJA::cuda::make_launch_body(gridSize, blockSize, params.shared_mem_size, cuda_res, std::forward(body_in)); - // - // Configure plugins - // - util::PluginContext context{util::make_context>()}; - util::callPreCapturePlugins(context); - - using RAJA::util::trigger_updates_before; - auto p_body = trigger_updates_before(body); - - util::callPostCapturePlugins(context); - - util::callPreLaunchPlugins(context); - // // Launch the kernel // - void *args[] = {(void*)&p_body}; + void *args[] = {(void*)&body}; RAJA::cuda::launch((const void*)func, gridSize, blockSize, args, params.shared_mem_size, cuda_res, async, kernel_name); - - util::callPostLaunchPlugins(context); } RAJA_FT_END; @@ -146,26 +131,13 @@ struct LaunchExecute> { BODY body = RAJA::cuda::make_launch_body( gridSize, blockSize, params.shared_mem_size, cuda_res, std::forward(body_in)); - // - // Configure plugins - // - util::PluginContext context{util::make_context>()}; - util::callPreCapturePlugins(context); - - using RAJA::util::trigger_updates_before; - auto p_body = trigger_updates_before(body); - - util::callPostCapturePlugins(context); - - util::callPreLaunchPlugins(context); - // // Launch the kernel // - void *args[] = {(void*)&p_body}; - RAJA::cuda::launch((const void*)func, gridSize, blockSize, args, params.shared_mem_size, cuda_res, async, kernel_name); - - util::callPostLaunchPlugins(context); + void *args[] = {(void*)&body}; + { + RAJA::cuda::launch((const void*)func, gridSize, blockSize, args, params.shared_mem_size, cuda_res, async, kernel_name); + } } RAJA_FT_END; @@ -234,31 +206,15 @@ struct LaunchExecute(body_in)); - // - // Configure plugins - // - util::PluginContext context{util::make_context>()}; - util::callPreCapturePlugins(context); - - using RAJA::util::trigger_updates_before; - auto p_body = trigger_updates_before(body); - - util::callPostCapturePlugins(context); - - util::callPreLaunchPlugins(context); - // // Launch the kernel // - void *args[] = {(void*)&p_body}; + void *args[] = {(void*)&body}; RAJA::cuda::launch((const void*)func, gridSize, blockSize, args, params.shared_mem_size, cuda_res, async, kernel_name); - - util::callPostLaunchPlugins(context); } RAJA_FT_END; - - } + } } template @@ -299,27 +255,13 @@ struct LaunchExecute(body_in)); - - // - // Configure plugins - // - util::PluginContext context{util::make_context>()}; - util::callPreCapturePlugins(context); - - using RAJA::util::trigger_updates_before; - auto p_body = trigger_updates_before(body); - - util::callPostCapturePlugins(context); - - util::callPreLaunchPlugins(context); - // // Launch the kernel // - void *args[] = {(void*)&p_body}; - RAJA::cuda::launch((const void*)func, gridSize, blockSize, args, params.shared_mem_size, cuda_res, async, kernel_name); - - util::callPostLaunchPlugins(context); + void *args[] = {(void*)&body}; + { + RAJA::cuda::launch((const void*)func, gridSize, blockSize, args, params.shared_mem_size, cuda_res, async, kernel_name); + } } RAJA_FT_END; diff --git a/include/RAJA/policy/loop/launch.hpp b/include/RAJA/policy/loop/launch.hpp index 06b2d6332a..50d5ff1c0a 100644 --- a/include/RAJA/policy/loop/launch.hpp +++ b/include/RAJA/policy/loop/launch.hpp @@ -47,24 +47,10 @@ struct LaunchExecute { ctx.shared_mem_ptr = (char*) malloc(params.shared_mem_size); - // - // Configure plugins - // - util::PluginContext context{util::make_context()}; - util::callPreCapturePlugins(context); - - using RAJA::util::trigger_updates_before; - auto p_body = trigger_updates_before(body); - - util::callPostCapturePlugins(context); - - util::callPreLaunchPlugins(context); - - p_body(ctx); + body(ctx); free(ctx.shared_mem_ptr); ctx.shared_mem_ptr = nullptr; - util::callPostLaunchPlugins(context); } template @@ -73,26 +59,15 @@ struct LaunchExecute { { LaunchContext ctx; - ctx.shared_mem_ptr = (char*) malloc(params.shared_mem_size); - // - // Configure plugins - // - util::PluginContext context{util::make_context()}; - util::callPreCapturePlugins(context); + char *kernel_local_mem = new char[params.shared_mem_size]; + ctx.shared_mem_ptr = kernel_local_mem; - using RAJA::util::trigger_updates_before; - auto p_body = trigger_updates_before(body); + body(ctx); - util::callPostCapturePlugins(context); - - util::callPreLaunchPlugins(context); - - p_body(ctx); - - free(ctx.shared_mem_ptr); + delete[] kernel_local_mem; ctx.shared_mem_ptr = nullptr; - util::callPostLaunchPlugins(context); + return resources::EventProxy(res); } From 954eb6aa1dc50638f5d29a628830d4fa3476c045 Mon Sep 17 00:00:00 2001 From: Arturo Vargas Date: Tue, 15 Nov 2022 15:15:06 -0800 Subject: [PATCH 54/94] Empty-Commit From e6635d00750e20ad07ee1306a5ad8f6a01820533 Mon Sep 17 00:00:00 2001 From: Arturo Vargas Date: Tue, 15 Nov 2022 15:16:35 -0800 Subject: [PATCH 55/94] revert hip openmp launch plugin work --- include/RAJA/policy/hip/launch.hpp | 68 ++------------------------- include/RAJA/policy/openmp/launch.hpp | 36 +------------- 2 files changed, 6 insertions(+), 98 deletions(-) diff --git a/include/RAJA/policy/hip/launch.hpp b/include/RAJA/policy/hip/launch.hpp index d8d3164841..edeb6a91b6 100644 --- a/include/RAJA/policy/hip/launch.hpp +++ b/include/RAJA/policy/hip/launch.hpp @@ -82,26 +82,11 @@ struct LaunchExecute> { BODY body = RAJA::hip::make_launch_body( gridSize, blockSize, params.shared_mem_size, hip_res, std::forward(body_in)); - // - // Configure plugins - // - util::PluginContext context{util::make_context>()}; - util::callPreCapturePlugins(context); - - using RAJA::util::trigger_updates_before; - auto p_body = trigger_updates_before(body); - - util::callPostCapturePlugins(context); - - util::callPreLaunchPlugins(context); - // // Launch the kernel // - void *args[] = {(void*)&p_body}; + void *args[] = {(void*)&body}; RAJA::hip::launch((const void*)func, gridSize, blockSize, args, params.shared_mem_size, hip_res, async, kernel_name); - - util::callPostLaunchPlugins(context); } RAJA_FT_END; @@ -145,26 +130,11 @@ struct LaunchExecute> { BODY body = RAJA::hip::make_launch_body( gridSize, blockSize, params.shared_mem_size, hip_res, std::forward(body_in)); - // - // Configure plugins - // - util::PluginContext context{util::make_context>()}; - util::callPreCapturePlugins(context); - - using RAJA::util::trigger_updates_before; - auto p_body = trigger_updates_before(body); - - util::callPostCapturePlugins(context); - - util::callPreLaunchPlugins(context); - // // Launch the kernel // - void *args[] = {(void*)&p_body}; + void *args[] = {(void*)&body}; RAJA::hip::launch((const void*)func, gridSize, blockSize, args, params.shared_mem_size, hip_res, async, kernel_name); - - util::callPostLaunchPlugins(context); } RAJA_FT_END; @@ -230,26 +200,11 @@ struct LaunchExecute> { BODY body = RAJA::hip::make_launch_body( gridSize, blockSize, params.shared_mem_size, hip_res, std::forward(body_in)); - // - // Configure plugins - // - util::PluginContext context{util::make_context>()}; - util::callPreCapturePlugins(context); - - using RAJA::util::trigger_updates_before; - auto p_body = trigger_updates_before(body); - - util::callPostCapturePlugins(context); - - util::callPreLaunchPlugins(context); - // // Launch the kernel // - void *args[] = {(void*)&p_body}; + void *args[] = {(void*)&body}; RAJA::hip::launch((const void*)func, gridSize, blockSize, args, params.shared_mem_size, hip_res, async, kernel_name); - - util::callPostLaunchPlugins(context); } RAJA_FT_END; @@ -293,26 +248,11 @@ struct LaunchExecute> { BODY body = RAJA::hip::make_launch_body( gridSize, blockSize, params.shared_mem_size, hip_res, std::forward(body_in)); - // - // Configure plugins - // - util::PluginContext context{util::make_context>()}; - util::callPreCapturePlugins(context); - - using RAJA::util::trigger_updates_before; - auto p_body = trigger_updates_before(body); - - util::callPostCapturePlugins(context); - - util::callPreLaunchPlugins(context); - // // Launch the kernel // - void *args[] = {(void*)&p_body}; + void *args[] = {(void*)&body}; RAJA::hip::launch((const void*)func, gridSize, blockSize, args, params.shared_mem_size, hip_res, async, kernel_name); - - util::callPostLaunchPlugins(context); } RAJA_FT_END; diff --git a/include/RAJA/policy/openmp/launch.hpp b/include/RAJA/policy/openmp/launch.hpp index 191125c33d..7ec9a7c5ed 100644 --- a/include/RAJA/policy/openmp/launch.hpp +++ b/include/RAJA/policy/openmp/launch.hpp @@ -32,26 +32,12 @@ struct LaunchExecute { template static void exec(LaunchParams const ¶ms, const char *, BODY const &body) { - - // - // Configure plugins - // - util::PluginContext context{util::make_context()}; - util::callPreCapturePlugins(context); - - using RAJA::util::trigger_updates_before; - auto p_body = trigger_updates_before(body); - - util::callPostCapturePlugins(context); - - util::callPreLaunchPlugins(context); - RAJA::region([&]() { LaunchContext ctx; using RAJA::internal::thread_privatize; - auto loop_body = thread_privatize(p_body); + auto loop_body = thread_privatize(body); ctx.shared_mem_ptr = (char*) malloc(params.shared_mem_size); @@ -60,34 +46,18 @@ struct LaunchExecute { free(ctx.shared_mem_ptr); ctx.shared_mem_ptr = nullptr; }); - - util::callPostLaunchPlugins(context); } template static resources::EventProxy exec(RAJA::resources::Resource res, LaunchParams const ¶ms, const char *, BODY const &body) { - - // - // Configure plugins - // - util::PluginContext context{util::make_context()}; - util::callPreCapturePlugins(context); - - using RAJA::util::trigger_updates_before; - auto p_body = trigger_updates_before(body); - - util::callPostCapturePlugins(context); - - util::callPreLaunchPlugins(context); - RAJA::region([&]() { LaunchContext ctx; using RAJA::internal::thread_privatize; - auto loop_body = thread_privatize(p_body); + auto loop_body = thread_privatize(body); ctx.shared_mem_ptr = (char*) malloc(params.shared_mem_size); @@ -97,8 +67,6 @@ struct LaunchExecute { ctx.shared_mem_ptr = nullptr; }); - util::callPostLaunchPlugins(context); - return resources::EventProxy(res); } From 93dc420d9d0636fbcf0cb1966c6118552b8fa732 Mon Sep 17 00:00:00 2001 From: Arturo Vargas Date: Tue, 15 Nov 2022 15:50:49 -0800 Subject: [PATCH 56/94] guards for device --- include/RAJA/pattern/launch/launch_core.hpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/include/RAJA/pattern/launch/launch_core.hpp b/include/RAJA/pattern/launch/launch_core.hpp index 455ae733f8..2a419293a3 100644 --- a/include/RAJA/pattern/launch/launch_core.hpp +++ b/include/RAJA/pattern/launch/launch_core.hpp @@ -258,9 +258,13 @@ void launch(ExecPlace place, const LaunchParams ¶ms, const char *kernel_name // //Configure plugins // +#ifdef RAJA_DEVICE_ACTIVE util::PluginContext context{place == ExecPlace::HOST ? util::make_context() : util::make_context()}; +#else + util::PluginContext context{util::make_context()}; +#endif util::callPreCapturePlugins(context); @@ -331,9 +335,13 @@ launch(RAJA::resources::Resource res, LaunchParams const ¶ms, const char *ke // //Configure plugins // +#ifdef RAJA_DEVICE_ACTIVE util::PluginContext context{place == ExecPlace::HOST ? util::make_context() : util::make_context()}; +#else + util::PluginContext context{util::make_context()}; +#endif util::callPreCapturePlugins(context); From 9297a85c6c245febee2988407d5d3dbd3242009e Mon Sep 17 00:00:00 2001 From: mdavis36 Date: Wed, 16 Nov 2022 11:33:02 -0800 Subject: [PATCH 57/94] Export desul_atomics target in RAJA-config.cmake --- share/raja/cmake/RAJA-config.cmake.in | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/share/raja/cmake/RAJA-config.cmake.in b/share/raja/cmake/RAJA-config.cmake.in index 3baa8b68a2..5763554f6b 100644 --- a/share/raja/cmake/RAJA-config.cmake.in +++ b/share/raja/cmake/RAJA-config.cmake.in @@ -53,6 +53,19 @@ if (NOT TARGET camp) @PACKAGE_CMAKE_INSTALL_PREFIX@/lib/cmake/camp) endif () +if (NOT TARGET desul_atomics) + set(RAJA_DESUL_DIR "@desul_DIR@") + if(NOT desul_DIR) + set(desul_DIR ${RAJA_DESUL_DIR}) + endif() + + find_dependency(desul_atomics CONFIG NO_DEFAULT_PATH PATHS + ${desul_DIR} + ${desul_DIR}/lib/cmake/desul + @PACKAGE_CMAKE_INSTALL_PREFIX@ + @PACKAGE_CMAKE_INSTALL_PREFIX@/lib/cmake/desul) +endif () + set(BLT_TGTS "${CMAKE_CURRENT_LIST_DIR}/bltTargets.cmake") if(EXISTS "${BLT_TGTS}") include("${BLT_TGTS}") From 7c277ab6e31c89dab164a0adc80f0bb6a18bb11c Mon Sep 17 00:00:00 2001 From: Arturo Vargas Date: Thu, 17 Nov 2022 09:32:56 -0800 Subject: [PATCH 58/94] clean up pass --- include/RAJA/pattern/launch/launch_core.hpp | 6 ++++-- .../plugin/tests/test-plugin-launch.hpp | 20 +++++++++++-------- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/include/RAJA/pattern/launch/launch_core.hpp b/include/RAJA/pattern/launch/launch_core.hpp index 2a419293a3..12b850e370 100644 --- a/include/RAJA/pattern/launch/launch_core.hpp +++ b/include/RAJA/pattern/launch/launch_core.hpp @@ -326,7 +326,7 @@ launch(RAJA::resources::Resource res, LaunchParams const ¶ms, const char *ke { ExecPlace place; - if(res.get_platform() == camp::resources::v1::Platform::host) { + if(res.get_platform() == RAJA::Platform::host) { place = RAJA::ExecPlace::HOST; }else{ place = RAJA::ExecPlace::DEVICE; @@ -370,7 +370,9 @@ launch(RAJA::resources::Resource res, LaunchParams const ¶ms, const char *ke util::callPostLaunchPlugins(context); - //Should not get here; + RAJA_ABORT_OR_THROW("Unknown launch place"); + + //^^ RAJA will abort before getting here return resources::EventProxy(res); } diff --git a/test/integration/plugin/tests/test-plugin-launch.hpp b/test/integration/plugin/tests/test-plugin-launch.hpp index 68f3dfbbaf..44b3191265 100644 --- a/test/integration/plugin/tests/test-plugin-launch.hpp +++ b/test/integration/plugin/tests/test-plugin-launch.hpp @@ -32,14 +32,18 @@ void PluginLaunchTestImpl() for (int i = 0; i < 10; i++) { - PluginTestCallable p_callable{data}; - - RAJA::launch - (RAJA::LaunchParams(RAJA::Teams(1), RAJA::Threads(1)), - [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) - { - p_callable(i); - }); + //Keep PluginTestCallable within a scope to ensure + //destruction, consistent with other test + { + PluginTestCallable p_callable{data}; + + RAJA::launch + (RAJA::LaunchParams(RAJA::Teams(1), RAJA::Threads(1)), + [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) + { + p_callable(i); + }); + } CounterData loop_data; plugin_test_resource->memcpy(&loop_data, &data[i], sizeof(CounterData)); From daa0c23bb14b7c1ec69bfb65a2ff80d7f22e8254 Mon Sep 17 00:00:00 2001 From: Arturo Vargas Date: Thu, 17 Nov 2022 10:04:08 -0800 Subject: [PATCH 59/94] rename DEVICE -> RAJA_DEVICE_BACKEND --- include/RAJA/pattern/params/reducer.hpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/RAJA/pattern/params/reducer.hpp b/include/RAJA/pattern/params/reducer.hpp index 5c4858a14a..a738e67af0 100644 --- a/include/RAJA/pattern/params/reducer.hpp +++ b/include/RAJA/pattern/params/reducer.hpp @@ -5,10 +5,10 @@ #include "RAJA/util/SoAPtr.hpp" #if defined(RAJA_CUDA_ACTIVE) -#define DEVICE cuda +#define RAJA_DEVICE_BACKEND cuda #include "RAJA/policy/cuda/MemUtils_CUDA.hpp" #elif defined(RAJA_HIP_ACTIVE) -#define DEVICE hip +#define RAJA_DEVICE_BACKEND hip #include "RAJA/policy/hip/MemUtils_HIP.hpp" #endif @@ -91,7 +91,7 @@ namespace detail #if defined(RAJA_CUDA_ACTIVE) || defined(RAJA_HIP_ACTIVE) // Device related attributes. value_type * devicetarget = nullptr; - RAJA::detail::SoAPtr device_mem; + RAJA::detail::SoAPtr device_mem; unsigned int * device_count = nullptr; #endif From c01f02cb73dc934438a488793b005b855dd50fdb Mon Sep 17 00:00:00 2001 From: Arturo Vargas Date: Thu, 17 Nov 2022 10:58:25 -0800 Subject: [PATCH 60/94] use using instead of macro --- include/RAJA/pattern/params/reducer.hpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/RAJA/pattern/params/reducer.hpp b/include/RAJA/pattern/params/reducer.hpp index a738e67af0..5317420d5b 100644 --- a/include/RAJA/pattern/params/reducer.hpp +++ b/include/RAJA/pattern/params/reducer.hpp @@ -5,11 +5,11 @@ #include "RAJA/util/SoAPtr.hpp" #if defined(RAJA_CUDA_ACTIVE) -#define RAJA_DEVICE_BACKEND cuda #include "RAJA/policy/cuda/MemUtils_CUDA.hpp" +using device_mem_pool_t = RAJA::cuda::device_mempool_type; #elif defined(RAJA_HIP_ACTIVE) -#define RAJA_DEVICE_BACKEND hip #include "RAJA/policy/hip/MemUtils_HIP.hpp" +using device_mem_pool_t = RAJA::hip::device_mempool_type; #endif namespace RAJA @@ -91,11 +91,11 @@ namespace detail #if defined(RAJA_CUDA_ACTIVE) || defined(RAJA_HIP_ACTIVE) // Device related attributes. value_type * devicetarget = nullptr; - RAJA::detail::SoAPtr device_mem; + RAJA::detail::SoAPtr device_mem; unsigned int * device_count = nullptr; #endif - using ARG_TUP_T = camp::tuple; + using ARG_TUP_T = camp::tuple; RAJA_HOST_DEVICE ARG_TUP_T get_lambda_arg_tup() { return camp::make_tuple(&val); } using ARG_LIST_T = typename ARG_TUP_T::TList; From c9444e323100d50da9c25863009738528fd9d710 Mon Sep 17 00:00:00 2001 From: mdavis36 Date: Thu, 17 Nov 2022 15:29:14 -0800 Subject: [PATCH 61/94] Only export desul targets when enabled. --- share/raja/cmake/RAJA-config.cmake.in | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/share/raja/cmake/RAJA-config.cmake.in b/share/raja/cmake/RAJA-config.cmake.in index 5763554f6b..9560b784d7 100644 --- a/share/raja/cmake/RAJA-config.cmake.in +++ b/share/raja/cmake/RAJA-config.cmake.in @@ -53,18 +53,20 @@ if (NOT TARGET camp) @PACKAGE_CMAKE_INSTALL_PREFIX@/lib/cmake/camp) endif () -if (NOT TARGET desul_atomics) - set(RAJA_DESUL_DIR "@desul_DIR@") - if(NOT desul_DIR) - set(desul_DIR ${RAJA_DESUL_DIR}) - endif() +if (@RAJA_ENABLE_DESUL_ATOMICS@) + if (NOT TARGET desul_atomics) + set(RAJA_DESUL_DIR "@desul_DIR@") + if(NOT desul_DIR) + set(desul_DIR ${RAJA_DESUL_DIR}) + endif() - find_dependency(desul_atomics CONFIG NO_DEFAULT_PATH PATHS - ${desul_DIR} - ${desul_DIR}/lib/cmake/desul - @PACKAGE_CMAKE_INSTALL_PREFIX@ - @PACKAGE_CMAKE_INSTALL_PREFIX@/lib/cmake/desul) -endif () + find_dependency(desul_atomics CONFIG NO_DEFAULT_PATH PATHS + ${desul_DIR} + ${desul_DIR}/lib/cmake/desul + @PACKAGE_CMAKE_INSTALL_PREFIX@ + @PACKAGE_CMAKE_INSTALL_PREFIX@/lib/cmake/desul) + endif () +endif() set(BLT_TGTS "${CMAKE_CURRENT_LIST_DIR}/bltTargets.cmake") if(EXISTS "${BLT_TGTS}") From 9db00e6bb7748f7f18eb3491916093adddde8b9d Mon Sep 17 00:00:00 2001 From: Rich Hornung Date: Fri, 18 Nov 2022 12:08:01 -0800 Subject: [PATCH 62/94] Update user docs for new compiler flags. --- docs/sphinx/user_guide/getting_started.rst | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/docs/sphinx/user_guide/getting_started.rst b/docs/sphinx/user_guide/getting_started.rst index e4483d25c3..1c525d2dfa 100644 --- a/docs/sphinx/user_guide/getting_started.rst +++ b/docs/sphinx/user_guide/getting_started.rst @@ -323,16 +323,19 @@ OpenMP ^^^^^^^ To use OpenMP target offload GPU execution, additional options may need to be -passed to the compiler. The variable ``OpenMP_CXX_FLAGS`` is used for this. -Option syntax follows the CMake *list* pattern. For example, to specify OpenMP -target options for NVIDIA GPUs using a clang-based compiler, one may do -something like:: +passed to the compiler. BLT variables are used for this. Option syntax follows +the CMake *list* pattern. For example, to specify OpenMP target options for +NVIDIA GPUs using a clang-based compiler, one may do something like:: cmake \ ... \ - -DOpenMP_CXX_FLAGS="-fopenmp;-fopenmp-targets=nvptx64-nvidia-cuda" \ + -DBLT_OPENMP_COMPILE_FLAGS="-fopenmp;-fopenmp-targets=nvptx64-nvidia-cuda" \ + -DBLT_OPENMP_LINK_FLAGS="-fopenmp;-fopenmp-targets=nvptx64-nvidia-cuda" \ ... +Compiler flags are passed to other compilers similarly, using flags specific to +the compiler. Typically, the compile and link flags are the same as shown here. + ---------------------------------------- RAJA Example Build Configuration Files ---------------------------------------- From bdb72fed43b412dd67b3d373b80d85e526eda8b1 Mon Sep 17 00:00:00 2001 From: Arturo Vargas Date: Tue, 22 Nov 2022 13:09:33 -0800 Subject: [PATCH 63/94] build blt target before camp --- share/raja/cmake/RAJA-config.cmake.in | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/share/raja/cmake/RAJA-config.cmake.in b/share/raja/cmake/RAJA-config.cmake.in index 3baa8b68a2..af522db812 100644 --- a/share/raja/cmake/RAJA-config.cmake.in +++ b/share/raja/cmake/RAJA-config.cmake.in @@ -40,6 +40,14 @@ if (@RAJA_NEEDS_BLT_TPLS@) endif () endif() +#Needs to come before camp +#Camp performs a check on the BLT target +set(BLT_TGTS "${CMAKE_CURRENT_LIST_DIR}/bltTargets.cmake") +if(EXISTS "${BLT_TGTS}") +include("${BLT_TGTS}") +endif() +unset(BLT_TGTS) + if (NOT TARGET camp) set(RAJA_CAMP_DIR "@camp_DIR@") if(NOT camp_DIR) @@ -53,10 +61,5 @@ if (NOT TARGET camp) @PACKAGE_CMAKE_INSTALL_PREFIX@/lib/cmake/camp) endif () -set(BLT_TGTS "${CMAKE_CURRENT_LIST_DIR}/bltTargets.cmake") -if(EXISTS "${BLT_TGTS}") -include("${BLT_TGTS}") -endif() -unset(BLT_TGTS) include("${CMAKE_CURRENT_LIST_DIR}/RAJATargets.cmake") check_required_components("@PROJECT_NAME@") From 75845dfe3cebc8ea9a3b1120d2f411a90a205c79 Mon Sep 17 00:00:00 2001 From: Michael Davis Date: Wed, 23 Nov 2022 07:40:18 -0800 Subject: [PATCH 64/94] build_and_test removing desul condition --- scripts/gitlab/build_and_test.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/gitlab/build_and_test.sh b/scripts/gitlab/build_and_test.sh index 2c0202ae9f..2c7041fefb 100755 --- a/scripts/gitlab/build_and_test.sh +++ b/scripts/gitlab/build_and_test.sh @@ -221,9 +221,9 @@ then echo "ERROR: failure(s) while running CTest" && exit 1 fi - if grep -q -i "ENABLE_HIP.*ON" ${hostconfig_path} || grep -q -i "RAJA_ENABLE_DESUL_ATOMICS.*ON" ${hostconfig_path} + if grep -q -i "ENABLE_HIP.*ON" ${hostconfig_path} then - echo "WARNING: not testing install with HIP or desul" + echo "WARNING: not testing install with HIP" else if [[ ! -d ${install_dir} ]] then From b5c872c6470a49b6d37516834c453b64eb3a3b03 Mon Sep 17 00:00:00 2001 From: Michael Davis Date: Wed, 23 Nov 2022 12:33:34 -0800 Subject: [PATCH 65/94] Update RAJA-config.cmake.in desul_atomics -> desul in find_dependency --- share/raja/cmake/RAJA-config.cmake.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/share/raja/cmake/RAJA-config.cmake.in b/share/raja/cmake/RAJA-config.cmake.in index 9560b784d7..2f4969104e 100644 --- a/share/raja/cmake/RAJA-config.cmake.in +++ b/share/raja/cmake/RAJA-config.cmake.in @@ -60,7 +60,7 @@ if (@RAJA_ENABLE_DESUL_ATOMICS@) set(desul_DIR ${RAJA_DESUL_DIR}) endif() - find_dependency(desul_atomics CONFIG NO_DEFAULT_PATH PATHS + find_dependency(desul CONFIG NO_DEFAULT_PATH PATHS ${desul_DIR} ${desul_DIR}/lib/cmake/desul @PACKAGE_CMAKE_INSTALL_PREFIX@ From b58bbc7f0cf69acea19dfddff880276c0c67aa64 Mon Sep 17 00:00:00 2001 From: Robert Chen Date: Mon, 28 Nov 2022 18:20:12 -0800 Subject: [PATCH 66/94] Remove OpenMPTarget tests for WorkGroup Dispatcher. --- test/unit/workgroup/CMakeLists.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/test/unit/workgroup/CMakeLists.txt b/test/unit/workgroup/CMakeLists.txt index 0de12ba336..3cd18b7ef5 100644 --- a/test/unit/workgroup/CMakeLists.txt +++ b/test/unit/workgroup/CMakeLists.txt @@ -74,6 +74,10 @@ if(RAJA_TEST_EXHAUSTIVE OR NOT RAJA_COMPILER MATCHES "RAJA_COMPILER_Intel") endif() set(Dispatcher_SUBTESTS Single) +if(RAJA_ENABLE_TARGET_OPENMP) + # WorkGroup dispatcher for OpenMPTarget not implemented yet + list(REMOVE_ITEM BACKENDS OpenMPTarget) +endif() buildunitworkgrouptest(Dispatcher "${Dispatcher_SUBTESTS}" "${DISPATCHERS}" "${BACKENDS}") set(WorkStorage_SUBTESTS Constructor Iterator InsertCall Multiple) From 13f02551610989be34d421e1e283e018965e7ef8 Mon Sep 17 00:00:00 2001 From: Rich Hornung Date: Tue, 29 Nov 2022 11:55:32 -0800 Subject: [PATCH 67/94] Update release notes and version number to prepare for patch release. --- CMakeLists.txt | 2 +- RELEASE_NOTES.md | 27 +++++++++++++++++++++++++++ docs/conf.py | 2 +- 3 files changed, 29 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index b8ebfb77e1..ff6600cd13 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -16,7 +16,7 @@ include(CMakeDependentOption) # Set version number set(RAJA_VERSION_MAJOR 2022) set(RAJA_VERSION_MINOR 10) -set(RAJA_VERSION_PATCHLEVEL 2) +set(RAJA_VERSION_PATCHLEVEL 3) if (RAJA_LOADED AND (NOT RAJA_LOADED STREQUAL "${RAJA_VERSION_MAJOR}.${RAJA_VERSION_MINOR}.${RAJA_VERSION_PATCHLEVEL}")) message(FATAL_ERROR "You are mixing RAJA versions. Loaded is ${RAJA_LOADED}, expected ${RAJA_VERSION_MAJOR}.${RAJA_VERSION_MINOR}.${RAJA_VERSION_PATCHLEVEL}") diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index 0f6b40cdf4..05bc1d6e35 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -19,6 +19,33 @@ Notable changes include: * Bug fixes/improvements: +Version 2022.10.3 -- Release date 2022-12-xx +============================================ + +This release fixes a few issues that were found after the v2022.10.2 release. + +Notable changes include: + + * Update camp submodule to .... + * Update BLT submodule to .... + + * Properly export 'roctx' target when CMake variable RAJA_ENABLE_ROCTX is on. + * Add missing template parameter pack argument in RAJA::statement::For + execution policy construct used in RAJA::kernel implementation for OpenMP + target back-end. + * Change to use compile-time GPU thread block size in RAJA::forall + implementation. This improves performance of GPU kernels, especially + those using the RAJA HIP back-end. + * Added RAJA plugin support, including CHAI support, for RAJA::launch. + * Renamed 'DEVICE' macro to 'RAJA_DEVICE_BACKEND' to prevent name conflicts + with other libraries. + * Updated User Guide documentation about CMake variable used to pass + compiler flags for OpenMP target back-end. This changed with CMake + minimum required version bump in v2022.10.0. + * Adjust ordering of BLT and camp target inclusion in RAJA CMake usage to + fix an issue with projects using external camp vs. RAJA submodule. + + Version 2022.10.2 -- Release date 2022-11-08 ============================================ diff --git a/docs/conf.py b/docs/conf.py index 20c5ef60e0..51e0336b1a 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -88,7 +88,7 @@ # The short X.Y version. version = u'2022.10' # The full version, including alpha/beta/rc tags. -release = u'2022.10.2' +release = u'2022.10.3' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. From 07af608f9671ae0bd2521c42f840dc16c3485809 Mon Sep 17 00:00:00 2001 From: David Beckingsale Date: Tue, 29 Nov 2022 15:39:16 -0800 Subject: [PATCH 68/94] Cache PACKAGE_PREFIX_DIR --- share/raja/cmake/RAJA-config.cmake.in | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/share/raja/cmake/RAJA-config.cmake.in b/share/raja/cmake/RAJA-config.cmake.in index 507b882e2f..87864bc17f 100644 --- a/share/raja/cmake/RAJA-config.cmake.in +++ b/share/raja/cmake/RAJA-config.cmake.in @@ -32,6 +32,9 @@ # @PACKAGE_INIT@ +# caceh the prefix dir (could be overriden by find_dependency) +set(RAJA_PACKAGE_PREFIX_DIR ${PACKAGE_PREFIX_DIR}) + include(CMakeFindDependencyMacro) if (@RAJA_NEEDS_BLT_TPLS@) @@ -57,8 +60,8 @@ if (NOT TARGET camp) find_dependency(camp CONFIG NO_DEFAULT_PATH PATHS ${camp_DIR} ${camp_DIR}/lib/cmake/camp - @PACKAGE_CMAKE_INSTALL_PREFIX@ - @PACKAGE_CMAKE_INSTALL_PREFIX@/lib/cmake/camp) + ${RAJA_PACKAGE_PREFIX_DIR} + ${RAJA_PACKAGE_PREFIX_DIR}/lib/cmake/camp) endif () if (@RAJA_ENABLE_DESUL_ATOMICS@) @@ -71,8 +74,8 @@ if (@RAJA_ENABLE_DESUL_ATOMICS@) find_dependency(desul CONFIG NO_DEFAULT_PATH PATHS ${desul_DIR} ${desul_DIR}/lib/cmake/desul - @PACKAGE_CMAKE_INSTALL_PREFIX@ - @PACKAGE_CMAKE_INSTALL_PREFIX@/lib/cmake/desul) + ${RAJA_PACKAGE_PREFIX_DIR} + ${RAJA_PACKAGE_PREFIX_DIR}/lib/cmake/desul) endif () endif() From 1ab2134a4dc4bac1d45c6ada9f577c2bdb19f82d Mon Sep 17 00:00:00 2001 From: David Beckingsale Date: Tue, 29 Nov 2022 15:46:35 -0800 Subject: [PATCH 69/94] Update share/raja/cmake/RAJA-config.cmake.in Co-authored-by: Rich Hornung --- share/raja/cmake/RAJA-config.cmake.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/share/raja/cmake/RAJA-config.cmake.in b/share/raja/cmake/RAJA-config.cmake.in index 87864bc17f..629e404b56 100644 --- a/share/raja/cmake/RAJA-config.cmake.in +++ b/share/raja/cmake/RAJA-config.cmake.in @@ -32,7 +32,7 @@ # @PACKAGE_INIT@ -# caceh the prefix dir (could be overriden by find_dependency) +# cache the prefix dir (could be overriden by find_dependency) set(RAJA_PACKAGE_PREFIX_DIR ${PACKAGE_PREFIX_DIR}) include(CMakeFindDependencyMacro) From 889f0c131cbad9eae6b8c74fecea4cfaca6e6ee4 Mon Sep 17 00:00:00 2001 From: Jason Burmark Date: Wed, 30 Nov 2022 09:02:15 -0800 Subject: [PATCH 70/94] Use craycc instead of loading module --- scripts/lc-builds/toss4_cce_hip.sh | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/scripts/lc-builds/toss4_cce_hip.sh b/scripts/lc-builds/toss4_cce_hip.sh index 187e797a5f..503ec252c5 100755 --- a/scripts/lc-builds/toss4_cce_hip.sh +++ b/scripts/lc-builds/toss4_cce_hip.sh @@ -41,12 +41,10 @@ mkdir build_${BUILD_SUFFIX} && cd build_${BUILD_SUFFIX} module load cmake/3.24.2 -module load cce/${COMP_VER} - cmake \ -DCMAKE_BUILD_TYPE=Release \ - -DCMAKE_C_COMPILER=$(which cc) \ - -DCMAKE_CXX_COMPILER=$(which CC) \ + -DCMAKE_C_COMPILER="/usr/tce/packages/cce-tce/cce-${COMP_VER}/bin/craycc" \ + -DCMAKE_CXX_COMPILER="/usr/tce/packages/cce-tce/cce-${COMP_VER}/bin/crayCC" \ -DHIP_PATH=/opt/rocm-${HIP_VER}/hip \ -DCMAKE_HIP_ARCHITECTURES=${HIP_ARCH} \ -DGPU_TARGETS=${HIP_ARCH} \ @@ -69,7 +67,7 @@ echo " Please note that you have to have a consistent build environment" echo " when you make RAJA as cmake may reconfigure; load the appropriate" echo " cce module (${COMP_VER}) when building." echo -echo " module load cce/${COMP_VER}" +echo " module load cce-tce/${COMP_VER}" echo " srun -n1 make" echo echo "***********************************************************************" From fb6e0c45f66161cde8b69fc1fefc785cc7470f92 Mon Sep 17 00:00:00 2001 From: Jason Burmark Date: Wed, 30 Nov 2022 09:06:27 -0800 Subject: [PATCH 71/94] Add note about fp64 atomics with gfx90a --- scripts/lc-builds/toss4_amdclang.sh | 3 +++ scripts/lc-builds/toss4_amdclang_asan.sh | 3 +++ scripts/lc-builds/toss4_cce_hip.sh | 3 +++ 3 files changed, 9 insertions(+) diff --git a/scripts/lc-builds/toss4_amdclang.sh b/scripts/lc-builds/toss4_amdclang.sh index 615060490e..4aaa36b207 100755 --- a/scripts/lc-builds/toss4_amdclang.sh +++ b/scripts/lc-builds/toss4_amdclang.sh @@ -43,6 +43,9 @@ echo "Creating build directory build_${BUILD_SUFFIX} and generating configuratio echo "Configuration extra arguments:" echo " $@" echo +echo "To use fp64 HW atomics you must configure with these options when using gfx90a and hip >= 5.2" +echo " -DCMAKE_CXX_FLAGS=\"-munsafe-fp-atomics\"" +echo rm -rf build_${BUILD_SUFFIX} >/dev/null mkdir build_${BUILD_SUFFIX} && cd build_${BUILD_SUFFIX} diff --git a/scripts/lc-builds/toss4_amdclang_asan.sh b/scripts/lc-builds/toss4_amdclang_asan.sh index a886d1c0b6..02925093ff 100755 --- a/scripts/lc-builds/toss4_amdclang_asan.sh +++ b/scripts/lc-builds/toss4_amdclang_asan.sh @@ -43,6 +43,9 @@ echo "Creating build directory ${BUILD_SUFFIX} and generating configuration in i echo "Configuration extra arguments:" echo " $@" echo +echo "To use fp64 HW atomics you must configure with these options when using gfx90a and hip >= 5.2" +echo " -DCMAKE_CXX_FLAGS=\"-munsafe-fp-atomics\"" +echo rm -rf build_${BUILD_SUFFIX} >/dev/null mkdir build_${BUILD_SUFFIX} && cd build_${BUILD_SUFFIX} diff --git a/scripts/lc-builds/toss4_cce_hip.sh b/scripts/lc-builds/toss4_cce_hip.sh index 503ec252c5..fc325e0948 100755 --- a/scripts/lc-builds/toss4_cce_hip.sh +++ b/scripts/lc-builds/toss4_cce_hip.sh @@ -34,6 +34,9 @@ echo "Creating build directory build_${BUILD_SUFFIX} and generating configuratio echo "Configuration extra arguments:" echo " $@" echo +echo "To use fp64 HW atomics you must configure with these options when using gfx90a and hip >= 5.2" +echo " -DCMAKE_CXX_FLAGS=\"-munsafe-fp-atomics\"" +echo rm -rf build_${BUILD_SUFFIX} >/dev/null mkdir build_${BUILD_SUFFIX} && cd build_${BUILD_SUFFIX} From 9d593e3e28f3f15467ac0a3895b66490628438da Mon Sep 17 00:00:00 2001 From: David Beckingsale Date: Wed, 30 Nov 2022 09:44:33 -0800 Subject: [PATCH 72/94] Add `debug-find` to cmake invocation --- scripts/gitlab/build_and_test.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/gitlab/build_and_test.sh b/scripts/gitlab/build_and_test.sh index 2c7041fefb..0d6fff8ef2 100755 --- a/scripts/gitlab/build_and_test.sh +++ b/scripts/gitlab/build_and_test.sh @@ -232,7 +232,7 @@ then cd ${install_dir}/examples/RAJA/using-with-cmake mkdir build && cd build - if ! $cmake_exe -C ../host-config.cmake ..; then + if ! $cmake_exe --debug-find -C ../host-config.cmake ..; then echo "ERROR: running $cmake_exe for using-with-cmake test" && exit 1 fi From f8b3bfa5b60366a56ac895d169c64c3ec921de63 Mon Sep 17 00:00:00 2001 From: Rich Hornung Date: Wed, 30 Nov 2022 10:23:11 -0800 Subject: [PATCH 73/94] Bump cmake min version req for install test to match raja build. --- test/install/using-with-cmake/CMakeLists.txt | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/test/install/using-with-cmake/CMakeLists.txt b/test/install/using-with-cmake/CMakeLists.txt index 32f8baa1da..27b397bbaf 100644 --- a/test/install/using-with-cmake/CMakeLists.txt +++ b/test/install/using-with-cmake/CMakeLists.txt @@ -5,7 +5,11 @@ # SPDX-License-Identifier: (BSD-3-Clause) ############################################################################### -cmake_minimum_required(VERSION 3.14) +if (ENABLE_HIP) + cmake_minimum_required(VERSION 3.23) +else() + cmake_minimum_required(VERSION 3.20) +endif() project(using_with_cmake) @@ -19,4 +23,4 @@ cmake_minimum_required(VERSION 3.14) add_executable(using-with-cmake using-with-cmake.cpp) target_link_libraries(using-with-cmake RAJA) - \ No newline at end of file + From f1f45a5a26fb3a9779181ec5ced1c331bf80086a Mon Sep 17 00:00:00 2001 From: David Beckingsale Date: Wed, 30 Nov 2022 10:26:46 -0800 Subject: [PATCH 74/94] Search for desul_atomics directly --- share/raja/cmake/RAJA-config.cmake.in | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/share/raja/cmake/RAJA-config.cmake.in b/share/raja/cmake/RAJA-config.cmake.in index 629e404b56..a9271781cd 100644 --- a/share/raja/cmake/RAJA-config.cmake.in +++ b/share/raja/cmake/RAJA-config.cmake.in @@ -71,7 +71,7 @@ if (@RAJA_ENABLE_DESUL_ATOMICS@) set(desul_DIR ${RAJA_DESUL_DIR}) endif() - find_dependency(desul CONFIG NO_DEFAULT_PATH PATHS + find_dependency(desul_atomics CONFIG NO_DEFAULT_PATH PATHS ${desul_DIR} ${desul_DIR}/lib/cmake/desul ${RAJA_PACKAGE_PREFIX_DIR} @@ -79,11 +79,5 @@ if (@RAJA_ENABLE_DESUL_ATOMICS@) endif () endif() -set(BLT_TGTS "${CMAKE_CURRENT_LIST_DIR}/bltTargets.cmake") -if(EXISTS "${BLT_TGTS}") -include("${BLT_TGTS}") -endif() -unset(BLT_TGTS) - include("${CMAKE_CURRENT_LIST_DIR}/RAJATargets.cmake") check_required_components("@PROJECT_NAME@") From 5810ac5ce4ec5ce2a9e2ae886474cd1d1752c34c Mon Sep 17 00:00:00 2001 From: David Beckingsale Date: Wed, 30 Nov 2022 11:13:34 -0800 Subject: [PATCH 75/94] Remove --debug-find flag --- scripts/gitlab/build_and_test.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/gitlab/build_and_test.sh b/scripts/gitlab/build_and_test.sh index 0d6fff8ef2..2c7041fefb 100755 --- a/scripts/gitlab/build_and_test.sh +++ b/scripts/gitlab/build_and_test.sh @@ -232,7 +232,7 @@ then cd ${install_dir}/examples/RAJA/using-with-cmake mkdir build && cd build - if ! $cmake_exe --debug-find -C ../host-config.cmake ..; then + if ! $cmake_exe -C ../host-config.cmake ..; then echo "ERROR: running $cmake_exe for using-with-cmake test" && exit 1 fi From 9943ae45ac490d8ba78b317965b8637e31710e40 Mon Sep 17 00:00:00 2001 From: Rich Hornung Date: Wed, 30 Nov 2022 13:30:07 -0800 Subject: [PATCH 76/94] Bump camp version and BLT to match camp --- RELEASE_NOTES.md | 4 ++-- blt | 2 +- tpl/camp | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index 05bc1d6e35..107abdc759 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -26,8 +26,8 @@ This release fixes a few issues that were found after the v2022.10.2 release. Notable changes include: - * Update camp submodule to .... - * Update BLT submodule to .... + * Update camp submodule to v2022.10.1 + * Update BLT submodule to commit 8c229991 (includes fixes for crayftn + hip) * Properly export 'roctx' target when CMake variable RAJA_ENABLE_ROCTX is on. * Add missing template parameter pack argument in RAJA::statement::For diff --git a/blt b/blt index e35f490a8a..8c229991e6 160000 --- a/blt +++ b/blt @@ -1 +1 @@ -Subproject commit e35f490a8a8b1689e99b5f4308b5251f97eb36cf +Subproject commit 8c229991e65e7a9603c621b47cb3ba158bb7468c diff --git a/tpl/camp b/tpl/camp index 3a7486edb8..a1c74aade4 160000 --- a/tpl/camp +++ b/tpl/camp @@ -1 +1 @@ -Subproject commit 3a7486edb8b1c50ce36ecace56384d32a1009e4f +Subproject commit a1c74aade443c6332e953e1ca5ad5e9e5b5baf21 From f0f75fb3dae41ff0d31ec262bb6ac040b547ae95 Mon Sep 17 00:00:00 2001 From: Rich Hornung Date: Wed, 30 Nov 2022 13:55:09 -0800 Subject: [PATCH 77/94] Squash compiler warnings --- .../tests/test-dynamic-forall-resource-RangeSegment.hpp | 2 ++ test/integration/plugin/tests/test-plugin-launch.hpp | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/test/functional/dynamic_forall/resource-segment/tests/test-dynamic-forall-resource-RangeSegment.hpp b/test/functional/dynamic_forall/resource-segment/tests/test-dynamic-forall-resource-RangeSegment.hpp index 0b09079093..fccdf6880b 100644 --- a/test/functional/dynamic_forall/resource-segment/tests/test-dynamic-forall-resource-RangeSegment.hpp +++ b/test/functional/dynamic_forall/resource-segment/tests/test-dynamic-forall-resource-RangeSegment.hpp @@ -66,7 +66,9 @@ TYPED_TEST_P(DynamicForallResourceRangeSegmentTest, RangeSegmentForallResource) using POLICY_LIST = typename camp::at>::type; +#if defined(RAJA_DEVICE_ACTIVE) constexpr int N = camp::size::value; +#endif //If N == 2 host, no openmp is available //If N == 3 host, openmp is available diff --git a/test/integration/plugin/tests/test-plugin-launch.hpp b/test/integration/plugin/tests/test-plugin-launch.hpp index 44b3191265..6aa1744b6c 100644 --- a/test/integration/plugin/tests/test-plugin-launch.hpp +++ b/test/integration/plugin/tests/test-plugin-launch.hpp @@ -39,7 +39,7 @@ void PluginLaunchTestImpl() RAJA::launch (RAJA::LaunchParams(RAJA::Teams(1), RAJA::Threads(1)), - [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) + [=] RAJA_HOST_DEVICE(RAJA::LaunchContext RAJA_UNUSED_ARG(ctx)) { p_callable(i); }); From 172eb7f0988265700dfd4f1aff16e17b36fcbf8a Mon Sep 17 00:00:00 2001 From: Rich Hornung Date: Wed, 30 Nov 2022 15:48:19 -0800 Subject: [PATCH 78/94] Finalize release notes for patch release. --- RELEASE_NOTES.md | 1 + 1 file changed, 1 insertion(+) diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index 107abdc759..71ca9d8b5e 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -30,6 +30,7 @@ Notable changes include: * Update BLT submodule to commit 8c229991 (includes fixes for crayftn + hip) * Properly export 'roctx' target when CMake variable RAJA_ENABLE_ROCTX is on. + * Fix CMake logic for exporting desul targets when desul atomics are enabled. * Add missing template parameter pack argument in RAJA::statement::For execution policy construct used in RAJA::kernel implementation for OpenMP target back-end. From f6c366c131e2d311c659e4cf19b0fa4d6f0bfdfa Mon Sep 17 00:00:00 2001 From: Rich Hornung Date: Wed, 30 Nov 2022 15:50:52 -0800 Subject: [PATCH 79/94] Update release date --- RELEASE_NOTES.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index 71ca9d8b5e..60c58ac4d7 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -19,7 +19,7 @@ Notable changes include: * Bug fixes/improvements: -Version 2022.10.3 -- Release date 2022-12-xx +Version 2022.10.3 -- Release date 2022-12-01 ============================================ This release fixes a few issues that were found after the v2022.10.2 release. From 837aae0668813c76b869b03bf3a86abb29fbb1a4 Mon Sep 17 00:00:00 2001 From: Arturo Vargas Date: Thu, 1 Dec 2022 09:06:59 -0800 Subject: [PATCH 80/94] fix post launch call --- include/RAJA/pattern/launch/launch_core.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/RAJA/pattern/launch/launch_core.hpp b/include/RAJA/pattern/launch/launch_core.hpp index 12b850e370..3c40fce0d5 100644 --- a/include/RAJA/pattern/launch/launch_core.hpp +++ b/include/RAJA/pattern/launch/launch_core.hpp @@ -355,12 +355,12 @@ launch(RAJA::resources::Resource res, LaunchParams const ¶ms, const char *ke switch (place) { case ExecPlace::HOST: { using launch_t = LaunchExecute; - return launch_t::exec(res, params, kernel_name, p_body); break; + launch_t::exec(res, params, kernel_name, p_body); break; } #ifdef RAJA_DEVICE_ACTIVE case ExecPlace::DEVICE: { using launch_t = LaunchExecute; - return launch_t::exec(res, params, kernel_name, p_body); break; + launch_t::exec(res, params, kernel_name, p_body); break; } #endif default: { From bf26690fe602a390dca993bb92089ed7af8712b7 Mon Sep 17 00:00:00 2001 From: Rich Hornung Date: Thu, 1 Dec 2022 09:23:24 -0800 Subject: [PATCH 81/94] Modifications based on reviewer comments. --- RELEASE_NOTES.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index 60c58ac4d7..60259de6ae 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -38,8 +38,8 @@ Notable changes include: implementation. This improves performance of GPU kernels, especially those using the RAJA HIP back-end. * Added RAJA plugin support, including CHAI support, for RAJA::launch. - * Renamed 'DEVICE' macro to 'RAJA_DEVICE_BACKEND' to prevent name conflicts - with other libraries. + * Replaced 'DEVICE' macro with alias to 'device_mem_pool_t' to prevent name + conflicts with other libraries. * Updated User Guide documentation about CMake variable used to pass compiler flags for OpenMP target back-end. This changed with CMake minimum required version bump in v2022.10.0. From c75a011d47a617119668dee1803f4cf2eea3d7bb Mon Sep 17 00:00:00 2001 From: Arturo Vargas Date: Thu, 1 Dec 2022 09:31:23 -0800 Subject: [PATCH 82/94] avoid error --- include/RAJA/pattern/launch/launch_core.hpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/include/RAJA/pattern/launch/launch_core.hpp b/include/RAJA/pattern/launch/launch_core.hpp index 3c40fce0d5..233350aefc 100644 --- a/include/RAJA/pattern/launch/launch_core.hpp +++ b/include/RAJA/pattern/launch/launch_core.hpp @@ -355,12 +355,16 @@ launch(RAJA::resources::Resource res, LaunchParams const ¶ms, const char *ke switch (place) { case ExecPlace::HOST: { using launch_t = LaunchExecute; - launch_t::exec(res, params, kernel_name, p_body); break; + resources::EventProxy e_proxy = launch_t::exec(res, params, kernel_name, p_body); break; + util::callPostLaunchPlugins(context); + return e_proxy; } #ifdef RAJA_DEVICE_ACTIVE case ExecPlace::DEVICE: { using launch_t = LaunchExecute; - launch_t::exec(res, params, kernel_name, p_body); break; + resources::EventProxy e_proxy = launch_t::exec(res, params, kernel_name, p_body); break; + util::callPostLaunchPlugins(context); + return e_proxy; } #endif default: { @@ -368,8 +372,6 @@ launch(RAJA::resources::Resource res, LaunchParams const ¶ms, const char *ke } } - util::callPostLaunchPlugins(context); - RAJA_ABORT_OR_THROW("Unknown launch place"); //^^ RAJA will abort before getting here From b81e03dd78b44f5a3f97aa8dd167952376e32bd6 Mon Sep 17 00:00:00 2001 From: Arturo Vargas Date: Thu, 1 Dec 2022 10:52:19 -0800 Subject: [PATCH 83/94] Update include/RAJA/pattern/launch/launch_core.hpp --- include/RAJA/pattern/launch/launch_core.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/RAJA/pattern/launch/launch_core.hpp b/include/RAJA/pattern/launch/launch_core.hpp index 233350aefc..a1ba1c3e38 100644 --- a/include/RAJA/pattern/launch/launch_core.hpp +++ b/include/RAJA/pattern/launch/launch_core.hpp @@ -355,7 +355,7 @@ launch(RAJA::resources::Resource res, LaunchParams const ¶ms, const char *ke switch (place) { case ExecPlace::HOST: { using launch_t = LaunchExecute; - resources::EventProxy e_proxy = launch_t::exec(res, params, kernel_name, p_body); break; + resources::EventProxy e_proxy = launch_t::exec(res, params, kernel_name, p_body); util::callPostLaunchPlugins(context); return e_proxy; } From 7d29fe9cbcc95722a5b8c3e1600c4de62790bac2 Mon Sep 17 00:00:00 2001 From: Arturo Vargas Date: Thu, 1 Dec 2022 10:52:27 -0800 Subject: [PATCH 84/94] Update include/RAJA/pattern/launch/launch_core.hpp --- include/RAJA/pattern/launch/launch_core.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/RAJA/pattern/launch/launch_core.hpp b/include/RAJA/pattern/launch/launch_core.hpp index a1ba1c3e38..b450436923 100644 --- a/include/RAJA/pattern/launch/launch_core.hpp +++ b/include/RAJA/pattern/launch/launch_core.hpp @@ -362,7 +362,7 @@ launch(RAJA::resources::Resource res, LaunchParams const ¶ms, const char *ke #ifdef RAJA_DEVICE_ACTIVE case ExecPlace::DEVICE: { using launch_t = LaunchExecute; - resources::EventProxy e_proxy = launch_t::exec(res, params, kernel_name, p_body); break; + resources::EventProxy e_proxy = launch_t::exec(res, params, kernel_name, p_body); util::callPostLaunchPlugins(context); return e_proxy; } From 66947572fd567166c4a0d7e85a9e5a039f7fdde6 Mon Sep 17 00:00:00 2001 From: Arturo Vargas Date: Thu, 1 Dec 2022 12:29:06 -0800 Subject: [PATCH 85/94] add resource + plugin test + reduce launch API complexity --- include/RAJA/pattern/launch/launch_core.hpp | 28 +----- test/include/RAJA_test-plugin-launchpol.hpp | 4 +- .../RAJA_test-plugin-resource-launchpol.hpp | 35 ++++++++ test/integration/plugin/CMakeLists.txt | 11 +++ .../plugin/test-plugin-resource-launch.cpp.in | 38 ++++++++ .../tests/test-plugin-resource-launch.hpp | 89 +++++++++++++++++++ 6 files changed, 178 insertions(+), 27 deletions(-) create mode 100644 test/include/RAJA_test-plugin-resource-launchpol.hpp create mode 100644 test/integration/plugin/test-plugin-resource-launch.cpp.in create mode 100644 test/integration/plugin/tests/test-plugin-resource-launch.hpp diff --git a/include/RAJA/pattern/launch/launch_core.hpp b/include/RAJA/pattern/launch/launch_core.hpp index b450436923..28dca62ff0 100644 --- a/include/RAJA/pattern/launch/launch_core.hpp +++ b/include/RAJA/pattern/launch/launch_core.hpp @@ -255,36 +255,15 @@ template void launch(ExecPlace place, const LaunchParams ¶ms, const char *kernel_name, BODY const &body) { - // - //Configure plugins - // -#ifdef RAJA_DEVICE_ACTIVE - util::PluginContext context{place == ExecPlace::HOST ? - util::make_context() - : util::make_context()}; -#else - util::PluginContext context{util::make_context()}; -#endif - - util::callPreCapturePlugins(context); - - using RAJA::util::trigger_updates_before; - auto p_body = trigger_updates_before(body); - - util::callPostCapturePlugins(context); - - util::callPreLaunchPlugins(context); - + //Forward to single policy launch API - simplifies testing of plugins switch (place) { case ExecPlace::HOST: { - using launch_t = LaunchExecute; - launch_t::exec(params, kernel_name, p_body); + launch>(params, kernel_name, body); break; } #ifdef RAJA_DEVICE_ACTIVE case ExecPlace::DEVICE: { - using launch_t = LaunchExecute; - launch_t::exec(params, kernel_name, p_body); + launch>(params, kernel_name, body); break; } #endif @@ -292,7 +271,6 @@ void launch(ExecPlace place, const LaunchParams ¶ms, const char *kernel_name RAJA_ABORT_OR_THROW("Unknown launch place or device is not enabled"); } - util::callPostLaunchPlugins(context); } // Helper function to retrieve a resource based on the run-time policy - if a device is active diff --git a/test/include/RAJA_test-plugin-launchpol.hpp b/test/include/RAJA_test-plugin-launchpol.hpp index 862f2018dd..b3677144dc 100644 --- a/test/include/RAJA_test-plugin-launchpol.hpp +++ b/test/include/RAJA_test-plugin-launchpol.hpp @@ -9,8 +9,8 @@ // Kernel execution policy lists used throughout plugin tests // -#ifndef __RAJA_test_plugin_kernelpol_HPP__ -#define __RAJA_test_plugin_kernelpol_HPP__ +#ifndef __RAJA_test_plugin_launchpol_HPP__ +#define __RAJA_test_plugin_launchpol_HPP__ #include "RAJA/RAJA.hpp" diff --git a/test/include/RAJA_test-plugin-resource-launchpol.hpp b/test/include/RAJA_test-plugin-resource-launchpol.hpp new file mode 100644 index 0000000000..c2c2dccb9a --- /dev/null +++ b/test/include/RAJA_test-plugin-resource-launchpol.hpp @@ -0,0 +1,35 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2016-22, Lawrence Livermore National Security, LLC +// and RAJA project contributors. See the RAJA/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +// +// Kernel execution policy lists used throughout plugin tests +// + +#ifndef __RAJA_test_plugin_resource_launchpol_HPP__ +#define __RAJA_test_plugin_resource_launchpol_HPP__ + +#include "RAJA/RAJA.hpp" + +#include "camp/list.hpp" + +// Sequential execution policy types +using SequentialPluginResourceLaunchExecPols = camp::list>; + +#if defined(RAJA_ENABLE_OPENMP) +using OpenMPPluginResourceLaunchExecPols = camp::list>; +#endif + +#if defined(RAJA_ENABLE_CUDA) +using CudaPluginResourceLaunchExecPols = camp::list>>; +#endif + +#if defined(RAJA_ENABLE_HIP) +using HipPluginResourceLaunchExecPols = camp::list>>; + +#endif + +#endif // __RAJA_test_plugin_kernelpol_HPP__ diff --git a/test/integration/plugin/CMakeLists.txt b/test/integration/plugin/CMakeLists.txt index 0d8d56643b..360d63164c 100644 --- a/test/integration/plugin/CMakeLists.txt +++ b/test/integration/plugin/CMakeLists.txt @@ -43,6 +43,17 @@ foreach( BACKEND ${PLUGIN_BACKENDS} ) PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/tests) endforeach() +foreach( BACKEND ${PLUGIN_BACKENDS} ) + configure_file( test-plugin-resource-launch.cpp.in + test-plugin-resource-launch-${BACKEND}.cpp ) + raja_add_test( NAME test-plugin-resource-launch-${BACKEND} + SOURCES ${CMAKE_CURRENT_BINARY_DIR}/test-plugin-resource-launch-${BACKEND}.cpp + plugin_to_test.cpp ) + + target_include_directories(test-plugin-resource-launch-${BACKEND}.exe + PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/tests) +endforeach() + set(DISPATCHERS Direct IndirectFunction IndirectVirtual) foreach( BACKEND ${PLUGIN_BACKENDS} ) diff --git a/test/integration/plugin/test-plugin-resource-launch.cpp.in b/test/integration/plugin/test-plugin-resource-launch.cpp.in new file mode 100644 index 0000000000..573982ba38 --- /dev/null +++ b/test/integration/plugin/test-plugin-resource-launch.cpp.in @@ -0,0 +1,38 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2016-22, Lawrence Livermore National Security, LLC +// and RAJA project contributors. See the RAJA/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +// +// test/include headers +// +#include "RAJA_test-base.hpp" +#include "RAJA_test-camp.hpp" +#include "RAJA_test-platform.hpp" + +#include "RAJA_test-plugin-resource-launchpol.hpp" + +// +// Header for tests in ./tests directory +// +// Note: CMake adds ./tests as an include dir for these tests. +// +#include "test-plugin-resource-launch.hpp" + + +// +// Cartesian product of types used in parameterized tests +// +using @BACKEND@PluginResourceLaunchTypes = + Test< camp::cartesian_product<@BACKEND@PluginResourceLaunchExecPols, + @BACKEND@ResourceList, + @BACKEND@PlatformList > >::Types; + +// +// Instantiate parameterized test +// +INSTANTIATE_TYPED_TEST_SUITE_P(@BACKEND@, + PluginResourceLaunchTest, + @BACKEND@PluginResourceLaunchTypes); diff --git a/test/integration/plugin/tests/test-plugin-resource-launch.hpp b/test/integration/plugin/tests/test-plugin-resource-launch.hpp new file mode 100644 index 0000000000..8895210f73 --- /dev/null +++ b/test/integration/plugin/tests/test-plugin-resource-launch.hpp @@ -0,0 +1,89 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2016-22, Lawrence Livermore National Security, LLC +// and RAJA project contributors. See the RAJA/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +/// +/// Header file containing basic integration tests for plugins with launch. +/// + +#ifndef __TEST_PLUGIN_RESOURCE_LAUNCH_HPP__ +#define __TEST_PLUGIN_RESOURCE_LAUNCH_HPP__ + +#include "test-plugin.hpp" + + +// Check that the plugin is called with the right Platform. +// Check that the plugin is called the correct number of times, +// once before and after each launch capture for the capture counter, +// once before and after each launch invocation for the launch counter. + +// test with basic launch +template +void PluginResourceLaunchTestImpl() +{ + SetupPluginVars spv(WORKING_RES::get_default()); + + CounterData* data = plugin_test_resource->allocate(10); + + for (int i = 0; i < 10; i++) { + + //Keep PluginTestCallable within a scope to ensure + //destruction, consistent with other test + { + PluginTestCallable p_callable{data}; + + RAJA::launch + (WORKING_RES::get_default(), RAJA::LaunchParams(RAJA::Teams(1), RAJA::Threads(1)), + [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) + { + p_callable(i); + }); + } + + CounterData loop_data; + plugin_test_resource->memcpy(&loop_data, &data[i], sizeof(CounterData)); + ASSERT_EQ(loop_data.capture_platform_active, PLATFORM); + ASSERT_EQ(loop_data.capture_counter_pre, i+1); + ASSERT_EQ(loop_data.capture_counter_post, i); + ASSERT_EQ(loop_data.launch_platform_active, PLATFORM); + ASSERT_EQ(loop_data.launch_counter_pre, i+1); + ASSERT_EQ(loop_data.launch_counter_post, i); + } + + CounterData plugin_data; + plugin_test_resource->memcpy(&plugin_data, plugin_test_data, sizeof(CounterData)); + ASSERT_EQ(plugin_data.capture_platform_active, RAJA::Platform::undefined); + ASSERT_EQ(plugin_data.capture_counter_pre, 10); + ASSERT_EQ(plugin_data.capture_counter_post, 10); + ASSERT_EQ(plugin_data.launch_platform_active, RAJA::Platform::undefined); + ASSERT_EQ(plugin_data.launch_counter_pre, 10); + ASSERT_EQ(plugin_data.launch_counter_post, 10); + + plugin_test_resource->deallocate(data); +} + + +TYPED_TEST_SUITE_P(PluginResourceLaunchTest); +template +class PluginResourceLaunchTest : public ::testing::Test +{ +}; + +TYPED_TEST_P(PluginResourceLaunchTest, PluginResourceLaunch) +{ + using LaunchPolicy = typename camp::at>::type; + using ResType = typename camp::at>::type; + using PlatformHolder = typename camp::at>::type; + + PluginResourceLaunchTestImpl( ); +} + +REGISTER_TYPED_TEST_SUITE_P(PluginResourceLaunchTest, + PluginResourceLaunch); + +#endif //__TEST_PLUGIN_LAUNCH_HPP__ From 2b0aced1e3279f96ae55bf5893058dd1dcbb8610 Mon Sep 17 00:00:00 2001 From: Arturo Vargas Date: Thu, 1 Dec 2022 13:15:02 -0800 Subject: [PATCH 86/94] use non-default stream --- .../plugin/tests/test-plugin-resource-launch.hpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/test/integration/plugin/tests/test-plugin-resource-launch.hpp b/test/integration/plugin/tests/test-plugin-resource-launch.hpp index 8895210f73..76e10a7118 100644 --- a/test/integration/plugin/tests/test-plugin-resource-launch.hpp +++ b/test/integration/plugin/tests/test-plugin-resource-launch.hpp @@ -26,7 +26,9 @@ template void PluginResourceLaunchTestImpl() { - SetupPluginVars spv(WORKING_RES::get_default()); + WORKING_RES res; + + SetupPluginVars spv(res); CounterData* data = plugin_test_resource->allocate(10); @@ -38,7 +40,7 @@ void PluginResourceLaunchTestImpl() PluginTestCallable p_callable{data}; RAJA::launch - (WORKING_RES::get_default(), RAJA::LaunchParams(RAJA::Teams(1), RAJA::Threads(1)), + (res, RAJA::LaunchParams(RAJA::Teams(1), RAJA::Threads(1)), [=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) { p_callable(i); From 057e90faee2ef90abda03a2c665603852445cfa8 Mon Sep 17 00:00:00 2001 From: Arturo Vargas Date: Thu, 1 Dec 2022 14:59:02 -0800 Subject: [PATCH 87/94] move device_mem_pool_t to raja detail namespace --- include/RAJA/pattern/params/reducer.hpp | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/include/RAJA/pattern/params/reducer.hpp b/include/RAJA/pattern/params/reducer.hpp index 5317420d5b..72a4417697 100644 --- a/include/RAJA/pattern/params/reducer.hpp +++ b/include/RAJA/pattern/params/reducer.hpp @@ -6,10 +6,8 @@ #if defined(RAJA_CUDA_ACTIVE) #include "RAJA/policy/cuda/MemUtils_CUDA.hpp" -using device_mem_pool_t = RAJA::cuda::device_mempool_type; #elif defined(RAJA_HIP_ACTIVE) #include "RAJA/policy/hip/MemUtils_HIP.hpp" -using device_mem_pool_t = RAJA::hip::device_mempool_type; #endif namespace RAJA @@ -66,6 +64,14 @@ struct limits> { namespace RAJA { +namespace detail +{ +#if defined(RAJA_CUDA_ACTIVE) +using device_mem_pool_t = RAJA::cuda::device_mempool_type; +#elif defined(RAJA_HIP_ACTIVE) +using device_mem_pool_t = RAJA::hip::device_mempool_type; +#endif +} //namespace detail namespace expt { @@ -91,7 +97,7 @@ namespace detail #if defined(RAJA_CUDA_ACTIVE) || defined(RAJA_HIP_ACTIVE) // Device related attributes. value_type * devicetarget = nullptr; - RAJA::detail::SoAPtr device_mem; + RAJA::detail::SoAPtr device_mem; unsigned int * device_count = nullptr; #endif From 4597a788bae9fad7b3c597a0611351bbed91e8ab Mon Sep 17 00:00:00 2001 From: Jason Burmark Date: Thu, 1 Dec 2022 16:44:22 -0800 Subject: [PATCH 88/94] Move device_mem_pool_t into expt::Device namespace --- include/RAJA/pattern/params/reducer.hpp | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/include/RAJA/pattern/params/reducer.hpp b/include/RAJA/pattern/params/reducer.hpp index 72a4417697..d094a729c1 100644 --- a/include/RAJA/pattern/params/reducer.hpp +++ b/include/RAJA/pattern/params/reducer.hpp @@ -64,20 +64,18 @@ struct limits> { namespace RAJA { -namespace detail -{ -#if defined(RAJA_CUDA_ACTIVE) -using device_mem_pool_t = RAJA::cuda::device_mempool_type; -#elif defined(RAJA_HIP_ACTIVE) -using device_mem_pool_t = RAJA::hip::device_mempool_type; -#endif -} //namespace detail namespace expt { namespace detail { +#if defined(RAJA_CUDA_ACTIVE) + using device_mem_pool_t = RAJA::cuda::device_mempool_type; +#elif defined(RAJA_HIP_ACTIVE) + using device_mem_pool_t = RAJA::hip::device_mempool_type; +#endif + // // // Basic Reducer @@ -97,7 +95,7 @@ namespace detail #if defined(RAJA_CUDA_ACTIVE) || defined(RAJA_HIP_ACTIVE) // Device related attributes. value_type * devicetarget = nullptr; - RAJA::detail::SoAPtr device_mem; + RAJA::detail::SoAPtr device_mem; unsigned int * device_count = nullptr; #endif From 655ab91538848c2ec53bc5c6c002aebb38c7426f Mon Sep 17 00:00:00 2001 From: Alan Dayton Date: Fri, 2 Dec 2022 10:16:10 -0800 Subject: [PATCH 89/94] Fix CMake warning for inconsistent naming of rocPRIM in find module --- cmake/SetupPackages.cmake | 2 +- cmake/thirdparty/{FindRocPRIM.cmake => FindROCPRIM.cmake} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename cmake/thirdparty/{FindRocPRIM.cmake => FindROCPRIM.cmake} (100%) diff --git a/cmake/SetupPackages.cmake b/cmake/SetupPackages.cmake index 9e057f3a50..451fc62ae2 100644 --- a/cmake/SetupPackages.cmake +++ b/cmake/SetupPackages.cmake @@ -82,7 +82,7 @@ if (RAJA_ENABLE_HIP) endif() if (RAJA_ENABLE_EXTERNAL_ROCPRIM) - include(cmake/thirdparty/FindRocPRIM.cmake) + include(cmake/thirdparty/FindROCPRIM.cmake) if (ROCPRIM_FOUND) blt_import_library( NAME rocPRIM diff --git a/cmake/thirdparty/FindRocPRIM.cmake b/cmake/thirdparty/FindROCPRIM.cmake similarity index 100% rename from cmake/thirdparty/FindRocPRIM.cmake rename to cmake/thirdparty/FindROCPRIM.cmake From 08f0cdd0c8cc6cc6fe23b97d151dc79e25f12ba0 Mon Sep 17 00:00:00 2001 From: Alan Dayton Date: Fri, 2 Dec 2022 13:03:14 -0800 Subject: [PATCH 90/94] Attempt to use best practices for rocPRIM find module --- cmake/SetupPackages.cmake | 6 ++-- cmake/thirdparty/FindROCPRIM.cmake | 29 ---------------- cmake/thirdparty/FindrocPRIM.cmake | 54 ++++++++++++++++++++++++++++++ 3 files changed, 57 insertions(+), 32 deletions(-) delete mode 100644 cmake/thirdparty/FindROCPRIM.cmake create mode 100644 cmake/thirdparty/FindrocPRIM.cmake diff --git a/cmake/SetupPackages.cmake b/cmake/SetupPackages.cmake index 451fc62ae2..dc107e1fe3 100644 --- a/cmake/SetupPackages.cmake +++ b/cmake/SetupPackages.cmake @@ -82,11 +82,11 @@ if (RAJA_ENABLE_HIP) endif() if (RAJA_ENABLE_EXTERNAL_ROCPRIM) - include(cmake/thirdparty/FindROCPRIM.cmake) - if (ROCPRIM_FOUND) + find_package(rocPRIM) + if (rocPRIM_FOUND) blt_import_library( NAME rocPRIM - INCLUDES ${ROCPRIM_INCLUDE_DIRS} + INCLUDES ${rocPRIM_INCLUDE_DIRS} TREAT_INCLUDES_AS_SYSTEM ON EXPORTABLE ON) else() diff --git a/cmake/thirdparty/FindROCPRIM.cmake b/cmake/thirdparty/FindROCPRIM.cmake deleted file mode 100644 index 4279f48d1b..0000000000 --- a/cmake/thirdparty/FindROCPRIM.cmake +++ /dev/null @@ -1,29 +0,0 @@ -############################################################################### -# Copyright (c) 2016-22, Lawrence Livermore National Security, LLC -# and other RAJA project contributors. See the RAJA/LICENSE file for details. -# -# SPDX-License-Identifier: (BSD-3-Clause) -############################################################################### - -include (FindPackageHandleStandardArgs) - -find_path(ROCPRIM_INCLUDE_DIRS - NAMES rocprim/rocprim.hpp - HINTS - ${ROCPRIM_DIR}/ - ${ROCPRIM_DIR}/include - ${ROCPRIM_DIR}/rocprim/include - ${HIP_ROOT_DIR}/../rocprim - ${HIP_ROOT_DIR}/../rocprim/include - ${HIP_ROOT_DIR}/../include) - -find_package_handle_standard_args( - ROCPRIM - DEFAULT_MSG - ROCPRIM_INCLUDE_DIRS) - -if (ROCPRIM_INCLUDE_DIRS) - set(ROCPRIM_FOUND True) -else () - set(ROCPRIM_FOUND False) -endif() diff --git a/cmake/thirdparty/FindrocPRIM.cmake b/cmake/thirdparty/FindrocPRIM.cmake new file mode 100644 index 0000000000..702ef52013 --- /dev/null +++ b/cmake/thirdparty/FindrocPRIM.cmake @@ -0,0 +1,54 @@ +############################################################################### +# Copyright (c) 2016-22, Lawrence Livermore National Security, LLC +# and other RAJA project contributors. See the RAJA/LICENSE file for details. +# +# SPDX-License-Identifier: (BSD-3-Clause) +############################################################################### + +#[=======================================================================[.rst: + +FindrocPRIM +------- + +Finds the rocPRIM package. + +Result Variables +^^^^^^^^^^^^^^^^ + +This will define the following variables: + +``rocPRIM_FOUND`` +True if the system has the rocPRIM library. +``rocPRIM_INCLUDE_DIRS`` +Include directories needed to use rocPRIM. + +Cache Variables +^^^^^^^^^^^^^^^ + +The following cache variables may also be set: + +``rocPRIM_INCLUDE_DIR`` +The directory containing ``rocprim.hpp``. + +#]=======================================================================] + +include (FindPackageHandleStandardArgs) + +find_path(rocPRIM_INCLUDE_DIR + NAMES rocprim/rocprim.hpp + HINTS + ${ROCPRIM_DIR}/ + ${HIP_ROOT_DIR}/../ + PATH_SUFFIXES + include + rocprim + rocprim/include) + +find_package_handle_standard_args( + rocPRIM + DEFAULT_MSG + rocPRIM_INCLUDE_DIR) + +if (rocPRIM_FOUND) + set(rocPRIM_INCLUDE_DIRS ${rocPRIM_INCLUDE_DIR}) +endif() From c80f1a5f6cc7a789cee23e4b87535d67efcbe788 Mon Sep 17 00:00:00 2001 From: Rich Hornung Date: Mon, 5 Dec 2022 13:38:24 -0800 Subject: [PATCH 91/94] Final update of release notes. --- RELEASE_NOTES.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index 60259de6ae..f34f191608 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -19,6 +19,7 @@ Notable changes include: * Bug fixes/improvements: + Version 2022.10.3 -- Release date 2022-12-01 ============================================ @@ -31,6 +32,8 @@ Notable changes include: * Properly export 'roctx' target when CMake variable RAJA_ENABLE_ROCTX is on. * Fix CMake logic for exporting desul targets when desul atomics are enabled. + * Fix the way we use CMake to find the rocPRIM module to follow CMake + best practices. * Add missing template parameter pack argument in RAJA::statement::For execution policy construct used in RAJA::kernel implementation for OpenMP target back-end. From 828388e0935ab1d9cb051c61d0fef1d99d31a4cd Mon Sep 17 00:00:00 2001 From: Corbin Robeck <13821049+CRobeck@users.noreply.github.com> Date: Mon, 5 Dec 2022 18:05:59 -0500 Subject: [PATCH 92/94] adding building script for hipcc --- scripts/lc-builds/toss4_hipcc.sh | 92 ++++++++++++++++++++++++++++++++ 1 file changed, 92 insertions(+) create mode 100644 scripts/lc-builds/toss4_hipcc.sh diff --git a/scripts/lc-builds/toss4_hipcc.sh b/scripts/lc-builds/toss4_hipcc.sh new file mode 100644 index 0000000000..2ec9b9be46 --- /dev/null +++ b/scripts/lc-builds/toss4_hipcc.sh @@ -0,0 +1,92 @@ +#!/usr/bin/env bash + +############################################################################### +# Copyright (c) 2016-22, Lawrence Livermore National Security, LLC +# and RAJA project contributors. See the RAJA/LICENSE file for details. +# +# SPDX-License-Identifier: (BSD-3-Clause) +############################################################################### + +if [[ $# -lt 2 ]]; then + echo + echo "You must pass 2 or more arguments to the script (in this order): " + echo " 1) compiler version number" + echo " 2) HIP compute architecture" + echo " 3...) optional arguments to cmake" + echo + echo "For example: " + echo " toss4_amdclang.sh 4.1.0 gfx906" + exit +fi + +COMP_VER=$1 +COMP_ARCH=$2 +shift 2 + +HOSTCONFIG="hip_3_X" + +if [[ ${COMP_VER} == 4.* ]] +then +##HIP_CLANG_FLAGS="-mllvm -amdgpu-fixed-function-abi=1" + HOSTCONFIG="hip_4_link_X" +elif [[ ${COMP_VER} == 3.* ]] +then + HOSTCONFIG="hip_3_X" +else + echo "Unknown hip version, using ${HOSTCONFIG} host-config" +fi + +BUILD_SUFFIX=lc_toss4-hipcc-${COMP_VER}-${COMP_ARCH} + +echo +echo "Creating build directory build_${BUILD_SUFFIX} and generating configuration in it" +echo "Configuration extra arguments:" +echo " $@" +echo +echo "To use fp64 HW atomics you must configure with these options when using gfx90a and hip >= 5.2" +echo " -DCMAKE_CXX_FLAGS=\"-munsafe-fp-atomics\"" +echo + +rm -rf build_${BUILD_SUFFIX} >/dev/null +mkdir build_${BUILD_SUFFIX} && cd build_${BUILD_SUFFIX} + + +module load cmake/3.23.1 + +# unload rocm to avoid configuration problems where the loaded rocm and COMP_VER +# are inconsistent causing the rocprim from the module to be used unexpectedly +module unload rocm + + +cmake \ + -DCMAKE_BUILD_TYPE=Release \ + -DROCM_ROOT_DIR="/opt/rocm-${COMP_VER}" \ + -DHIP_ROOT_DIR="/opt/rocm-${COMP_VER}/hip" \ + -DHIP_PATH=/opt/rocm-${COMP_VER}/bin \ + -DCMAKE_C_COMPILER=/opt/rocm-${COMP_VER}/bin/hipcc \ + -DCMAKE_CXX_COMPILER=/opt/rocm-${COMP_VER}/bin/hipcc \ + -DCMAKE_HIP_ARCHITECTURES="${COMP_ARCH}" \ + -DGPU_TARGETS="${COMP_ARCH}" \ + -DAMDGPU_TARGETS="${COMP_ARCH}" \ + -DBLT_CXX_STD=c++14 \ + -C "../host-configs/lc-builds/toss4/${HOSTCONFIG}.cmake" \ + -DENABLE_HIP=ON \ + -DENABLE_OPENMP=ON \ + -DENABLE_CUDA=OFF \ + -DCMAKE_INSTALL_PREFIX=../install_${BUILD_SUFFIX} \ + "$@" \ + .. + +echo +echo "***********************************************************************" +echo +echo "cd into directory build_${BUILD_SUFFIX} and run make to build RAJA" +echo +echo " Please note that you have to have a consistent build environment" +echo " when you make RAJA as cmake may reconfigure; unload the rocm module" +echo " or load the appropriate rocm module (${COMP_VER}) when building." +echo +echo " module unload rocm" +echo " srun -n1 make" +echo +echo "***********************************************************************" From 3ca7535ccbf0299f8921ebddfd0fd90ed2cfd9ab Mon Sep 17 00:00:00 2001 From: Corbin Robeck <13821049+CRobeck@users.noreply.github.com> Date: Mon, 5 Dec 2022 23:08:07 -0500 Subject: [PATCH 93/94] make comments consistent Co-authored-by: Jason Burmark --- scripts/lc-builds/toss4_hipcc.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/lc-builds/toss4_hipcc.sh b/scripts/lc-builds/toss4_hipcc.sh index 2ec9b9be46..0e3d8bdd6d 100644 --- a/scripts/lc-builds/toss4_hipcc.sh +++ b/scripts/lc-builds/toss4_hipcc.sh @@ -15,7 +15,7 @@ if [[ $# -lt 2 ]]; then echo " 3...) optional arguments to cmake" echo echo "For example: " - echo " toss4_amdclang.sh 4.1.0 gfx906" + echo " toss4_hipcc.sh 4.1.0 gfx906" exit fi From 589685d756f40a78b5409641295bbd6882478c54 Mon Sep 17 00:00:00 2001 From: Jason Burmark Date: Tue, 6 Dec 2022 10:30:29 -0800 Subject: [PATCH 94/94] Fix exec permissions on hipcc script --- scripts/lc-builds/toss4_hipcc.sh | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 scripts/lc-builds/toss4_hipcc.sh diff --git a/scripts/lc-builds/toss4_hipcc.sh b/scripts/lc-builds/toss4_hipcc.sh old mode 100644 new mode 100755