diff --git a/.bazelrc b/.bazelrc index 02dec0349c4741..c17ae4494dc99c 100644 --- a/.bazelrc +++ b/.bazelrc @@ -226,12 +226,13 @@ build:cuda --repo_env TF_NEED_CUDA=1 build:cuda --crosstool_top=@local_config_cuda//crosstool:toolchain build:cuda --@local_config_cuda//:enable_cuda +build:no_cuda_libs --@local_config_cuda//cuda:include_hermetic_cuda_libs=false + # CUDA: This config refers to building CUDA op kernels with clang. build:cuda_clang --config=cuda -# Enable TensorRT optimizations https://developer.nvidia.com/tensorrt -build:cuda_clang --config=tensorrt build:cuda_clang --action_env=TF_CUDA_CLANG="1" build:cuda_clang --@local_config_cuda//:cuda_compiler=clang +build:cuda_clang --copt=-Qunused-arguments # Select supported compute capabilities (supported graphics cards). # This is the same as the official TensorFlow builds. # See https://developer.nvidia.com/cuda-gpus#compute @@ -244,12 +245,10 @@ build:cuda_clang --repo_env=TF_CUDA_COMPUTE_CAPABILITIES="sm_60,sm_70,sm_80,sm_8 # Set up compilation CUDA version and paths and use the CUDA Clang toolchain. build:cuda_clang_official --config=cuda_clang -build:cuda_clang_official --action_env=TF_CUDA_VERSION="12" -build:cuda_clang_official --action_env=TF_CUDNN_VERSION="8" -build:cuda_clang_official --action_env=CUDA_TOOLKIT_PATH="/usr/local/cuda-12.3" +build:cuda_clang_official --action_env=TF_CUDA_VERSION="12.3" +build:cuda_clang_official --action_env=TF_CUDNN_VERSION="8.9" build:cuda_clang_official --action_env=GCC_HOST_COMPILER_PATH="/dt9/usr/bin/gcc" build:cuda_clang_official --action_env=CLANG_CUDA_COMPILER_PATH="/usr/lib/llvm-17/bin/clang" -build:cuda_clang_official --action_env=LD_LIBRARY_PATH="/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64" build:cuda_clang_official --crosstool_top="@sigbuild-r2.17-clang_config_cuda//crosstool:toolchain" # Build with nvcc for CUDA and clang for host @@ -545,10 +544,6 @@ build:rbe_linux_cuda --config=cuda_clang_official build:rbe_linux_cuda --config=rbe_linux_cpu # For Remote build execution -- GPU configuration build:rbe_linux_cuda --repo_env=REMOTE_GPU_TESTING=1 -build:rbe_linux_cuda --repo_env=TF_CUDA_CONFIG_REPO="@sigbuild-r2.17-clang_config_cuda" -build:rbe_linux_cuda --repo_env=TF_TENSORRT_CONFIG_REPO="@sigbuild-r2.17-clang_config_tensorrt" -build:rbe_linux_cuda --repo_env=TF_NCCL_CONFIG_REPO="@sigbuild-r2.17-clang_config_nccl" -test:rbe_linux_cuda --test_env=LD_LIBRARY_PATH="/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64" build:rbe_linux_cuda_nvcc --config=rbe_linux_cuda build:rbe_linux_cuda_nvcc --config=nvcc_clang @@ -633,7 +628,6 @@ build:release_cpu_linux_base --repo_env=BAZEL_COMPILER="/usr/lib/llvm-17/bin/cla # Test-related settings below this point. test:release_linux_base --build_tests_only --keep_going --test_output=errors --verbose_failures=true test:release_linux_base --local_test_jobs=HOST_CPUS -test:release_linux_base --test_env=LD_LIBRARY_PATH # Give only the list of failed tests at the end of the log test:release_linux_base --test_summary=short @@ -645,7 +639,6 @@ build:release_gpu_linux --config=release_cpu_linux # Set up compilation CUDA version and paths and use the CUDA Clang toolchain. # Note that linux cpu and cuda builds share the same toolchain now. build:release_gpu_linux --config=cuda_clang_official -test:release_gpu_linux --test_env=LD_LIBRARY_PATH="/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64" # Local test jobs has to be 4 because parallel_gpu_execute is fragile, I think test:release_gpu_linux --test_timeout=300,450,1200,3600 --local_test_jobs=4 --run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute @@ -674,11 +667,8 @@ test:unsupported_cpu_linux --config=release_base build:unsupported_gpu_linux --config=cuda build:unsupported_gpu_linux --config=unsupported_cpu_linux build:unsupported_gpu_linux --action_env=TF_CUDA_VERSION="11" -build:unsupported_gpu_linux --action_env=TF_CUDNN_VERSION="8" +build:unsupported_gpu_linux --action_env=TF_CUDNN_VERSION="8.6" build:unsupported_gpu_linux --repo_env=TF_CUDA_COMPUTE_CAPABILITIES="sm_35,sm_50,sm_60,sm_70,sm_75,compute_80" -build:unsupported_gpu_linux --config=tensorrt -build:unsupported_gpu_linux --action_env=CUDA_TOOLKIT_PATH="/usr/local/cuda-11.2" -build:unsupported_gpu_linux --action_env=LD_LIBRARY_PATH="/usr/local/cuda:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda-11.1/lib64:/usr/local/tensorrt/lib" build:unsupported_gpu_linux --action_env=GCC_HOST_COMPILER_PATH="/dt9/usr/bin/gcc" build:unsupported_gpu_linux --crosstool_top=@ubuntu20.04-gcc9_manylinux2014-cuda11.2-cudnn8.1-tensorrt7.2_config_cuda//crosstool:toolchain diff --git a/ci/official/wheel.sh b/ci/official/wheel.sh index e2a828bfacce35..5ae0feca48f646 100755 --- a/ci/official/wheel.sh +++ b/ci/official/wheel.sh @@ -27,7 +27,8 @@ if [[ "$TFCI_NIGHTLY_UPDATE_VERSION_ENABLE" == 1 ]]; then export TFCI_BUILD_PIP_PACKAGE_ARGS="$(echo $TFCI_BUILD_PIP_PACKAGE_ARGS | sed 's/tensorflow/tf_nightly/')" fi -tfrun bazel build $TFCI_BAZEL_COMMON_ARGS //tensorflow/tools/pip_package:wheel $TFCI_BUILD_PIP_PACKAGE_ARGS +tfrun bazel build $TFCI_BAZEL_COMMON_ARGS --config=no_cuda_libs \ +//tensorflow/tools/pip_package:wheel $TFCI_BUILD_PIP_PACKAGE_ARGS tfrun find ./bazel-bin/tensorflow/tools/pip_package -iname "*.whl" -exec cp {} $TFCI_OUTPUT_DIR \; tfrun ./ci/official/utilities/rename_and_verify_wheels.sh diff --git a/configure.py b/configure.py index 0081eeabf66bcc..01e7f343872797 100644 --- a/configure.py +++ b/configure.py @@ -16,7 +16,6 @@ import argparse import errno -import glob import json import os import platform @@ -239,7 +238,7 @@ def setup_python(environ_cp): write_to_bazelrc('build --python_path=\"{}"'.format(python_bin_path)) environ_cp['PYTHON_BIN_PATH'] = python_bin_path - # If choosen python_lib_path is from a path specified in the PYTHONPATH + # If chosen python_lib_path is from a path specified in the PYTHONPATH # variable, need to tell bazel to include PYTHONPATH if environ_cp.get('PYTHONPATH'): python_paths = environ_cp.get('PYTHONPATH').split(':') @@ -775,11 +774,6 @@ def get_ndk_api_level(environ_cp, android_ndk_home_path): def set_gcc_host_compiler_path(environ_cp): """Set GCC_HOST_COMPILER_PATH.""" default_gcc_host_compiler_path = which('gcc') or '' - cuda_bin_symlink = '%s/bin/gcc' % environ_cp.get('CUDA_TOOLKIT_PATH') - - if os.path.islink(cuda_bin_symlink): - # os.readlink is only available in linux - default_gcc_host_compiler_path = os.path.realpath(cuda_bin_symlink) gcc_host_compiler_path = prompt_loop_or_load_from_env( environ_cp, @@ -937,17 +931,6 @@ def disable_clang_offsetof_extension(clang_version): write_to_bazelrc('build --copt=-Wno-gnu-offsetof-extensions') -def set_tf_cuda_paths(environ_cp): - """Set TF_CUDA_PATHS.""" - ask_cuda_paths = ( - 'Please specify the comma-separated list of base paths to look for CUDA ' - 'libraries and headers. [Leave empty to use the default]: ') - tf_cuda_paths = get_from_env_or_user_or_default(environ_cp, 'TF_CUDA_PATHS', - ask_cuda_paths, '') - if tf_cuda_paths: - environ_cp['TF_CUDA_PATHS'] = tf_cuda_paths - - def set_tf_cuda_version(environ_cp): """Set TF_CUDA_VERSION.""" ask_cuda_version = ( @@ -972,73 +955,10 @@ def set_tf_cudnn_version(environ_cp): environ_cp['TF_CUDNN_VERSION'] = tf_cudnn_version -def set_tf_tensorrt_version(environ_cp): - """Set TF_TENSORRT_VERSION.""" - if not (is_linux() or is_windows()): - raise ValueError('Currently TensorRT is only supported on Linux platform.') - - if not int(environ_cp.get('TF_NEED_TENSORRT', False)): - return - - ask_tensorrt_version = ( - 'Please specify the TensorRT version you want to use. ' - '[Leave empty to default to TensorRT %s]: ') % _DEFAULT_TENSORRT_VERSION - tf_tensorrt_version = get_from_env_or_user_or_default( - environ_cp, 'TF_TENSORRT_VERSION', ask_tensorrt_version, - _DEFAULT_TENSORRT_VERSION) - environ_cp['TF_TENSORRT_VERSION'] = tf_tensorrt_version - - -def set_tf_nccl_version(environ_cp): - """Set TF_NCCL_VERSION.""" - if not is_linux(): - raise ValueError('Currently NCCL is only supported on Linux platform.') - - if 'TF_NCCL_VERSION' in environ_cp: - return - - ask_nccl_version = ( - 'Please specify the locally installed NCCL version you want to use. ' - '[Leave empty to use http://github.com/nvidia/nccl]: ') - tf_nccl_version = get_from_env_or_user_or_default(environ_cp, - 'TF_NCCL_VERSION', - ask_nccl_version, '') - environ_cp['TF_NCCL_VERSION'] = tf_nccl_version - - -def get_native_cuda_compute_capabilities(environ_cp): - """Get native cuda compute capabilities. - - Args: - environ_cp: copy of the os.environ. - - Returns: - string of native cuda compute capabilities, separated by comma. - """ - device_query_bin = os.path.join( - environ_cp.get('CUDA_TOOLKIT_PATH'), 'extras/demo_suite/deviceQuery') - if os.path.isfile(device_query_bin) and os.access(device_query_bin, os.X_OK): - try: - output = run_shell(device_query_bin).split('\n') - pattern = re.compile('[0-9]*\\.[0-9]*') - output = [pattern.search(x) for x in output if 'Capability' in x] - output = ','.join(x.group() for x in output if x is not None) - except subprocess.CalledProcessError: - output = '' - else: - output = '' - return output - - def set_tf_cuda_compute_capabilities(environ_cp): """Set TF_CUDA_COMPUTE_CAPABILITIES.""" while True: - native_cuda_compute_capabilities = get_native_cuda_compute_capabilities( - environ_cp) - if not native_cuda_compute_capabilities: - default_cuda_compute_capabilities = _DEFAULT_CUDA_COMPUTE_CAPABILITIES - else: - default_cuda_compute_capabilities = native_cuda_compute_capabilities + default_cuda_compute_capabilities = _DEFAULT_CUDA_COMPUTE_CAPABILITIES ask_cuda_compute_capabilities = ( 'Please specify a list of comma-separated CUDA compute capabilities ' @@ -1217,73 +1137,6 @@ def configure_ios(environ_cp): symlink_force(filepath, new_filepath) -def validate_cuda_config(environ_cp): - """Run find_cuda_config.py and return cuda_toolkit_path, or None.""" - - def maybe_encode_env(env): - """Encodes unicode in env to str on Windows python 2.x.""" - if not is_windows() or sys.version_info[0] != 2: - return env - for k, v in env.items(): - if isinstance(k, unicode): - k = k.encode('ascii') - if isinstance(v, unicode): - v = v.encode('ascii') - env[k] = v - return env - - cuda_libraries = ['cuda', 'cudnn'] - if is_linux(): - if int(environ_cp.get('TF_NEED_TENSORRT', False)): - cuda_libraries.append('tensorrt') - if environ_cp.get('TF_NCCL_VERSION', None): - cuda_libraries.append('nccl') - if is_windows(): - if int(environ_cp.get('TF_NEED_TENSORRT', False)): - cuda_libraries.append('tensorrt') - print('WARNING: TensorRT support on Windows is experimental\n') - - paths = glob.glob('**/third_party/gpus/find_cuda_config.py', recursive=True) - if not paths: - raise FileNotFoundError( - "Can't find 'find_cuda_config.py' script inside working directory") - proc = subprocess.Popen( - [environ_cp['PYTHON_BIN_PATH'], paths[0]] + cuda_libraries, - stdout=subprocess.PIPE, - env=maybe_encode_env(environ_cp)) - - if proc.wait(): - # Errors from find_cuda_config.py were sent to stderr. - print('Asking for detailed CUDA configuration...\n') - return False - - config = dict( - tuple(line.decode('ascii').rstrip().split(': ')) for line in proc.stdout) - - print('Found CUDA %s in:' % config['cuda_version']) - print(' %s' % config['cuda_library_dir']) - print(' %s' % config['cuda_include_dir']) - - print('Found cuDNN %s in:' % config['cudnn_version']) - print(' %s' % config['cudnn_library_dir']) - print(' %s' % config['cudnn_include_dir']) - - if 'tensorrt_version' in config: - print('Found TensorRT %s in:' % config['tensorrt_version']) - print(' %s' % config['tensorrt_library_dir']) - print(' %s' % config['tensorrt_include_dir']) - - if config.get('nccl_version', None): - print('Found NCCL %s in:' % config['nccl_version']) - print(' %s' % config['nccl_library_dir']) - print(' %s' % config['nccl_include_dir']) - - print('\n') - - environ_cp['CUDA_TOOLKIT_PATH'] = config['cuda_toolkit_path'] - return True - - def get_gcc_compiler(environ_cp): gcc_env = environ_cp.get('CXX') or environ_cp.get('CC') or which('gcc') if gcc_env is not None: @@ -1388,57 +1241,20 @@ def main(): if (environ_cp.get('TF_NEED_CUDA') == '1' and 'TF_CUDA_CONFIG_REPO' not in environ_cp): - set_action_env_var( - environ_cp, - 'TF_NEED_TENSORRT', - 'TensorRT', - False, - bazel_config_name='tensorrt') - - environ_save = dict(environ_cp) for _ in range(_DEFAULT_PROMPT_ASK_ATTEMPTS): - if validate_cuda_config(environ_cp): - cuda_env_names = [ - 'TF_CUDA_VERSION', - 'TF_CUBLAS_VERSION', - 'TF_CUDNN_VERSION', - 'TF_TENSORRT_VERSION', - 'TF_NCCL_VERSION', - 'TF_CUDA_PATHS', - # Items below are for backwards compatibility when not using - # TF_CUDA_PATHS. - 'CUDA_TOOLKIT_PATH', - 'CUDNN_INSTALL_PATH', - 'NCCL_INSTALL_PATH', - 'NCCL_HDR_PATH', - 'TENSORRT_INSTALL_PATH' - ] - # Note: set_action_env_var above already writes to bazelrc. - for name in cuda_env_names: - if name in environ_cp: - write_action_env_to_bazelrc(name, environ_cp[name]) - break - - # Restore settings changed below if CUDA config could not be validated. - environ_cp = dict(environ_save) - - set_tf_cuda_version(environ_cp) - set_tf_cudnn_version(environ_cp) - if is_windows(): - set_tf_tensorrt_version(environ_cp) - if is_linux(): - set_tf_tensorrt_version(environ_cp) - set_tf_nccl_version(environ_cp) - - set_tf_cuda_paths(environ_cp) + cuda_env_names = [ + 'TF_CUDA_VERSION', + 'TF_CUDNN_VERSION', + ] + # Note: set_action_env_var above already writes to bazelrc. + for name in cuda_env_names: + if name in environ_cp: + write_action_env_to_bazelrc(name, environ_cp[name]) + break - else: - raise UserInputError( - 'Invalid CUDA setting were provided %d ' - 'times in a row. Assuming to be a scripting mistake.' - % _DEFAULT_PROMPT_ASK_ATTEMPTS - ) + set_tf_cuda_version(environ_cp) + set_tf_cudnn_version(environ_cp) set_tf_cuda_compute_capabilities(environ_cp) if 'LD_LIBRARY_PATH' in environ_cp and environ_cp.get( diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/tests/hlo_to_kernel/tanh.mlir b/tensorflow/compiler/mlir/tools/kernel_gen/tests/hlo_to_kernel/tanh.mlir index 2d3c8e6f5b9ef7..67f5a036fccc32 100644 --- a/tensorflow/compiler/mlir/tools/kernel_gen/tests/hlo_to_kernel/tanh.mlir +++ b/tensorflow/compiler/mlir/tools/kernel_gen/tests/hlo_to_kernel/tanh.mlir @@ -1,3 +1,4 @@ +// RUN: export XLA_FLAGS="--xla_gpu_cuda_data_dir=%S/../../../../../../../../cuda_nvcc" // RUN: hlo_to_kernel --input=%s --output=%t --unroll_factors=4 --tile_sizes=256 --arch=sm_70 func.func @tanh(%arg0: tensor<*xf32>) -> tensor<*xf32> attributes {tf_entry} { diff --git a/tensorflow/core/common_runtime/gpu/BUILD b/tensorflow/core/common_runtime/gpu/BUILD index 84ec94ba673ff8..6de9910ca2233f 100644 --- a/tensorflow/core/common_runtime/gpu/BUILD +++ b/tensorflow/core/common_runtime/gpu/BUILD @@ -158,6 +158,7 @@ tf_cuda_library( "@local_config_cuda//cuda:cudnn_header", "@local_xla//xla/stream_executor/cuda:cuda_platform", "@local_xla//xla/stream_executor/gpu:gpu_stream", + "@local_xla//xla/tsl:gpu_runtime_hermetic_cuda_deps", ], defines = if_linux_x86_64(["TF_PLATFORM_LINUX_X86_64"]), features = ["-layering_check"], diff --git a/tensorflow/opensource_only.files b/tensorflow/opensource_only.files index b2645a331739e3..7c3947fa49e861 100644 --- a/tensorflow/opensource_only.files +++ b/tensorflow/opensource_only.files @@ -207,6 +207,8 @@ tf_staging/third_party/compute_library/BUILD: tf_staging/third_party/compute_library/build_defs.bzl: tf_staging/third_party/coremltools.BUILD: tf_staging/third_party/cub.BUILD: +tf_staging/third_party/cuda_redist_json_repo.bzl: +tf_staging/third_party/cuda_repo.bzl: tf_staging/third_party/curl.BUILD: tf_staging/third_party/cython.BUILD: tf_staging/third_party/ducc/BUILD: @@ -233,6 +235,7 @@ tf_staging/third_party/googleapis/build_rules.bzl: tf_staging/third_party/googleapis/googleapis.BUILD: tf_staging/third_party/googleapis/repository_rules.bzl: tf_staging/third_party/gpus/BUILD: +tf_staging/third_party/gpus/compiler_common_tools.bzl: tf_staging/third_party/gpus/crosstool/BUILD.rocm.tpl: tf_staging/third_party/gpus/crosstool/BUILD.sycl.tpl: tf_staging/third_party/gpus/crosstool/BUILD.tpl: @@ -243,15 +246,32 @@ tf_staging/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tp tf_staging/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_sycl.tpl: tf_staging/third_party/gpus/crosstool/sycl_cc_toolchain_config.bzl.tpl: tf_staging/third_party/gpus/crosstool/windows/msvc_wrapper_for_nvcc.py.tpl: +tf_staging/third_party/gpus/cuda/BUILD.hermetic.tpl: tf_staging/third_party/gpus/cuda/BUILD.tpl: tf_staging/third_party/gpus/cuda/BUILD.windows.tpl: tf_staging/third_party/gpus/cuda/BUILD: tf_staging/third_party/gpus/cuda/LICENSE: tf_staging/third_party/gpus/cuda/build_defs.bzl.tpl: +tf_staging/third_party/gpus/cuda/cuda_cccl.BUILD: tf_staging/third_party/gpus/cuda/cuda_config.h.tpl: tf_staging/third_party/gpus/cuda/cuda_config.py.tpl: +tf_staging/third_party/gpus/cuda/cuda_cublas.BUILD.tpl: +tf_staging/third_party/gpus/cuda/cuda_cudart.BUILD.tpl: +tf_staging/third_party/gpus/cuda/cuda_cudnn.BUILD.tpl: +tf_staging/third_party/gpus/cuda/cuda_cufft.BUILD.tpl: +tf_staging/third_party/gpus/cuda/cuda_cupti.BUILD.tpl: +tf_staging/third_party/gpus/cuda/cuda_curand.BUILD.tpl: +tf_staging/third_party/gpus/cuda/cuda_cusolver.BUILD.tpl: +tf_staging/third_party/gpus/cuda/cuda_cusparse.BUILD.tpl: +tf_staging/third_party/gpus/cuda/cuda_nccl.BUILD: +tf_staging/third_party/gpus/cuda/cuda_nvcc.BUILD: +tf_staging/third_party/gpus/cuda/cuda_nvjitlink.BUILD.tpl: +tf_staging/third_party/gpus/cuda/cuda_nvml.BUILD: +tf_staging/third_party/gpus/cuda/cuda_nvprune.BUILD: +tf_staging/third_party/gpus/cuda/cuda_nvtx.BUILD: tf_staging/third_party/gpus/cuda_configure.bzl: tf_staging/third_party/gpus/find_cuda_config:.py +tf_staging/third_party/gpus/hermetic_cuda_configure.bzl: tf_staging/third_party/gpus/rocm/BUILD.tpl: tf_staging/third_party/gpus/rocm/BUILD: tf_staging/third_party/gpus/rocm/build_defs.bzl.tpl: @@ -283,6 +303,7 @@ tf_staging/third_party/nccl/archive.BUILD: tf_staging/third_party/nccl/archive.patch: tf_staging/third_party/nccl/build_defs.bzl.tpl: tf_staging/third_party/nccl/generated_names.bzl.tpl: +tf_staging/third_party/nccl/hermetic_nccl_configure.bzl: tf_staging/third_party/nccl/nccl_configure.bzl: tf_staging/third_party/nccl/system.BUILD.tpl: tf_staging/third_party/nlohmann_json.BUILD: diff --git a/tensorflow/python/framework/BUILD b/tensorflow/python/framework/BUILD index bac9403d63dc27..eff65990c58c38 100644 --- a/tensorflow/python/framework/BUILD +++ b/tensorflow/python/framework/BUILD @@ -6,6 +6,7 @@ load("//tensorflow:strict.default.bzl", "py_strict_library", "py_strict_test") # Placeholder: load py_proto_library load( "//tensorflow:tensorflow.bzl", + "if_hermetic_cuda_tools", "if_not_windows", "if_oss", "if_xla_available", @@ -1045,6 +1046,13 @@ tf_python_pybind_extension( "python_api_dispatcher.h", "//tensorflow/python/lib/core:safe_pyobject_ptr_required_hdrs", ], + # This data is needed to add hermetic CUDA tools in python runfiles. + data = if_hermetic_cuda_tools( + [ + "@cuda_nvcc//:ptxas", + "@cuda_nvcc//:nvvm", + ], + ), enable_stub_generation = True, pytype_srcs = [ "_pywrap_python_api_dispatcher.pyi", diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl index 6e656b861bedaf..0bf6a91f995007 100644 --- a/tensorflow/tensorflow.bzl +++ b/tensorflow/tensorflow.bzl @@ -73,6 +73,7 @@ load( "tsl_gpu_library", _cc_header_only_library = "cc_header_only_library", _if_cuda_or_rocm = "if_cuda_or_rocm", + _if_hermetic_cuda_tools = "if_hermetic_cuda_tools", _if_nccl = "if_nccl", _transitive_hdrs = "transitive_hdrs", ) @@ -803,7 +804,7 @@ def tf_cc_shared_object( testonly = kwargs.pop("testonly", False) for name_os, name_os_major, name_os_full in names: - # Windows DLLs cant be versioned + # Windows DLLs can't be versioned if name_os.endswith(".dll"): name_os_major = name_os name_os_full = name_os @@ -3578,3 +3579,6 @@ def replace_with_portable_tf_lib_when_required(non_portable_tf_deps, use_lib_wit def tf_python_framework_friends(): return ["//tensorflow:__subpackages__"] + +def if_hermetic_cuda_tools(if_true, if_false = []): + return _if_hermetic_cuda_tools(if_true, if_false) diff --git a/tensorflow/tools/pip_package/build_pip_package.py b/tensorflow/tools/pip_package/build_pip_package.py index 10593503395532..6ad413acca561d 100644 --- a/tensorflow/tools/pip_package/build_pip_package.py +++ b/tensorflow/tools/pip_package/build_pip_package.py @@ -69,6 +69,34 @@ def prepare_headers(headers: list[str], srcs_dir: str) -> None: srcs_dir: target directory where headers are copied to. """ path_to_exclude = [ + "cuda_cccl/_virtual_includes", + "cuda_cublas/_virtual_includes", + "cuda_cudart/_virtual_includes", + "cuda_cudnn/_virtual_includes", + "cuda_cufft/_virtual_includes", + "cuda_cupti/_virtual_includes", + "cuda_curand/_virtual_includes", + "cuda_cusolver/_virtual_includes", + "cuda_cusparse/_virtual_includes", + "cuda_nccl/_virtual_includes", + "cuda_nvcc/_virtual_includes", + "cuda_nvjitlink/_virtual_includes", + "cuda_nvml/_virtual_includes", + "cuda_nvtx/_virtual_includes", + "external/cuda_cccl", + "external/cuda_cublas", + "external/cuda_cudart", + "external/cuda_cudnn", + "external/cuda_cufft", + "external/cuda_cupti", + "external/cuda_curand", + "external/cuda_cusolver", + "external/cuda_cusparse", + "external/cuda_nccl", + "external/cuda_nvcc", + "external/cuda_nvjitlink", + "external/cuda_nvml", + "external/cuda_nvtx", "external/pypi", "external/jsoncpp_git/src", "local_config_cuda/cuda/_virtual_includes", diff --git a/tensorflow/tools/toolchains/remote_config/configs.bzl b/tensorflow/tools/toolchains/remote_config/configs.bzl index 41fe389aa09cad..48473d49e84ce2 100644 --- a/tensorflow/tools/toolchains/remote_config/configs.bzl +++ b/tensorflow/tools/toolchains/remote_config/configs.bzl @@ -703,7 +703,7 @@ def initialize_rbe_configs(): "TF_CUDNN_VERSION": "8.9", "TF_ENABLE_XLA": "1", "TF_NEED_CUDA": "1", - "TF_NEED_TENSORRT": "1", + "TF_NEED_TENSORRT": "0", "TF_SYSROOT": "/dt9", "TF_TENSORRT_VERSION": "8.6", }, @@ -742,7 +742,7 @@ def initialize_rbe_configs(): "TF_CUDNN_VERSION": "8.9", "TF_ENABLE_XLA": "1", "TF_NEED_CUDA": "1", - "TF_NEED_TENSORRT": "1", + "TF_NEED_TENSORRT": "0", "TF_SYSROOT": "/dt9", "TF_TENSORRT_VERSION": "8.6", }, @@ -782,7 +782,7 @@ def initialize_rbe_configs(): "TF_ENABLE_XLA": "1", "TF_NEED_CUDA": "1", "TF_SYSROOT": "/dt9", - "TF_NEED_TENSORRT": "1", + "TF_NEED_TENSORRT": "0", "TF_TENSORRT_VERSION": "8.6", }, ) @@ -820,7 +820,7 @@ def initialize_rbe_configs(): "TF_ENABLE_XLA": "1", "TF_NEED_CUDA": "1", "TF_SYSROOT": "/dt9", - "TF_NEED_TENSORRT": "1", + "TF_NEED_TENSORRT": "0", "TF_TENSORRT_VERSION": "8.6", }, ) diff --git a/tensorflow/tools/toolchains/remote_config/rbe_config.bzl b/tensorflow/tools/toolchains/remote_config/rbe_config.bzl index ae776c2a2fd388..317e7139e5f96b 100644 --- a/tensorflow/tools/toolchains/remote_config/rbe_config.bzl +++ b/tensorflow/tools/toolchains/remote_config/rbe_config.bzl @@ -1,9 +1,15 @@ """Macro that creates external repositories for remote config.""" load("//tensorflow/tools/toolchains/remote_config:containers.bzl", "containers") -load("//third_party/gpus:cuda_configure.bzl", "remote_cuda_configure") + +# If you need to use non-hermetic CUDA, replace the line below with +# load("//third_party/gpus:cuda_configure.bzl", "remote_cuda_configure") +load("//third_party/gpus:hermetic_cuda_configure.bzl", "hermetic_cuda_configure") load("//third_party/gpus:rocm_configure.bzl", "remote_rocm_configure") -load("//third_party/nccl:nccl_configure.bzl", "remote_nccl_configure") + +# If you need to use non-hermetic CUDA, replace the line below with +# load("//third_party/nccl:nccl_configure.bzl", "remote_nccl_configure") +load("//third_party/nccl:hermetic_nccl_configure.bzl", "hermetic_nccl_configure") load("//third_party/py:python_configure.bzl", "local_python_configure", "remote_python_configure") load("//third_party/remote_config:remote_platform_configure.bzl", "remote_platform_configure") load("//third_party/tensorrt:tensorrt_configure.bzl", "remote_tensorrt_configure") @@ -42,7 +48,7 @@ def _tensorflow_rbe_config(name, compiler, python_versions, os, rocm_version = N "TF_CUDNN_VERSION": cudnn_version, "TF_CUDA_VERSION": cuda_version, "CUDNN_INSTALL_PATH": cudnn_install_path if cudnn_install_path != None else "/usr/lib/x86_64-linux-gnu", - "TF_NEED_TENSORRT": "1", + "TF_NEED_TENSORRT": "0", "TF_TENSORRT_VERSION": tensorrt_version if tensorrt_version != None else "", "TENSORRT_INSTALL_PATH": tensorrt_install_path if tensorrt_install_path != None else "/usr/lib/x86_64-linux-gnu", "GCC_HOST_COMPILER_PATH": compiler if not compiler.endswith("clang") else "", @@ -58,13 +64,17 @@ def _tensorflow_rbe_config(name, compiler, python_versions, os, rocm_version = N "Pool": "default", } - remote_cuda_configure( + # If you need to use non-hermetic CUDA, replace the call below with + # remote_cuda_configure. + hermetic_cuda_configure( name = "%s_config_cuda" % name, environ = env, exec_properties = exec_properties, ) - remote_nccl_configure( + # If you need to use non-hermetic CUDA, replace the call below with + # remote_nccl_configure. + hermetic_nccl_configure( name = "%s_config_nccl" % name, environ = env, exec_properties = exec_properties, @@ -175,13 +185,15 @@ def sigbuild_tf_configs(name_container_map, env): "Pool": "default", } - remote_cuda_configure( + # TODO (ybaturina): DEPRECATED: replace with remote_cuda_configure for non-hermetic CUDA. + hermetic_cuda_configure( name = "%s_config_cuda" % name, environ = env, exec_properties = exec_properties, ) - remote_nccl_configure( + # TODO (ybaturina): DEPRECATED: replace with remote_nccl_configure for non-hermetic NCCL. + hermetic_nccl_configure( name = "%s_config_nccl" % name, environ = env, exec_properties = exec_properties, diff --git a/tensorflow/workspace2.bzl b/tensorflow/workspace2.bzl index 77eea2ac869167..c041ad5d1abbbc 100644 --- a/tensorflow/workspace2.bzl +++ b/tensorflow/workspace2.bzl @@ -15,6 +15,7 @@ load("//tensorflow/tools/toolchains/clang6:repo.bzl", "clang6_configure") load("//tensorflow/tools/toolchains/embedded/arm-linux:arm_linux_toolchain_configure.bzl", "arm_linux_toolchain_configure") load("//tensorflow/tools/toolchains/remote:configure.bzl", "remote_execution_configure") load("//tensorflow/tools/toolchains/remote_config:configs.bzl", "initialize_rbe_configs") +load("//third_party:cuda_repo.bzl", "cuda_distributives") load("//third_party:repo.bzl", "tf_http_archive", "tf_mirror_urls") load("//third_party/absl:workspace.bzl", absl = "repo") load("//third_party/benchmark:workspace.bzl", benchmark = "repo") @@ -29,7 +30,10 @@ load("//third_party/flatbuffers:workspace.bzl", flatbuffers = "repo") load("//third_party/FP16:workspace.bzl", FP16 = "repo") load("//third_party/gemmlowp:workspace.bzl", gemmlowp = "repo") load("//third_party/git:git_configure.bzl", "git_configure") -load("//third_party/gpus:cuda_configure.bzl", "cuda_configure") + +# If you need to use non-hermetic CUDA, replace the line below with +# load("//third_party/gpus:cuda_configure.bzl", "cuda_configure") +load("//third_party/gpus:hermetic_cuda_configure.bzl", "hermetic_cuda_configure") load("//third_party/gpus:rocm_configure.bzl", "rocm_configure") load("//third_party/hexagon:workspace.bzl", hexagon_nn = "repo") load("//third_party/highwayhash:workspace.bzl", highwayhash = "repo") @@ -41,7 +45,10 @@ load("//third_party/kissfft:workspace.bzl", kissfft = "repo") load("//third_party/libprotobuf_mutator:workspace.bzl", libprotobuf_mutator = "repo") load("//third_party/llvm:setup.bzl", "llvm_setup") load("//third_party/nasm:workspace.bzl", nasm = "repo") -load("//third_party/nccl:nccl_configure.bzl", "nccl_configure") + +# If you need to use non-hermetic CUDA, replace the line below with +# load("//third_party/nccl:nccl_configure.bzl", "nccl_configure") +load("//third_party/nccl:hermetic_nccl_configure.bzl", "hermetic_nccl_configure") load("//third_party/opencl_headers:workspace.bzl", opencl_headers = "repo") load("//third_party/pasta:workspace.bzl", pasta = "repo") load("//third_party/py:python_configure.bzl", "python_configure") @@ -103,9 +110,15 @@ def _tf_toolchains(): # Note that we check the minimum bazel version in WORKSPACE. clang6_configure(name = "local_config_clang6") cc_download_clang_toolchain(name = "local_config_download_clang") - cuda_configure(name = "local_config_cuda") + + # If you need to use non-hermetic CUDA, replace the line below with + # cuda_configure(name = "local_config_cuda") + hermetic_cuda_configure(name = "local_config_cuda") tensorrt_configure(name = "local_config_tensorrt") - nccl_configure(name = "local_config_nccl") + + # If you need to use non-hermetic CUDA, replace the line below with + # nccl_configure(name = "local_config_nccl") + hermetic_nccl_configure(name = "local_config_nccl") git_configure(name = "local_config_git") syslibs_configure(name = "local_config_syslibs") python_configure(name = "local_config_python") @@ -919,6 +932,28 @@ def _tf_repositories(): version_conflict_policy = "pinned", ) +_CUDA_12_3_NCCL_WHEEL_DICT = { + "x86_64-unknown-linux-gnu": { + "url": "https://files.pythonhosted.org/packages/38/00/d0d4e48aef772ad5aebcf70b73028f88db6e5640b36c38e90445b7a57c45/nvidia_nccl_cu12-2.19.3-py3-none-manylinux1_x86_64.whl", + "sha256": "a9734707a2c96443331c1e48c717024aa6678a0e2a4cb66b2c364d18cee6b48d", + }, + "aarch64-unknown-linux-gnu": { + "url": "https://files.pythonhosted.org/packages/c1/bb/d09dda47c881f9ff504afd6f9ca4f502ded6d8fc2f572cacc5e39da91c28/nvidia_nccl_cu12-2.20.5-py3-none-manylinux2014_aarch64.whl", + "sha256": "1fc150d5c3250b170b29410ba682384b14581db722b2531b0d8d33c595f33d01", + }, +} + +_CUDA_12_1_NCCL_WHEEL_DICT = { + "x86_64-unknown-linux-gnu": { + "url": "https://files.pythonhosted.org/packages/38/00/d0d4e48aef772ad5aebcf70b73028f88db6e5640b36c38e90445b7a57c45/nvidia_nccl_cu12-2.19.3-py3-none-manylinux1_x86_64.whl", + "sha256": "a9734707a2c96443331c1e48c717024aa6678a0e2a4cb66b2c364d18cee6b48d", + }, + "aarch64-unknown-linux-gnu": { + "url": "https://files.pythonhosted.org/packages/c1/bb/d09dda47c881f9ff504afd6f9ca4f502ded6d8fc2f572cacc5e39da91c28/nvidia_nccl_cu12-2.20.5-py3-none-manylinux2014_aarch64.whl", + "sha256": "1fc150d5c3250b170b29410ba682384b14581db722b2531b0d8d33c595f33d01", + }, +} + def workspace(): # Check the bazel version before executing any repository rules, in case # those rules rely on the version we require here. @@ -936,6 +971,10 @@ def workspace(): # don't already exist (at least if the external repository macros were # written according to common practice to query native.existing_rule()). _tf_repositories() + cuda_distributives(cuda_nccl_wheel_dict = { + "12.3.2": _CUDA_12_3_NCCL_WHEEL_DICT, + "12.1.1": _CUDA_12_1_NCCL_WHEEL_DICT, + }) tfrt_dependencies() diff --git a/tensorflow/workspace3.bzl b/tensorflow/workspace3.bzl index d7b32f01c7144d..3632d49ade844f 100644 --- a/tensorflow/workspace3.bzl +++ b/tensorflow/workspace3.bzl @@ -1,10 +1,33 @@ """TensorFlow workspace initialization. Consult the WORKSPACE on how to use it.""" load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") +load("//third_party:cuda_redist_json_repo.bzl", "cuda_redist_json") load("//third_party:repo.bzl", "tf_vendored") load("//third_party/llvm:workspace.bzl", llvm = "repo") load("//third_party/tf_runtime:workspace.bzl", tf_runtime = "repo") +_CUDA_REDIST_JSON_DICT = { + "12.1.1": [ + "https://developer.download.nvidia.com/compute/cuda/redist/redistrib_12.1.1.json", + "bafea3cb83a4cf5c764eeedcaac0040d0d3c5db3f9a74550da0e7b6ac24d378c", + ], + "12.3.2": [ + "https://developer.download.nvidia.com/compute/cuda/redist/redistrib_12.3.2.json", + "1b6eacf335dd49803633fed53ef261d62c193e5a56eee5019e7d2f634e39e7ef", + ], +} + +_CUDNN_REDIST_JSON_DICT = { + "8.6": [ + "https://developer.download.nvidia.com/compute/cudnn/redist/redistrib_8.6.0.json", + "7f6f50bed4fd8216dc10d6ef505771dc0ecc99cce813993ab405cb507a21d51d", + ], + "8.9.7.29": [ + "https://developer.download.nvidia.com/compute/cudnn/redist/redistrib_8.9.7.29.json", + "a0734f26f068522464fa09b2f2c186dfbe6ad7407a88ea0c50dd331f0c3389ec", + ], +} + def workspace(): tf_vendored(name = "local_xla", relpath = "third_party/xla") tf_vendored(name = "local_tsl", relpath = "third_party/xla/third_party/tsl") @@ -62,6 +85,13 @@ def workspace(): # but provides a script for setting up build rules via overlays. llvm("llvm-raw") + # Load JSON files for CUDA and cuDNN distribution versions. + cuda_redist_json( + name = "cuda_redist_json", + cuda_json_dict = _CUDA_REDIST_JSON_DICT, + cudnn_json_dict = _CUDNN_REDIST_JSON_DICT, + ) + # Alias so it can be loaded without assigning to a different symbol to prevent # shadowing previous loads and trigger a buildifier warning. tf_workspace3 = workspace diff --git a/third_party/cuda_redist_json_repo.bzl b/third_party/cuda_redist_json_repo.bzl new file mode 100644 index 00000000000000..76941dd74c9488 --- /dev/null +++ b/third_party/cuda_redist_json_repo.bzl @@ -0,0 +1,110 @@ +# Copyright 2024 The TensorFlow Authors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Utilities for defining CUDA and cuDNN JSON files with distributives versions.""" + +load("//third_party:repo.bzl", "tf_mirror_urls") + +_DEFAULT_CUDA_VERSION = "12.3.2" +_DEFAULT_CUDNN_VERSION = "8.9.7.29" + +def _get_env_var(ctx, name): + if name in ctx.os.environ: + return ctx.os.environ[name] + else: + return None + +def _cuda_redist_json_impl(repository_ctx): + cuda_version = _get_env_var(repository_ctx, "TF_CUDA_VERSION") + cudnn_version = _get_env_var(repository_ctx, "TF_CUDNN_VERSION") + supported_cuda_versions = repository_ctx.attr.cuda_json_dict.keys() + if cuda_version and (cuda_version not in supported_cuda_versions): + if cuda_version in ["12", "12.3"]: + cuda_version = _DEFAULT_CUDA_VERSION + else: + fail( + ("The supported CUDA versions are {supported_versions}." + + " Please provide a supported version in TF_CUDA_VERSION" + + " environment variable or add JSON URL for" + + " CUDA version={version}.") + .format( + supported_versions = supported_cuda_versions, + version = cuda_version, + ), + ) + supported_cudnn_versions = repository_ctx.attr.cudnn_json_dict.keys() + if cudnn_version and (cudnn_version not in supported_cudnn_versions): + if cudnn_version in ["8", "8.9"]: + cudnn_version = _DEFAULT_CUDNN_VERSION + else: + fail( + ("The supported CUDNN versions are {supported_versions}." + + " Please provide a supported version in TF_CUDNN_VERSION" + + " environment variable or add JSON URL for" + + " CUDNN version={version}.") + .format( + supported_versions = supported_cudnn_versions, + version = cudnn_version, + ), + ) + cuda_distributives = "{}" + cudnn_distributives = "{}" + if cuda_version: + (url, sha256) = repository_ctx.attr.cuda_json_dict[cuda_version] + json_file_name = "redistrib_cuda_%s.json" % cuda_version + repository_ctx.download( + url = tf_mirror_urls(url), + sha256 = sha256, + output = json_file_name, + ) + cuda_distributives = repository_ctx.read(repository_ctx.path(json_file_name)) + if cudnn_version: + (url, sha256) = repository_ctx.attr.cudnn_json_dict[cudnn_version] + json_file_name = "redistrib_cudnn_%s.json" % cudnn_version + repository_ctx.download( + url = tf_mirror_urls(url), + sha256 = sha256, + output = json_file_name, + ) + cudnn_distributives = repository_ctx.read(repository_ctx.path(json_file_name)) + + repository_ctx.file( + "build_defs.bzl", + """def get_cuda_distributives(): + return {cuda_distributives} + +def get_cudnn_distributives(): + return {cudnn_distributives} +""".format(cuda_distributives = cuda_distributives, cudnn_distributives = cudnn_distributives), + ) + repository_ctx.file( + "BUILD", + "", + ) + +_cuda_redist_json = repository_rule( + implementation = _cuda_redist_json_impl, + attrs = { + "cuda_json_dict": attr.string_list_dict(mandatory = True), + "cudnn_json_dict": attr.string_list_dict(mandatory = True), + }, + environ = ["TF_CUDA_VERSION", "TF_CUDNN_VERSION"], +) + +def cuda_redist_json(name, cuda_json_dict, cudnn_json_dict): + _cuda_redist_json( + name = name, + cuda_json_dict = cuda_json_dict, + cudnn_json_dict = cudnn_json_dict, + ) diff --git a/third_party/cuda_repo.bzl b/third_party/cuda_repo.bzl new file mode 100644 index 00000000000000..0dcda26cba3a23 --- /dev/null +++ b/third_party/cuda_repo.bzl @@ -0,0 +1,327 @@ +# Copyright 2024 The TensorFlow Authors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Utilities for defining Google ML CUDA dependencies.""" + +load("@cuda_redist_json//:build_defs.bzl", "get_cuda_distributives", "get_cudnn_distributives") +load("//third_party:repo.bzl", "tf_mirror_urls") + +_DEFAULT_CUDA_VERSION = "12.3.2" +_OS_ARCH_DICT = { + "amd64": "x86_64-unknown-linux-gnu", + "aarch64": "aarch64-unknown-linux-gnu", +} +_REDIST_ARCH_DICT = { + "linux-x86_64": "x86_64-unknown-linux-gnu", + "linux-sbsa": "aarch64-unknown-linux-gnu", +} + +_CUDA_DIST_PATH_PREFIX = "https://developer.download.nvidia.com/compute/cuda/redist/" +_CUDNN_DIST_PATH_PREFIX = "https://developer.download.nvidia.com/compute/cudnn/redist/" + +def _get_env_var(ctx, name): + if name in ctx.os.environ: + return ctx.os.environ[name] + else: + return None + +def _get_archive_name(url, archive_suffix = ".tar.xz"): + last_slash_index = url.rfind("/") + return url[last_slash_index + 1:-len(archive_suffix)] + +def _cuda_http_archive_impl(repository_ctx): + cuda_or_cudnn_version = None + dist_version = "" + cuda_version = _get_env_var(repository_ctx, "TF_CUDA_VERSION") + cudnn_version = _get_env_var(repository_ctx, "TF_CUDNN_VERSION") + if repository_ctx.attr.is_cudnn_dist: + cuda_or_cudnn_version = cudnn_version + else: + cuda_or_cudnn_version = cuda_version + if cuda_or_cudnn_version: + # Download archive only when GPU config is used. + dist_version = repository_ctx.attr.dist_version + arch = _OS_ARCH_DICT[repository_ctx.os.arch] + if arch not in repository_ctx.attr.relative_url_dict.keys(): + (relative_url, sha256) = repository_ctx.attr.relative_url_dict["cuda{version}_{arch}" \ + .format( + version = cuda_version.split(".")[0], + arch = arch, + )] + else: + (relative_url, sha256) = repository_ctx.attr.relative_url_dict[arch] + url = (repository_ctx.attr.cudnn_dist_path_prefix if repository_ctx.attr.is_cudnn_dist else repository_ctx.attr.cuda_dist_path_prefix) + relative_url + + archive_name = _get_archive_name(url) + + repository_ctx.download( + url = tf_mirror_urls(url), + output = archive_name + repository_ctx.attr.extension, + sha256 = sha256, + ) + repository_ctx.extract( + archive = archive_name + repository_ctx.attr.extension, + stripPrefix = repository_ctx.attr.override_strip_prefix if repository_ctx.attr.override_strip_prefix else archive_name, + ) + if repository_ctx.attr.build_template: + version = dist_version.split(".")[0] if dist_version else "" + repository_ctx.file("version.txt", version) + repository_ctx.template( + "BUILD", + repository_ctx.attr.build_template, + {"%{version}": version}, + ) + else: + repository_ctx.file( + "BUILD", + repository_ctx.read(repository_ctx.attr.build_file), + ) + +_cuda_http_archive = repository_rule( + implementation = _cuda_http_archive_impl, + attrs = { + "dist_version": attr.string(mandatory = True), + "relative_url_dict": attr.string_list_dict(mandatory = True), + "build_template": attr.label(), + "build_file": attr.label(), + "is_cudnn_dist": attr.bool(), + "override_strip_prefix": attr.string(), + "cudnn_dist_path_prefix": attr.string(default = _CUDNN_DIST_PATH_PREFIX), + "cuda_dist_path_prefix": attr.string(default = _CUDA_DIST_PATH_PREFIX), + "extension": attr.string(default = ".tar.xz"), + }, + environ = ["TF_CUDA_VERSION", "TF_CUDNN_VERSION"], +) + +def cuda_http_archive(name, dist_version, relative_url_dict, **kwargs): + _cuda_http_archive( + name = name, + dist_version = dist_version, + relative_url_dict = relative_url_dict, + **kwargs + ) + +def _cuda_wheel_impl(repository_ctx): + cuda_version = _get_env_var(repository_ctx, "TF_CUDA_VERSION") + if cuda_version in ["12", "12.3"]: + cuda_version = _DEFAULT_CUDA_VERSION + if cuda_version: + # Download archive only when GPU config is used. + arch = _OS_ARCH_DICT[repository_ctx.os.arch] + dict_key = "{cuda_version}-{arch}".format( + cuda_version = cuda_version, + arch = arch, + ) + supported_versions = repository_ctx.attr.url_dict.keys() + if dict_key not in supported_versions: + fail( + ("The supported NCCL versions are {supported_versions}." + + " Please provide a supported CUDA version in TF_CUDA_VERSION" + + " environment variable or add NCCL distributive for" + + " CUDA version={version}, OS={arch}.") + .format( + supported_versions = supported_versions, + version = cuda_version, + arch = arch, + ), + ) + sha256 = repository_ctx.attr.sha256_dict[dict_key] + url = repository_ctx.attr.url_dict[dict_key] + + archive_name = _get_archive_name(url, archive_suffix = ".whl") + + repository_ctx.download( + url = tf_mirror_urls(url), + output = archive_name + repository_ctx.attr.extension, + sha256 = sha256, + ) + repository_ctx.extract( + archive = archive_name + repository_ctx.attr.extension, + stripPrefix = repository_ctx.attr.strip_prefix, + ) + + repository_ctx.file( + "BUILD", + repository_ctx.read(repository_ctx.attr.build_file), + ) + +_cuda_wheel = repository_rule( + implementation = _cuda_wheel_impl, + attrs = { + "sha256_dict": attr.string_dict(mandatory = True), + "url_dict": attr.string_dict(mandatory = True), + "build_file": attr.label(), + "strip_prefix": attr.string(), + "extension": attr.string(default = ".zip"), + }, + environ = ["TF_CUDA_VERSION"], +) + +def cuda_wheel(name, sha256_dict, url_dict, **kwargs): + _cuda_wheel( + name = name, + sha256_dict = sha256_dict, + url_dict = url_dict, + **kwargs + ) + +def _get_relative_url_dict(dist_info): + relative_url_dict = {} + for arch in _REDIST_ARCH_DICT.keys(): + # CUDNN JSON might contain paths for each CUDA version. + if "relative_path" not in dist_info[arch]: + for cuda_version, data in dist_info[arch].items(): + relative_url_dict["{cuda_version}_{arch}" \ + .format( + cuda_version = cuda_version, + arch = _REDIST_ARCH_DICT[arch], + )] = [data["relative_path"], data["sha256"]] + else: + relative_url_dict[_REDIST_ARCH_DICT[arch]] = [ + dist_info[arch]["relative_path"], + dist_info[arch]["sha256"], + ] + return relative_url_dict + +def _get_cuda_archive( + repo_name, + dist_dict, + dist_name, + build_file = None, + build_template = None, + is_cudnn_dist = False): + if dist_name in dist_dict.keys(): + return cuda_http_archive( + name = repo_name, + dist_version = dist_dict[dist_name]["version"], + build_file = Label(build_file) if build_file else None, + build_template = Label(build_template) if build_template else None, + relative_url_dict = _get_relative_url_dict(dist_dict[dist_name]), + is_cudnn_dist = is_cudnn_dist, + ) + else: + return cuda_http_archive( + name = repo_name, + dist_version = "", + build_file = Label(build_file) if build_file else None, + build_template = Label(build_template) if build_template else None, + relative_url_dict = {"": []}, + is_cudnn_dist = is_cudnn_dist, + ) + +def cuda_distributives(cuda_nccl_wheel_dict): + nccl_artifacts_dict = {"sha256_dict": {}, "url_dict": {}} + for cuda_version, nccl_wheel_info in cuda_nccl_wheel_dict.items(): + for arch in _OS_ARCH_DICT.values(): + if arch in nccl_wheel_info.keys(): + cuda_version_to_arch_key = "%s-%s" % (cuda_version, arch) + nccl_artifacts_dict["sha256_dict"][cuda_version_to_arch_key] = nccl_wheel_info[arch]["sha256"] + nccl_artifacts_dict["url_dict"][cuda_version_to_arch_key] = nccl_wheel_info[arch]["url"] + + cuda_wheel( + name = "cuda_nccl", + sha256_dict = nccl_artifacts_dict["sha256_dict"], + url_dict = nccl_artifacts_dict["url_dict"], + build_file = Label("//third_party/gpus/cuda:cuda_nccl.BUILD"), + strip_prefix = "nvidia/nccl", + ) + + cuda_distributives = get_cuda_distributives() + cudnn_distributives = get_cudnn_distributives() + + _get_cuda_archive( + repo_name = "cuda_cccl", + dist_dict = cuda_distributives, + dist_name = "cuda_cccl", + build_file = "//third_party/gpus/cuda:cuda_cccl.BUILD", + ) + _get_cuda_archive( + repo_name = "cuda_cublas", + dist_dict = cuda_distributives, + dist_name = "libcublas", + build_template = "//third_party/gpus/cuda:cuda_cublas.BUILD.tpl", + ) + _get_cuda_archive( + repo_name = "cuda_cudart", + dist_dict = cuda_distributives, + dist_name = "cuda_cudart", + build_template = "//third_party/gpus/cuda:cuda_cudart.BUILD.tpl", + ) + _get_cuda_archive( + repo_name = "cuda_cudnn", + dist_dict = cudnn_distributives, + dist_name = "cudnn", + build_template = "//third_party/gpus/cuda:cuda_cudnn.BUILD.tpl", + is_cudnn_dist = True, + ) + _get_cuda_archive( + repo_name = "cuda_cufft", + dist_dict = cuda_distributives, + dist_name = "libcufft", + build_template = "//third_party/gpus/cuda:cuda_cufft.BUILD.tpl", + ) + _get_cuda_archive( + repo_name = "cuda_cupti", + dist_dict = cuda_distributives, + dist_name = "cuda_cupti", + build_template = "//third_party/gpus/cuda:cuda_cupti.BUILD.tpl", + ) + _get_cuda_archive( + repo_name = "cuda_curand", + dist_dict = cuda_distributives, + dist_name = "libcurand", + build_template = "//third_party/gpus/cuda:cuda_curand.BUILD.tpl", + ) + _get_cuda_archive( + repo_name = "cuda_cusolver", + dist_dict = cuda_distributives, + dist_name = "libcusolver", + build_template = "//third_party/gpus/cuda:cuda_cusolver.BUILD.tpl", + ) + _get_cuda_archive( + repo_name = "cuda_cusparse", + dist_dict = cuda_distributives, + dist_name = "libcusparse", + build_template = "//third_party/gpus/cuda:cuda_cusparse.BUILD.tpl", + ) + _get_cuda_archive( + repo_name = "cuda_nvcc", + dist_dict = cuda_distributives, + dist_name = "cuda_nvcc", + build_file = "//third_party/gpus/cuda:cuda_nvcc.BUILD", + ) + _get_cuda_archive( + repo_name = "cuda_nvjitlink", + dist_dict = cuda_distributives, + dist_name = "libnvjitlink", + build_template = "//third_party/gpus/cuda:cuda_nvjitlink.BUILD.tpl", + ) + _get_cuda_archive( + repo_name = "cuda_nvml", + dist_dict = cuda_distributives, + dist_name = "cuda_nvml_dev", + build_file = "//third_party/gpus/cuda:cuda_nvml.BUILD", + ) + _get_cuda_archive( + repo_name = "cuda_nvprune", + dist_dict = cuda_distributives, + dist_name = "cuda_nvprune", + build_file = "//third_party/gpus/cuda:cuda_nvprune.BUILD", + ) + _get_cuda_archive( + repo_name = "cuda_nvtx", + dist_dict = cuda_distributives, + dist_name = "cuda_nvtx", + build_file = "//third_party/gpus/cuda:cuda_nvtx.BUILD", + ) diff --git a/third_party/gpus/compiler_common_tools.bzl b/third_party/gpus/compiler_common_tools.bzl new file mode 100644 index 00000000000000..bd07f49ec457bb --- /dev/null +++ b/third_party/gpus/compiler_common_tools.bzl @@ -0,0 +1,174 @@ +"""Common compiler functions. """ + +load( + "//third_party/remote_config:common.bzl", + "err_out", + "raw_exec", + "realpath", +) + +def to_list_of_strings(elements): + """Convert the list of ["a", "b", "c"] into '"a", "b", "c"'. + + This is to be used to put a list of strings into the bzl file templates + so it gets interpreted as list of strings in Starlark. + + Args: + elements: list of string elements + + Returns: + single string of elements wrapped in quotes separated by a comma.""" + quoted_strings = ["\"" + element + "\"" for element in elements] + return ", ".join(quoted_strings) + +_INC_DIR_MARKER_BEGIN = "#include <...>" + +# OSX add " (framework directory)" at the end of line, strip it. +_OSX_FRAMEWORK_SUFFIX = " (framework directory)" +_OSX_FRAMEWORK_SUFFIX_LEN = len(_OSX_FRAMEWORK_SUFFIX) + +# TODO(dzc): Once these functions have been factored out of Bazel's +# cc_configure.bzl, load them from @bazel_tools instead. +def _cxx_inc_convert(path): + """Convert path returned by cc -E xc++ in a complete path.""" + path = path.strip() + if path.endswith(_OSX_FRAMEWORK_SUFFIX): + path = path[:-_OSX_FRAMEWORK_SUFFIX_LEN].strip() + return path + +def _normalize_include_path(repository_ctx, path): + """Normalizes include paths before writing them to the crosstool. + + If path points inside the 'crosstool' folder of the repository, a relative + path is returned. + If path points outside the 'crosstool' folder, an absolute path is returned. + """ + path = str(repository_ctx.path(path)) + crosstool_folder = str(repository_ctx.path(".").get_child("crosstool")) + + if path.startswith(crosstool_folder): + # We drop the path to "$REPO/crosstool" and a trailing path separator. + return path[len(crosstool_folder) + 1:] + return path + +def _is_compiler_option_supported(repository_ctx, cc, option): + """Checks that `option` is supported by the C compiler. Doesn't %-escape the option.""" + result = repository_ctx.execute([ + cc, + option, + "-o", + "/dev/null", + "-c", + str(repository_ctx.path("tools/cpp/empty.cc")), + ]) + return result.stderr.find(option) == -1 + +def _get_cxx_inc_directories_impl(repository_ctx, cc, lang_is_cpp, tf_sys_root): + """Compute the list of default C or C++ include directories.""" + if lang_is_cpp: + lang = "c++" + else: + lang = "c" + sysroot = [] + if tf_sys_root: + sysroot += ["--sysroot", tf_sys_root] + result = raw_exec(repository_ctx, [cc, "-E", "-x" + lang, "-", "-v"] + + sysroot) + stderr = err_out(result) + index1 = stderr.find(_INC_DIR_MARKER_BEGIN) + if index1 == -1: + return [] + index1 = stderr.find("\n", index1) + if index1 == -1: + return [] + index2 = stderr.rfind("\n ") + if index2 == -1 or index2 < index1: + return [] + index2 = stderr.find("\n", index2 + 1) + if index2 == -1: + inc_dirs = stderr[index1 + 1:] + else: + inc_dirs = stderr[index1 + 1:index2].strip() + + print_resource_dir_supported = _is_compiler_option_supported( + repository_ctx, + cc, + "-print-resource-dir", + ) + + if print_resource_dir_supported: + resource_dir = repository_ctx.execute( + [cc, "-print-resource-dir"], + ).stdout.strip() + "/share" + inc_dirs += "\n" + resource_dir + + compiler_includes = [ + _normalize_include_path(repository_ctx, _cxx_inc_convert(p)) + for p in inc_dirs.split("\n") + ] + + # The compiler might be on a symlink, e.g. /symlink -> /opt/gcc + # The above keeps only the resolved paths to the default includes (e.g. /opt/gcc/include/c++/11) + # but Bazel might encounter either (usually reported by the compiler) + # especially when a compiler wrapper (e.g. ccache) is used. + # So we need to also include paths where symlinks are not resolved. + + # Try to find real path to CC installation to "see through" compiler wrappers + # GCC has the path to g++ + index1 = result.stderr.find("COLLECT_GCC=") + if index1 != -1: + index1 = result.stderr.find("=", index1) + index2 = result.stderr.find("\n", index1) + cc_topdir = repository_ctx.path(result.stderr[index1 + 1:index2]).dirname.dirname + else: + # Clang has the directory + index1 = result.stderr.find("InstalledDir: ") + if index1 != -1: + index1 = result.stderr.find(" ", index1) + index2 = result.stderr.find("\n", index1) + cc_topdir = repository_ctx.path(result.stderr[index1 + 1:index2]).dirname + else: + # Fallback to the CC path + cc_topdir = repository_ctx.path(cc).dirname.dirname + + # We now have the compiler installation prefix, e.g. /symlink/gcc + # And the resolved installation prefix, e.g. /opt/gcc + cc_topdir_resolved = str(realpath(repository_ctx, cc_topdir)).strip() + cc_topdir = str(cc_topdir).strip() + + # If there is (any!) symlink involved we add paths where the unresolved installation prefix is kept. + # e.g. [/opt/gcc/include/c++/11, /opt/gcc/lib/gcc/x86_64-linux-gnu/11/include, /other/path] + # adds [/symlink/include/c++/11, /symlink/lib/gcc/x86_64-linux-gnu/11/include] + if cc_topdir_resolved != cc_topdir: + unresolved_compiler_includes = [ + cc_topdir + inc[len(cc_topdir_resolved):] + for inc in compiler_includes + if inc.startswith(cc_topdir_resolved) + ] + compiler_includes = compiler_includes + unresolved_compiler_includes + return compiler_includes + +def get_cxx_inc_directories(repository_ctx, cc, tf_sys_root): + """Compute the list of default C and C++ include directories.""" + + # For some reason `clang -xc` sometimes returns include paths that are + # different from the ones from `clang -xc++`. (Symlink and a dir) + # So we run the compiler with both `-xc` and `-xc++` and merge resulting lists + includes_cpp = _get_cxx_inc_directories_impl( + repository_ctx, + cc, + True, + tf_sys_root, + ) + includes_c = _get_cxx_inc_directories_impl( + repository_ctx, + cc, + False, + tf_sys_root, + ) + + return includes_cpp + [ + inc + for inc in includes_c + if inc not in includes_cpp + ] diff --git a/third_party/gpus/crosstool/BUILD.tpl b/third_party/gpus/crosstool/BUILD.tpl index 8eda7a1cf6ac2b..b9553d9b99ecfe 100644 --- a/third_party/gpus/crosstool/BUILD.tpl +++ b/third_party/gpus/crosstool/BUILD.tpl @@ -2,6 +2,7 @@ # Update cuda_configure.bzl#verify_build_defines when adding new variables. load(":cc_toolchain_config.bzl", "cc_toolchain_config") +load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda") licenses(["restricted"]) @@ -133,9 +134,17 @@ filegroup( srcs = [], ) +filegroup( + name = "cuda_nvcc_files", + srcs = %{cuda_nvcc_files}, +) + filegroup( name = "crosstool_wrapper_driver_is_not_gcc", - srcs = ["clang/bin/crosstool_wrapper_driver_is_not_gcc"], + srcs = [ + ":cuda_nvcc_files", + ":clang/bin/crosstool_wrapper_driver_is_not_gcc" + ], ) filegroup( diff --git a/third_party/gpus/cuda/BUILD.hermetic.tpl b/third_party/gpus/cuda/BUILD.hermetic.tpl new file mode 100644 index 00000000000000..1c00f1c5e32916 --- /dev/null +++ b/third_party/gpus/cuda/BUILD.hermetic.tpl @@ -0,0 +1,291 @@ +load("@bazel_skylib//:bzl_library.bzl", "bzl_library") +load("@bazel_skylib//lib:selects.bzl", "selects") +load("@bazel_skylib//rules:common_settings.bzl", "bool_flag") + +licenses(["restricted"]) # MPL2, portions GPL v3, LGPL v3, BSD-like + +package(default_visibility = ["//visibility:public"]) + +# Config setting whether TensorFlow is built with CUDA support using clang. +# +# TODO(b/174244321), DEPRECATED: this target will be removed when all users +# have been converted to :is_cuda_enabled (most) or :is_cuda_compiler_clang. +selects.config_setting_group( + name = "using_clang", + match_all = [ + "@local_config_cuda//:is_cuda_enabled", + "@local_config_cuda//:is_cuda_compiler_clang", + ], +) + +# Config setting whether TensorFlow is built with CUDA support using nvcc. +# +# TODO(b/174244321), DEPRECATED: this target will be removed when all users +# have been converted to :is_cuda_enabled (most) or :is_cuda_compiler_nvcc. +selects.config_setting_group( + name = "using_nvcc", + match_all = [ + "@local_config_cuda//:is_cuda_enabled", + "@local_config_cuda//:is_cuda_compiler_nvcc", + ], +) + +# Equivalent to using_clang && -c opt. +selects.config_setting_group( + name = "using_clang_opt", + match_all = [ + ":using_clang", + ":_opt", + ], +) + +config_setting( + name = "_opt", + values = {"compilation_mode": "opt"}, +) + +# Provides CUDA headers for '#include "third_party/gpus/cuda/include/cuda.h"' +# All clients including TensorFlow should use these directives. +cc_library( + name = "cuda_headers", + hdrs = [ + "cuda/cuda_config.h", + ], + include_prefix = "third_party/gpus", + includes = [ + ".", # required to include cuda/cuda/cuda_config.h as cuda/config.h + ], + deps = [":cudart_headers", + ":cublas_headers", + ":cccl_headers", + ":nvtx_headers", + ":nvcc_headers", + ":nvjitlink_headers", + ":cusolver_headers", + ":cufft_headers", + ":cusparse_headers", + ":curand_headers", + ":cupti_headers", + ":nvml_headers"], +) + +cc_library( + name = "cudart_static", + srcs = ["@cuda_cudart//:static"], + linkopts = [ + "-ldl", + "-lpthread", + %{cudart_static_linkopt} + ], +) + +alias( + name = "cuda_driver", + actual = "@cuda_cudart//:cuda_driver", +) + +alias( + name = "cudart_headers", + actual = "@cuda_cudart//:headers", +) + +alias( + name = "cudart", + actual = "@cuda_cudart//:cudart", +) + +alias( + name = "nvjitlink_headers", + actual = "@cuda_nvjitlink//:headers", +) + +alias( + name = "nvjitlink", + actual = "@cuda_nvjitlink//:nvjitlink", +) + +alias( + name = "nvtx_headers", + actual = "@cuda_nvtx//:headers", +) + +alias( + name = "nvml_headers", + actual = "@cuda_nvml//:headers", +) + +alias( + name = "nvcc_headers", + actual = "@cuda_nvcc//:headers", +) + +alias( + name = "cccl_headers", + actual = "@cuda_cccl//:headers", +) + +alias( + name = "cublas_headers", + actual = "@cuda_cublas//:headers", +) + +alias( + name = "cusolver_headers", + actual = "@cuda_cusolver//:headers", +) + +alias( + name = "cufft_headers", + actual = "@cuda_cufft//:headers", +) + +alias( + name = "cusparse_headers", + actual = "@cuda_cusparse//:headers", +) + +alias( + name = "curand_headers", + actual = "@cuda_curand//:headers", +) + +alias( + name = "cublas", + actual = "@cuda_cublas//:cublas", +) + +alias( + name = "cublasLt", + actual = "@cuda_cublas//:cublasLt", +) + +alias( + name = "cusolver", + actual = "@cuda_cusolver//:cusolver", +) + +alias( + name = "cudnn", + actual = "@cuda_cudnn//:cudnn", +) + +alias( + name = "cudnn_ops_infer", + actual = "@cuda_cudnn//:cudnn_ops_infer", +) + +alias( + name = "cudnn_cnn_infer", + actual = "@cuda_cudnn//:cudnn_cnn_infer", +) + +alias( + name = "cudnn_ops_train", + actual = "@cuda_cudnn//:cudnn_ops_train", +) + +alias( + name = "cudnn_cnn_train", + actual = "@cuda_cudnn//:cudnn_cnn_train", +) + +alias( + name = "cudnn_adv_infer", + actual = "@cuda_cudnn//:cudnn_adv_infer", +) + +alias( + name = "cudnn_adv_train", + actual = "@cuda_cudnn//:cudnn_adv_train", +) +alias( + name = "cudnn_header", + actual = "@cuda_cudnn//:headers", +) + +alias( + name = "cufft", + actual = "@cuda_cufft//:cufft", +) + +alias( + name = "curand", + actual = "@cuda_curand//:curand", +) + +cc_library( + name = "cuda", + deps = [ + ":cublas", + ":cublasLt", + ":cuda_headers", + ":cudart", + ":cudnn", + ":cufft", + ":curand", + ], +) + +alias( + name = "cub_headers", + actual = "%{cub_actual}", +) + +alias( + name = "cupti_headers", + actual = "@cuda_cupti//:headers", +) + +alias( + name = "cupti_dsos", + actual = "@cuda_cupti//:cupti", +) + +alias( + name = "cusparse", + actual = "@cuda_cusparse//:cusparse", +) + +cc_library( + name = "libdevice_root", + data = ["@cuda_nvcc//:nvvm"], +) + +bzl_library( + name = "build_defs_bzl", + srcs = ["build_defs.bzl"], + deps = [ + "@bazel_skylib//lib:selects", + ], +) + +py_library( + name = "cuda_config_py", + srcs = ["cuda/cuda_config.py"], +) + +# Config setting whether TensorFlow is built with hermetic CUDA. +alias( + name = "hermetic_cuda_tools", + actual = "@local_config_cuda//:is_cuda_enabled", +) + +# Flag indicating if we should include hermetic CUDA libs. +bool_flag( + name = "include_hermetic_cuda_libs", + build_setting_default = True, +) + +config_setting( + name = "hermetic_cuda_libs", + flag_values = {":include_hermetic_cuda_libs": "True"}, +) + +selects.config_setting_group( + name = "hermetic_cuda_tools_and_libs", + match_all = [ + ":hermetic_cuda_libs", + ":hermetic_cuda_tools" + ], +) + diff --git a/third_party/gpus/cuda/BUILD.tpl b/third_party/gpus/cuda/BUILD.tpl index 90a18b90de048c..a4264cc14890e5 100644 --- a/third_party/gpus/cuda/BUILD.tpl +++ b/third_party/gpus/cuda/BUILD.tpl @@ -1,6 +1,7 @@ load(":build_defs.bzl", "cuda_header_library") load("@bazel_skylib//:bzl_library.bzl", "bzl_library") load("@bazel_skylib//lib:selects.bzl", "selects") +load("@bazel_skylib//rules:common_settings.bzl", "bool_flag") licenses(["restricted"]) # MPL2, portions GPL v3, LGPL v3, BSD-like @@ -144,7 +145,6 @@ cc_library( name = "cusolver", srcs = ["cuda/lib/%{cusolver_lib}"], data = ["cuda/lib/%{cusolver_lib}"], - linkopts = ["-lgomp"], linkstatic = 1, ) @@ -220,7 +220,6 @@ cc_library( name = "cusparse", srcs = ["cuda/lib/%{cusparse_lib}"], data = ["cuda/lib/%{cusparse_lib}"], - linkopts = ["-lgomp"], linkstatic = 1, ) @@ -242,4 +241,29 @@ py_library( srcs = ["cuda/cuda_config.py"], ) +# Config setting whether TensorFlow is built with hermetic CUDA. +alias( + name = "hermetic_cuda_tools", + actual = "@local_config_cuda//:is_cuda_enabled", +) + +# Flag indicating if we should include hermetic CUDA libs. +bool_flag( + name = "include_hermetic_cuda_libs", + build_setting_default = False, +) + +config_setting( + name = "hermetic_cuda_libs", + flag_values = {":include_hermetic_cuda_libs": "True"}, +) + +selects.config_setting_group( + name = "hermetic_cuda_tools_and_libs", + match_all = [ + ":hermetic_cuda_libs", + ":hermetic_cuda_tools" + ], +) + %{copy_rules} diff --git a/third_party/gpus/cuda/cuda_cccl.BUILD b/third_party/gpus/cuda/cuda_cccl.BUILD new file mode 100644 index 00000000000000..9823f1d871ed53 --- /dev/null +++ b/third_party/gpus/cuda/cuda_cccl.BUILD @@ -0,0 +1,12 @@ +licenses(["restricted"]) # NVIDIA proprietary license + +cc_library( + name = "headers", + hdrs = glob([ + "include/**", + ]), + include_prefix = "third_party/gpus/cuda/include", + includes = ["include"], + strip_include_prefix = "include", + visibility = ["@local_config_cuda//cuda:__pkg__"], +) diff --git a/third_party/gpus/cuda/cuda_cublas.BUILD.tpl b/third_party/gpus/cuda/cuda_cublas.BUILD.tpl new file mode 100644 index 00000000000000..d5766c971a50ff --- /dev/null +++ b/third_party/gpus/cuda/cuda_cublas.BUILD.tpl @@ -0,0 +1,33 @@ +licenses(["restricted"]) # NVIDIA proprietary license + +exports_files([ + "version.txt", +]) + +cc_import( + name = "cublas", + hdrs = [":headers"], + shared_library = "lib/libcublas.so.%{version}", + visibility = ["//visibility:public"], +) + +cc_import( + name = "cublasLt", + hdrs = [":headers"], + shared_library = "lib/libcublasLt.so.%{version}", + visibility = ["//visibility:public"], +) + +cc_library( + name = "headers", + hdrs = [ + "include/cublas.h", + "include/cublas_v2.h", + "include/cublas_api.h", + "include/cublasLt.h" + ], + include_prefix = "third_party/gpus/cuda/include", + includes = ["include"], + strip_include_prefix = "include", + visibility = ["@local_config_cuda//cuda:__pkg__"], +) diff --git a/third_party/gpus/cuda/cuda_cudart.BUILD.tpl b/third_party/gpus/cuda/cuda_cudart.BUILD.tpl new file mode 100644 index 00000000000000..08655e7819156c --- /dev/null +++ b/third_party/gpus/cuda/cuda_cudart.BUILD.tpl @@ -0,0 +1,34 @@ +licenses(["restricted"]) # NVIDIA proprietary license + +exports_files([ + "version.txt", +]) + +filegroup( + name = "static", + srcs = ["lib/libcudart_static.a"], + visibility = ["@local_config_cuda//cuda:__pkg__"], +) + +cc_import( + name = "cuda_driver", + shared_library = "lib/stubs/libcuda.so", +) + +cc_import( + name = "cudart", + hdrs = [":headers"], + shared_library = "lib/libcudart.so.%{version}", + visibility = ["//visibility:public"], +) + +cc_library( + name = "headers", + hdrs = glob([ + "include/**", + ]), + include_prefix = "third_party/gpus/cuda/include", + includes = ["include"], + strip_include_prefix = "include", + visibility = ["@local_config_cuda//cuda:__pkg__"], +) diff --git a/third_party/gpus/cuda/cuda_cudnn.BUILD.tpl b/third_party/gpus/cuda/cuda_cudnn.BUILD.tpl new file mode 100644 index 00000000000000..98da6e69cbe644 --- /dev/null +++ b/third_party/gpus/cuda/cuda_cudnn.BUILD.tpl @@ -0,0 +1,65 @@ +licenses(["restricted"]) # NVIDIA proprietary license + +exports_files([ + "version.txt", +]) + +cc_import( + name = "cudnn", + hdrs = [":headers"], + shared_library = "lib/libcudnn.so.%{version}", + visibility = ["//visibility:public"], +) + +cc_import( + name = "cudnn_ops_infer", + hdrs = [":headers"], + shared_library = "lib/libcudnn_ops_infer.so.%{version}", + visibility = ["//visibility:public"], +) + +cc_import( + name = "cudnn_cnn_infer", + hdrs = [":headers"], + shared_library = "lib/libcudnn_cnn_infer.so.%{version}", + visibility = ["//visibility:public"], +) + +cc_import( + name = "cudnn_ops_train", + hdrs = [":headers"], + shared_library = "lib/libcudnn_ops_train.so.%{version}", + visibility = ["//visibility:public"], +) + +cc_import( + name = "cudnn_cnn_train", + hdrs = [":headers"], + shared_library = "lib/libcudnn_cnn_train.so.%{version}", + visibility = ["//visibility:public"], +) + +cc_import( + name = "cudnn_adv_infer", + hdrs = [":headers"], + shared_library = "lib/libcudnn_adv_infer.so.%{version}", + visibility = ["//visibility:public"], +) + +cc_import( + name = "cudnn_adv_train", + hdrs = [":headers"], + shared_library = "lib/libcudnn_adv_train.so.%{version}", + visibility = ["//visibility:public"], +) + +cc_library( + name = "headers", + hdrs = glob([ + "include/**", + ]), + include_prefix = "third_party/gpus/cudnn", + includes = ["include"], + strip_include_prefix = "include", + visibility = ["@local_config_cuda//cuda:__pkg__"], +) diff --git a/third_party/gpus/cuda/cuda_cufft.BUILD.tpl b/third_party/gpus/cuda/cuda_cufft.BUILD.tpl new file mode 100644 index 00000000000000..6836814dc9b622 --- /dev/null +++ b/third_party/gpus/cuda/cuda_cufft.BUILD.tpl @@ -0,0 +1,23 @@ +licenses(["restricted"]) # NVIDIA proprietary license + +exports_files([ + "version.txt", +]) + +cc_import( + name = "cufft", + hdrs = [":headers"], + shared_library = "lib/libcufft.so.%{version}", + visibility = ["//visibility:public"], +) + +cc_library( + name = "headers", + hdrs = glob([ + "include/**", + ]), + include_prefix = "third_party/gpus/cuda/include", + includes = ["include"], + strip_include_prefix = "include", + visibility = ["@local_config_cuda//cuda:__pkg__"], +) diff --git a/third_party/gpus/cuda/cuda_cupti.BUILD.tpl b/third_party/gpus/cuda/cuda_cupti.BUILD.tpl new file mode 100644 index 00000000000000..772386d723649f --- /dev/null +++ b/third_party/gpus/cuda/cuda_cupti.BUILD.tpl @@ -0,0 +1,23 @@ +licenses(["restricted"]) # NVIDIA proprietary license + +exports_files([ + "version.txt", +]) + +cc_import( + name = "cupti", + hdrs = [":headers"], + shared_library = "lib/libcupti.so.%{version}", + visibility = ["//visibility:public"], +) + +cc_library( + name = "headers", + hdrs = glob([ + "include/**", + ]), + include_prefix = "third_party/gpus/cuda/extras/CUPTI/include", + includes = ["include/"], + strip_include_prefix = "include", + visibility = ["@local_config_cuda//cuda:__pkg__"], +) diff --git a/third_party/gpus/cuda/cuda_curand.BUILD.tpl b/third_party/gpus/cuda/cuda_curand.BUILD.tpl new file mode 100644 index 00000000000000..c98ded26f4b907 --- /dev/null +++ b/third_party/gpus/cuda/cuda_curand.BUILD.tpl @@ -0,0 +1,23 @@ +licenses(["restricted"]) # NVIDIA proprietary license + +exports_files([ + "version.txt", +]) + +cc_import( + name = "curand", + hdrs = [":headers"], + shared_library = "lib/libcurand.so.%{version}", + visibility = ["//visibility:public"], +) + +cc_library( + name = "headers", + hdrs = glob([ + "include/**", + ]), + include_prefix = "third_party/gpus/cuda/include", + includes = ["include"], + strip_include_prefix = "include", + visibility = ["@local_config_cuda//cuda:__pkg__"], +) diff --git a/third_party/gpus/cuda/cuda_cusolver.BUILD.tpl b/third_party/gpus/cuda/cuda_cusolver.BUILD.tpl new file mode 100644 index 00000000000000..6a5f9d9737cfe2 --- /dev/null +++ b/third_party/gpus/cuda/cuda_cusolver.BUILD.tpl @@ -0,0 +1,25 @@ +licenses(["restricted"]) # NVIDIA proprietary license + +exports_files([ + "version.txt", +]) + +cc_import( + name = "cusolver", + hdrs = [":headers"], + shared_library = "lib/libcusolver.so.%{version}", + visibility = ["//visibility:public"], +) + +cc_library( + name = "headers", + hdrs = [ + "include/cusolver_common.h", + "include/cusolverDn.h", + "include/cusolverSp.h" + ], + include_prefix = "third_party/gpus/cuda/include", + includes = ["include"], + strip_include_prefix = "include", + visibility = ["@local_config_cuda//cuda:__pkg__"], +) diff --git a/third_party/gpus/cuda/cuda_cusparse.BUILD.tpl b/third_party/gpus/cuda/cuda_cusparse.BUILD.tpl new file mode 100644 index 00000000000000..ad5c2b5f0c45c1 --- /dev/null +++ b/third_party/gpus/cuda/cuda_cusparse.BUILD.tpl @@ -0,0 +1,23 @@ +licenses(["restricted"]) # NVIDIA proprietary license + +exports_files([ + "version.txt", +]) + +cc_import( + name = "cusparse", + hdrs = [":headers"], + shared_library = "lib/libcusparse.so.%{version}", + visibility = ["//visibility:public"], +) + +cc_library( + name = "headers", + hdrs = glob([ + "include/**", + ]), + include_prefix = "third_party/gpus/cuda/include", + includes = ["include"], + strip_include_prefix = "include", + visibility = ["@local_config_cuda//cuda:__pkg__"], +) diff --git a/third_party/gpus/cuda/cuda_nccl.BUILD b/third_party/gpus/cuda/cuda_nccl.BUILD new file mode 100644 index 00000000000000..440b31c5cb616e --- /dev/null +++ b/third_party/gpus/cuda/cuda_nccl.BUILD @@ -0,0 +1,7 @@ +licenses(["restricted"]) # NVIDIA proprietary license + +cc_import( + name = "nccl", + shared_library = "lib/libnccl.so.2", + visibility = ["//visibility:public"], +) diff --git a/third_party/gpus/cuda/cuda_nvcc.BUILD b/third_party/gpus/cuda/cuda_nvcc.BUILD new file mode 100644 index 00000000000000..6cdaca5cc902a0 --- /dev/null +++ b/third_party/gpus/cuda/cuda_nvcc.BUILD @@ -0,0 +1,73 @@ +licenses(["restricted"]) # NVIDIA proprietary license + +exports_files([ + "bin/nvcc", +]) + +filegroup( + name = "nvvm", + srcs = [ + "nvvm/libdevice/libdevice.10.bc", + ], + visibility = ["//visibility:public"], +) + +filegroup( + name = "nvlink", + srcs = [ + "bin/nvlink", + ], + visibility = ["//visibility:public"], +) + +filegroup( + name = "fatbinary", + srcs = [ + "bin/fatbinary", + ], + visibility = ["//visibility:public"], +) + +filegroup( + name = "bin2c", + srcs = [ + "bin/bin2c", + ], + visibility = ["//visibility:public"], +) + +filegroup( + name = "ptxas", + srcs = [ + "bin/ptxas", + ], + visibility = ["//visibility:public"], +) + +filegroup( + name = "bin", + srcs = glob([ + "bin/**", + "nvvm/bin/**", + ]), + visibility = ["//visibility:public"], +) + +filegroup( + name = "link_stub", + srcs = [ + "bin/crt/link.stub", + ], + visibility = ["//visibility:public"], +) + +cc_library( + name = "headers", + hdrs = glob([ + "include/**", + ]), + include_prefix = "third_party/gpus/cuda/include", + includes = ["include"], + strip_include_prefix = "include", + visibility = ["@local_config_cuda//cuda:__pkg__"], +) diff --git a/third_party/gpus/cuda/cuda_nvjitlink.BUILD.tpl b/third_party/gpus/cuda/cuda_nvjitlink.BUILD.tpl new file mode 100644 index 00000000000000..6729b7cd1df9c4 --- /dev/null +++ b/third_party/gpus/cuda/cuda_nvjitlink.BUILD.tpl @@ -0,0 +1,23 @@ +licenses(["restricted"]) # NVIDIA proprietary license + +exports_files([ + "version.txt", +]) + +cc_import( + name = "nvjitlink", + hdrs = [":headers"], + shared_library = "lib/libnvJitLink.so.%{version}", + visibility = ["//visibility:public"], +) + +cc_library( + name = "headers", + hdrs = glob([ + "include/**", + ]), + include_prefix = "third_party/gpus/cuda/include", + includes = ["include"], + strip_include_prefix = "include", + visibility = ["@local_config_cuda//cuda:__pkg__"], +) diff --git a/third_party/gpus/cuda/cuda_nvml.BUILD b/third_party/gpus/cuda/cuda_nvml.BUILD new file mode 100644 index 00000000000000..40b97e671cf7de --- /dev/null +++ b/third_party/gpus/cuda/cuda_nvml.BUILD @@ -0,0 +1,12 @@ +licenses(["restricted"]) # NVIDIA proprietary license + +cc_library( + name = "headers", + hdrs = glob([ + "include/**", + ]), + include_prefix = "third_party/gpus/cuda/nvml/include", + includes = ["include"], + strip_include_prefix = "include", + visibility = ["@local_config_cuda//cuda:__pkg__"], +) diff --git a/third_party/gpus/cuda/cuda_nvprune.BUILD b/third_party/gpus/cuda/cuda_nvprune.BUILD new file mode 100644 index 00000000000000..986ef0c8f76166 --- /dev/null +++ b/third_party/gpus/cuda/cuda_nvprune.BUILD @@ -0,0 +1,9 @@ +licenses(["restricted"]) # NVIDIA proprietary license + +filegroup( + name = "nvprune", + srcs = [ + "bin/nvprune", + ], + visibility = ["//visibility:public"], +) diff --git a/third_party/gpus/cuda/cuda_nvtx.BUILD b/third_party/gpus/cuda/cuda_nvtx.BUILD new file mode 100644 index 00000000000000..9823f1d871ed53 --- /dev/null +++ b/third_party/gpus/cuda/cuda_nvtx.BUILD @@ -0,0 +1,12 @@ +licenses(["restricted"]) # NVIDIA proprietary license + +cc_library( + name = "headers", + hdrs = glob([ + "include/**", + ]), + include_prefix = "third_party/gpus/cuda/include", + includes = ["include"], + strip_include_prefix = "include", + visibility = ["@local_config_cuda//cuda:__pkg__"], +) diff --git a/third_party/gpus/cuda_configure.bzl b/third_party/gpus/cuda_configure.bzl index fefbf081c87e1c..b8aad7ed4994ee 100644 --- a/third_party/gpus/cuda_configure.bzl +++ b/third_party/gpus/cuda_configure.bzl @@ -53,6 +53,11 @@ load( "realpath", "which", ) +load( + ":compiler_common_tools.bzl", + "get_cxx_inc_directories", + "to_list_of_strings", +) _GCC_HOST_COMPILER_PATH = "GCC_HOST_COMPILER_PATH" _GCC_HOST_COMPILER_PREFIX = "GCC_HOST_COMPILER_PREFIX" @@ -67,20 +72,6 @@ _TF_CUDA_CONFIG_REPO = "TF_CUDA_CONFIG_REPO" _TF_DOWNLOAD_CLANG = "TF_DOWNLOAD_CLANG" _PYTHON_BIN_PATH = "PYTHON_BIN_PATH" -def to_list_of_strings(elements): - """Convert the list of ["a", "b", "c"] into '"a", "b", "c"'. - - This is to be used to put a list of strings into the bzl file templates - so it gets interpreted as list of strings in Starlark. - - Args: - elements: list of string elements - - Returns: - single string of elements wrapped in quotes separated by a comma.""" - quoted_strings = ["\"" + element + "\"" for element in elements] - return ", ".join(quoted_strings) - def verify_build_defines(params): """Verify all variables that crosstool/BUILD.tpl expects are substituted. @@ -238,156 +229,6 @@ def find_cc(repository_ctx, use_cuda_clang): " environment variable").format(target_cc_name, cc_path_envvar)) return cc -_INC_DIR_MARKER_BEGIN = "#include <...>" - -# OSX add " (framework directory)" at the end of line, strip it. -_OSX_FRAMEWORK_SUFFIX = " (framework directory)" -_OSX_FRAMEWORK_SUFFIX_LEN = len(_OSX_FRAMEWORK_SUFFIX) - -def _cxx_inc_convert(path): - """Convert path returned by cc -E xc++ in a complete path.""" - path = path.strip() - if path.endswith(_OSX_FRAMEWORK_SUFFIX): - path = path[:-_OSX_FRAMEWORK_SUFFIX_LEN].strip() - return path - -def _normalize_include_path(repository_ctx, path): - """Normalizes include paths before writing them to the crosstool. - - If path points inside the 'crosstool' folder of the repository, a relative - path is returned. - If path points outside the 'crosstool' folder, an absolute path is returned. - """ - path = str(repository_ctx.path(path)) - crosstool_folder = str(repository_ctx.path(".").get_child("crosstool")) - - if path.startswith(crosstool_folder): - # We drop the path to "$REPO/crosstool" and a trailing path separator. - return path[len(crosstool_folder) + 1:] - return path - -def _is_compiler_option_supported(repository_ctx, cc, option): - """Checks that `option` is supported by the C compiler. Doesn't %-escape the option.""" - result = repository_ctx.execute([ - cc, - option, - "-o", - "/dev/null", - "-c", - str(repository_ctx.path("tools/cpp/empty.cc")), - ]) - return result.stderr.find(option) == -1 - -def _get_cxx_inc_directories_impl(repository_ctx, cc, lang_is_cpp, tf_sysroot): - """Compute the list of default C or C++ include directories.""" - if lang_is_cpp: - lang = "c++" - else: - lang = "c" - sysroot = [] - if tf_sysroot: - sysroot += ["--sysroot", tf_sysroot] - result = raw_exec(repository_ctx, [cc, "-E", "-x" + lang, "-", "-v"] + - sysroot) - stderr = err_out(result) - index1 = stderr.find(_INC_DIR_MARKER_BEGIN) - if index1 == -1: - return [] - index1 = stderr.find("\n", index1) - if index1 == -1: - return [] - index2 = stderr.rfind("\n ") - if index2 == -1 or index2 < index1: - return [] - index2 = stderr.find("\n", index2 + 1) - if index2 == -1: - inc_dirs = stderr[index1 + 1:] - else: - inc_dirs = stderr[index1 + 1:index2].strip() - - print_resource_dir_supported = _is_compiler_option_supported( - repository_ctx, - cc, - "-print-resource-dir", - ) - - if print_resource_dir_supported: - resource_dir = repository_ctx.execute( - [cc, "-print-resource-dir"], - ).stdout.strip() + "/share" - inc_dirs += "\n" + resource_dir - - compiler_includes = [ - _normalize_include_path(repository_ctx, _cxx_inc_convert(p)) - for p in inc_dirs.split("\n") - ] - - # The compiler might be on a symlink, e.g. /symlink -> /opt/gcc - # The above keeps only the resolved paths to the default includes (e.g. /opt/gcc/include/c++/11) - # but Bazel might encounter either (usually reported by the compiler) - # especially when a compiler wrapper (e.g. ccache) is used. - # So we need to also include paths where symlinks are not resolved. - - # Try to find real path to CC installation to "see through" compiler wrappers - # GCC has the path to g++ - index1 = result.stderr.find("COLLECT_GCC=") - if index1 != -1: - index1 = result.stderr.find("=", index1) - index2 = result.stderr.find("\n", index1) - cc_topdir = repository_ctx.path(result.stderr[index1 + 1:index2]).dirname.dirname - else: - # Clang has the directory - index1 = result.stderr.find("InstalledDir: ") - if index1 != -1: - index1 = result.stderr.find(" ", index1) - index2 = result.stderr.find("\n", index1) - cc_topdir = repository_ctx.path(result.stderr[index1 + 1:index2]).dirname - else: - # Fallback to the CC path - cc_topdir = repository_ctx.path(cc).dirname.dirname - - # We now have the compiler installation prefix, e.g. /symlink/gcc - # And the resolved installation prefix, e.g. /opt/gcc - cc_topdir_resolved = str(realpath(repository_ctx, cc_topdir)).strip() - cc_topdir = str(cc_topdir).strip() - - # If there is (any!) symlink involved we add paths where the unresolved installation prefix is kept. - # e.g. [/opt/gcc/include/c++/11, /opt/gcc/lib/gcc/x86_64-linux-gnu/11/include, /other/path] - # adds [/symlink/include/c++/11, /symlink/lib/gcc/x86_64-linux-gnu/11/include] - if cc_topdir_resolved != cc_topdir: - unresolved_compiler_includes = [ - cc_topdir + inc[len(cc_topdir_resolved):] - for inc in compiler_includes - if inc.startswith(cc_topdir_resolved) - ] - compiler_includes = compiler_includes + unresolved_compiler_includes - return compiler_includes - -def get_cxx_inc_directories(repository_ctx, cc, tf_sysroot): - """Compute the list of default C and C++ include directories.""" - - # For some reason `clang -xc` sometimes returns include paths that are - # different from the ones from `clang -xc++`. (Symlink and a dir) - # So we run the compiler with both `-xc` and `-xc++` and merge resulting lists - includes_cpp = _get_cxx_inc_directories_impl( - repository_ctx, - cc, - True, - tf_sysroot, - ) - includes_c = _get_cxx_inc_directories_impl( - repository_ctx, - cc, - False, - tf_sysroot, - ) - - return includes_cpp + [ - inc - for inc in includes_c - if inc not in includes_cpp - ] - def auto_configure_fail(msg): """Output failure message when cuda configuration fails.""" red = "\033[0;31m" @@ -1293,6 +1134,7 @@ def _create_local_cuda_repository(repository_ctx): cuda_defines["%{extra_no_canonical_prefixes_flags}"] = "" cuda_defines["%{unfiltered_compile_flags}"] = "" + cuda_defines["%{cuda_nvcc_files}"] = "[]" if is_cuda_clang and not is_nvcc_and_clang: cuda_defines["%{host_compiler_path}"] = str(cc) cuda_defines["%{host_compiler_warnings}"] = """ diff --git a/third_party/gpus/hermetic_cuda_configure.bzl b/third_party/gpus/hermetic_cuda_configure.bzl new file mode 100644 index 00000000000000..5d16aa6f76a1f4 --- /dev/null +++ b/third_party/gpus/hermetic_cuda_configure.bzl @@ -0,0 +1,570 @@ +"""Repository rule for hermetic CUDA autoconfiguration. + +`hermetic_cuda_configure` depends on the following environment variables: + + * `TF_NEED_CUDA`: Whether to enable building with CUDA. + * `TF_NVCC_CLANG`: Whether to use clang for C++ and NVCC for Cuda compilation. + * `CLANG_CUDA_COMPILER_PATH`: The clang compiler path that will be used for + both host and device code compilation. + * `TF_SYSROOT`: The sysroot to use when compiling. + * `TF_CUDA_VERSION`: The version of the CUDA toolkit (mandatory). + * `TF_CUDA_COMPUTE_CAPABILITIES`: The CUDA compute capabilities. Default is + `3.5,5.2`. + * `PYTHON_BIN_PATH`: The python binary path +""" + +load( + "//third_party/remote_config:common.bzl", + "get_cpu_value", + "get_host_environ", + "which", +) +load( + ":compiler_common_tools.bzl", + "get_cxx_inc_directories", + "to_list_of_strings", +) + +def _find_cc(repository_ctx): + """Find the C++ compiler.""" + cc_path_envvar = _CLANG_CUDA_COMPILER_PATH + cc_name = "clang" + + cc_name_from_env = get_host_environ(repository_ctx, cc_path_envvar) + if cc_name_from_env: + cc_name = cc_name_from_env + if cc_name.startswith("/"): + # Return the absolute path. + return cc_name + cc = which(repository_ctx, cc_name) + if cc == None: + fail(("Cannot find {}, either correct your path or set the {}" + + " environment variable").format(cc_name, cc_path_envvar)) + return cc + +def _auto_configure_fail(msg): + """Output failure message when cuda configuration fails.""" + red = "\033[0;31m" + no_color = "\033[0m" + fail("\n%sCuda Configuration Error:%s %s\n" % (red, no_color, msg)) + +def _lib_name(base_name, cpu_value, version = None, static = False): + """Constructs the platform-specific name of a library. + + Args: + base_name: The name of the library, such as "cudart" + cpu_value: The name of the host operating system. + version: The version of the library. + static: True the library is static or False if it is a shared object. + + Returns: + The platform-specific name of the library. + """ + version = "" if not version else "." + version + if cpu_value in ("Linux"): + if static: + return "lib%s.a" % base_name + return "lib%s.so%s" % (base_name, version) + elif cpu_value == "Windows": + return "%s.lib" % base_name + elif cpu_value == "Darwin": + if static: + return "lib%s.a" % base_name + return "lib%s%s.dylib" % (base_name, version) + else: + _auto_configure_fail("Invalid cpu_value: %s" % cpu_value) + +def _verify_build_defines(params): + """Verify all variables that crosstool/BUILD.tpl expects are substituted. + + Args: + params: dict of variables that will be passed to the BUILD.tpl template. + """ + missing = [] + for param in [ + "cxx_builtin_include_directories", + "extra_no_canonical_prefixes_flags", + "host_compiler_path", + "host_compiler_prefix", + "host_compiler_warnings", + "linker_bin_path", + "compiler_deps", + "msvc_cl_path", + "msvc_env_include", + "msvc_env_lib", + "msvc_env_path", + "msvc_env_tmp", + "msvc_lib_path", + "msvc_link_path", + "msvc_ml_path", + "unfiltered_compile_flags", + "win_compiler_deps", + ]: + if ("%{" + param + "}") not in params: + missing.append(param) + + if missing: + _auto_configure_fail( + "BUILD.tpl template is missing these variables: " + + str(missing) + + ".\nWe only got: " + + str(params) + + ".", + ) + +def get_cuda_version(repository_ctx): + return get_host_environ(repository_ctx, _TF_CUDA_VERSION) + +def enable_cuda(repository_ctx): + """Returns whether to build with CUDA support.""" + return int(get_host_environ(repository_ctx, TF_NEED_CUDA, False)) + +def _flag_enabled(repository_ctx, flag_name): + return get_host_environ(repository_ctx, flag_name) == "1" + +def _use_nvcc_and_clang(repository_ctx): + # Returns the flag if we need to use clang for C++ and NVCC for Cuda. + return _flag_enabled(repository_ctx, _TF_NVCC_CLANG) + +def _tf_sysroot(repository_ctx): + return get_host_environ(repository_ctx, _TF_SYSROOT, "") + +def _py_tmpl_dict(d): + return {"%{cuda_config}": str(d)} + +def _cudart_static_linkopt(cpu_value): + """Returns additional platform-specific linkopts for cudart.""" + return "\"\"," if cpu_value == "Darwin" else "\"-lrt\"," + +def _compute_capabilities(repository_ctx): + """Returns a list of strings representing cuda compute capabilities. + + Args: + repository_ctx: the repo rule's context. + + Returns: + list of cuda architectures to compile for. 'compute_xy' refers to + both PTX and SASS, 'sm_xy' refers to SASS only. + """ + capabilities = get_host_environ( + repository_ctx, + _TF_CUDA_COMPUTE_CAPABILITIES, + "compute_35,compute_52", + ).split(",") + + # Map old 'x.y' capabilities to 'compute_xy'. + if len(capabilities) > 0 and all([len(x.split(".")) == 2 for x in capabilities]): + # If all capabilities are in 'x.y' format, only include PTX for the + # highest capability. + cc_list = sorted([x.replace(".", "") for x in capabilities]) + capabilities = ["sm_%s" % x for x in cc_list[:-1]] + ["compute_%s" % cc_list[-1]] + for i, capability in enumerate(capabilities): + parts = capability.split(".") + if len(parts) != 2: + continue + capabilities[i] = "compute_%s%s" % (parts[0], parts[1]) + + # Make list unique + capabilities = dict(zip(capabilities, capabilities)).keys() + + # Validate capabilities. + for capability in capabilities: + if not capability.startswith(("compute_", "sm_")): + _auto_configure_fail("Invalid compute capability: %s" % capability) + for prefix in ["compute_", "sm_"]: + if not capability.startswith(prefix): + continue + if len(capability) == len(prefix) + 2 and capability[-2:].isdigit(): + continue + if len(capability) == len(prefix) + 3 and capability.endswith("90a"): + continue + _auto_configure_fail("Invalid compute capability: %s" % capability) + + return capabilities + +def _compute_cuda_extra_copts(compute_capabilities): + copts = ["--no-cuda-include-ptx=all"] + for capability in compute_capabilities: + if capability.startswith("compute_"): + capability = capability.replace("compute_", "sm_") + copts.append("--cuda-include-ptx=%s" % capability) + copts.append("--cuda-gpu-arch=%s" % capability) + + return str(copts) + +def _get_cuda_config(repository_ctx): + """Detects and returns information about the CUDA installation on the system. + + Args: + repository_ctx: The repository context. + + Returns: + A struct containing the following fields: + cuda_version: The version of CUDA on the system. + cudart_version: The CUDA runtime version on the system. + cudnn_version: The version of cuDNN on the system. + compute_capabilities: A list of the system's CUDA compute capabilities. + cpu_value: The name of the host operating system. + """ + + return struct( + cuda_version = get_cuda_version(repository_ctx), + cupti_version = repository_ctx.read(repository_ctx.attr.cupti_version), + cudart_version = repository_ctx.read(repository_ctx.attr.cudart_version), + cublas_version = repository_ctx.read(repository_ctx.attr.cublas_version), + cusolver_version = repository_ctx.read(repository_ctx.attr.cusolver_version), + curand_version = repository_ctx.read(repository_ctx.attr.curand_version), + cufft_version = repository_ctx.read(repository_ctx.attr.cufft_version), + cusparse_version = repository_ctx.read(repository_ctx.attr.cusparse_version), + cudnn_version = repository_ctx.read(repository_ctx.attr.cudnn_version), + compute_capabilities = _compute_capabilities(repository_ctx), + cpu_value = get_cpu_value(repository_ctx), + ) + +_DUMMY_CROSSTOOL_BZL_FILE = """ +def error_gpu_disabled(): + fail("ERROR: Building with --config=cuda but TensorFlow is not configured " + + "to build with GPU support. Please re-run ./configure and enter 'Y' " + + "at the prompt to build with GPU support.") + + native.genrule( + name = "error_gen_crosstool", + outs = ["CROSSTOOL"], + cmd = "echo 'Should not be run.' && exit 1", + ) + + native.filegroup( + name = "crosstool", + srcs = [":CROSSTOOL"], + output_licenses = ["unencumbered"], + ) +""" + +_DUMMY_CROSSTOOL_BUILD_FILE = """ +load("//crosstool:error_gpu_disabled.bzl", "error_gpu_disabled") + +error_gpu_disabled() +""" + +def _create_dummy_repository(repository_ctx): + cpu_value = get_cpu_value(repository_ctx) + + # Set up BUILD file for cuda/. + repository_ctx.template( + "cuda/build_defs.bzl", + repository_ctx.attr.build_defs_tpl, + { + "%{cuda_is_configured}": "False", + "%{cuda_extra_copts}": "[]", + "%{cuda_gpu_architectures}": "[]", + "%{cuda_version}": "0.0", + }, + ) + + repository_ctx.template( + "cuda/BUILD", + repository_ctx.attr.dummy_cuda_build_tpl, + { + "%{cuda_driver_lib}": _lib_name("cuda", cpu_value), + "%{cudart_static_lib}": _lib_name( + "cudart_static", + cpu_value, + static = True, + ), + "%{cudart_static_linkopt}": _cudart_static_linkopt(cpu_value), + "%{cudart_lib}": _lib_name("cudart", cpu_value), + "%{cublas_lib}": _lib_name("cublas", cpu_value), + "%{cublasLt_lib}": _lib_name("cublasLt", cpu_value), + "%{cusolver_lib}": _lib_name("cusolver", cpu_value), + "%{cudnn_lib}": _lib_name("cudnn", cpu_value), + "%{cufft_lib}": _lib_name("cufft", cpu_value), + "%{curand_lib}": _lib_name("curand", cpu_value), + "%{cupti_lib}": _lib_name("cupti", cpu_value), + "%{cusparse_lib}": _lib_name("cusparse", cpu_value), + "%{cub_actual}": ":cuda_headers", + "%{copy_rules}": """ +filegroup(name="cuda-include") +filegroup(name="cublas-include") +filegroup(name="cusolver-include") +filegroup(name="cufft-include") +filegroup(name="cusparse-include") +filegroup(name="curand-include") +filegroup(name="cudnn-include") +""", + }, + ) + + # Create dummy files for the CUDA toolkit since they are still required by + # tensorflow/tsl/platform/default/build_config:cuda. + repository_ctx.file("cuda/cuda/include/cuda.h") + repository_ctx.file("cuda/cuda/include/cublas.h") + repository_ctx.file("cuda/cuda/include/cudnn.h") + repository_ctx.file("cuda/cuda/extras/CUPTI/include/cupti.h") + repository_ctx.file("cuda/cuda/nvml/include/nvml.h") + repository_ctx.file("cuda/cuda/lib/%s" % _lib_name("cuda", cpu_value)) + repository_ctx.file("cuda/cuda/lib/%s" % _lib_name("cudart", cpu_value)) + repository_ctx.file( + "cuda/cuda/lib/%s" % _lib_name("cudart_static", cpu_value), + ) + repository_ctx.file("cuda/cuda/lib/%s" % _lib_name("cublas", cpu_value)) + repository_ctx.file("cuda/cuda/lib/%s" % _lib_name("cublasLt", cpu_value)) + repository_ctx.file("cuda/cuda/lib/%s" % _lib_name("cusolver", cpu_value)) + repository_ctx.file("cuda/cuda/lib/%s" % _lib_name("cudnn", cpu_value)) + repository_ctx.file("cuda/cuda/lib/%s" % _lib_name("curand", cpu_value)) + repository_ctx.file("cuda/cuda/lib/%s" % _lib_name("cufft", cpu_value)) + repository_ctx.file("cuda/cuda/lib/%s" % _lib_name("cupti", cpu_value)) + repository_ctx.file("cuda/cuda/lib/%s" % _lib_name("cusparse", cpu_value)) + + # Set up cuda_config.h, which is used by + # tensorflow/compiler/xla/stream_executor/dso_loader.cc. + repository_ctx.template( + "cuda/cuda/cuda_config.h", + repository_ctx.attr.cuda_config_tpl, + { + "%{cuda_version}": "", + "%{cudart_version}": "", + "%{cupti_version}": "", + "%{cublas_version}": "", + "%{cusolver_version}": "", + "%{curand_version}": "", + "%{cufft_version}": "", + "%{cusparse_version}": "", + "%{cudnn_version}": "", + "%{cuda_toolkit_path}": "", + "%{cuda_compute_capabilities}": "", + }, + ) + + # Set up cuda_config.py, which is used by gen_build_info to provide + # static build environment info to the API + repository_ctx.template( + "cuda/cuda/cuda_config.py", + repository_ctx.attr.cuda_config_py_tpl, + _py_tmpl_dict({}), + ) + + # If cuda_configure is not configured to build with GPU support, and the user + # attempts to build with --config=cuda, add a dummy build rule to intercept + # this and fail with an actionable error message. + repository_ctx.file( + "crosstool/error_gpu_disabled.bzl", + _DUMMY_CROSSTOOL_BZL_FILE, + ) + repository_ctx.file("crosstool/BUILD", _DUMMY_CROSSTOOL_BUILD_FILE) + +def _create_local_cuda_repository(repository_ctx): + """Creates the repository containing files set up to build with CUDA.""" + cuda_config = _get_cuda_config(repository_ctx) + + # Set up BUILD file for cuda/ + repository_ctx.template( + "cuda/build_defs.bzl", + repository_ctx.attr.build_defs_tpl, + { + "%{cuda_is_configured}": "True", + "%{cuda_extra_copts}": _compute_cuda_extra_copts( + cuda_config.compute_capabilities, + ), + "%{cuda_gpu_architectures}": str(cuda_config.compute_capabilities), + "%{cuda_version}": cuda_config.cuda_version, + }, + ) + + repository_ctx.template( + "cuda/BUILD", + repository_ctx.attr.cuda_build_tpl, + { + "%{cudart_static_linkopt}": _cudart_static_linkopt(cuda_config.cpu_value), + "%{cub_actual}": ":cuda_headers", + }, + ) + + is_nvcc_and_clang = _use_nvcc_and_clang(repository_ctx) + tf_sysroot = _tf_sysroot(repository_ctx) + + # Set up crosstool/ + cc = _find_cc(repository_ctx) + host_compiler_includes = get_cxx_inc_directories( + repository_ctx, + cc, + tf_sysroot, + ) + + cuda_defines = {} + + # We do not support hermetic CUDA on Windows. + # This ensures the CROSSTOOL file parser is happy. + cuda_defines.update({ + "%{msvc_env_tmp}": "msvc_not_used", + "%{msvc_env_path}": "msvc_not_used", + "%{msvc_env_include}": "msvc_not_used", + "%{msvc_env_lib}": "msvc_not_used", + "%{msvc_cl_path}": "msvc_not_used", + "%{msvc_ml_path}": "msvc_not_used", + "%{msvc_link_path}": "msvc_not_used", + "%{msvc_lib_path}": "msvc_not_used", + "%{win_compiler_deps}": ":empty", + }) + + cuda_defines["%{builtin_sysroot}"] = tf_sysroot + cuda_defines["%{cuda_toolkit_path}"] = repository_ctx.attr.nvcc_binary.workspace_root + cuda_defines["%{compiler}"] = "clang" + cuda_defines["%{host_compiler_prefix}"] = "/usr/bin" + cuda_defines["%{linker_bin_path}"] = "" + cuda_defines["%{extra_no_canonical_prefixes_flags}"] = "" + cuda_defines["%{unfiltered_compile_flags}"] = "" + cuda_defines["%{cxx_builtin_include_directories}"] = to_list_of_strings(host_compiler_includes) + cuda_defines["%{cuda_nvcc_files}"] = "if_cuda([\"@{nvcc_archive}//:bin\", \"@{nvcc_archive}//:nvvm\"])".format(nvcc_archive = repository_ctx.attr.nvcc_binary.repo_name) + + if not is_nvcc_and_clang: + cuda_defines["%{host_compiler_path}"] = str(cc) + cuda_defines["%{host_compiler_warnings}"] = """ + # Some parts of the codebase set -Werror and hit this warning, so + # switch it off for now. + "-Wno-invalid-partial-specialization" + """ + cuda_defines["%{compiler_deps}"] = ":cuda_nvcc_files" + repository_ctx.file( + "crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc", + "", + ) + else: + cuda_defines["%{host_compiler_path}"] = "clang/bin/crosstool_wrapper_driver_is_not_gcc" + cuda_defines["%{host_compiler_warnings}"] = "" + + nvcc_relative_path = "%s/%s" % (repository_ctx.attr.nvcc_binary.workspace_root, repository_ctx.attr.nvcc_binary.name) + cuda_defines["%{compiler_deps}"] = ":crosstool_wrapper_driver_is_not_gcc" + + wrapper_defines = { + "%{cpu_compiler}": str(cc), + "%{cuda_version}": cuda_config.cuda_version, + "%{nvcc_path}": nvcc_relative_path, + "%{host_compiler_path}": str(cc), + "%{use_clang_compiler}": "True", + } + repository_ctx.template( + "crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc", + repository_ctx.attr.crosstool_wrapper_driver_is_not_gcc_tpl, + wrapper_defines, + ) + + _verify_build_defines(cuda_defines) + + # Only expand template variables in the BUILD file + repository_ctx.template( + "crosstool/BUILD", + repository_ctx.attr.crosstool_build_tpl, + cuda_defines, + ) + + # No templating of cc_toolchain_config - use attributes and templatize the + # BUILD file. + repository_ctx.template( + "crosstool/cc_toolchain_config.bzl", + repository_ctx.attr.cc_toolchain_config_tpl, + {}, + ) + + # Set up cuda_config.h, which is used by + # tensorflow/compiler/xla/stream_executor/dso_loader.cc. + repository_ctx.template( + "cuda/cuda/cuda_config.h", + repository_ctx.attr.cuda_config_tpl, + { + "%{cuda_version}": cuda_config.cuda_version, + "%{cudart_version}": cuda_config.cudart_version, + "%{cupti_version}": cuda_config.cupti_version, + "%{cublas_version}": cuda_config.cublas_version, + "%{cusolver_version}": cuda_config.cusolver_version, + "%{curand_version}": cuda_config.curand_version, + "%{cufft_version}": cuda_config.cufft_version, + "%{cusparse_version}": cuda_config.cusparse_version, + "%{cudnn_version}": cuda_config.cudnn_version, + "%{cuda_toolkit_path}": "", + "%{cuda_compute_capabilities}": ", ".join([ + cc.split("_")[1] + for cc in cuda_config.compute_capabilities + ]), + }, + ) + + # Set up cuda_config.py, which is used by gen_build_info to provide + # static build environment info to the API + repository_ctx.template( + "cuda/cuda/cuda_config.py", + repository_ctx.attr.cuda_config_py_tpl, + _py_tmpl_dict({ + "cuda_version": cuda_config.cuda_version, + "cudnn_version": cuda_config.cudnn_version, + "cuda_compute_capabilities": cuda_config.compute_capabilities, + "cpu_compiler": str(cc), + }), + ) + +def _cuda_autoconf_impl(repository_ctx): + """Implementation of the cuda_autoconf repository rule.""" + build_file = repository_ctx.attr.local_config_cuda_build_file + + if not enable_cuda(repository_ctx): + _create_dummy_repository(repository_ctx) + else: + _create_local_cuda_repository(repository_ctx) + + repository_ctx.symlink(build_file, "BUILD") + +_CLANG_CUDA_COMPILER_PATH = "CLANG_CUDA_COMPILER_PATH" +_PYTHON_BIN_PATH = "PYTHON_BIN_PATH" +_TF_CUDA_COMPUTE_CAPABILITIES = "TF_CUDA_COMPUTE_CAPABILITIES" +_TF_CUDA_VERSION = "TF_CUDA_VERSION" +TF_NEED_CUDA = "TF_NEED_CUDA" +_TF_NVCC_CLANG = "TF_NVCC_CLANG" +_TF_SYSROOT = "TF_SYSROOT" + +_ENVIRONS = [ + _CLANG_CUDA_COMPILER_PATH, + TF_NEED_CUDA, + _TF_NVCC_CLANG, + _TF_CUDA_VERSION, + _TF_CUDA_COMPUTE_CAPABILITIES, + _TF_SYSROOT, + _PYTHON_BIN_PATH, + "TMP", + "TMPDIR", +] + +hermetic_cuda_configure = repository_rule( + implementation = _cuda_autoconf_impl, + environ = _ENVIRONS, + attrs = { + "environ": attr.string_dict(), + "cublas_version": attr.label(default = Label("@cuda_cublas//:version.txt")), + "cudart_version": attr.label(default = Label("@cuda_cudart//:version.txt")), + "cudnn_version": attr.label(default = Label("@cuda_cudnn//:version.txt")), + "cufft_version": attr.label(default = Label("@cuda_cufft//:version.txt")), + "cupti_version": attr.label(default = Label("@cuda_cupti//:version.txt")), + "curand_version": attr.label(default = Label("@cuda_curand//:version.txt")), + "cusolver_version": attr.label(default = Label("@cuda_cusolver//:version.txt")), + "cusparse_version": attr.label(default = Label("@cuda_cusparse//:version.txt")), + "nvcc_binary": attr.label(default = Label("@cuda_nvcc//:bin/nvcc")), + "local_config_cuda_build_file": attr.label(default = Label("//third_party/gpus:local_config_cuda.BUILD")), + "build_defs_tpl": attr.label(default = Label("//third_party/gpus/cuda:build_defs.bzl.tpl")), + "cuda_build_tpl": attr.label(default = Label("//third_party/gpus/cuda:BUILD.hermetic.tpl")), + "dummy_cuda_build_tpl": attr.label(default = Label("//third_party/gpus/cuda:BUILD.tpl")), + "cuda_config_tpl": attr.label(default = Label("//third_party/gpus/cuda:cuda_config.h.tpl")), + "cuda_config_py_tpl": attr.label(default = Label("//third_party/gpus/cuda:cuda_config.py.tpl")), + "crosstool_wrapper_driver_is_not_gcc_tpl": attr.label(default = Label("//third_party/gpus/crosstool:clang/bin/crosstool_wrapper_driver_is_not_gcc.tpl")), + "crosstool_build_tpl": attr.label(default = Label("//third_party/gpus/crosstool:BUILD.tpl")), + "cc_toolchain_config_tpl": attr.label(default = Label("//third_party/gpus/crosstool:cc_toolchain_config.bzl.tpl")), + }, +) +"""Detects and configures the hermetic CUDA toolchain. + +Add the following to your WORKSPACE FILE: + +```python +hermetic cuda_configure(name = "local_config_cuda") +``` + +Args: + name: A unique name for this workspace rule. +""" diff --git a/third_party/gpus/rocm_configure.bzl b/third_party/gpus/rocm_configure.bzl index 0fd4019fc5bb75..cf756b452e3950 100644 --- a/third_party/gpus/rocm_configure.bzl +++ b/third_party/gpus/rocm_configure.bzl @@ -22,12 +22,15 @@ load( "realpath", "which", ) +load( + ":compiler_common_tools.bzl", + "to_list_of_strings", +) load( ":cuda_configure.bzl", "enable_cuda", "make_copy_dir_rule", "make_copy_files_rule", - "to_list_of_strings", ) load( ":sycl_configure.bzl", diff --git a/third_party/gpus/sycl_configure.bzl b/third_party/gpus/sycl_configure.bzl index 05330b2fe53195..dd80694e7274f5 100644 --- a/third_party/gpus/sycl_configure.bzl +++ b/third_party/gpus/sycl_configure.bzl @@ -16,11 +16,14 @@ load( "realpath", "which", ) +load( + ":compiler_common_tools.bzl", + "to_list_of_strings", +) load( ":cuda_configure.bzl", "make_copy_dir_rule", "make_copy_files_rule", - "to_list_of_strings", ) _GCC_HOST_COMPILER_PATH = "GCC_HOST_COMPILER_PATH" diff --git a/third_party/nccl/build_defs.bzl.tpl b/third_party/nccl/build_defs.bzl.tpl index 53a6d4e1e41890..a0930df34ecec8 100644 --- a/third_party/nccl/build_defs.bzl.tpl +++ b/third_party/nccl/build_defs.bzl.tpl @@ -5,7 +5,6 @@ load("@bazel_tools//tools/cpp:toolchain_utils.bzl", "find_cpp_toolchain") # CUDA toolkit version as tuple (e.g. '(11, 1)'). _cuda_version = %{cuda_version} -_cuda_clang = %{cuda_clang} def _rdc_copts(): """Returns copts for compiling relocatable device code.""" @@ -121,25 +120,25 @@ _device_link = rule( "gpu_archs": attr.string_list(), "nvlink_args": attr.string_list(), "_nvlink": attr.label( - default = Label("@local_config_cuda//cuda:cuda/bin/nvlink"), + default = Label("%{nvlink_label}"), allow_single_file = True, executable = True, cfg = "host", ), "_fatbinary": attr.label( - default = Label("@local_config_cuda//cuda:cuda/bin/fatbinary"), + default = Label("%{fatbinary_label}"), allow_single_file = True, executable = True, cfg = "host", ), "_bin2c": attr.label( - default = Label("@local_config_cuda//cuda:cuda/bin/bin2c"), + default = Label("%{bin2c_label}"), allow_single_file = True, executable = True, cfg = "host", ), "_link_stub": attr.label( - default = Label("@local_config_cuda//cuda:cuda/bin/crt/link.stub"), + default = Label("%{link_stub_label}"), allow_single_file = True, ), }, @@ -189,7 +188,7 @@ _prune_relocatable_code = rule( "input": attr.label(mandatory = True, allow_files = True), "gpu_archs": attr.string_list(), "_nvprune": attr.label( - default = Label("@local_config_cuda//cuda:cuda/bin/nvprune"), + default = Label("%{nvprune_label}"), allow_single_file = True, executable = True, cfg = "host", diff --git a/third_party/nccl/hermetic_nccl_configure.bzl b/third_party/nccl/hermetic_nccl_configure.bzl new file mode 100644 index 00000000000000..b99cbcb08db58a --- /dev/null +++ b/third_party/nccl/hermetic_nccl_configure.bzl @@ -0,0 +1,153 @@ +"""Repository rule for hermetic NCCL configuration. + +`hermetic_nccl_configure` depends on the following environment variables: + + * `TF_NCCL_USE_STUB`: "1" if a NCCL stub that loads NCCL dynamically should + be used, "0" if NCCL should be linked in statically. + +""" + +load( + "//third_party/gpus:hermetic_cuda_configure.bzl", + "TF_NEED_CUDA", + "enable_cuda", + "get_cuda_version", +) +load( + "//third_party/remote_config:common.bzl", + "get_cpu_value", + "get_host_environ", +) + +_TF_NCCL_USE_STUB = "TF_NCCL_USE_STUB" + +_NCCL_DUMMY_BUILD_CONTENT = """ +filegroup( + name = "LICENSE", + visibility = ["//visibility:public"], +) + +cc_library( + name = "nccl", + visibility = ["//visibility:public"], +) + +cc_library( + name = "nccl_config", + hdrs = ["nccl_config.h"], + include_prefix = "third_party/nccl", + visibility = ["//visibility:public"], +) +""" + +_NCCL_ARCHIVE_BUILD_CONTENT = """ +filegroup( + name = "LICENSE", + data = ["@nccl_archive//:LICENSE.txt"], + visibility = ["//visibility:public"], +) + +alias( + name = "nccl", + actual = "@nccl_archive//:nccl", + visibility = ["//visibility:public"], +) + +alias( + name = "nccl_config", + actual = "@nccl_archive//:nccl_config", + visibility = ["//visibility:public"], +) +""" + +_NCCL_ARCHIVE_STUB_BUILD_CONTENT = """ +alias( + name = "nccl_lib", + actual = "@cuda_nccl//:nccl_lib", +) + +filegroup( + name = "LICENSE", + data = ["@nccl_archive//:LICENSE.txt"], + visibility = ["//visibility:public"], +) + +alias( + name = "nccl", + actual = "@nccl_archive//:nccl_via_stub", + visibility = ["//visibility:public"], +) + +alias( + name = "nccl_headers", + actual = "@nccl_archive//:nccl_headers", + visibility = ["//visibility:public"], +) + +alias( + name = "nccl_config", + actual = "@nccl_archive//:nccl_config", + visibility = ["//visibility:public"], +) +""" + +def _create_local_nccl_repository(repository_ctx): + cuda_version = get_cuda_version(repository_ctx) + if cuda_version == "12": + cuda_version = "12.3" + cuda_version = cuda_version.split(".") + + # Alias to open source build from @nccl_archive. + if get_host_environ(repository_ctx, _TF_NCCL_USE_STUB, "0") == "0": + repository_ctx.file("BUILD", _NCCL_ARCHIVE_BUILD_CONTENT) + else: + repository_ctx.file("BUILD", _NCCL_ARCHIVE_STUB_BUILD_CONTENT) + + repository_ctx.template("generated_names.bzl", repository_ctx.attr.generated_names_tpl, {}) + repository_ctx.template( + "build_defs.bzl", + repository_ctx.attr.build_defs_tpl, + { + "%{cuda_version}": "(%s, %s)" % tuple(cuda_version), + "%{nvlink_label}": "@cuda_nvcc//:nvlink", + "%{fatbinary_label}": "@cuda_nvcc//:fatbinary", + "%{bin2c_label}": "@cuda_nvcc//:bin2c", + "%{link_stub_label}": "@cuda_nvcc//:link_stub", + "%{nvprune_label}": "@cuda_nvprune//:nvprune", + }, + ) + +def _nccl_autoconf_impl(repository_ctx): + if (not enable_cuda(repository_ctx) or + get_cpu_value(repository_ctx) not in ("Linux", "FreeBSD")): + # Add a dummy build file to make bazel query happy. + repository_ctx.file("BUILD", _NCCL_DUMMY_BUILD_CONTENT) + repository_ctx.file("nccl_config.h", "#define TF_NCCL_VERSION \"\"") + else: + _create_local_nccl_repository(repository_ctx) + +_ENVIRONS = [ + TF_NEED_CUDA, +] + +hermetic_nccl_configure = repository_rule( + environ = _ENVIRONS, + implementation = _nccl_autoconf_impl, + attrs = { + "environ": attr.string_dict(), + "generated_names_tpl": attr.label(default = Label("//third_party/nccl:generated_names.bzl.tpl")), + "build_defs_tpl": attr.label(default = Label("//third_party/nccl:build_defs.bzl.tpl")), + "system_build_tpl": attr.label(default = Label("//third_party/nccl:system.BUILD.tpl")), + }, +) +"""Downloads and configures the hermetic NCCL configuration. + +Add the following to your WORKSPACE FILE: + +```python +hermetic_nccl_configure(name = "local_config_nccl") +``` + +Args: + name: A unique name for this workspace rule. +""" diff --git a/third_party/nccl/nccl_configure.bzl b/third_party/nccl/nccl_configure.bzl index 22cf64d4771062..4da2513e03eb44 100644 --- a/third_party/nccl/nccl_configure.bzl +++ b/third_party/nccl/nccl_configure.bzl @@ -8,7 +8,6 @@ files. * `TF_CUDA_PATHS`: The base paths to look for CUDA and cuDNN. Default is `/usr/local/cuda,usr/`. - * `TF_CUDA_CLANG`: "1" if using Clang, "0" if using NVCC. * `TF_NCCL_USE_STUB`: "1" if a NCCL stub that loads NCCL dynamically should be used, "0" if NCCL should be linked in statically. @@ -33,7 +32,6 @@ _TF_CUDA_COMPUTE_CAPABILITIES = "TF_CUDA_COMPUTE_CAPABILITIES" _TF_NCCL_VERSION = "TF_NCCL_VERSION" _TF_NEED_CUDA = "TF_NEED_CUDA" _TF_CUDA_PATHS = "TF_CUDA_PATHS" -_TF_CUDA_CLANG = "TF_CUDA_CLANG" _TF_NCCL_USE_STUB = "TF_NCCL_USE_STUB" _DEFINE_NCCL_MAJOR = "#define NCCL_MAJOR" @@ -129,7 +127,11 @@ def _create_local_nccl_repository(repository_ctx): _label("build_defs.bzl.tpl"), { "%{cuda_version}": "(%s, %s)" % tuple(cuda_version), - "%{cuda_clang}": repr(get_host_environ(repository_ctx, _TF_CUDA_CLANG)), + "%{nvlink_label}": "@local_config_cuda//cuda:cuda/bin/nvlink", + "%{fatbinary_label}": "@local_config_cuda//cuda:cuda/bin/fatbinary", + "%{bin2c_label}": "@local_config_cuda//cuda:cuda/bin/bin2c", + "%{link_stub_label}": "@local_config_cuda//cuda:cuda/bin/crt/link.stub", + "%{nvprune_label}": "@local_config_cuda//cuda:cuda/bin/nvprune", }, ) else: @@ -181,7 +183,6 @@ _ENVIRONS = [ _TF_CUDA_COMPUTE_CAPABILITIES, _TF_NEED_CUDA, _TF_CUDA_PATHS, - _TF_CUDA_CLANG, ] remote_nccl_configure = repository_rule( diff --git a/third_party/xla/.bazelrc b/third_party/xla/.bazelrc index 02dec0349c4741..c17ae4494dc99c 100644 --- a/third_party/xla/.bazelrc +++ b/third_party/xla/.bazelrc @@ -226,12 +226,13 @@ build:cuda --repo_env TF_NEED_CUDA=1 build:cuda --crosstool_top=@local_config_cuda//crosstool:toolchain build:cuda --@local_config_cuda//:enable_cuda +build:no_cuda_libs --@local_config_cuda//cuda:include_hermetic_cuda_libs=false + # CUDA: This config refers to building CUDA op kernels with clang. build:cuda_clang --config=cuda -# Enable TensorRT optimizations https://developer.nvidia.com/tensorrt -build:cuda_clang --config=tensorrt build:cuda_clang --action_env=TF_CUDA_CLANG="1" build:cuda_clang --@local_config_cuda//:cuda_compiler=clang +build:cuda_clang --copt=-Qunused-arguments # Select supported compute capabilities (supported graphics cards). # This is the same as the official TensorFlow builds. # See https://developer.nvidia.com/cuda-gpus#compute @@ -244,12 +245,10 @@ build:cuda_clang --repo_env=TF_CUDA_COMPUTE_CAPABILITIES="sm_60,sm_70,sm_80,sm_8 # Set up compilation CUDA version and paths and use the CUDA Clang toolchain. build:cuda_clang_official --config=cuda_clang -build:cuda_clang_official --action_env=TF_CUDA_VERSION="12" -build:cuda_clang_official --action_env=TF_CUDNN_VERSION="8" -build:cuda_clang_official --action_env=CUDA_TOOLKIT_PATH="/usr/local/cuda-12.3" +build:cuda_clang_official --action_env=TF_CUDA_VERSION="12.3" +build:cuda_clang_official --action_env=TF_CUDNN_VERSION="8.9" build:cuda_clang_official --action_env=GCC_HOST_COMPILER_PATH="/dt9/usr/bin/gcc" build:cuda_clang_official --action_env=CLANG_CUDA_COMPILER_PATH="/usr/lib/llvm-17/bin/clang" -build:cuda_clang_official --action_env=LD_LIBRARY_PATH="/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64" build:cuda_clang_official --crosstool_top="@sigbuild-r2.17-clang_config_cuda//crosstool:toolchain" # Build with nvcc for CUDA and clang for host @@ -545,10 +544,6 @@ build:rbe_linux_cuda --config=cuda_clang_official build:rbe_linux_cuda --config=rbe_linux_cpu # For Remote build execution -- GPU configuration build:rbe_linux_cuda --repo_env=REMOTE_GPU_TESTING=1 -build:rbe_linux_cuda --repo_env=TF_CUDA_CONFIG_REPO="@sigbuild-r2.17-clang_config_cuda" -build:rbe_linux_cuda --repo_env=TF_TENSORRT_CONFIG_REPO="@sigbuild-r2.17-clang_config_tensorrt" -build:rbe_linux_cuda --repo_env=TF_NCCL_CONFIG_REPO="@sigbuild-r2.17-clang_config_nccl" -test:rbe_linux_cuda --test_env=LD_LIBRARY_PATH="/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64" build:rbe_linux_cuda_nvcc --config=rbe_linux_cuda build:rbe_linux_cuda_nvcc --config=nvcc_clang @@ -633,7 +628,6 @@ build:release_cpu_linux_base --repo_env=BAZEL_COMPILER="/usr/lib/llvm-17/bin/cla # Test-related settings below this point. test:release_linux_base --build_tests_only --keep_going --test_output=errors --verbose_failures=true test:release_linux_base --local_test_jobs=HOST_CPUS -test:release_linux_base --test_env=LD_LIBRARY_PATH # Give only the list of failed tests at the end of the log test:release_linux_base --test_summary=short @@ -645,7 +639,6 @@ build:release_gpu_linux --config=release_cpu_linux # Set up compilation CUDA version and paths and use the CUDA Clang toolchain. # Note that linux cpu and cuda builds share the same toolchain now. build:release_gpu_linux --config=cuda_clang_official -test:release_gpu_linux --test_env=LD_LIBRARY_PATH="/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64" # Local test jobs has to be 4 because parallel_gpu_execute is fragile, I think test:release_gpu_linux --test_timeout=300,450,1200,3600 --local_test_jobs=4 --run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute @@ -674,11 +667,8 @@ test:unsupported_cpu_linux --config=release_base build:unsupported_gpu_linux --config=cuda build:unsupported_gpu_linux --config=unsupported_cpu_linux build:unsupported_gpu_linux --action_env=TF_CUDA_VERSION="11" -build:unsupported_gpu_linux --action_env=TF_CUDNN_VERSION="8" +build:unsupported_gpu_linux --action_env=TF_CUDNN_VERSION="8.6" build:unsupported_gpu_linux --repo_env=TF_CUDA_COMPUTE_CAPABILITIES="sm_35,sm_50,sm_60,sm_70,sm_75,compute_80" -build:unsupported_gpu_linux --config=tensorrt -build:unsupported_gpu_linux --action_env=CUDA_TOOLKIT_PATH="/usr/local/cuda-11.2" -build:unsupported_gpu_linux --action_env=LD_LIBRARY_PATH="/usr/local/cuda:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda-11.1/lib64:/usr/local/tensorrt/lib" build:unsupported_gpu_linux --action_env=GCC_HOST_COMPILER_PATH="/dt9/usr/bin/gcc" build:unsupported_gpu_linux --crosstool_top=@ubuntu20.04-gcc9_manylinux2014-cuda11.2-cudnn8.1-tensorrt7.2_config_cuda//crosstool:toolchain diff --git a/third_party/xla/build_tools/configure/configure.py b/third_party/xla/build_tools/configure/configure.py index 663e4b8724280d..74d571909a2c57 100755 --- a/third_party/xla/build_tools/configure/configure.py +++ b/third_party/xla/build_tools/configure/configure.py @@ -27,11 +27,6 @@ the clang in your path. If that isn't the correct clang, you can override like `./configure.py --backend=cpu --clang_path=`. -NOTE(ddunleavy): Lots of these things should probably be outside of configure.py -but are here because of complexity in `cuda_configure.bzl` and the TF bazelrc. -Once XLA has it's own bazelrc, and cuda_configure.bzl is replaced or refactored, -we can probably make this file smaller. - TODO(ddunleavy): add more thorough validation. """ import argparse @@ -45,18 +40,10 @@ import sys from typing import Optional -_REQUIRED_CUDA_LIBRARIES = ["cublas", "cuda", "cudnn"] +_REQUIRED_CUDA_LIBRARIES = ["cuda", "cudnn"] _DEFAULT_BUILD_AND_TEST_TAG_FILTERS = ("-no_oss",) # Assume we are being invoked from the symlink at the root of the repo _XLA_SRC_ROOT = pathlib.Path(__file__).absolute().parent -_FIND_CUDA_CONFIG = str( - _XLA_SRC_ROOT - / "third_party" - / "tsl" - / "third_party" - / "gpus" - / "find_cuda_config.py" -) _XLA_BAZELRC_NAME = "xla_configure.bazelrc" _KW_ONLY_IF_PYTHON310 = {"kw_only": True} if sys.version_info >= (3, 10) else {} @@ -218,11 +205,9 @@ class DiscoverablePathsAndVersions: ld_library_path: Optional[str] = None # CUDA specific - cublas_version: Optional[str] = None - cuda_toolkit_path: Optional[str] = None + cuda_version: Optional[str] = None cuda_compute_capabilities: Optional[list[str]] = None cudnn_version: Optional[str] = None - nccl_version: Optional[str] = None def get_relevant_paths_and_versions(self, config: "XLAConfigOptions"): """Gets paths and versions as needed by the config. @@ -241,7 +226,7 @@ def get_relevant_paths_and_versions(self, config: "XLAConfigOptions"): ) # Notably, we don't use `_find_executable_or_die` for lld, as it changes - # which commands it accepts based on it's name! ld.lld is symlinked to a + # which commands it accepts based on its name! ld.lld is symlinked to a # different executable just called lld, which should not be invoked # directly. self.lld_path = self.lld_path or shutil.which("ld.lld") @@ -255,64 +240,6 @@ def get_relevant_paths_and_versions(self, config: "XLAConfigOptions"): if not self.cuda_compute_capabilities: self.cuda_compute_capabilities = _get_cuda_compute_capabilities_or_die() - self._get_cuda_libraries_paths_and_versions_if_needed(config) - - def _get_cuda_libraries_paths_and_versions_if_needed( - self, config: "XLAConfigOptions" - ): - """Gets cuda paths and versions if user left any unspecified. - - This uses `find_cuda_config.py` to find versions for all libraries in - `_REQUIRED_CUDA_LIBRARIES`. - - Args: - config: config that determines which libraries should be found. - """ - should_find_nccl = config.using_nccl and self.nccl_version is None - any_cuda_config_unset = any([ - self.cublas_version is None, - self.cuda_toolkit_path is None, - self.cudnn_version is None, - should_find_nccl, - ]) - - maybe_nccl = ["nccl"] if should_find_nccl else [] - - if any_cuda_config_unset: - logging.info( - "Some CUDA config versions and paths were not provided, " - "so trying to find them using find_cuda_config.py" - ) - try: - find_cuda_config_proc = subprocess.run( - [ - sys.executable, - _FIND_CUDA_CONFIG, - *_REQUIRED_CUDA_LIBRARIES, - *maybe_nccl, - ], - capture_output=True, - check=True, - text=True, - ) - except subprocess.CalledProcessError as e: - logging.info("Command %s failed. Is CUDA installed?", e.cmd) - logging.info("Dumping %s ouptut:\n %s", e.cmd, e.output) - raise e - - cuda_config = dict( - tuple(line.split(": ")) - for line in find_cuda_config_proc.stdout.strip().split("\n") - ) - - self.cublas_version = self.cublas_version or cuda_config["cublas_version"] - self.cuda_toolkit_path = ( - self.cuda_toolkit_path or cuda_config["cuda_toolkit_path"] - ) - self.cudnn_version = self.cudnn_version or cuda_config["cudnn_version"] - if should_find_nccl: - self.nccl_version = self.nccl_version or cuda_config["nccl_version"] - @dataclasses.dataclass(frozen=True, **_KW_ONLY_IF_PYTHON310) class XLAConfigOptions: @@ -327,7 +254,6 @@ class XLAConfigOptions: # CUDA specific cuda_compiler: CudaCompiler using_nccl: bool - using_tensorrt: bool def to_bazelrc_lines( self, @@ -386,19 +312,13 @@ def to_bazelrc_lines( ) # Lines needed for CUDA backend regardless of CUDA/host compiler - rc.append( - f"build --action_env CUDA_TOOLKIT_PATH={dpav.cuda_toolkit_path}" - ) - rc.append(f"build --action_env TF_CUBLAS_VERSION={dpav.cublas_version}") + rc.append(f"build --action_env TF_CUDA_VERSION={dpav.cuda_version}") rc.append( "build --action_env" f" TF_CUDA_COMPUTE_CAPABILITIES={','.join(dpav.cuda_compute_capabilities)}" ) rc.append(f"build --action_env TF_CUDNN_VERSION={dpav.cudnn_version}") - rc.append(f"build --repo_env TF_NEED_TENSORRT={int(self.using_tensorrt)}") - if self.using_nccl: - rc.append(f"build --action_env TF_NCCL_VERSION={dpav.nccl_version}") - else: + if not self.using_nccl: rc.append("build --config nonccl") elif self.backend == Backend.ROCM: pass @@ -468,7 +388,6 @@ def _parse_args(): default="-Wno-sign-compare", ) parser.add_argument("--nccl", action="store_true") - parser.add_argument("--tensorrt", action="store_true") # Path and version overrides path_help = "Optional: will be found on PATH if possible." @@ -484,13 +403,16 @@ def _parse_args(): parser.add_argument("--lld_path", help=path_help) # CUDA specific - find_cuda_config_help = ( - "Optional: will be found using `find_cuda_config.py` if flag is not set." + parser.add_argument( + "--cuda_version", + help="Optional: CUDA will be downloaded by Bazel if the flag is provided", + ) + parser.add_argument( + "--cudnn_version", + help=( + "Optional: CUDNN will be downloaded by Bazel if the flag is provided" + ), ) - parser.add_argument("--cublas_version", help=find_cuda_config_help) - parser.add_argument("--cuda_toolkit_path", help=find_cuda_config_help) - parser.add_argument("--cudnn_version", help=find_cuda_config_help) - parser.add_argument("--nccl_version", help=find_cuda_config_help) return parser.parse_args() @@ -510,7 +432,6 @@ def main(): python_bin_path=args.python_bin_path, compiler_options=args.compiler_options, using_nccl=args.nccl, - using_tensorrt=args.tensorrt, ) bazelrc_lines = config.to_bazelrc_lines( @@ -519,11 +440,9 @@ def main(): gcc_path=args.gcc_path, lld_path=args.lld_path, ld_library_path=args.ld_library_path, - cublas_version=args.cublas_version, - cuda_compute_capabilities=args.cuda_compute_capabilities, - cuda_toolkit_path=args.cuda_toolkit_path, + cuda_version=args.cuda_version, cudnn_version=args.cudnn_version, - nccl_version=args.nccl_version, + cuda_compute_capabilities=args.cuda_compute_capabilities, ) ) diff --git a/third_party/xla/build_tools/configure/configure_test.py b/third_party/xla/build_tools/configure/configure_test.py index c952c8f9241f4f..8a1ca1ab3c699f 100644 --- a/third_party/xla/build_tools/configure/configure_test.py +++ b/third_party/xla/build_tools/configure/configure_test.py @@ -32,12 +32,10 @@ # CUDA specific paths and versions _CUDA_SPECIFIC_PATHS_AND_VERSIONS = { - "cublas_version": "12.3", - "cuda_toolkit_path": "/usr/local/cuda-12.2", + "cuda_version": "12.3", "cuda_compute_capabilities": ["7.5"], "cudnn_version": "8", "ld_library_path": "/usr/local/nvidia/lib:/usr/local/nvidia/lib64", - "nccl_version": "2", } @@ -75,7 +73,6 @@ def test_clang_bazelrc(self): compiler_options=list(_COMPILER_OPTIONS), cuda_compiler=CudaCompiler.NVCC, using_nccl=False, - using_tensorrt=False, ) bazelrc_lines = config.to_bazelrc_lines( @@ -97,7 +94,6 @@ def test_gcc_bazelrc(self): compiler_options=list(_COMPILER_OPTIONS), cuda_compiler=CudaCompiler.NVCC, using_nccl=False, - using_tensorrt=False, ) bazelrc_lines = config.to_bazelrc_lines( @@ -118,7 +114,6 @@ def test_cuda_clang_bazelrc(self): compiler_options=list(_COMPILER_OPTIONS), cuda_compiler=CudaCompiler.CLANG, using_nccl=False, - using_tensorrt=False, ) bazelrc_lines = config.to_bazelrc_lines( @@ -140,7 +135,6 @@ def test_nvcc_clang_bazelrc(self): compiler_options=list(_COMPILER_OPTIONS), cuda_compiler=CudaCompiler.NVCC, using_nccl=False, - using_tensorrt=False, ) bazelrc_lines = config.to_bazelrc_lines( @@ -162,7 +156,6 @@ def test_nvcc_gcc_bazelrc(self): compiler_options=list(_COMPILER_OPTIONS), cuda_compiler=CudaCompiler.NVCC, using_nccl=False, - using_tensorrt=False, ) bazelrc_lines = config.to_bazelrc_lines( diff --git a/third_party/xla/build_tools/configure/testdata/cuda_clang.bazelrc b/third_party/xla/build_tools/configure/testdata/cuda_clang.bazelrc index b998cf06935f33..62c5224a98dd19 100644 --- a/third_party/xla/build_tools/configure/testdata/cuda_clang.bazelrc +++ b/third_party/xla/build_tools/configure/testdata/cuda_clang.bazelrc @@ -3,11 +3,9 @@ build --repo_env CC=/usr/lib/llvm-17/bin/clang build --repo_env BAZEL_COMPILER=/usr/lib/llvm-17/bin/clang build --config cuda_clang build --action_env CLANG_CUDA_COMPILER_PATH=/usr/lib/llvm-17/bin/clang -build --action_env CUDA_TOOLKIT_PATH=/usr/local/cuda-12.2 -build --action_env TF_CUBLAS_VERSION=12.3 +build --action_env TF_CUDA_VERSION=12.3 build --action_env TF_CUDA_COMPUTE_CAPABILITIES=7.5 build --action_env TF_CUDNN_VERSION=8 -build --repo_env TF_NEED_TENSORRT=0 build --config nonccl build --action_env LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64 build --action_env PYTHON_BIN_PATH=/usr/bin/python3 diff --git a/third_party/xla/build_tools/configure/testdata/nvcc_clang.bazelrc b/third_party/xla/build_tools/configure/testdata/nvcc_clang.bazelrc index 912dc50faff4c1..ab3d220cdea804 100644 --- a/third_party/xla/build_tools/configure/testdata/nvcc_clang.bazelrc +++ b/third_party/xla/build_tools/configure/testdata/nvcc_clang.bazelrc @@ -3,11 +3,9 @@ build --repo_env CC=/usr/lib/llvm-17/bin/clang build --repo_env BAZEL_COMPILER=/usr/lib/llvm-17/bin/clang build --config nvcc_clang build --action_env CLANG_CUDA_COMPILER_PATH=/usr/lib/llvm-17/bin/clang -build --action_env CUDA_TOOLKIT_PATH=/usr/local/cuda-12.2 -build --action_env TF_CUBLAS_VERSION=12.3 +build --action_env TF_CUDA_VERSION=12.3 build --action_env TF_CUDA_COMPUTE_CAPABILITIES=7.5 build --action_env TF_CUDNN_VERSION=8 -build --repo_env TF_NEED_TENSORRT=0 build --config nonccl build --action_env LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64 build --action_env PYTHON_BIN_PATH=/usr/bin/python3 diff --git a/third_party/xla/build_tools/configure/testdata/nvcc_gcc.bazelrc b/third_party/xla/build_tools/configure/testdata/nvcc_gcc.bazelrc index 863209697362de..e27b41ffc01a99 100644 --- a/third_party/xla/build_tools/configure/testdata/nvcc_gcc.bazelrc +++ b/third_party/xla/build_tools/configure/testdata/nvcc_gcc.bazelrc @@ -1,10 +1,8 @@ build --action_env GCC_HOST_COMPILER_PATH=/usr/bin/gcc build --config cuda -build --action_env CUDA_TOOLKIT_PATH=/usr/local/cuda-12.2 -build --action_env TF_CUBLAS_VERSION=12.3 +build --action_env TF_CUDA_VERSION=12.3 build --action_env TF_CUDA_COMPUTE_CAPABILITIES=7.5 build --action_env TF_CUDNN_VERSION=8 -build --repo_env TF_NEED_TENSORRT=0 build --config nonccl build --action_env LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64 build --action_env PYTHON_BIN_PATH=/usr/bin/python3 diff --git a/third_party/xla/opensource_only.files b/third_party/xla/opensource_only.files index 7655cabdafeb6b..3a7d2c1499c8b2 100644 --- a/third_party/xla/opensource_only.files +++ b/third_party/xla/opensource_only.files @@ -9,6 +9,8 @@ third_party/BUILD: third_party/__init__:.py third_party/compute_library/BUILD: third_party/compute_library/build_defs.bzl: +third_party/cuda_redist_json_repo.bzl: +third_party/cuda_repo.bzl: third_party/implib_so/BUILD: third_party/implib_so/get_symbols.py: third_party/implib_so/make_stub.py: diff --git a/third_party/xla/third_party/cuda_redist_json_repo.bzl b/third_party/xla/third_party/cuda_redist_json_repo.bzl new file mode 100644 index 00000000000000..76941dd74c9488 --- /dev/null +++ b/third_party/xla/third_party/cuda_redist_json_repo.bzl @@ -0,0 +1,110 @@ +# Copyright 2024 The TensorFlow Authors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Utilities for defining CUDA and cuDNN JSON files with distributives versions.""" + +load("//third_party:repo.bzl", "tf_mirror_urls") + +_DEFAULT_CUDA_VERSION = "12.3.2" +_DEFAULT_CUDNN_VERSION = "8.9.7.29" + +def _get_env_var(ctx, name): + if name in ctx.os.environ: + return ctx.os.environ[name] + else: + return None + +def _cuda_redist_json_impl(repository_ctx): + cuda_version = _get_env_var(repository_ctx, "TF_CUDA_VERSION") + cudnn_version = _get_env_var(repository_ctx, "TF_CUDNN_VERSION") + supported_cuda_versions = repository_ctx.attr.cuda_json_dict.keys() + if cuda_version and (cuda_version not in supported_cuda_versions): + if cuda_version in ["12", "12.3"]: + cuda_version = _DEFAULT_CUDA_VERSION + else: + fail( + ("The supported CUDA versions are {supported_versions}." + + " Please provide a supported version in TF_CUDA_VERSION" + + " environment variable or add JSON URL for" + + " CUDA version={version}.") + .format( + supported_versions = supported_cuda_versions, + version = cuda_version, + ), + ) + supported_cudnn_versions = repository_ctx.attr.cudnn_json_dict.keys() + if cudnn_version and (cudnn_version not in supported_cudnn_versions): + if cudnn_version in ["8", "8.9"]: + cudnn_version = _DEFAULT_CUDNN_VERSION + else: + fail( + ("The supported CUDNN versions are {supported_versions}." + + " Please provide a supported version in TF_CUDNN_VERSION" + + " environment variable or add JSON URL for" + + " CUDNN version={version}.") + .format( + supported_versions = supported_cudnn_versions, + version = cudnn_version, + ), + ) + cuda_distributives = "{}" + cudnn_distributives = "{}" + if cuda_version: + (url, sha256) = repository_ctx.attr.cuda_json_dict[cuda_version] + json_file_name = "redistrib_cuda_%s.json" % cuda_version + repository_ctx.download( + url = tf_mirror_urls(url), + sha256 = sha256, + output = json_file_name, + ) + cuda_distributives = repository_ctx.read(repository_ctx.path(json_file_name)) + if cudnn_version: + (url, sha256) = repository_ctx.attr.cudnn_json_dict[cudnn_version] + json_file_name = "redistrib_cudnn_%s.json" % cudnn_version + repository_ctx.download( + url = tf_mirror_urls(url), + sha256 = sha256, + output = json_file_name, + ) + cudnn_distributives = repository_ctx.read(repository_ctx.path(json_file_name)) + + repository_ctx.file( + "build_defs.bzl", + """def get_cuda_distributives(): + return {cuda_distributives} + +def get_cudnn_distributives(): + return {cudnn_distributives} +""".format(cuda_distributives = cuda_distributives, cudnn_distributives = cudnn_distributives), + ) + repository_ctx.file( + "BUILD", + "", + ) + +_cuda_redist_json = repository_rule( + implementation = _cuda_redist_json_impl, + attrs = { + "cuda_json_dict": attr.string_list_dict(mandatory = True), + "cudnn_json_dict": attr.string_list_dict(mandatory = True), + }, + environ = ["TF_CUDA_VERSION", "TF_CUDNN_VERSION"], +) + +def cuda_redist_json(name, cuda_json_dict, cudnn_json_dict): + _cuda_redist_json( + name = name, + cuda_json_dict = cuda_json_dict, + cudnn_json_dict = cudnn_json_dict, + ) diff --git a/third_party/xla/third_party/cuda_repo.bzl b/third_party/xla/third_party/cuda_repo.bzl new file mode 100644 index 00000000000000..0dcda26cba3a23 --- /dev/null +++ b/third_party/xla/third_party/cuda_repo.bzl @@ -0,0 +1,327 @@ +# Copyright 2024 The TensorFlow Authors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Utilities for defining Google ML CUDA dependencies.""" + +load("@cuda_redist_json//:build_defs.bzl", "get_cuda_distributives", "get_cudnn_distributives") +load("//third_party:repo.bzl", "tf_mirror_urls") + +_DEFAULT_CUDA_VERSION = "12.3.2" +_OS_ARCH_DICT = { + "amd64": "x86_64-unknown-linux-gnu", + "aarch64": "aarch64-unknown-linux-gnu", +} +_REDIST_ARCH_DICT = { + "linux-x86_64": "x86_64-unknown-linux-gnu", + "linux-sbsa": "aarch64-unknown-linux-gnu", +} + +_CUDA_DIST_PATH_PREFIX = "https://developer.download.nvidia.com/compute/cuda/redist/" +_CUDNN_DIST_PATH_PREFIX = "https://developer.download.nvidia.com/compute/cudnn/redist/" + +def _get_env_var(ctx, name): + if name in ctx.os.environ: + return ctx.os.environ[name] + else: + return None + +def _get_archive_name(url, archive_suffix = ".tar.xz"): + last_slash_index = url.rfind("/") + return url[last_slash_index + 1:-len(archive_suffix)] + +def _cuda_http_archive_impl(repository_ctx): + cuda_or_cudnn_version = None + dist_version = "" + cuda_version = _get_env_var(repository_ctx, "TF_CUDA_VERSION") + cudnn_version = _get_env_var(repository_ctx, "TF_CUDNN_VERSION") + if repository_ctx.attr.is_cudnn_dist: + cuda_or_cudnn_version = cudnn_version + else: + cuda_or_cudnn_version = cuda_version + if cuda_or_cudnn_version: + # Download archive only when GPU config is used. + dist_version = repository_ctx.attr.dist_version + arch = _OS_ARCH_DICT[repository_ctx.os.arch] + if arch not in repository_ctx.attr.relative_url_dict.keys(): + (relative_url, sha256) = repository_ctx.attr.relative_url_dict["cuda{version}_{arch}" \ + .format( + version = cuda_version.split(".")[0], + arch = arch, + )] + else: + (relative_url, sha256) = repository_ctx.attr.relative_url_dict[arch] + url = (repository_ctx.attr.cudnn_dist_path_prefix if repository_ctx.attr.is_cudnn_dist else repository_ctx.attr.cuda_dist_path_prefix) + relative_url + + archive_name = _get_archive_name(url) + + repository_ctx.download( + url = tf_mirror_urls(url), + output = archive_name + repository_ctx.attr.extension, + sha256 = sha256, + ) + repository_ctx.extract( + archive = archive_name + repository_ctx.attr.extension, + stripPrefix = repository_ctx.attr.override_strip_prefix if repository_ctx.attr.override_strip_prefix else archive_name, + ) + if repository_ctx.attr.build_template: + version = dist_version.split(".")[0] if dist_version else "" + repository_ctx.file("version.txt", version) + repository_ctx.template( + "BUILD", + repository_ctx.attr.build_template, + {"%{version}": version}, + ) + else: + repository_ctx.file( + "BUILD", + repository_ctx.read(repository_ctx.attr.build_file), + ) + +_cuda_http_archive = repository_rule( + implementation = _cuda_http_archive_impl, + attrs = { + "dist_version": attr.string(mandatory = True), + "relative_url_dict": attr.string_list_dict(mandatory = True), + "build_template": attr.label(), + "build_file": attr.label(), + "is_cudnn_dist": attr.bool(), + "override_strip_prefix": attr.string(), + "cudnn_dist_path_prefix": attr.string(default = _CUDNN_DIST_PATH_PREFIX), + "cuda_dist_path_prefix": attr.string(default = _CUDA_DIST_PATH_PREFIX), + "extension": attr.string(default = ".tar.xz"), + }, + environ = ["TF_CUDA_VERSION", "TF_CUDNN_VERSION"], +) + +def cuda_http_archive(name, dist_version, relative_url_dict, **kwargs): + _cuda_http_archive( + name = name, + dist_version = dist_version, + relative_url_dict = relative_url_dict, + **kwargs + ) + +def _cuda_wheel_impl(repository_ctx): + cuda_version = _get_env_var(repository_ctx, "TF_CUDA_VERSION") + if cuda_version in ["12", "12.3"]: + cuda_version = _DEFAULT_CUDA_VERSION + if cuda_version: + # Download archive only when GPU config is used. + arch = _OS_ARCH_DICT[repository_ctx.os.arch] + dict_key = "{cuda_version}-{arch}".format( + cuda_version = cuda_version, + arch = arch, + ) + supported_versions = repository_ctx.attr.url_dict.keys() + if dict_key not in supported_versions: + fail( + ("The supported NCCL versions are {supported_versions}." + + " Please provide a supported CUDA version in TF_CUDA_VERSION" + + " environment variable or add NCCL distributive for" + + " CUDA version={version}, OS={arch}.") + .format( + supported_versions = supported_versions, + version = cuda_version, + arch = arch, + ), + ) + sha256 = repository_ctx.attr.sha256_dict[dict_key] + url = repository_ctx.attr.url_dict[dict_key] + + archive_name = _get_archive_name(url, archive_suffix = ".whl") + + repository_ctx.download( + url = tf_mirror_urls(url), + output = archive_name + repository_ctx.attr.extension, + sha256 = sha256, + ) + repository_ctx.extract( + archive = archive_name + repository_ctx.attr.extension, + stripPrefix = repository_ctx.attr.strip_prefix, + ) + + repository_ctx.file( + "BUILD", + repository_ctx.read(repository_ctx.attr.build_file), + ) + +_cuda_wheel = repository_rule( + implementation = _cuda_wheel_impl, + attrs = { + "sha256_dict": attr.string_dict(mandatory = True), + "url_dict": attr.string_dict(mandatory = True), + "build_file": attr.label(), + "strip_prefix": attr.string(), + "extension": attr.string(default = ".zip"), + }, + environ = ["TF_CUDA_VERSION"], +) + +def cuda_wheel(name, sha256_dict, url_dict, **kwargs): + _cuda_wheel( + name = name, + sha256_dict = sha256_dict, + url_dict = url_dict, + **kwargs + ) + +def _get_relative_url_dict(dist_info): + relative_url_dict = {} + for arch in _REDIST_ARCH_DICT.keys(): + # CUDNN JSON might contain paths for each CUDA version. + if "relative_path" not in dist_info[arch]: + for cuda_version, data in dist_info[arch].items(): + relative_url_dict["{cuda_version}_{arch}" \ + .format( + cuda_version = cuda_version, + arch = _REDIST_ARCH_DICT[arch], + )] = [data["relative_path"], data["sha256"]] + else: + relative_url_dict[_REDIST_ARCH_DICT[arch]] = [ + dist_info[arch]["relative_path"], + dist_info[arch]["sha256"], + ] + return relative_url_dict + +def _get_cuda_archive( + repo_name, + dist_dict, + dist_name, + build_file = None, + build_template = None, + is_cudnn_dist = False): + if dist_name in dist_dict.keys(): + return cuda_http_archive( + name = repo_name, + dist_version = dist_dict[dist_name]["version"], + build_file = Label(build_file) if build_file else None, + build_template = Label(build_template) if build_template else None, + relative_url_dict = _get_relative_url_dict(dist_dict[dist_name]), + is_cudnn_dist = is_cudnn_dist, + ) + else: + return cuda_http_archive( + name = repo_name, + dist_version = "", + build_file = Label(build_file) if build_file else None, + build_template = Label(build_template) if build_template else None, + relative_url_dict = {"": []}, + is_cudnn_dist = is_cudnn_dist, + ) + +def cuda_distributives(cuda_nccl_wheel_dict): + nccl_artifacts_dict = {"sha256_dict": {}, "url_dict": {}} + for cuda_version, nccl_wheel_info in cuda_nccl_wheel_dict.items(): + for arch in _OS_ARCH_DICT.values(): + if arch in nccl_wheel_info.keys(): + cuda_version_to_arch_key = "%s-%s" % (cuda_version, arch) + nccl_artifacts_dict["sha256_dict"][cuda_version_to_arch_key] = nccl_wheel_info[arch]["sha256"] + nccl_artifacts_dict["url_dict"][cuda_version_to_arch_key] = nccl_wheel_info[arch]["url"] + + cuda_wheel( + name = "cuda_nccl", + sha256_dict = nccl_artifacts_dict["sha256_dict"], + url_dict = nccl_artifacts_dict["url_dict"], + build_file = Label("//third_party/gpus/cuda:cuda_nccl.BUILD"), + strip_prefix = "nvidia/nccl", + ) + + cuda_distributives = get_cuda_distributives() + cudnn_distributives = get_cudnn_distributives() + + _get_cuda_archive( + repo_name = "cuda_cccl", + dist_dict = cuda_distributives, + dist_name = "cuda_cccl", + build_file = "//third_party/gpus/cuda:cuda_cccl.BUILD", + ) + _get_cuda_archive( + repo_name = "cuda_cublas", + dist_dict = cuda_distributives, + dist_name = "libcublas", + build_template = "//third_party/gpus/cuda:cuda_cublas.BUILD.tpl", + ) + _get_cuda_archive( + repo_name = "cuda_cudart", + dist_dict = cuda_distributives, + dist_name = "cuda_cudart", + build_template = "//third_party/gpus/cuda:cuda_cudart.BUILD.tpl", + ) + _get_cuda_archive( + repo_name = "cuda_cudnn", + dist_dict = cudnn_distributives, + dist_name = "cudnn", + build_template = "//third_party/gpus/cuda:cuda_cudnn.BUILD.tpl", + is_cudnn_dist = True, + ) + _get_cuda_archive( + repo_name = "cuda_cufft", + dist_dict = cuda_distributives, + dist_name = "libcufft", + build_template = "//third_party/gpus/cuda:cuda_cufft.BUILD.tpl", + ) + _get_cuda_archive( + repo_name = "cuda_cupti", + dist_dict = cuda_distributives, + dist_name = "cuda_cupti", + build_template = "//third_party/gpus/cuda:cuda_cupti.BUILD.tpl", + ) + _get_cuda_archive( + repo_name = "cuda_curand", + dist_dict = cuda_distributives, + dist_name = "libcurand", + build_template = "//third_party/gpus/cuda:cuda_curand.BUILD.tpl", + ) + _get_cuda_archive( + repo_name = "cuda_cusolver", + dist_dict = cuda_distributives, + dist_name = "libcusolver", + build_template = "//third_party/gpus/cuda:cuda_cusolver.BUILD.tpl", + ) + _get_cuda_archive( + repo_name = "cuda_cusparse", + dist_dict = cuda_distributives, + dist_name = "libcusparse", + build_template = "//third_party/gpus/cuda:cuda_cusparse.BUILD.tpl", + ) + _get_cuda_archive( + repo_name = "cuda_nvcc", + dist_dict = cuda_distributives, + dist_name = "cuda_nvcc", + build_file = "//third_party/gpus/cuda:cuda_nvcc.BUILD", + ) + _get_cuda_archive( + repo_name = "cuda_nvjitlink", + dist_dict = cuda_distributives, + dist_name = "libnvjitlink", + build_template = "//third_party/gpus/cuda:cuda_nvjitlink.BUILD.tpl", + ) + _get_cuda_archive( + repo_name = "cuda_nvml", + dist_dict = cuda_distributives, + dist_name = "cuda_nvml_dev", + build_file = "//third_party/gpus/cuda:cuda_nvml.BUILD", + ) + _get_cuda_archive( + repo_name = "cuda_nvprune", + dist_dict = cuda_distributives, + dist_name = "cuda_nvprune", + build_file = "//third_party/gpus/cuda:cuda_nvprune.BUILD", + ) + _get_cuda_archive( + repo_name = "cuda_nvtx", + dist_dict = cuda_distributives, + dist_name = "cuda_nvtx", + build_file = "//third_party/gpus/cuda:cuda_nvtx.BUILD", + ) diff --git a/third_party/xla/third_party/tsl/.bazelrc b/third_party/xla/third_party/tsl/.bazelrc index 02dec0349c4741..c17ae4494dc99c 100644 --- a/third_party/xla/third_party/tsl/.bazelrc +++ b/third_party/xla/third_party/tsl/.bazelrc @@ -226,12 +226,13 @@ build:cuda --repo_env TF_NEED_CUDA=1 build:cuda --crosstool_top=@local_config_cuda//crosstool:toolchain build:cuda --@local_config_cuda//:enable_cuda +build:no_cuda_libs --@local_config_cuda//cuda:include_hermetic_cuda_libs=false + # CUDA: This config refers to building CUDA op kernels with clang. build:cuda_clang --config=cuda -# Enable TensorRT optimizations https://developer.nvidia.com/tensorrt -build:cuda_clang --config=tensorrt build:cuda_clang --action_env=TF_CUDA_CLANG="1" build:cuda_clang --@local_config_cuda//:cuda_compiler=clang +build:cuda_clang --copt=-Qunused-arguments # Select supported compute capabilities (supported graphics cards). # This is the same as the official TensorFlow builds. # See https://developer.nvidia.com/cuda-gpus#compute @@ -244,12 +245,10 @@ build:cuda_clang --repo_env=TF_CUDA_COMPUTE_CAPABILITIES="sm_60,sm_70,sm_80,sm_8 # Set up compilation CUDA version and paths and use the CUDA Clang toolchain. build:cuda_clang_official --config=cuda_clang -build:cuda_clang_official --action_env=TF_CUDA_VERSION="12" -build:cuda_clang_official --action_env=TF_CUDNN_VERSION="8" -build:cuda_clang_official --action_env=CUDA_TOOLKIT_PATH="/usr/local/cuda-12.3" +build:cuda_clang_official --action_env=TF_CUDA_VERSION="12.3" +build:cuda_clang_official --action_env=TF_CUDNN_VERSION="8.9" build:cuda_clang_official --action_env=GCC_HOST_COMPILER_PATH="/dt9/usr/bin/gcc" build:cuda_clang_official --action_env=CLANG_CUDA_COMPILER_PATH="/usr/lib/llvm-17/bin/clang" -build:cuda_clang_official --action_env=LD_LIBRARY_PATH="/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64" build:cuda_clang_official --crosstool_top="@sigbuild-r2.17-clang_config_cuda//crosstool:toolchain" # Build with nvcc for CUDA and clang for host @@ -545,10 +544,6 @@ build:rbe_linux_cuda --config=cuda_clang_official build:rbe_linux_cuda --config=rbe_linux_cpu # For Remote build execution -- GPU configuration build:rbe_linux_cuda --repo_env=REMOTE_GPU_TESTING=1 -build:rbe_linux_cuda --repo_env=TF_CUDA_CONFIG_REPO="@sigbuild-r2.17-clang_config_cuda" -build:rbe_linux_cuda --repo_env=TF_TENSORRT_CONFIG_REPO="@sigbuild-r2.17-clang_config_tensorrt" -build:rbe_linux_cuda --repo_env=TF_NCCL_CONFIG_REPO="@sigbuild-r2.17-clang_config_nccl" -test:rbe_linux_cuda --test_env=LD_LIBRARY_PATH="/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64" build:rbe_linux_cuda_nvcc --config=rbe_linux_cuda build:rbe_linux_cuda_nvcc --config=nvcc_clang @@ -633,7 +628,6 @@ build:release_cpu_linux_base --repo_env=BAZEL_COMPILER="/usr/lib/llvm-17/bin/cla # Test-related settings below this point. test:release_linux_base --build_tests_only --keep_going --test_output=errors --verbose_failures=true test:release_linux_base --local_test_jobs=HOST_CPUS -test:release_linux_base --test_env=LD_LIBRARY_PATH # Give only the list of failed tests at the end of the log test:release_linux_base --test_summary=short @@ -645,7 +639,6 @@ build:release_gpu_linux --config=release_cpu_linux # Set up compilation CUDA version and paths and use the CUDA Clang toolchain. # Note that linux cpu and cuda builds share the same toolchain now. build:release_gpu_linux --config=cuda_clang_official -test:release_gpu_linux --test_env=LD_LIBRARY_PATH="/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64" # Local test jobs has to be 4 because parallel_gpu_execute is fragile, I think test:release_gpu_linux --test_timeout=300,450,1200,3600 --local_test_jobs=4 --run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute @@ -674,11 +667,8 @@ test:unsupported_cpu_linux --config=release_base build:unsupported_gpu_linux --config=cuda build:unsupported_gpu_linux --config=unsupported_cpu_linux build:unsupported_gpu_linux --action_env=TF_CUDA_VERSION="11" -build:unsupported_gpu_linux --action_env=TF_CUDNN_VERSION="8" +build:unsupported_gpu_linux --action_env=TF_CUDNN_VERSION="8.6" build:unsupported_gpu_linux --repo_env=TF_CUDA_COMPUTE_CAPABILITIES="sm_35,sm_50,sm_60,sm_70,sm_75,compute_80" -build:unsupported_gpu_linux --config=tensorrt -build:unsupported_gpu_linux --action_env=CUDA_TOOLKIT_PATH="/usr/local/cuda-11.2" -build:unsupported_gpu_linux --action_env=LD_LIBRARY_PATH="/usr/local/cuda:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda-11.1/lib64:/usr/local/tensorrt/lib" build:unsupported_gpu_linux --action_env=GCC_HOST_COMPILER_PATH="/dt9/usr/bin/gcc" build:unsupported_gpu_linux --crosstool_top=@ubuntu20.04-gcc9_manylinux2014-cuda11.2-cudnn8.1-tensorrt7.2_config_cuda//crosstool:toolchain diff --git a/third_party/xla/third_party/tsl/opensource_only.files b/third_party/xla/third_party/tsl/opensource_only.files index 3f2bcf8431edc0..a789348e6f50c7 100644 --- a/third_party/xla/third_party/tsl/opensource_only.files +++ b/third_party/xla/third_party/tsl/opensource_only.files @@ -6,6 +6,8 @@ third_party/clang_toolchain/cc_configure_clang.bzl: third_party/clang_toolchain/download_clang.bzl: third_party/compute_library/BUILD: third_party/compute_library/build_defs.bzl: +third_party/cuda_redist_json_repo.bzl: +third_party/cuda_repo.bzl: third_party/curl.BUILD: third_party/cython.BUILD: third_party/ducc/BUILD: @@ -21,6 +23,7 @@ third_party/git/BUILD.tpl: third_party/git/BUILD: third_party/git/git_configure.bzl: third_party/gpus/BUILD: +third_party/gpus/compiler_common_tools.bzl: third_party/gpus/crosstool/BUILD.rocm.tpl: third_party/gpus/crosstool/BUILD.sycl.tpl: third_party/gpus/crosstool/BUILD.tpl: @@ -31,15 +34,32 @@ third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_rocm.tpl: third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_sycl.tpl: third_party/gpus/crosstool/sycl_cc_toolchain_config.bzl.tpl: third_party/gpus/crosstool/windows/msvc_wrapper_for_nvcc.py.tpl: +third_party/gpus/cuda/BUILD.hermetic.tpl: third_party/gpus/cuda/BUILD.tpl: third_party/gpus/cuda/BUILD.windows.tpl: third_party/gpus/cuda/BUILD: third_party/gpus/cuda/LICENSE: third_party/gpus/cuda/build_defs.bzl.tpl: +third_party/gpus/cuda/cuda_cccl.BUILD: third_party/gpus/cuda/cuda_config.h.tpl: third_party/gpus/cuda/cuda_config.py.tpl: +third_party/gpus/cuda/cuda_cublas.BUILD.tpl: +third_party/gpus/cuda/cuda_cudart.BUILD.tpl: +third_party/gpus/cuda/cuda_cudnn.BUILD.tpl: +third_party/gpus/cuda/cuda_cufft.BUILD.tpl: +third_party/gpus/cuda/cuda_cupti.BUILD.tpl: +third_party/gpus/cuda/cuda_curand.BUILD.tpl: +third_party/gpus/cuda/cuda_cusolver.BUILD.tpl: +third_party/gpus/cuda/cuda_cusparse.BUILD.tpl: +third_party/gpus/cuda/cuda_nccl.BUILD: +third_party/gpus/cuda/cuda_nvcc.BUILD: +third_party/gpus/cuda/cuda_nvjitlink.BUILD.tpl: +third_party/gpus/cuda/cuda_nvml.BUILD: +third_party/gpus/cuda/cuda_nvprune.BUILD: +third_party/gpus/cuda/cuda_nvtx.BUILD: third_party/gpus/cuda_configure.bzl: third_party/gpus/find_cuda_config:.py +third_party/gpus/hermetic_cuda_configure.bzl: third_party/gpus/rocm/BUILD.tpl: third_party/gpus/rocm/BUILD: third_party/gpus/rocm/build_defs.bzl.tpl: @@ -68,6 +88,7 @@ third_party/nccl/archive.BUILD: third_party/nccl/archive.patch: third_party/nccl/build_defs.bzl.tpl: third_party/nccl/generated_names.bzl.tpl: +third_party/nccl/hermetic_nccl_configure.bzl: third_party/nccl/nccl_configure.bzl: third_party/nccl/system.BUILD.tpl: third_party/nvtx/BUILD: diff --git a/third_party/xla/third_party/tsl/third_party/cuda_redist_json_repo.bzl b/third_party/xla/third_party/tsl/third_party/cuda_redist_json_repo.bzl new file mode 100644 index 00000000000000..76941dd74c9488 --- /dev/null +++ b/third_party/xla/third_party/tsl/third_party/cuda_redist_json_repo.bzl @@ -0,0 +1,110 @@ +# Copyright 2024 The TensorFlow Authors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Utilities for defining CUDA and cuDNN JSON files with distributives versions.""" + +load("//third_party:repo.bzl", "tf_mirror_urls") + +_DEFAULT_CUDA_VERSION = "12.3.2" +_DEFAULT_CUDNN_VERSION = "8.9.7.29" + +def _get_env_var(ctx, name): + if name in ctx.os.environ: + return ctx.os.environ[name] + else: + return None + +def _cuda_redist_json_impl(repository_ctx): + cuda_version = _get_env_var(repository_ctx, "TF_CUDA_VERSION") + cudnn_version = _get_env_var(repository_ctx, "TF_CUDNN_VERSION") + supported_cuda_versions = repository_ctx.attr.cuda_json_dict.keys() + if cuda_version and (cuda_version not in supported_cuda_versions): + if cuda_version in ["12", "12.3"]: + cuda_version = _DEFAULT_CUDA_VERSION + else: + fail( + ("The supported CUDA versions are {supported_versions}." + + " Please provide a supported version in TF_CUDA_VERSION" + + " environment variable or add JSON URL for" + + " CUDA version={version}.") + .format( + supported_versions = supported_cuda_versions, + version = cuda_version, + ), + ) + supported_cudnn_versions = repository_ctx.attr.cudnn_json_dict.keys() + if cudnn_version and (cudnn_version not in supported_cudnn_versions): + if cudnn_version in ["8", "8.9"]: + cudnn_version = _DEFAULT_CUDNN_VERSION + else: + fail( + ("The supported CUDNN versions are {supported_versions}." + + " Please provide a supported version in TF_CUDNN_VERSION" + + " environment variable or add JSON URL for" + + " CUDNN version={version}.") + .format( + supported_versions = supported_cudnn_versions, + version = cudnn_version, + ), + ) + cuda_distributives = "{}" + cudnn_distributives = "{}" + if cuda_version: + (url, sha256) = repository_ctx.attr.cuda_json_dict[cuda_version] + json_file_name = "redistrib_cuda_%s.json" % cuda_version + repository_ctx.download( + url = tf_mirror_urls(url), + sha256 = sha256, + output = json_file_name, + ) + cuda_distributives = repository_ctx.read(repository_ctx.path(json_file_name)) + if cudnn_version: + (url, sha256) = repository_ctx.attr.cudnn_json_dict[cudnn_version] + json_file_name = "redistrib_cudnn_%s.json" % cudnn_version + repository_ctx.download( + url = tf_mirror_urls(url), + sha256 = sha256, + output = json_file_name, + ) + cudnn_distributives = repository_ctx.read(repository_ctx.path(json_file_name)) + + repository_ctx.file( + "build_defs.bzl", + """def get_cuda_distributives(): + return {cuda_distributives} + +def get_cudnn_distributives(): + return {cudnn_distributives} +""".format(cuda_distributives = cuda_distributives, cudnn_distributives = cudnn_distributives), + ) + repository_ctx.file( + "BUILD", + "", + ) + +_cuda_redist_json = repository_rule( + implementation = _cuda_redist_json_impl, + attrs = { + "cuda_json_dict": attr.string_list_dict(mandatory = True), + "cudnn_json_dict": attr.string_list_dict(mandatory = True), + }, + environ = ["TF_CUDA_VERSION", "TF_CUDNN_VERSION"], +) + +def cuda_redist_json(name, cuda_json_dict, cudnn_json_dict): + _cuda_redist_json( + name = name, + cuda_json_dict = cuda_json_dict, + cudnn_json_dict = cudnn_json_dict, + ) diff --git a/third_party/xla/third_party/tsl/third_party/cuda_repo.bzl b/third_party/xla/third_party/tsl/third_party/cuda_repo.bzl new file mode 100644 index 00000000000000..0dcda26cba3a23 --- /dev/null +++ b/third_party/xla/third_party/tsl/third_party/cuda_repo.bzl @@ -0,0 +1,327 @@ +# Copyright 2024 The TensorFlow Authors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Utilities for defining Google ML CUDA dependencies.""" + +load("@cuda_redist_json//:build_defs.bzl", "get_cuda_distributives", "get_cudnn_distributives") +load("//third_party:repo.bzl", "tf_mirror_urls") + +_DEFAULT_CUDA_VERSION = "12.3.2" +_OS_ARCH_DICT = { + "amd64": "x86_64-unknown-linux-gnu", + "aarch64": "aarch64-unknown-linux-gnu", +} +_REDIST_ARCH_DICT = { + "linux-x86_64": "x86_64-unknown-linux-gnu", + "linux-sbsa": "aarch64-unknown-linux-gnu", +} + +_CUDA_DIST_PATH_PREFIX = "https://developer.download.nvidia.com/compute/cuda/redist/" +_CUDNN_DIST_PATH_PREFIX = "https://developer.download.nvidia.com/compute/cudnn/redist/" + +def _get_env_var(ctx, name): + if name in ctx.os.environ: + return ctx.os.environ[name] + else: + return None + +def _get_archive_name(url, archive_suffix = ".tar.xz"): + last_slash_index = url.rfind("/") + return url[last_slash_index + 1:-len(archive_suffix)] + +def _cuda_http_archive_impl(repository_ctx): + cuda_or_cudnn_version = None + dist_version = "" + cuda_version = _get_env_var(repository_ctx, "TF_CUDA_VERSION") + cudnn_version = _get_env_var(repository_ctx, "TF_CUDNN_VERSION") + if repository_ctx.attr.is_cudnn_dist: + cuda_or_cudnn_version = cudnn_version + else: + cuda_or_cudnn_version = cuda_version + if cuda_or_cudnn_version: + # Download archive only when GPU config is used. + dist_version = repository_ctx.attr.dist_version + arch = _OS_ARCH_DICT[repository_ctx.os.arch] + if arch not in repository_ctx.attr.relative_url_dict.keys(): + (relative_url, sha256) = repository_ctx.attr.relative_url_dict["cuda{version}_{arch}" \ + .format( + version = cuda_version.split(".")[0], + arch = arch, + )] + else: + (relative_url, sha256) = repository_ctx.attr.relative_url_dict[arch] + url = (repository_ctx.attr.cudnn_dist_path_prefix if repository_ctx.attr.is_cudnn_dist else repository_ctx.attr.cuda_dist_path_prefix) + relative_url + + archive_name = _get_archive_name(url) + + repository_ctx.download( + url = tf_mirror_urls(url), + output = archive_name + repository_ctx.attr.extension, + sha256 = sha256, + ) + repository_ctx.extract( + archive = archive_name + repository_ctx.attr.extension, + stripPrefix = repository_ctx.attr.override_strip_prefix if repository_ctx.attr.override_strip_prefix else archive_name, + ) + if repository_ctx.attr.build_template: + version = dist_version.split(".")[0] if dist_version else "" + repository_ctx.file("version.txt", version) + repository_ctx.template( + "BUILD", + repository_ctx.attr.build_template, + {"%{version}": version}, + ) + else: + repository_ctx.file( + "BUILD", + repository_ctx.read(repository_ctx.attr.build_file), + ) + +_cuda_http_archive = repository_rule( + implementation = _cuda_http_archive_impl, + attrs = { + "dist_version": attr.string(mandatory = True), + "relative_url_dict": attr.string_list_dict(mandatory = True), + "build_template": attr.label(), + "build_file": attr.label(), + "is_cudnn_dist": attr.bool(), + "override_strip_prefix": attr.string(), + "cudnn_dist_path_prefix": attr.string(default = _CUDNN_DIST_PATH_PREFIX), + "cuda_dist_path_prefix": attr.string(default = _CUDA_DIST_PATH_PREFIX), + "extension": attr.string(default = ".tar.xz"), + }, + environ = ["TF_CUDA_VERSION", "TF_CUDNN_VERSION"], +) + +def cuda_http_archive(name, dist_version, relative_url_dict, **kwargs): + _cuda_http_archive( + name = name, + dist_version = dist_version, + relative_url_dict = relative_url_dict, + **kwargs + ) + +def _cuda_wheel_impl(repository_ctx): + cuda_version = _get_env_var(repository_ctx, "TF_CUDA_VERSION") + if cuda_version in ["12", "12.3"]: + cuda_version = _DEFAULT_CUDA_VERSION + if cuda_version: + # Download archive only when GPU config is used. + arch = _OS_ARCH_DICT[repository_ctx.os.arch] + dict_key = "{cuda_version}-{arch}".format( + cuda_version = cuda_version, + arch = arch, + ) + supported_versions = repository_ctx.attr.url_dict.keys() + if dict_key not in supported_versions: + fail( + ("The supported NCCL versions are {supported_versions}." + + " Please provide a supported CUDA version in TF_CUDA_VERSION" + + " environment variable or add NCCL distributive for" + + " CUDA version={version}, OS={arch}.") + .format( + supported_versions = supported_versions, + version = cuda_version, + arch = arch, + ), + ) + sha256 = repository_ctx.attr.sha256_dict[dict_key] + url = repository_ctx.attr.url_dict[dict_key] + + archive_name = _get_archive_name(url, archive_suffix = ".whl") + + repository_ctx.download( + url = tf_mirror_urls(url), + output = archive_name + repository_ctx.attr.extension, + sha256 = sha256, + ) + repository_ctx.extract( + archive = archive_name + repository_ctx.attr.extension, + stripPrefix = repository_ctx.attr.strip_prefix, + ) + + repository_ctx.file( + "BUILD", + repository_ctx.read(repository_ctx.attr.build_file), + ) + +_cuda_wheel = repository_rule( + implementation = _cuda_wheel_impl, + attrs = { + "sha256_dict": attr.string_dict(mandatory = True), + "url_dict": attr.string_dict(mandatory = True), + "build_file": attr.label(), + "strip_prefix": attr.string(), + "extension": attr.string(default = ".zip"), + }, + environ = ["TF_CUDA_VERSION"], +) + +def cuda_wheel(name, sha256_dict, url_dict, **kwargs): + _cuda_wheel( + name = name, + sha256_dict = sha256_dict, + url_dict = url_dict, + **kwargs + ) + +def _get_relative_url_dict(dist_info): + relative_url_dict = {} + for arch in _REDIST_ARCH_DICT.keys(): + # CUDNN JSON might contain paths for each CUDA version. + if "relative_path" not in dist_info[arch]: + for cuda_version, data in dist_info[arch].items(): + relative_url_dict["{cuda_version}_{arch}" \ + .format( + cuda_version = cuda_version, + arch = _REDIST_ARCH_DICT[arch], + )] = [data["relative_path"], data["sha256"]] + else: + relative_url_dict[_REDIST_ARCH_DICT[arch]] = [ + dist_info[arch]["relative_path"], + dist_info[arch]["sha256"], + ] + return relative_url_dict + +def _get_cuda_archive( + repo_name, + dist_dict, + dist_name, + build_file = None, + build_template = None, + is_cudnn_dist = False): + if dist_name in dist_dict.keys(): + return cuda_http_archive( + name = repo_name, + dist_version = dist_dict[dist_name]["version"], + build_file = Label(build_file) if build_file else None, + build_template = Label(build_template) if build_template else None, + relative_url_dict = _get_relative_url_dict(dist_dict[dist_name]), + is_cudnn_dist = is_cudnn_dist, + ) + else: + return cuda_http_archive( + name = repo_name, + dist_version = "", + build_file = Label(build_file) if build_file else None, + build_template = Label(build_template) if build_template else None, + relative_url_dict = {"": []}, + is_cudnn_dist = is_cudnn_dist, + ) + +def cuda_distributives(cuda_nccl_wheel_dict): + nccl_artifacts_dict = {"sha256_dict": {}, "url_dict": {}} + for cuda_version, nccl_wheel_info in cuda_nccl_wheel_dict.items(): + for arch in _OS_ARCH_DICT.values(): + if arch in nccl_wheel_info.keys(): + cuda_version_to_arch_key = "%s-%s" % (cuda_version, arch) + nccl_artifacts_dict["sha256_dict"][cuda_version_to_arch_key] = nccl_wheel_info[arch]["sha256"] + nccl_artifacts_dict["url_dict"][cuda_version_to_arch_key] = nccl_wheel_info[arch]["url"] + + cuda_wheel( + name = "cuda_nccl", + sha256_dict = nccl_artifacts_dict["sha256_dict"], + url_dict = nccl_artifacts_dict["url_dict"], + build_file = Label("//third_party/gpus/cuda:cuda_nccl.BUILD"), + strip_prefix = "nvidia/nccl", + ) + + cuda_distributives = get_cuda_distributives() + cudnn_distributives = get_cudnn_distributives() + + _get_cuda_archive( + repo_name = "cuda_cccl", + dist_dict = cuda_distributives, + dist_name = "cuda_cccl", + build_file = "//third_party/gpus/cuda:cuda_cccl.BUILD", + ) + _get_cuda_archive( + repo_name = "cuda_cublas", + dist_dict = cuda_distributives, + dist_name = "libcublas", + build_template = "//third_party/gpus/cuda:cuda_cublas.BUILD.tpl", + ) + _get_cuda_archive( + repo_name = "cuda_cudart", + dist_dict = cuda_distributives, + dist_name = "cuda_cudart", + build_template = "//third_party/gpus/cuda:cuda_cudart.BUILD.tpl", + ) + _get_cuda_archive( + repo_name = "cuda_cudnn", + dist_dict = cudnn_distributives, + dist_name = "cudnn", + build_template = "//third_party/gpus/cuda:cuda_cudnn.BUILD.tpl", + is_cudnn_dist = True, + ) + _get_cuda_archive( + repo_name = "cuda_cufft", + dist_dict = cuda_distributives, + dist_name = "libcufft", + build_template = "//third_party/gpus/cuda:cuda_cufft.BUILD.tpl", + ) + _get_cuda_archive( + repo_name = "cuda_cupti", + dist_dict = cuda_distributives, + dist_name = "cuda_cupti", + build_template = "//third_party/gpus/cuda:cuda_cupti.BUILD.tpl", + ) + _get_cuda_archive( + repo_name = "cuda_curand", + dist_dict = cuda_distributives, + dist_name = "libcurand", + build_template = "//third_party/gpus/cuda:cuda_curand.BUILD.tpl", + ) + _get_cuda_archive( + repo_name = "cuda_cusolver", + dist_dict = cuda_distributives, + dist_name = "libcusolver", + build_template = "//third_party/gpus/cuda:cuda_cusolver.BUILD.tpl", + ) + _get_cuda_archive( + repo_name = "cuda_cusparse", + dist_dict = cuda_distributives, + dist_name = "libcusparse", + build_template = "//third_party/gpus/cuda:cuda_cusparse.BUILD.tpl", + ) + _get_cuda_archive( + repo_name = "cuda_nvcc", + dist_dict = cuda_distributives, + dist_name = "cuda_nvcc", + build_file = "//third_party/gpus/cuda:cuda_nvcc.BUILD", + ) + _get_cuda_archive( + repo_name = "cuda_nvjitlink", + dist_dict = cuda_distributives, + dist_name = "libnvjitlink", + build_template = "//third_party/gpus/cuda:cuda_nvjitlink.BUILD.tpl", + ) + _get_cuda_archive( + repo_name = "cuda_nvml", + dist_dict = cuda_distributives, + dist_name = "cuda_nvml_dev", + build_file = "//third_party/gpus/cuda:cuda_nvml.BUILD", + ) + _get_cuda_archive( + repo_name = "cuda_nvprune", + dist_dict = cuda_distributives, + dist_name = "cuda_nvprune", + build_file = "//third_party/gpus/cuda:cuda_nvprune.BUILD", + ) + _get_cuda_archive( + repo_name = "cuda_nvtx", + dist_dict = cuda_distributives, + dist_name = "cuda_nvtx", + build_file = "//third_party/gpus/cuda:cuda_nvtx.BUILD", + ) diff --git a/third_party/xla/third_party/tsl/third_party/gpus/compiler_common_tools.bzl b/third_party/xla/third_party/tsl/third_party/gpus/compiler_common_tools.bzl new file mode 100644 index 00000000000000..bd07f49ec457bb --- /dev/null +++ b/third_party/xla/third_party/tsl/third_party/gpus/compiler_common_tools.bzl @@ -0,0 +1,174 @@ +"""Common compiler functions. """ + +load( + "//third_party/remote_config:common.bzl", + "err_out", + "raw_exec", + "realpath", +) + +def to_list_of_strings(elements): + """Convert the list of ["a", "b", "c"] into '"a", "b", "c"'. + + This is to be used to put a list of strings into the bzl file templates + so it gets interpreted as list of strings in Starlark. + + Args: + elements: list of string elements + + Returns: + single string of elements wrapped in quotes separated by a comma.""" + quoted_strings = ["\"" + element + "\"" for element in elements] + return ", ".join(quoted_strings) + +_INC_DIR_MARKER_BEGIN = "#include <...>" + +# OSX add " (framework directory)" at the end of line, strip it. +_OSX_FRAMEWORK_SUFFIX = " (framework directory)" +_OSX_FRAMEWORK_SUFFIX_LEN = len(_OSX_FRAMEWORK_SUFFIX) + +# TODO(dzc): Once these functions have been factored out of Bazel's +# cc_configure.bzl, load them from @bazel_tools instead. +def _cxx_inc_convert(path): + """Convert path returned by cc -E xc++ in a complete path.""" + path = path.strip() + if path.endswith(_OSX_FRAMEWORK_SUFFIX): + path = path[:-_OSX_FRAMEWORK_SUFFIX_LEN].strip() + return path + +def _normalize_include_path(repository_ctx, path): + """Normalizes include paths before writing them to the crosstool. + + If path points inside the 'crosstool' folder of the repository, a relative + path is returned. + If path points outside the 'crosstool' folder, an absolute path is returned. + """ + path = str(repository_ctx.path(path)) + crosstool_folder = str(repository_ctx.path(".").get_child("crosstool")) + + if path.startswith(crosstool_folder): + # We drop the path to "$REPO/crosstool" and a trailing path separator. + return path[len(crosstool_folder) + 1:] + return path + +def _is_compiler_option_supported(repository_ctx, cc, option): + """Checks that `option` is supported by the C compiler. Doesn't %-escape the option.""" + result = repository_ctx.execute([ + cc, + option, + "-o", + "/dev/null", + "-c", + str(repository_ctx.path("tools/cpp/empty.cc")), + ]) + return result.stderr.find(option) == -1 + +def _get_cxx_inc_directories_impl(repository_ctx, cc, lang_is_cpp, tf_sys_root): + """Compute the list of default C or C++ include directories.""" + if lang_is_cpp: + lang = "c++" + else: + lang = "c" + sysroot = [] + if tf_sys_root: + sysroot += ["--sysroot", tf_sys_root] + result = raw_exec(repository_ctx, [cc, "-E", "-x" + lang, "-", "-v"] + + sysroot) + stderr = err_out(result) + index1 = stderr.find(_INC_DIR_MARKER_BEGIN) + if index1 == -1: + return [] + index1 = stderr.find("\n", index1) + if index1 == -1: + return [] + index2 = stderr.rfind("\n ") + if index2 == -1 or index2 < index1: + return [] + index2 = stderr.find("\n", index2 + 1) + if index2 == -1: + inc_dirs = stderr[index1 + 1:] + else: + inc_dirs = stderr[index1 + 1:index2].strip() + + print_resource_dir_supported = _is_compiler_option_supported( + repository_ctx, + cc, + "-print-resource-dir", + ) + + if print_resource_dir_supported: + resource_dir = repository_ctx.execute( + [cc, "-print-resource-dir"], + ).stdout.strip() + "/share" + inc_dirs += "\n" + resource_dir + + compiler_includes = [ + _normalize_include_path(repository_ctx, _cxx_inc_convert(p)) + for p in inc_dirs.split("\n") + ] + + # The compiler might be on a symlink, e.g. /symlink -> /opt/gcc + # The above keeps only the resolved paths to the default includes (e.g. /opt/gcc/include/c++/11) + # but Bazel might encounter either (usually reported by the compiler) + # especially when a compiler wrapper (e.g. ccache) is used. + # So we need to also include paths where symlinks are not resolved. + + # Try to find real path to CC installation to "see through" compiler wrappers + # GCC has the path to g++ + index1 = result.stderr.find("COLLECT_GCC=") + if index1 != -1: + index1 = result.stderr.find("=", index1) + index2 = result.stderr.find("\n", index1) + cc_topdir = repository_ctx.path(result.stderr[index1 + 1:index2]).dirname.dirname + else: + # Clang has the directory + index1 = result.stderr.find("InstalledDir: ") + if index1 != -1: + index1 = result.stderr.find(" ", index1) + index2 = result.stderr.find("\n", index1) + cc_topdir = repository_ctx.path(result.stderr[index1 + 1:index2]).dirname + else: + # Fallback to the CC path + cc_topdir = repository_ctx.path(cc).dirname.dirname + + # We now have the compiler installation prefix, e.g. /symlink/gcc + # And the resolved installation prefix, e.g. /opt/gcc + cc_topdir_resolved = str(realpath(repository_ctx, cc_topdir)).strip() + cc_topdir = str(cc_topdir).strip() + + # If there is (any!) symlink involved we add paths where the unresolved installation prefix is kept. + # e.g. [/opt/gcc/include/c++/11, /opt/gcc/lib/gcc/x86_64-linux-gnu/11/include, /other/path] + # adds [/symlink/include/c++/11, /symlink/lib/gcc/x86_64-linux-gnu/11/include] + if cc_topdir_resolved != cc_topdir: + unresolved_compiler_includes = [ + cc_topdir + inc[len(cc_topdir_resolved):] + for inc in compiler_includes + if inc.startswith(cc_topdir_resolved) + ] + compiler_includes = compiler_includes + unresolved_compiler_includes + return compiler_includes + +def get_cxx_inc_directories(repository_ctx, cc, tf_sys_root): + """Compute the list of default C and C++ include directories.""" + + # For some reason `clang -xc` sometimes returns include paths that are + # different from the ones from `clang -xc++`. (Symlink and a dir) + # So we run the compiler with both `-xc` and `-xc++` and merge resulting lists + includes_cpp = _get_cxx_inc_directories_impl( + repository_ctx, + cc, + True, + tf_sys_root, + ) + includes_c = _get_cxx_inc_directories_impl( + repository_ctx, + cc, + False, + tf_sys_root, + ) + + return includes_cpp + [ + inc + for inc in includes_c + if inc not in includes_cpp + ] diff --git a/third_party/xla/third_party/tsl/third_party/gpus/crosstool/BUILD.tpl b/third_party/xla/third_party/tsl/third_party/gpus/crosstool/BUILD.tpl index 8eda7a1cf6ac2b..b9553d9b99ecfe 100644 --- a/third_party/xla/third_party/tsl/third_party/gpus/crosstool/BUILD.tpl +++ b/third_party/xla/third_party/tsl/third_party/gpus/crosstool/BUILD.tpl @@ -2,6 +2,7 @@ # Update cuda_configure.bzl#verify_build_defines when adding new variables. load(":cc_toolchain_config.bzl", "cc_toolchain_config") +load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda") licenses(["restricted"]) @@ -133,9 +134,17 @@ filegroup( srcs = [], ) +filegroup( + name = "cuda_nvcc_files", + srcs = %{cuda_nvcc_files}, +) + filegroup( name = "crosstool_wrapper_driver_is_not_gcc", - srcs = ["clang/bin/crosstool_wrapper_driver_is_not_gcc"], + srcs = [ + ":cuda_nvcc_files", + ":clang/bin/crosstool_wrapper_driver_is_not_gcc" + ], ) filegroup( diff --git a/third_party/xla/third_party/tsl/third_party/gpus/cuda/BUILD.hermetic.tpl b/third_party/xla/third_party/tsl/third_party/gpus/cuda/BUILD.hermetic.tpl new file mode 100644 index 00000000000000..1c00f1c5e32916 --- /dev/null +++ b/third_party/xla/third_party/tsl/third_party/gpus/cuda/BUILD.hermetic.tpl @@ -0,0 +1,291 @@ +load("@bazel_skylib//:bzl_library.bzl", "bzl_library") +load("@bazel_skylib//lib:selects.bzl", "selects") +load("@bazel_skylib//rules:common_settings.bzl", "bool_flag") + +licenses(["restricted"]) # MPL2, portions GPL v3, LGPL v3, BSD-like + +package(default_visibility = ["//visibility:public"]) + +# Config setting whether TensorFlow is built with CUDA support using clang. +# +# TODO(b/174244321), DEPRECATED: this target will be removed when all users +# have been converted to :is_cuda_enabled (most) or :is_cuda_compiler_clang. +selects.config_setting_group( + name = "using_clang", + match_all = [ + "@local_config_cuda//:is_cuda_enabled", + "@local_config_cuda//:is_cuda_compiler_clang", + ], +) + +# Config setting whether TensorFlow is built with CUDA support using nvcc. +# +# TODO(b/174244321), DEPRECATED: this target will be removed when all users +# have been converted to :is_cuda_enabled (most) or :is_cuda_compiler_nvcc. +selects.config_setting_group( + name = "using_nvcc", + match_all = [ + "@local_config_cuda//:is_cuda_enabled", + "@local_config_cuda//:is_cuda_compiler_nvcc", + ], +) + +# Equivalent to using_clang && -c opt. +selects.config_setting_group( + name = "using_clang_opt", + match_all = [ + ":using_clang", + ":_opt", + ], +) + +config_setting( + name = "_opt", + values = {"compilation_mode": "opt"}, +) + +# Provides CUDA headers for '#include "third_party/gpus/cuda/include/cuda.h"' +# All clients including TensorFlow should use these directives. +cc_library( + name = "cuda_headers", + hdrs = [ + "cuda/cuda_config.h", + ], + include_prefix = "third_party/gpus", + includes = [ + ".", # required to include cuda/cuda/cuda_config.h as cuda/config.h + ], + deps = [":cudart_headers", + ":cublas_headers", + ":cccl_headers", + ":nvtx_headers", + ":nvcc_headers", + ":nvjitlink_headers", + ":cusolver_headers", + ":cufft_headers", + ":cusparse_headers", + ":curand_headers", + ":cupti_headers", + ":nvml_headers"], +) + +cc_library( + name = "cudart_static", + srcs = ["@cuda_cudart//:static"], + linkopts = [ + "-ldl", + "-lpthread", + %{cudart_static_linkopt} + ], +) + +alias( + name = "cuda_driver", + actual = "@cuda_cudart//:cuda_driver", +) + +alias( + name = "cudart_headers", + actual = "@cuda_cudart//:headers", +) + +alias( + name = "cudart", + actual = "@cuda_cudart//:cudart", +) + +alias( + name = "nvjitlink_headers", + actual = "@cuda_nvjitlink//:headers", +) + +alias( + name = "nvjitlink", + actual = "@cuda_nvjitlink//:nvjitlink", +) + +alias( + name = "nvtx_headers", + actual = "@cuda_nvtx//:headers", +) + +alias( + name = "nvml_headers", + actual = "@cuda_nvml//:headers", +) + +alias( + name = "nvcc_headers", + actual = "@cuda_nvcc//:headers", +) + +alias( + name = "cccl_headers", + actual = "@cuda_cccl//:headers", +) + +alias( + name = "cublas_headers", + actual = "@cuda_cublas//:headers", +) + +alias( + name = "cusolver_headers", + actual = "@cuda_cusolver//:headers", +) + +alias( + name = "cufft_headers", + actual = "@cuda_cufft//:headers", +) + +alias( + name = "cusparse_headers", + actual = "@cuda_cusparse//:headers", +) + +alias( + name = "curand_headers", + actual = "@cuda_curand//:headers", +) + +alias( + name = "cublas", + actual = "@cuda_cublas//:cublas", +) + +alias( + name = "cublasLt", + actual = "@cuda_cublas//:cublasLt", +) + +alias( + name = "cusolver", + actual = "@cuda_cusolver//:cusolver", +) + +alias( + name = "cudnn", + actual = "@cuda_cudnn//:cudnn", +) + +alias( + name = "cudnn_ops_infer", + actual = "@cuda_cudnn//:cudnn_ops_infer", +) + +alias( + name = "cudnn_cnn_infer", + actual = "@cuda_cudnn//:cudnn_cnn_infer", +) + +alias( + name = "cudnn_ops_train", + actual = "@cuda_cudnn//:cudnn_ops_train", +) + +alias( + name = "cudnn_cnn_train", + actual = "@cuda_cudnn//:cudnn_cnn_train", +) + +alias( + name = "cudnn_adv_infer", + actual = "@cuda_cudnn//:cudnn_adv_infer", +) + +alias( + name = "cudnn_adv_train", + actual = "@cuda_cudnn//:cudnn_adv_train", +) +alias( + name = "cudnn_header", + actual = "@cuda_cudnn//:headers", +) + +alias( + name = "cufft", + actual = "@cuda_cufft//:cufft", +) + +alias( + name = "curand", + actual = "@cuda_curand//:curand", +) + +cc_library( + name = "cuda", + deps = [ + ":cublas", + ":cublasLt", + ":cuda_headers", + ":cudart", + ":cudnn", + ":cufft", + ":curand", + ], +) + +alias( + name = "cub_headers", + actual = "%{cub_actual}", +) + +alias( + name = "cupti_headers", + actual = "@cuda_cupti//:headers", +) + +alias( + name = "cupti_dsos", + actual = "@cuda_cupti//:cupti", +) + +alias( + name = "cusparse", + actual = "@cuda_cusparse//:cusparse", +) + +cc_library( + name = "libdevice_root", + data = ["@cuda_nvcc//:nvvm"], +) + +bzl_library( + name = "build_defs_bzl", + srcs = ["build_defs.bzl"], + deps = [ + "@bazel_skylib//lib:selects", + ], +) + +py_library( + name = "cuda_config_py", + srcs = ["cuda/cuda_config.py"], +) + +# Config setting whether TensorFlow is built with hermetic CUDA. +alias( + name = "hermetic_cuda_tools", + actual = "@local_config_cuda//:is_cuda_enabled", +) + +# Flag indicating if we should include hermetic CUDA libs. +bool_flag( + name = "include_hermetic_cuda_libs", + build_setting_default = True, +) + +config_setting( + name = "hermetic_cuda_libs", + flag_values = {":include_hermetic_cuda_libs": "True"}, +) + +selects.config_setting_group( + name = "hermetic_cuda_tools_and_libs", + match_all = [ + ":hermetic_cuda_libs", + ":hermetic_cuda_tools" + ], +) + diff --git a/third_party/xla/third_party/tsl/third_party/gpus/cuda/BUILD.tpl b/third_party/xla/third_party/tsl/third_party/gpus/cuda/BUILD.tpl index 90a18b90de048c..a4264cc14890e5 100644 --- a/third_party/xla/third_party/tsl/third_party/gpus/cuda/BUILD.tpl +++ b/third_party/xla/third_party/tsl/third_party/gpus/cuda/BUILD.tpl @@ -1,6 +1,7 @@ load(":build_defs.bzl", "cuda_header_library") load("@bazel_skylib//:bzl_library.bzl", "bzl_library") load("@bazel_skylib//lib:selects.bzl", "selects") +load("@bazel_skylib//rules:common_settings.bzl", "bool_flag") licenses(["restricted"]) # MPL2, portions GPL v3, LGPL v3, BSD-like @@ -144,7 +145,6 @@ cc_library( name = "cusolver", srcs = ["cuda/lib/%{cusolver_lib}"], data = ["cuda/lib/%{cusolver_lib}"], - linkopts = ["-lgomp"], linkstatic = 1, ) @@ -220,7 +220,6 @@ cc_library( name = "cusparse", srcs = ["cuda/lib/%{cusparse_lib}"], data = ["cuda/lib/%{cusparse_lib}"], - linkopts = ["-lgomp"], linkstatic = 1, ) @@ -242,4 +241,29 @@ py_library( srcs = ["cuda/cuda_config.py"], ) +# Config setting whether TensorFlow is built with hermetic CUDA. +alias( + name = "hermetic_cuda_tools", + actual = "@local_config_cuda//:is_cuda_enabled", +) + +# Flag indicating if we should include hermetic CUDA libs. +bool_flag( + name = "include_hermetic_cuda_libs", + build_setting_default = False, +) + +config_setting( + name = "hermetic_cuda_libs", + flag_values = {":include_hermetic_cuda_libs": "True"}, +) + +selects.config_setting_group( + name = "hermetic_cuda_tools_and_libs", + match_all = [ + ":hermetic_cuda_libs", + ":hermetic_cuda_tools" + ], +) + %{copy_rules} diff --git a/third_party/xla/third_party/tsl/third_party/gpus/cuda/cuda_cccl.BUILD b/third_party/xla/third_party/tsl/third_party/gpus/cuda/cuda_cccl.BUILD new file mode 100644 index 00000000000000..9823f1d871ed53 --- /dev/null +++ b/third_party/xla/third_party/tsl/third_party/gpus/cuda/cuda_cccl.BUILD @@ -0,0 +1,12 @@ +licenses(["restricted"]) # NVIDIA proprietary license + +cc_library( + name = "headers", + hdrs = glob([ + "include/**", + ]), + include_prefix = "third_party/gpus/cuda/include", + includes = ["include"], + strip_include_prefix = "include", + visibility = ["@local_config_cuda//cuda:__pkg__"], +) diff --git a/third_party/xla/third_party/tsl/third_party/gpus/cuda/cuda_cublas.BUILD.tpl b/third_party/xla/third_party/tsl/third_party/gpus/cuda/cuda_cublas.BUILD.tpl new file mode 100644 index 00000000000000..d5766c971a50ff --- /dev/null +++ b/third_party/xla/third_party/tsl/third_party/gpus/cuda/cuda_cublas.BUILD.tpl @@ -0,0 +1,33 @@ +licenses(["restricted"]) # NVIDIA proprietary license + +exports_files([ + "version.txt", +]) + +cc_import( + name = "cublas", + hdrs = [":headers"], + shared_library = "lib/libcublas.so.%{version}", + visibility = ["//visibility:public"], +) + +cc_import( + name = "cublasLt", + hdrs = [":headers"], + shared_library = "lib/libcublasLt.so.%{version}", + visibility = ["//visibility:public"], +) + +cc_library( + name = "headers", + hdrs = [ + "include/cublas.h", + "include/cublas_v2.h", + "include/cublas_api.h", + "include/cublasLt.h" + ], + include_prefix = "third_party/gpus/cuda/include", + includes = ["include"], + strip_include_prefix = "include", + visibility = ["@local_config_cuda//cuda:__pkg__"], +) diff --git a/third_party/xla/third_party/tsl/third_party/gpus/cuda/cuda_cudart.BUILD.tpl b/third_party/xla/third_party/tsl/third_party/gpus/cuda/cuda_cudart.BUILD.tpl new file mode 100644 index 00000000000000..08655e7819156c --- /dev/null +++ b/third_party/xla/third_party/tsl/third_party/gpus/cuda/cuda_cudart.BUILD.tpl @@ -0,0 +1,34 @@ +licenses(["restricted"]) # NVIDIA proprietary license + +exports_files([ + "version.txt", +]) + +filegroup( + name = "static", + srcs = ["lib/libcudart_static.a"], + visibility = ["@local_config_cuda//cuda:__pkg__"], +) + +cc_import( + name = "cuda_driver", + shared_library = "lib/stubs/libcuda.so", +) + +cc_import( + name = "cudart", + hdrs = [":headers"], + shared_library = "lib/libcudart.so.%{version}", + visibility = ["//visibility:public"], +) + +cc_library( + name = "headers", + hdrs = glob([ + "include/**", + ]), + include_prefix = "third_party/gpus/cuda/include", + includes = ["include"], + strip_include_prefix = "include", + visibility = ["@local_config_cuda//cuda:__pkg__"], +) diff --git a/third_party/xla/third_party/tsl/third_party/gpus/cuda/cuda_cudnn.BUILD.tpl b/third_party/xla/third_party/tsl/third_party/gpus/cuda/cuda_cudnn.BUILD.tpl new file mode 100644 index 00000000000000..98da6e69cbe644 --- /dev/null +++ b/third_party/xla/third_party/tsl/third_party/gpus/cuda/cuda_cudnn.BUILD.tpl @@ -0,0 +1,65 @@ +licenses(["restricted"]) # NVIDIA proprietary license + +exports_files([ + "version.txt", +]) + +cc_import( + name = "cudnn", + hdrs = [":headers"], + shared_library = "lib/libcudnn.so.%{version}", + visibility = ["//visibility:public"], +) + +cc_import( + name = "cudnn_ops_infer", + hdrs = [":headers"], + shared_library = "lib/libcudnn_ops_infer.so.%{version}", + visibility = ["//visibility:public"], +) + +cc_import( + name = "cudnn_cnn_infer", + hdrs = [":headers"], + shared_library = "lib/libcudnn_cnn_infer.so.%{version}", + visibility = ["//visibility:public"], +) + +cc_import( + name = "cudnn_ops_train", + hdrs = [":headers"], + shared_library = "lib/libcudnn_ops_train.so.%{version}", + visibility = ["//visibility:public"], +) + +cc_import( + name = "cudnn_cnn_train", + hdrs = [":headers"], + shared_library = "lib/libcudnn_cnn_train.so.%{version}", + visibility = ["//visibility:public"], +) + +cc_import( + name = "cudnn_adv_infer", + hdrs = [":headers"], + shared_library = "lib/libcudnn_adv_infer.so.%{version}", + visibility = ["//visibility:public"], +) + +cc_import( + name = "cudnn_adv_train", + hdrs = [":headers"], + shared_library = "lib/libcudnn_adv_train.so.%{version}", + visibility = ["//visibility:public"], +) + +cc_library( + name = "headers", + hdrs = glob([ + "include/**", + ]), + include_prefix = "third_party/gpus/cudnn", + includes = ["include"], + strip_include_prefix = "include", + visibility = ["@local_config_cuda//cuda:__pkg__"], +) diff --git a/third_party/xla/third_party/tsl/third_party/gpus/cuda/cuda_cufft.BUILD.tpl b/third_party/xla/third_party/tsl/third_party/gpus/cuda/cuda_cufft.BUILD.tpl new file mode 100644 index 00000000000000..6836814dc9b622 --- /dev/null +++ b/third_party/xla/third_party/tsl/third_party/gpus/cuda/cuda_cufft.BUILD.tpl @@ -0,0 +1,23 @@ +licenses(["restricted"]) # NVIDIA proprietary license + +exports_files([ + "version.txt", +]) + +cc_import( + name = "cufft", + hdrs = [":headers"], + shared_library = "lib/libcufft.so.%{version}", + visibility = ["//visibility:public"], +) + +cc_library( + name = "headers", + hdrs = glob([ + "include/**", + ]), + include_prefix = "third_party/gpus/cuda/include", + includes = ["include"], + strip_include_prefix = "include", + visibility = ["@local_config_cuda//cuda:__pkg__"], +) diff --git a/third_party/xla/third_party/tsl/third_party/gpus/cuda/cuda_cupti.BUILD.tpl b/third_party/xla/third_party/tsl/third_party/gpus/cuda/cuda_cupti.BUILD.tpl new file mode 100644 index 00000000000000..772386d723649f --- /dev/null +++ b/third_party/xla/third_party/tsl/third_party/gpus/cuda/cuda_cupti.BUILD.tpl @@ -0,0 +1,23 @@ +licenses(["restricted"]) # NVIDIA proprietary license + +exports_files([ + "version.txt", +]) + +cc_import( + name = "cupti", + hdrs = [":headers"], + shared_library = "lib/libcupti.so.%{version}", + visibility = ["//visibility:public"], +) + +cc_library( + name = "headers", + hdrs = glob([ + "include/**", + ]), + include_prefix = "third_party/gpus/cuda/extras/CUPTI/include", + includes = ["include/"], + strip_include_prefix = "include", + visibility = ["@local_config_cuda//cuda:__pkg__"], +) diff --git a/third_party/xla/third_party/tsl/third_party/gpus/cuda/cuda_curand.BUILD.tpl b/third_party/xla/third_party/tsl/third_party/gpus/cuda/cuda_curand.BUILD.tpl new file mode 100644 index 00000000000000..c98ded26f4b907 --- /dev/null +++ b/third_party/xla/third_party/tsl/third_party/gpus/cuda/cuda_curand.BUILD.tpl @@ -0,0 +1,23 @@ +licenses(["restricted"]) # NVIDIA proprietary license + +exports_files([ + "version.txt", +]) + +cc_import( + name = "curand", + hdrs = [":headers"], + shared_library = "lib/libcurand.so.%{version}", + visibility = ["//visibility:public"], +) + +cc_library( + name = "headers", + hdrs = glob([ + "include/**", + ]), + include_prefix = "third_party/gpus/cuda/include", + includes = ["include"], + strip_include_prefix = "include", + visibility = ["@local_config_cuda//cuda:__pkg__"], +) diff --git a/third_party/xla/third_party/tsl/third_party/gpus/cuda/cuda_cusolver.BUILD.tpl b/third_party/xla/third_party/tsl/third_party/gpus/cuda/cuda_cusolver.BUILD.tpl new file mode 100644 index 00000000000000..6a5f9d9737cfe2 --- /dev/null +++ b/third_party/xla/third_party/tsl/third_party/gpus/cuda/cuda_cusolver.BUILD.tpl @@ -0,0 +1,25 @@ +licenses(["restricted"]) # NVIDIA proprietary license + +exports_files([ + "version.txt", +]) + +cc_import( + name = "cusolver", + hdrs = [":headers"], + shared_library = "lib/libcusolver.so.%{version}", + visibility = ["//visibility:public"], +) + +cc_library( + name = "headers", + hdrs = [ + "include/cusolver_common.h", + "include/cusolverDn.h", + "include/cusolverSp.h" + ], + include_prefix = "third_party/gpus/cuda/include", + includes = ["include"], + strip_include_prefix = "include", + visibility = ["@local_config_cuda//cuda:__pkg__"], +) diff --git a/third_party/xla/third_party/tsl/third_party/gpus/cuda/cuda_cusparse.BUILD.tpl b/third_party/xla/third_party/tsl/third_party/gpus/cuda/cuda_cusparse.BUILD.tpl new file mode 100644 index 00000000000000..ad5c2b5f0c45c1 --- /dev/null +++ b/third_party/xla/third_party/tsl/third_party/gpus/cuda/cuda_cusparse.BUILD.tpl @@ -0,0 +1,23 @@ +licenses(["restricted"]) # NVIDIA proprietary license + +exports_files([ + "version.txt", +]) + +cc_import( + name = "cusparse", + hdrs = [":headers"], + shared_library = "lib/libcusparse.so.%{version}", + visibility = ["//visibility:public"], +) + +cc_library( + name = "headers", + hdrs = glob([ + "include/**", + ]), + include_prefix = "third_party/gpus/cuda/include", + includes = ["include"], + strip_include_prefix = "include", + visibility = ["@local_config_cuda//cuda:__pkg__"], +) diff --git a/third_party/xla/third_party/tsl/third_party/gpus/cuda/cuda_nccl.BUILD b/third_party/xla/third_party/tsl/third_party/gpus/cuda/cuda_nccl.BUILD new file mode 100644 index 00000000000000..440b31c5cb616e --- /dev/null +++ b/third_party/xla/third_party/tsl/third_party/gpus/cuda/cuda_nccl.BUILD @@ -0,0 +1,7 @@ +licenses(["restricted"]) # NVIDIA proprietary license + +cc_import( + name = "nccl", + shared_library = "lib/libnccl.so.2", + visibility = ["//visibility:public"], +) diff --git a/third_party/xla/third_party/tsl/third_party/gpus/cuda/cuda_nvcc.BUILD b/third_party/xla/third_party/tsl/third_party/gpus/cuda/cuda_nvcc.BUILD new file mode 100644 index 00000000000000..6cdaca5cc902a0 --- /dev/null +++ b/third_party/xla/third_party/tsl/third_party/gpus/cuda/cuda_nvcc.BUILD @@ -0,0 +1,73 @@ +licenses(["restricted"]) # NVIDIA proprietary license + +exports_files([ + "bin/nvcc", +]) + +filegroup( + name = "nvvm", + srcs = [ + "nvvm/libdevice/libdevice.10.bc", + ], + visibility = ["//visibility:public"], +) + +filegroup( + name = "nvlink", + srcs = [ + "bin/nvlink", + ], + visibility = ["//visibility:public"], +) + +filegroup( + name = "fatbinary", + srcs = [ + "bin/fatbinary", + ], + visibility = ["//visibility:public"], +) + +filegroup( + name = "bin2c", + srcs = [ + "bin/bin2c", + ], + visibility = ["//visibility:public"], +) + +filegroup( + name = "ptxas", + srcs = [ + "bin/ptxas", + ], + visibility = ["//visibility:public"], +) + +filegroup( + name = "bin", + srcs = glob([ + "bin/**", + "nvvm/bin/**", + ]), + visibility = ["//visibility:public"], +) + +filegroup( + name = "link_stub", + srcs = [ + "bin/crt/link.stub", + ], + visibility = ["//visibility:public"], +) + +cc_library( + name = "headers", + hdrs = glob([ + "include/**", + ]), + include_prefix = "third_party/gpus/cuda/include", + includes = ["include"], + strip_include_prefix = "include", + visibility = ["@local_config_cuda//cuda:__pkg__"], +) diff --git a/third_party/xla/third_party/tsl/third_party/gpus/cuda/cuda_nvjitlink.BUILD.tpl b/third_party/xla/third_party/tsl/third_party/gpus/cuda/cuda_nvjitlink.BUILD.tpl new file mode 100644 index 00000000000000..6729b7cd1df9c4 --- /dev/null +++ b/third_party/xla/third_party/tsl/third_party/gpus/cuda/cuda_nvjitlink.BUILD.tpl @@ -0,0 +1,23 @@ +licenses(["restricted"]) # NVIDIA proprietary license + +exports_files([ + "version.txt", +]) + +cc_import( + name = "nvjitlink", + hdrs = [":headers"], + shared_library = "lib/libnvJitLink.so.%{version}", + visibility = ["//visibility:public"], +) + +cc_library( + name = "headers", + hdrs = glob([ + "include/**", + ]), + include_prefix = "third_party/gpus/cuda/include", + includes = ["include"], + strip_include_prefix = "include", + visibility = ["@local_config_cuda//cuda:__pkg__"], +) diff --git a/third_party/xla/third_party/tsl/third_party/gpus/cuda/cuda_nvml.BUILD b/third_party/xla/third_party/tsl/third_party/gpus/cuda/cuda_nvml.BUILD new file mode 100644 index 00000000000000..40b97e671cf7de --- /dev/null +++ b/third_party/xla/third_party/tsl/third_party/gpus/cuda/cuda_nvml.BUILD @@ -0,0 +1,12 @@ +licenses(["restricted"]) # NVIDIA proprietary license + +cc_library( + name = "headers", + hdrs = glob([ + "include/**", + ]), + include_prefix = "third_party/gpus/cuda/nvml/include", + includes = ["include"], + strip_include_prefix = "include", + visibility = ["@local_config_cuda//cuda:__pkg__"], +) diff --git a/third_party/xla/third_party/tsl/third_party/gpus/cuda/cuda_nvprune.BUILD b/third_party/xla/third_party/tsl/third_party/gpus/cuda/cuda_nvprune.BUILD new file mode 100644 index 00000000000000..986ef0c8f76166 --- /dev/null +++ b/third_party/xla/third_party/tsl/third_party/gpus/cuda/cuda_nvprune.BUILD @@ -0,0 +1,9 @@ +licenses(["restricted"]) # NVIDIA proprietary license + +filegroup( + name = "nvprune", + srcs = [ + "bin/nvprune", + ], + visibility = ["//visibility:public"], +) diff --git a/third_party/xla/third_party/tsl/third_party/gpus/cuda/cuda_nvtx.BUILD b/third_party/xla/third_party/tsl/third_party/gpus/cuda/cuda_nvtx.BUILD new file mode 100644 index 00000000000000..9823f1d871ed53 --- /dev/null +++ b/third_party/xla/third_party/tsl/third_party/gpus/cuda/cuda_nvtx.BUILD @@ -0,0 +1,12 @@ +licenses(["restricted"]) # NVIDIA proprietary license + +cc_library( + name = "headers", + hdrs = glob([ + "include/**", + ]), + include_prefix = "third_party/gpus/cuda/include", + includes = ["include"], + strip_include_prefix = "include", + visibility = ["@local_config_cuda//cuda:__pkg__"], +) diff --git a/third_party/xla/third_party/tsl/third_party/gpus/cuda_configure.bzl b/third_party/xla/third_party/tsl/third_party/gpus/cuda_configure.bzl index fefbf081c87e1c..b8aad7ed4994ee 100644 --- a/third_party/xla/third_party/tsl/third_party/gpus/cuda_configure.bzl +++ b/third_party/xla/third_party/tsl/third_party/gpus/cuda_configure.bzl @@ -53,6 +53,11 @@ load( "realpath", "which", ) +load( + ":compiler_common_tools.bzl", + "get_cxx_inc_directories", + "to_list_of_strings", +) _GCC_HOST_COMPILER_PATH = "GCC_HOST_COMPILER_PATH" _GCC_HOST_COMPILER_PREFIX = "GCC_HOST_COMPILER_PREFIX" @@ -67,20 +72,6 @@ _TF_CUDA_CONFIG_REPO = "TF_CUDA_CONFIG_REPO" _TF_DOWNLOAD_CLANG = "TF_DOWNLOAD_CLANG" _PYTHON_BIN_PATH = "PYTHON_BIN_PATH" -def to_list_of_strings(elements): - """Convert the list of ["a", "b", "c"] into '"a", "b", "c"'. - - This is to be used to put a list of strings into the bzl file templates - so it gets interpreted as list of strings in Starlark. - - Args: - elements: list of string elements - - Returns: - single string of elements wrapped in quotes separated by a comma.""" - quoted_strings = ["\"" + element + "\"" for element in elements] - return ", ".join(quoted_strings) - def verify_build_defines(params): """Verify all variables that crosstool/BUILD.tpl expects are substituted. @@ -238,156 +229,6 @@ def find_cc(repository_ctx, use_cuda_clang): " environment variable").format(target_cc_name, cc_path_envvar)) return cc -_INC_DIR_MARKER_BEGIN = "#include <...>" - -# OSX add " (framework directory)" at the end of line, strip it. -_OSX_FRAMEWORK_SUFFIX = " (framework directory)" -_OSX_FRAMEWORK_SUFFIX_LEN = len(_OSX_FRAMEWORK_SUFFIX) - -def _cxx_inc_convert(path): - """Convert path returned by cc -E xc++ in a complete path.""" - path = path.strip() - if path.endswith(_OSX_FRAMEWORK_SUFFIX): - path = path[:-_OSX_FRAMEWORK_SUFFIX_LEN].strip() - return path - -def _normalize_include_path(repository_ctx, path): - """Normalizes include paths before writing them to the crosstool. - - If path points inside the 'crosstool' folder of the repository, a relative - path is returned. - If path points outside the 'crosstool' folder, an absolute path is returned. - """ - path = str(repository_ctx.path(path)) - crosstool_folder = str(repository_ctx.path(".").get_child("crosstool")) - - if path.startswith(crosstool_folder): - # We drop the path to "$REPO/crosstool" and a trailing path separator. - return path[len(crosstool_folder) + 1:] - return path - -def _is_compiler_option_supported(repository_ctx, cc, option): - """Checks that `option` is supported by the C compiler. Doesn't %-escape the option.""" - result = repository_ctx.execute([ - cc, - option, - "-o", - "/dev/null", - "-c", - str(repository_ctx.path("tools/cpp/empty.cc")), - ]) - return result.stderr.find(option) == -1 - -def _get_cxx_inc_directories_impl(repository_ctx, cc, lang_is_cpp, tf_sysroot): - """Compute the list of default C or C++ include directories.""" - if lang_is_cpp: - lang = "c++" - else: - lang = "c" - sysroot = [] - if tf_sysroot: - sysroot += ["--sysroot", tf_sysroot] - result = raw_exec(repository_ctx, [cc, "-E", "-x" + lang, "-", "-v"] + - sysroot) - stderr = err_out(result) - index1 = stderr.find(_INC_DIR_MARKER_BEGIN) - if index1 == -1: - return [] - index1 = stderr.find("\n", index1) - if index1 == -1: - return [] - index2 = stderr.rfind("\n ") - if index2 == -1 or index2 < index1: - return [] - index2 = stderr.find("\n", index2 + 1) - if index2 == -1: - inc_dirs = stderr[index1 + 1:] - else: - inc_dirs = stderr[index1 + 1:index2].strip() - - print_resource_dir_supported = _is_compiler_option_supported( - repository_ctx, - cc, - "-print-resource-dir", - ) - - if print_resource_dir_supported: - resource_dir = repository_ctx.execute( - [cc, "-print-resource-dir"], - ).stdout.strip() + "/share" - inc_dirs += "\n" + resource_dir - - compiler_includes = [ - _normalize_include_path(repository_ctx, _cxx_inc_convert(p)) - for p in inc_dirs.split("\n") - ] - - # The compiler might be on a symlink, e.g. /symlink -> /opt/gcc - # The above keeps only the resolved paths to the default includes (e.g. /opt/gcc/include/c++/11) - # but Bazel might encounter either (usually reported by the compiler) - # especially when a compiler wrapper (e.g. ccache) is used. - # So we need to also include paths where symlinks are not resolved. - - # Try to find real path to CC installation to "see through" compiler wrappers - # GCC has the path to g++ - index1 = result.stderr.find("COLLECT_GCC=") - if index1 != -1: - index1 = result.stderr.find("=", index1) - index2 = result.stderr.find("\n", index1) - cc_topdir = repository_ctx.path(result.stderr[index1 + 1:index2]).dirname.dirname - else: - # Clang has the directory - index1 = result.stderr.find("InstalledDir: ") - if index1 != -1: - index1 = result.stderr.find(" ", index1) - index2 = result.stderr.find("\n", index1) - cc_topdir = repository_ctx.path(result.stderr[index1 + 1:index2]).dirname - else: - # Fallback to the CC path - cc_topdir = repository_ctx.path(cc).dirname.dirname - - # We now have the compiler installation prefix, e.g. /symlink/gcc - # And the resolved installation prefix, e.g. /opt/gcc - cc_topdir_resolved = str(realpath(repository_ctx, cc_topdir)).strip() - cc_topdir = str(cc_topdir).strip() - - # If there is (any!) symlink involved we add paths where the unresolved installation prefix is kept. - # e.g. [/opt/gcc/include/c++/11, /opt/gcc/lib/gcc/x86_64-linux-gnu/11/include, /other/path] - # adds [/symlink/include/c++/11, /symlink/lib/gcc/x86_64-linux-gnu/11/include] - if cc_topdir_resolved != cc_topdir: - unresolved_compiler_includes = [ - cc_topdir + inc[len(cc_topdir_resolved):] - for inc in compiler_includes - if inc.startswith(cc_topdir_resolved) - ] - compiler_includes = compiler_includes + unresolved_compiler_includes - return compiler_includes - -def get_cxx_inc_directories(repository_ctx, cc, tf_sysroot): - """Compute the list of default C and C++ include directories.""" - - # For some reason `clang -xc` sometimes returns include paths that are - # different from the ones from `clang -xc++`. (Symlink and a dir) - # So we run the compiler with both `-xc` and `-xc++` and merge resulting lists - includes_cpp = _get_cxx_inc_directories_impl( - repository_ctx, - cc, - True, - tf_sysroot, - ) - includes_c = _get_cxx_inc_directories_impl( - repository_ctx, - cc, - False, - tf_sysroot, - ) - - return includes_cpp + [ - inc - for inc in includes_c - if inc not in includes_cpp - ] - def auto_configure_fail(msg): """Output failure message when cuda configuration fails.""" red = "\033[0;31m" @@ -1293,6 +1134,7 @@ def _create_local_cuda_repository(repository_ctx): cuda_defines["%{extra_no_canonical_prefixes_flags}"] = "" cuda_defines["%{unfiltered_compile_flags}"] = "" + cuda_defines["%{cuda_nvcc_files}"] = "[]" if is_cuda_clang and not is_nvcc_and_clang: cuda_defines["%{host_compiler_path}"] = str(cc) cuda_defines["%{host_compiler_warnings}"] = """ diff --git a/third_party/xla/third_party/tsl/third_party/gpus/hermetic_cuda_configure.bzl b/third_party/xla/third_party/tsl/third_party/gpus/hermetic_cuda_configure.bzl new file mode 100644 index 00000000000000..5d16aa6f76a1f4 --- /dev/null +++ b/third_party/xla/third_party/tsl/third_party/gpus/hermetic_cuda_configure.bzl @@ -0,0 +1,570 @@ +"""Repository rule for hermetic CUDA autoconfiguration. + +`hermetic_cuda_configure` depends on the following environment variables: + + * `TF_NEED_CUDA`: Whether to enable building with CUDA. + * `TF_NVCC_CLANG`: Whether to use clang for C++ and NVCC for Cuda compilation. + * `CLANG_CUDA_COMPILER_PATH`: The clang compiler path that will be used for + both host and device code compilation. + * `TF_SYSROOT`: The sysroot to use when compiling. + * `TF_CUDA_VERSION`: The version of the CUDA toolkit (mandatory). + * `TF_CUDA_COMPUTE_CAPABILITIES`: The CUDA compute capabilities. Default is + `3.5,5.2`. + * `PYTHON_BIN_PATH`: The python binary path +""" + +load( + "//third_party/remote_config:common.bzl", + "get_cpu_value", + "get_host_environ", + "which", +) +load( + ":compiler_common_tools.bzl", + "get_cxx_inc_directories", + "to_list_of_strings", +) + +def _find_cc(repository_ctx): + """Find the C++ compiler.""" + cc_path_envvar = _CLANG_CUDA_COMPILER_PATH + cc_name = "clang" + + cc_name_from_env = get_host_environ(repository_ctx, cc_path_envvar) + if cc_name_from_env: + cc_name = cc_name_from_env + if cc_name.startswith("/"): + # Return the absolute path. + return cc_name + cc = which(repository_ctx, cc_name) + if cc == None: + fail(("Cannot find {}, either correct your path or set the {}" + + " environment variable").format(cc_name, cc_path_envvar)) + return cc + +def _auto_configure_fail(msg): + """Output failure message when cuda configuration fails.""" + red = "\033[0;31m" + no_color = "\033[0m" + fail("\n%sCuda Configuration Error:%s %s\n" % (red, no_color, msg)) + +def _lib_name(base_name, cpu_value, version = None, static = False): + """Constructs the platform-specific name of a library. + + Args: + base_name: The name of the library, such as "cudart" + cpu_value: The name of the host operating system. + version: The version of the library. + static: True the library is static or False if it is a shared object. + + Returns: + The platform-specific name of the library. + """ + version = "" if not version else "." + version + if cpu_value in ("Linux"): + if static: + return "lib%s.a" % base_name + return "lib%s.so%s" % (base_name, version) + elif cpu_value == "Windows": + return "%s.lib" % base_name + elif cpu_value == "Darwin": + if static: + return "lib%s.a" % base_name + return "lib%s%s.dylib" % (base_name, version) + else: + _auto_configure_fail("Invalid cpu_value: %s" % cpu_value) + +def _verify_build_defines(params): + """Verify all variables that crosstool/BUILD.tpl expects are substituted. + + Args: + params: dict of variables that will be passed to the BUILD.tpl template. + """ + missing = [] + for param in [ + "cxx_builtin_include_directories", + "extra_no_canonical_prefixes_flags", + "host_compiler_path", + "host_compiler_prefix", + "host_compiler_warnings", + "linker_bin_path", + "compiler_deps", + "msvc_cl_path", + "msvc_env_include", + "msvc_env_lib", + "msvc_env_path", + "msvc_env_tmp", + "msvc_lib_path", + "msvc_link_path", + "msvc_ml_path", + "unfiltered_compile_flags", + "win_compiler_deps", + ]: + if ("%{" + param + "}") not in params: + missing.append(param) + + if missing: + _auto_configure_fail( + "BUILD.tpl template is missing these variables: " + + str(missing) + + ".\nWe only got: " + + str(params) + + ".", + ) + +def get_cuda_version(repository_ctx): + return get_host_environ(repository_ctx, _TF_CUDA_VERSION) + +def enable_cuda(repository_ctx): + """Returns whether to build with CUDA support.""" + return int(get_host_environ(repository_ctx, TF_NEED_CUDA, False)) + +def _flag_enabled(repository_ctx, flag_name): + return get_host_environ(repository_ctx, flag_name) == "1" + +def _use_nvcc_and_clang(repository_ctx): + # Returns the flag if we need to use clang for C++ and NVCC for Cuda. + return _flag_enabled(repository_ctx, _TF_NVCC_CLANG) + +def _tf_sysroot(repository_ctx): + return get_host_environ(repository_ctx, _TF_SYSROOT, "") + +def _py_tmpl_dict(d): + return {"%{cuda_config}": str(d)} + +def _cudart_static_linkopt(cpu_value): + """Returns additional platform-specific linkopts for cudart.""" + return "\"\"," if cpu_value == "Darwin" else "\"-lrt\"," + +def _compute_capabilities(repository_ctx): + """Returns a list of strings representing cuda compute capabilities. + + Args: + repository_ctx: the repo rule's context. + + Returns: + list of cuda architectures to compile for. 'compute_xy' refers to + both PTX and SASS, 'sm_xy' refers to SASS only. + """ + capabilities = get_host_environ( + repository_ctx, + _TF_CUDA_COMPUTE_CAPABILITIES, + "compute_35,compute_52", + ).split(",") + + # Map old 'x.y' capabilities to 'compute_xy'. + if len(capabilities) > 0 and all([len(x.split(".")) == 2 for x in capabilities]): + # If all capabilities are in 'x.y' format, only include PTX for the + # highest capability. + cc_list = sorted([x.replace(".", "") for x in capabilities]) + capabilities = ["sm_%s" % x for x in cc_list[:-1]] + ["compute_%s" % cc_list[-1]] + for i, capability in enumerate(capabilities): + parts = capability.split(".") + if len(parts) != 2: + continue + capabilities[i] = "compute_%s%s" % (parts[0], parts[1]) + + # Make list unique + capabilities = dict(zip(capabilities, capabilities)).keys() + + # Validate capabilities. + for capability in capabilities: + if not capability.startswith(("compute_", "sm_")): + _auto_configure_fail("Invalid compute capability: %s" % capability) + for prefix in ["compute_", "sm_"]: + if not capability.startswith(prefix): + continue + if len(capability) == len(prefix) + 2 and capability[-2:].isdigit(): + continue + if len(capability) == len(prefix) + 3 and capability.endswith("90a"): + continue + _auto_configure_fail("Invalid compute capability: %s" % capability) + + return capabilities + +def _compute_cuda_extra_copts(compute_capabilities): + copts = ["--no-cuda-include-ptx=all"] + for capability in compute_capabilities: + if capability.startswith("compute_"): + capability = capability.replace("compute_", "sm_") + copts.append("--cuda-include-ptx=%s" % capability) + copts.append("--cuda-gpu-arch=%s" % capability) + + return str(copts) + +def _get_cuda_config(repository_ctx): + """Detects and returns information about the CUDA installation on the system. + + Args: + repository_ctx: The repository context. + + Returns: + A struct containing the following fields: + cuda_version: The version of CUDA on the system. + cudart_version: The CUDA runtime version on the system. + cudnn_version: The version of cuDNN on the system. + compute_capabilities: A list of the system's CUDA compute capabilities. + cpu_value: The name of the host operating system. + """ + + return struct( + cuda_version = get_cuda_version(repository_ctx), + cupti_version = repository_ctx.read(repository_ctx.attr.cupti_version), + cudart_version = repository_ctx.read(repository_ctx.attr.cudart_version), + cublas_version = repository_ctx.read(repository_ctx.attr.cublas_version), + cusolver_version = repository_ctx.read(repository_ctx.attr.cusolver_version), + curand_version = repository_ctx.read(repository_ctx.attr.curand_version), + cufft_version = repository_ctx.read(repository_ctx.attr.cufft_version), + cusparse_version = repository_ctx.read(repository_ctx.attr.cusparse_version), + cudnn_version = repository_ctx.read(repository_ctx.attr.cudnn_version), + compute_capabilities = _compute_capabilities(repository_ctx), + cpu_value = get_cpu_value(repository_ctx), + ) + +_DUMMY_CROSSTOOL_BZL_FILE = """ +def error_gpu_disabled(): + fail("ERROR: Building with --config=cuda but TensorFlow is not configured " + + "to build with GPU support. Please re-run ./configure and enter 'Y' " + + "at the prompt to build with GPU support.") + + native.genrule( + name = "error_gen_crosstool", + outs = ["CROSSTOOL"], + cmd = "echo 'Should not be run.' && exit 1", + ) + + native.filegroup( + name = "crosstool", + srcs = [":CROSSTOOL"], + output_licenses = ["unencumbered"], + ) +""" + +_DUMMY_CROSSTOOL_BUILD_FILE = """ +load("//crosstool:error_gpu_disabled.bzl", "error_gpu_disabled") + +error_gpu_disabled() +""" + +def _create_dummy_repository(repository_ctx): + cpu_value = get_cpu_value(repository_ctx) + + # Set up BUILD file for cuda/. + repository_ctx.template( + "cuda/build_defs.bzl", + repository_ctx.attr.build_defs_tpl, + { + "%{cuda_is_configured}": "False", + "%{cuda_extra_copts}": "[]", + "%{cuda_gpu_architectures}": "[]", + "%{cuda_version}": "0.0", + }, + ) + + repository_ctx.template( + "cuda/BUILD", + repository_ctx.attr.dummy_cuda_build_tpl, + { + "%{cuda_driver_lib}": _lib_name("cuda", cpu_value), + "%{cudart_static_lib}": _lib_name( + "cudart_static", + cpu_value, + static = True, + ), + "%{cudart_static_linkopt}": _cudart_static_linkopt(cpu_value), + "%{cudart_lib}": _lib_name("cudart", cpu_value), + "%{cublas_lib}": _lib_name("cublas", cpu_value), + "%{cublasLt_lib}": _lib_name("cublasLt", cpu_value), + "%{cusolver_lib}": _lib_name("cusolver", cpu_value), + "%{cudnn_lib}": _lib_name("cudnn", cpu_value), + "%{cufft_lib}": _lib_name("cufft", cpu_value), + "%{curand_lib}": _lib_name("curand", cpu_value), + "%{cupti_lib}": _lib_name("cupti", cpu_value), + "%{cusparse_lib}": _lib_name("cusparse", cpu_value), + "%{cub_actual}": ":cuda_headers", + "%{copy_rules}": """ +filegroup(name="cuda-include") +filegroup(name="cublas-include") +filegroup(name="cusolver-include") +filegroup(name="cufft-include") +filegroup(name="cusparse-include") +filegroup(name="curand-include") +filegroup(name="cudnn-include") +""", + }, + ) + + # Create dummy files for the CUDA toolkit since they are still required by + # tensorflow/tsl/platform/default/build_config:cuda. + repository_ctx.file("cuda/cuda/include/cuda.h") + repository_ctx.file("cuda/cuda/include/cublas.h") + repository_ctx.file("cuda/cuda/include/cudnn.h") + repository_ctx.file("cuda/cuda/extras/CUPTI/include/cupti.h") + repository_ctx.file("cuda/cuda/nvml/include/nvml.h") + repository_ctx.file("cuda/cuda/lib/%s" % _lib_name("cuda", cpu_value)) + repository_ctx.file("cuda/cuda/lib/%s" % _lib_name("cudart", cpu_value)) + repository_ctx.file( + "cuda/cuda/lib/%s" % _lib_name("cudart_static", cpu_value), + ) + repository_ctx.file("cuda/cuda/lib/%s" % _lib_name("cublas", cpu_value)) + repository_ctx.file("cuda/cuda/lib/%s" % _lib_name("cublasLt", cpu_value)) + repository_ctx.file("cuda/cuda/lib/%s" % _lib_name("cusolver", cpu_value)) + repository_ctx.file("cuda/cuda/lib/%s" % _lib_name("cudnn", cpu_value)) + repository_ctx.file("cuda/cuda/lib/%s" % _lib_name("curand", cpu_value)) + repository_ctx.file("cuda/cuda/lib/%s" % _lib_name("cufft", cpu_value)) + repository_ctx.file("cuda/cuda/lib/%s" % _lib_name("cupti", cpu_value)) + repository_ctx.file("cuda/cuda/lib/%s" % _lib_name("cusparse", cpu_value)) + + # Set up cuda_config.h, which is used by + # tensorflow/compiler/xla/stream_executor/dso_loader.cc. + repository_ctx.template( + "cuda/cuda/cuda_config.h", + repository_ctx.attr.cuda_config_tpl, + { + "%{cuda_version}": "", + "%{cudart_version}": "", + "%{cupti_version}": "", + "%{cublas_version}": "", + "%{cusolver_version}": "", + "%{curand_version}": "", + "%{cufft_version}": "", + "%{cusparse_version}": "", + "%{cudnn_version}": "", + "%{cuda_toolkit_path}": "", + "%{cuda_compute_capabilities}": "", + }, + ) + + # Set up cuda_config.py, which is used by gen_build_info to provide + # static build environment info to the API + repository_ctx.template( + "cuda/cuda/cuda_config.py", + repository_ctx.attr.cuda_config_py_tpl, + _py_tmpl_dict({}), + ) + + # If cuda_configure is not configured to build with GPU support, and the user + # attempts to build with --config=cuda, add a dummy build rule to intercept + # this and fail with an actionable error message. + repository_ctx.file( + "crosstool/error_gpu_disabled.bzl", + _DUMMY_CROSSTOOL_BZL_FILE, + ) + repository_ctx.file("crosstool/BUILD", _DUMMY_CROSSTOOL_BUILD_FILE) + +def _create_local_cuda_repository(repository_ctx): + """Creates the repository containing files set up to build with CUDA.""" + cuda_config = _get_cuda_config(repository_ctx) + + # Set up BUILD file for cuda/ + repository_ctx.template( + "cuda/build_defs.bzl", + repository_ctx.attr.build_defs_tpl, + { + "%{cuda_is_configured}": "True", + "%{cuda_extra_copts}": _compute_cuda_extra_copts( + cuda_config.compute_capabilities, + ), + "%{cuda_gpu_architectures}": str(cuda_config.compute_capabilities), + "%{cuda_version}": cuda_config.cuda_version, + }, + ) + + repository_ctx.template( + "cuda/BUILD", + repository_ctx.attr.cuda_build_tpl, + { + "%{cudart_static_linkopt}": _cudart_static_linkopt(cuda_config.cpu_value), + "%{cub_actual}": ":cuda_headers", + }, + ) + + is_nvcc_and_clang = _use_nvcc_and_clang(repository_ctx) + tf_sysroot = _tf_sysroot(repository_ctx) + + # Set up crosstool/ + cc = _find_cc(repository_ctx) + host_compiler_includes = get_cxx_inc_directories( + repository_ctx, + cc, + tf_sysroot, + ) + + cuda_defines = {} + + # We do not support hermetic CUDA on Windows. + # This ensures the CROSSTOOL file parser is happy. + cuda_defines.update({ + "%{msvc_env_tmp}": "msvc_not_used", + "%{msvc_env_path}": "msvc_not_used", + "%{msvc_env_include}": "msvc_not_used", + "%{msvc_env_lib}": "msvc_not_used", + "%{msvc_cl_path}": "msvc_not_used", + "%{msvc_ml_path}": "msvc_not_used", + "%{msvc_link_path}": "msvc_not_used", + "%{msvc_lib_path}": "msvc_not_used", + "%{win_compiler_deps}": ":empty", + }) + + cuda_defines["%{builtin_sysroot}"] = tf_sysroot + cuda_defines["%{cuda_toolkit_path}"] = repository_ctx.attr.nvcc_binary.workspace_root + cuda_defines["%{compiler}"] = "clang" + cuda_defines["%{host_compiler_prefix}"] = "/usr/bin" + cuda_defines["%{linker_bin_path}"] = "" + cuda_defines["%{extra_no_canonical_prefixes_flags}"] = "" + cuda_defines["%{unfiltered_compile_flags}"] = "" + cuda_defines["%{cxx_builtin_include_directories}"] = to_list_of_strings(host_compiler_includes) + cuda_defines["%{cuda_nvcc_files}"] = "if_cuda([\"@{nvcc_archive}//:bin\", \"@{nvcc_archive}//:nvvm\"])".format(nvcc_archive = repository_ctx.attr.nvcc_binary.repo_name) + + if not is_nvcc_and_clang: + cuda_defines["%{host_compiler_path}"] = str(cc) + cuda_defines["%{host_compiler_warnings}"] = """ + # Some parts of the codebase set -Werror and hit this warning, so + # switch it off for now. + "-Wno-invalid-partial-specialization" + """ + cuda_defines["%{compiler_deps}"] = ":cuda_nvcc_files" + repository_ctx.file( + "crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc", + "", + ) + else: + cuda_defines["%{host_compiler_path}"] = "clang/bin/crosstool_wrapper_driver_is_not_gcc" + cuda_defines["%{host_compiler_warnings}"] = "" + + nvcc_relative_path = "%s/%s" % (repository_ctx.attr.nvcc_binary.workspace_root, repository_ctx.attr.nvcc_binary.name) + cuda_defines["%{compiler_deps}"] = ":crosstool_wrapper_driver_is_not_gcc" + + wrapper_defines = { + "%{cpu_compiler}": str(cc), + "%{cuda_version}": cuda_config.cuda_version, + "%{nvcc_path}": nvcc_relative_path, + "%{host_compiler_path}": str(cc), + "%{use_clang_compiler}": "True", + } + repository_ctx.template( + "crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc", + repository_ctx.attr.crosstool_wrapper_driver_is_not_gcc_tpl, + wrapper_defines, + ) + + _verify_build_defines(cuda_defines) + + # Only expand template variables in the BUILD file + repository_ctx.template( + "crosstool/BUILD", + repository_ctx.attr.crosstool_build_tpl, + cuda_defines, + ) + + # No templating of cc_toolchain_config - use attributes and templatize the + # BUILD file. + repository_ctx.template( + "crosstool/cc_toolchain_config.bzl", + repository_ctx.attr.cc_toolchain_config_tpl, + {}, + ) + + # Set up cuda_config.h, which is used by + # tensorflow/compiler/xla/stream_executor/dso_loader.cc. + repository_ctx.template( + "cuda/cuda/cuda_config.h", + repository_ctx.attr.cuda_config_tpl, + { + "%{cuda_version}": cuda_config.cuda_version, + "%{cudart_version}": cuda_config.cudart_version, + "%{cupti_version}": cuda_config.cupti_version, + "%{cublas_version}": cuda_config.cublas_version, + "%{cusolver_version}": cuda_config.cusolver_version, + "%{curand_version}": cuda_config.curand_version, + "%{cufft_version}": cuda_config.cufft_version, + "%{cusparse_version}": cuda_config.cusparse_version, + "%{cudnn_version}": cuda_config.cudnn_version, + "%{cuda_toolkit_path}": "", + "%{cuda_compute_capabilities}": ", ".join([ + cc.split("_")[1] + for cc in cuda_config.compute_capabilities + ]), + }, + ) + + # Set up cuda_config.py, which is used by gen_build_info to provide + # static build environment info to the API + repository_ctx.template( + "cuda/cuda/cuda_config.py", + repository_ctx.attr.cuda_config_py_tpl, + _py_tmpl_dict({ + "cuda_version": cuda_config.cuda_version, + "cudnn_version": cuda_config.cudnn_version, + "cuda_compute_capabilities": cuda_config.compute_capabilities, + "cpu_compiler": str(cc), + }), + ) + +def _cuda_autoconf_impl(repository_ctx): + """Implementation of the cuda_autoconf repository rule.""" + build_file = repository_ctx.attr.local_config_cuda_build_file + + if not enable_cuda(repository_ctx): + _create_dummy_repository(repository_ctx) + else: + _create_local_cuda_repository(repository_ctx) + + repository_ctx.symlink(build_file, "BUILD") + +_CLANG_CUDA_COMPILER_PATH = "CLANG_CUDA_COMPILER_PATH" +_PYTHON_BIN_PATH = "PYTHON_BIN_PATH" +_TF_CUDA_COMPUTE_CAPABILITIES = "TF_CUDA_COMPUTE_CAPABILITIES" +_TF_CUDA_VERSION = "TF_CUDA_VERSION" +TF_NEED_CUDA = "TF_NEED_CUDA" +_TF_NVCC_CLANG = "TF_NVCC_CLANG" +_TF_SYSROOT = "TF_SYSROOT" + +_ENVIRONS = [ + _CLANG_CUDA_COMPILER_PATH, + TF_NEED_CUDA, + _TF_NVCC_CLANG, + _TF_CUDA_VERSION, + _TF_CUDA_COMPUTE_CAPABILITIES, + _TF_SYSROOT, + _PYTHON_BIN_PATH, + "TMP", + "TMPDIR", +] + +hermetic_cuda_configure = repository_rule( + implementation = _cuda_autoconf_impl, + environ = _ENVIRONS, + attrs = { + "environ": attr.string_dict(), + "cublas_version": attr.label(default = Label("@cuda_cublas//:version.txt")), + "cudart_version": attr.label(default = Label("@cuda_cudart//:version.txt")), + "cudnn_version": attr.label(default = Label("@cuda_cudnn//:version.txt")), + "cufft_version": attr.label(default = Label("@cuda_cufft//:version.txt")), + "cupti_version": attr.label(default = Label("@cuda_cupti//:version.txt")), + "curand_version": attr.label(default = Label("@cuda_curand//:version.txt")), + "cusolver_version": attr.label(default = Label("@cuda_cusolver//:version.txt")), + "cusparse_version": attr.label(default = Label("@cuda_cusparse//:version.txt")), + "nvcc_binary": attr.label(default = Label("@cuda_nvcc//:bin/nvcc")), + "local_config_cuda_build_file": attr.label(default = Label("//third_party/gpus:local_config_cuda.BUILD")), + "build_defs_tpl": attr.label(default = Label("//third_party/gpus/cuda:build_defs.bzl.tpl")), + "cuda_build_tpl": attr.label(default = Label("//third_party/gpus/cuda:BUILD.hermetic.tpl")), + "dummy_cuda_build_tpl": attr.label(default = Label("//third_party/gpus/cuda:BUILD.tpl")), + "cuda_config_tpl": attr.label(default = Label("//third_party/gpus/cuda:cuda_config.h.tpl")), + "cuda_config_py_tpl": attr.label(default = Label("//third_party/gpus/cuda:cuda_config.py.tpl")), + "crosstool_wrapper_driver_is_not_gcc_tpl": attr.label(default = Label("//third_party/gpus/crosstool:clang/bin/crosstool_wrapper_driver_is_not_gcc.tpl")), + "crosstool_build_tpl": attr.label(default = Label("//third_party/gpus/crosstool:BUILD.tpl")), + "cc_toolchain_config_tpl": attr.label(default = Label("//third_party/gpus/crosstool:cc_toolchain_config.bzl.tpl")), + }, +) +"""Detects and configures the hermetic CUDA toolchain. + +Add the following to your WORKSPACE FILE: + +```python +hermetic cuda_configure(name = "local_config_cuda") +``` + +Args: + name: A unique name for this workspace rule. +""" diff --git a/third_party/xla/third_party/tsl/third_party/gpus/rocm_configure.bzl b/third_party/xla/third_party/tsl/third_party/gpus/rocm_configure.bzl index 0fd4019fc5bb75..cf756b452e3950 100644 --- a/third_party/xla/third_party/tsl/third_party/gpus/rocm_configure.bzl +++ b/third_party/xla/third_party/tsl/third_party/gpus/rocm_configure.bzl @@ -22,12 +22,15 @@ load( "realpath", "which", ) +load( + ":compiler_common_tools.bzl", + "to_list_of_strings", +) load( ":cuda_configure.bzl", "enable_cuda", "make_copy_dir_rule", "make_copy_files_rule", - "to_list_of_strings", ) load( ":sycl_configure.bzl", diff --git a/third_party/xla/third_party/tsl/third_party/gpus/sycl_configure.bzl b/third_party/xla/third_party/tsl/third_party/gpus/sycl_configure.bzl index 05330b2fe53195..dd80694e7274f5 100644 --- a/third_party/xla/third_party/tsl/third_party/gpus/sycl_configure.bzl +++ b/third_party/xla/third_party/tsl/third_party/gpus/sycl_configure.bzl @@ -16,11 +16,14 @@ load( "realpath", "which", ) +load( + ":compiler_common_tools.bzl", + "to_list_of_strings", +) load( ":cuda_configure.bzl", "make_copy_dir_rule", "make_copy_files_rule", - "to_list_of_strings", ) _GCC_HOST_COMPILER_PATH = "GCC_HOST_COMPILER_PATH" diff --git a/third_party/xla/third_party/tsl/third_party/nccl/build_defs.bzl.tpl b/third_party/xla/third_party/tsl/third_party/nccl/build_defs.bzl.tpl index 53a6d4e1e41890..a0930df34ecec8 100644 --- a/third_party/xla/third_party/tsl/third_party/nccl/build_defs.bzl.tpl +++ b/third_party/xla/third_party/tsl/third_party/nccl/build_defs.bzl.tpl @@ -5,7 +5,6 @@ load("@bazel_tools//tools/cpp:toolchain_utils.bzl", "find_cpp_toolchain") # CUDA toolkit version as tuple (e.g. '(11, 1)'). _cuda_version = %{cuda_version} -_cuda_clang = %{cuda_clang} def _rdc_copts(): """Returns copts for compiling relocatable device code.""" @@ -121,25 +120,25 @@ _device_link = rule( "gpu_archs": attr.string_list(), "nvlink_args": attr.string_list(), "_nvlink": attr.label( - default = Label("@local_config_cuda//cuda:cuda/bin/nvlink"), + default = Label("%{nvlink_label}"), allow_single_file = True, executable = True, cfg = "host", ), "_fatbinary": attr.label( - default = Label("@local_config_cuda//cuda:cuda/bin/fatbinary"), + default = Label("%{fatbinary_label}"), allow_single_file = True, executable = True, cfg = "host", ), "_bin2c": attr.label( - default = Label("@local_config_cuda//cuda:cuda/bin/bin2c"), + default = Label("%{bin2c_label}"), allow_single_file = True, executable = True, cfg = "host", ), "_link_stub": attr.label( - default = Label("@local_config_cuda//cuda:cuda/bin/crt/link.stub"), + default = Label("%{link_stub_label}"), allow_single_file = True, ), }, @@ -189,7 +188,7 @@ _prune_relocatable_code = rule( "input": attr.label(mandatory = True, allow_files = True), "gpu_archs": attr.string_list(), "_nvprune": attr.label( - default = Label("@local_config_cuda//cuda:cuda/bin/nvprune"), + default = Label("%{nvprune_label}"), allow_single_file = True, executable = True, cfg = "host", diff --git a/third_party/xla/third_party/tsl/third_party/nccl/hermetic_nccl_configure.bzl b/third_party/xla/third_party/tsl/third_party/nccl/hermetic_nccl_configure.bzl new file mode 100644 index 00000000000000..b99cbcb08db58a --- /dev/null +++ b/third_party/xla/third_party/tsl/third_party/nccl/hermetic_nccl_configure.bzl @@ -0,0 +1,153 @@ +"""Repository rule for hermetic NCCL configuration. + +`hermetic_nccl_configure` depends on the following environment variables: + + * `TF_NCCL_USE_STUB`: "1" if a NCCL stub that loads NCCL dynamically should + be used, "0" if NCCL should be linked in statically. + +""" + +load( + "//third_party/gpus:hermetic_cuda_configure.bzl", + "TF_NEED_CUDA", + "enable_cuda", + "get_cuda_version", +) +load( + "//third_party/remote_config:common.bzl", + "get_cpu_value", + "get_host_environ", +) + +_TF_NCCL_USE_STUB = "TF_NCCL_USE_STUB" + +_NCCL_DUMMY_BUILD_CONTENT = """ +filegroup( + name = "LICENSE", + visibility = ["//visibility:public"], +) + +cc_library( + name = "nccl", + visibility = ["//visibility:public"], +) + +cc_library( + name = "nccl_config", + hdrs = ["nccl_config.h"], + include_prefix = "third_party/nccl", + visibility = ["//visibility:public"], +) +""" + +_NCCL_ARCHIVE_BUILD_CONTENT = """ +filegroup( + name = "LICENSE", + data = ["@nccl_archive//:LICENSE.txt"], + visibility = ["//visibility:public"], +) + +alias( + name = "nccl", + actual = "@nccl_archive//:nccl", + visibility = ["//visibility:public"], +) + +alias( + name = "nccl_config", + actual = "@nccl_archive//:nccl_config", + visibility = ["//visibility:public"], +) +""" + +_NCCL_ARCHIVE_STUB_BUILD_CONTENT = """ +alias( + name = "nccl_lib", + actual = "@cuda_nccl//:nccl_lib", +) + +filegroup( + name = "LICENSE", + data = ["@nccl_archive//:LICENSE.txt"], + visibility = ["//visibility:public"], +) + +alias( + name = "nccl", + actual = "@nccl_archive//:nccl_via_stub", + visibility = ["//visibility:public"], +) + +alias( + name = "nccl_headers", + actual = "@nccl_archive//:nccl_headers", + visibility = ["//visibility:public"], +) + +alias( + name = "nccl_config", + actual = "@nccl_archive//:nccl_config", + visibility = ["//visibility:public"], +) +""" + +def _create_local_nccl_repository(repository_ctx): + cuda_version = get_cuda_version(repository_ctx) + if cuda_version == "12": + cuda_version = "12.3" + cuda_version = cuda_version.split(".") + + # Alias to open source build from @nccl_archive. + if get_host_environ(repository_ctx, _TF_NCCL_USE_STUB, "0") == "0": + repository_ctx.file("BUILD", _NCCL_ARCHIVE_BUILD_CONTENT) + else: + repository_ctx.file("BUILD", _NCCL_ARCHIVE_STUB_BUILD_CONTENT) + + repository_ctx.template("generated_names.bzl", repository_ctx.attr.generated_names_tpl, {}) + repository_ctx.template( + "build_defs.bzl", + repository_ctx.attr.build_defs_tpl, + { + "%{cuda_version}": "(%s, %s)" % tuple(cuda_version), + "%{nvlink_label}": "@cuda_nvcc//:nvlink", + "%{fatbinary_label}": "@cuda_nvcc//:fatbinary", + "%{bin2c_label}": "@cuda_nvcc//:bin2c", + "%{link_stub_label}": "@cuda_nvcc//:link_stub", + "%{nvprune_label}": "@cuda_nvprune//:nvprune", + }, + ) + +def _nccl_autoconf_impl(repository_ctx): + if (not enable_cuda(repository_ctx) or + get_cpu_value(repository_ctx) not in ("Linux", "FreeBSD")): + # Add a dummy build file to make bazel query happy. + repository_ctx.file("BUILD", _NCCL_DUMMY_BUILD_CONTENT) + repository_ctx.file("nccl_config.h", "#define TF_NCCL_VERSION \"\"") + else: + _create_local_nccl_repository(repository_ctx) + +_ENVIRONS = [ + TF_NEED_CUDA, +] + +hermetic_nccl_configure = repository_rule( + environ = _ENVIRONS, + implementation = _nccl_autoconf_impl, + attrs = { + "environ": attr.string_dict(), + "generated_names_tpl": attr.label(default = Label("//third_party/nccl:generated_names.bzl.tpl")), + "build_defs_tpl": attr.label(default = Label("//third_party/nccl:build_defs.bzl.tpl")), + "system_build_tpl": attr.label(default = Label("//third_party/nccl:system.BUILD.tpl")), + }, +) +"""Downloads and configures the hermetic NCCL configuration. + +Add the following to your WORKSPACE FILE: + +```python +hermetic_nccl_configure(name = "local_config_nccl") +``` + +Args: + name: A unique name for this workspace rule. +""" diff --git a/third_party/xla/third_party/tsl/third_party/nccl/nccl_configure.bzl b/third_party/xla/third_party/tsl/third_party/nccl/nccl_configure.bzl index 22cf64d4771062..4da2513e03eb44 100644 --- a/third_party/xla/third_party/tsl/third_party/nccl/nccl_configure.bzl +++ b/third_party/xla/third_party/tsl/third_party/nccl/nccl_configure.bzl @@ -8,7 +8,6 @@ files. * `TF_CUDA_PATHS`: The base paths to look for CUDA and cuDNN. Default is `/usr/local/cuda,usr/`. - * `TF_CUDA_CLANG`: "1" if using Clang, "0" if using NVCC. * `TF_NCCL_USE_STUB`: "1" if a NCCL stub that loads NCCL dynamically should be used, "0" if NCCL should be linked in statically. @@ -33,7 +32,6 @@ _TF_CUDA_COMPUTE_CAPABILITIES = "TF_CUDA_COMPUTE_CAPABILITIES" _TF_NCCL_VERSION = "TF_NCCL_VERSION" _TF_NEED_CUDA = "TF_NEED_CUDA" _TF_CUDA_PATHS = "TF_CUDA_PATHS" -_TF_CUDA_CLANG = "TF_CUDA_CLANG" _TF_NCCL_USE_STUB = "TF_NCCL_USE_STUB" _DEFINE_NCCL_MAJOR = "#define NCCL_MAJOR" @@ -129,7 +127,11 @@ def _create_local_nccl_repository(repository_ctx): _label("build_defs.bzl.tpl"), { "%{cuda_version}": "(%s, %s)" % tuple(cuda_version), - "%{cuda_clang}": repr(get_host_environ(repository_ctx, _TF_CUDA_CLANG)), + "%{nvlink_label}": "@local_config_cuda//cuda:cuda/bin/nvlink", + "%{fatbinary_label}": "@local_config_cuda//cuda:cuda/bin/fatbinary", + "%{bin2c_label}": "@local_config_cuda//cuda:cuda/bin/bin2c", + "%{link_stub_label}": "@local_config_cuda//cuda:cuda/bin/crt/link.stub", + "%{nvprune_label}": "@local_config_cuda//cuda:cuda/bin/nvprune", }, ) else: @@ -181,7 +183,6 @@ _ENVIRONS = [ _TF_CUDA_COMPUTE_CAPABILITIES, _TF_NEED_CUDA, _TF_CUDA_PATHS, - _TF_CUDA_CLANG, ] remote_nccl_configure = repository_rule( diff --git a/third_party/xla/third_party/tsl/tools/toolchains/remote_config/configs.bzl b/third_party/xla/third_party/tsl/tools/toolchains/remote_config/configs.bzl index f2eecd61a5faf7..c105ee4544f51f 100644 --- a/third_party/xla/third_party/tsl/tools/toolchains/remote_config/configs.bzl +++ b/third_party/xla/third_party/tsl/tools/toolchains/remote_config/configs.bzl @@ -703,7 +703,7 @@ def initialize_rbe_configs(): "TF_CUDNN_VERSION": "8.9", "TF_ENABLE_XLA": "1", "TF_NEED_CUDA": "1", - "TF_NEED_TENSORRT": "1", + "TF_NEED_TENSORRT": "0", "TF_SYSROOT": "/dt9", "TF_TENSORRT_VERSION": "8.6", }, @@ -742,7 +742,7 @@ def initialize_rbe_configs(): "TF_CUDNN_VERSION": "8.9", "TF_ENABLE_XLA": "1", "TF_NEED_CUDA": "1", - "TF_NEED_TENSORRT": "1", + "TF_NEED_TENSORRT": "0", "TF_SYSROOT": "/dt9", "TF_TENSORRT_VERSION": "8.6", }, @@ -782,7 +782,7 @@ def initialize_rbe_configs(): "TF_ENABLE_XLA": "1", "TF_NEED_CUDA": "1", "TF_SYSROOT": "/dt9", - "TF_NEED_TENSORRT": "1", + "TF_NEED_TENSORRT": "0", "TF_TENSORRT_VERSION": "8.6", }, ) @@ -820,7 +820,7 @@ def initialize_rbe_configs(): "TF_ENABLE_XLA": "1", "TF_NEED_CUDA": "1", "TF_SYSROOT": "/dt9", - "TF_NEED_TENSORRT": "1", + "TF_NEED_TENSORRT": "0", "TF_TENSORRT_VERSION": "8.6", }, ) diff --git a/third_party/xla/third_party/tsl/tools/toolchains/remote_config/rbe_config.bzl b/third_party/xla/third_party/tsl/tools/toolchains/remote_config/rbe_config.bzl index 18a84d96c39f82..9ade984f45351d 100644 --- a/third_party/xla/third_party/tsl/tools/toolchains/remote_config/rbe_config.bzl +++ b/third_party/xla/third_party/tsl/tools/toolchains/remote_config/rbe_config.bzl @@ -1,8 +1,13 @@ """Macro that creates external repositories for remote config.""" -load("//third_party/gpus:cuda_configure.bzl", "remote_cuda_configure") +# If you need to use non-hermetic CUDA, replace the line below with +# load("//third_party/gpus:cuda_configure.bzl", "remote_cuda_configure") +load("//third_party/gpus:hermetic_cuda_configure.bzl", "hermetic_cuda_configure") load("//third_party/gpus:rocm_configure.bzl", "remote_rocm_configure") -load("//third_party/nccl:nccl_configure.bzl", "remote_nccl_configure") + +# If you need to use non-hermetic CUDA, replace the line below with +# load("//third_party/nccl:nccl_configure.bzl", "remote_nccl_configure") +load("//third_party/nccl:hermetic_nccl_configure.bzl", "hermetic_nccl_configure") load("//third_party/py:python_configure.bzl", "local_python_configure", "remote_python_configure") load("//third_party/remote_config:remote_platform_configure.bzl", "remote_platform_configure") load("//third_party/tensorrt:tensorrt_configure.bzl", "remote_tensorrt_configure") @@ -42,7 +47,7 @@ def _tensorflow_rbe_config(name, compiler, python_versions, os, rocm_version = N "TF_CUDNN_VERSION": cudnn_version, "TF_CUDA_VERSION": cuda_version, "CUDNN_INSTALL_PATH": cudnn_install_path if cudnn_install_path != None else "/usr/lib/x86_64-linux-gnu", - "TF_NEED_TENSORRT": "1", + "TF_NEED_TENSORRT": "0", "TF_TENSORRT_VERSION": tensorrt_version if tensorrt_version != None else "", "TENSORRT_INSTALL_PATH": tensorrt_install_path if tensorrt_install_path != None else "/usr/lib/x86_64-linux-gnu", "GCC_HOST_COMPILER_PATH": compiler if not compiler.endswith("clang") else "", @@ -58,13 +63,17 @@ def _tensorflow_rbe_config(name, compiler, python_versions, os, rocm_version = N "Pool": "default", } - remote_cuda_configure( + # If you need to use non-hermetic CUDA, replace the call below with + # remote_cuda_configure. + hermetic_cuda_configure( name = "%s_config_cuda" % name, environ = env, exec_properties = exec_properties, ) - remote_nccl_configure( + # If you need to use non-hermetic CUDA, replace the call below with + # remote_nccl_configure. + hermetic_nccl_configure( name = "%s_config_nccl" % name, environ = env, exec_properties = exec_properties, @@ -175,13 +184,15 @@ def sigbuild_tf_configs(name_container_map, env): "Pool": "default", } - remote_cuda_configure( + # TODO (ybaturina): DEPRECATED: replace with remote_cuda_configure for non-hermetic CUDA. + hermetic_cuda_configure( name = "%s_config_cuda" % name, environ = env, exec_properties = exec_properties, ) - remote_nccl_configure( + # TODO (ybaturina): DEPRECATED: replace with remote_nccl_configure for non-hermetic NCCL. + hermetic_nccl_configure( name = "%s_config_nccl" % name, environ = env, exec_properties = exec_properties, diff --git a/third_party/xla/third_party/tsl/tsl/platform/default/BUILD b/third_party/xla/third_party/tsl/tsl/platform/default/BUILD index ca974552eca1ab..9db8ada08122f6 100644 --- a/third_party/xla/third_party/tsl/tsl/platform/default/BUILD +++ b/third_party/xla/third_party/tsl/tsl/platform/default/BUILD @@ -3,6 +3,7 @@ load("@bazel_skylib//:bzl_library.bzl", "bzl_library") load("@local_tsl//tsl/platform:rules_cc.bzl", "cc_library") load( "@local_xla//xla/tsl:tsl.bzl", + "if_hermetic_cuda_tools", "if_not_fuchsia", "if_not_windows", "internal_visibility", @@ -58,6 +59,9 @@ cc_library( srcs = ["cuda_libdevice_path.cc"], hdrs = ["//tsl/platform:cuda_libdevice_path.h"], compatible_with = [], + data = if_hermetic_cuda_tools([ + "@cuda_nvcc//:nvvm", + ]), tags = [ "manual", "no_oss", @@ -65,6 +69,7 @@ cc_library( ], deps = [ "//tsl/platform", + "//tsl/platform:env", "//tsl/platform:logging", "//tsl/platform:path", "//tsl/platform:types", diff --git a/third_party/xla/third_party/tsl/tsl/platform/default/cuda_libdevice_path.cc b/third_party/xla/third_party/tsl/tsl/platform/default/cuda_libdevice_path.cc index 46321e74b5dc38..ee95a3d17fd68e 100644 --- a/third_party/xla/third_party/tsl/tsl/platform/default/cuda_libdevice_path.cc +++ b/third_party/xla/third_party/tsl/tsl/platform/default/cuda_libdevice_path.cc @@ -31,6 +31,7 @@ limitations under the License. #if !defined(PLATFORM_GOOGLE) #include "third_party/gpus/cuda/cuda_config.h" +#include "tsl/platform/env.h" #endif #include "tsl/platform/logging.h" @@ -40,6 +41,17 @@ std::vector CandidateCudaRoots() { #if !defined(PLATFORM_GOOGLE) auto roots = std::vector{TF_CUDA_TOOLKIT_PATH, std::string("/usr/local/cuda")}; + std::string runfiles_suffix = "runfiles"; + std::string executable_path = tsl::Env::Default()->GetExecutablePath(); + std::string cuda_nvcc_dir = + io::JoinPath(executable_path + "." + runfiles_suffix, "cuda_nvcc"); + roots.emplace_back(cuda_nvcc_dir); + std::string runfiles_dir = tsl::Env::Default()->GetRunfilesDir(); + std::size_t runfiles_ind = runfiles_dir.rfind(runfiles_suffix); + cuda_nvcc_dir = io::JoinPath( + runfiles_dir.substr(0, runfiles_ind + runfiles_suffix.length()), + "cuda_nvcc"); + roots.emplace_back(cuda_nvcc_dir); #if defined(PLATFORM_POSIX) && !defined(__APPLE__) Dl_info info; @@ -53,6 +65,9 @@ std::vector CandidateCudaRoots() { // relative to the current binary for the wheel-based nvcc package. for (auto path : {"../nvidia/cuda_nvcc", "../../nvidia/cuda_nvcc"}) roots.emplace_back(io::JoinPath(dir, path)); + + // Also add the path to the copy of libdevice.10.bc we include with XLA. + roots.emplace_back(io::JoinPath(dir, "cuda")); } #endif // defined(PLATFORM_POSIX) && !defined(__APPLE__) diff --git a/third_party/xla/third_party/tsl/workspace2.bzl b/third_party/xla/third_party/tsl/workspace2.bzl index 001ce018d87066..7274961489c516 100644 --- a/third_party/xla/third_party/tsl/workspace2.bzl +++ b/third_party/xla/third_party/tsl/workspace2.bzl @@ -6,6 +6,7 @@ load("@bazel_skylib//lib:versions.bzl", "versions") # Import external repository rules. load("@bazel_tools//tools/build_defs/repo:java.bzl", "java_import_external") load("@io_bazel_rules_closure//closure:defs.bzl", "filegroup_external") +load("//third_party:cuda_repo.bzl", "cuda_distributives") load("//third_party:repo.bzl", "tf_http_archive", "tf_mirror_urls") # Import third party repository rules. See go/tfbr-thirdparty. @@ -17,14 +18,20 @@ load("//third_party/eigen3:workspace.bzl", eigen3 = "repo") load("//third_party/farmhash:workspace.bzl", farmhash = "repo") load("//third_party/gemmlowp:workspace.bzl", gemmlowp = "repo") load("//third_party/git:git_configure.bzl", "git_configure") -load("//third_party/gpus:cuda_configure.bzl", "cuda_configure") + +# If you need to use non-hermetic CUDA, replace the line below with +# load("//third_party/gpus:cuda_configure.bzl", "cuda_configure") +load("//third_party/gpus:hermetic_cuda_configure.bzl", "hermetic_cuda_configure") load("//third_party/gpus:rocm_configure.bzl", "rocm_configure") load("//third_party/gpus:sycl_configure.bzl", "sycl_configure") load("//third_party/hwloc:workspace.bzl", hwloc = "repo") load("//third_party/implib_so:workspace.bzl", implib_so = "repo") load("//third_party/llvm:setup.bzl", "llvm_setup") load("//third_party/nasm:workspace.bzl", nasm = "repo") -load("//third_party/nccl:nccl_configure.bzl", "nccl_configure") + +# If you need to use non-hermetic CUDA, replace the line below with +# load("//third_party/nccl:nccl_configure.bzl", "nccl_configure") +load("//third_party/nccl:hermetic_nccl_configure.bzl", "hermetic_nccl_configure") load("//third_party/py:python_configure.bzl", "python_configure") load("//third_party/py/ml_dtypes:workspace.bzl", ml_dtypes = "repo") load("//third_party/pybind11_abseil:workspace.bzl", pybind11_abseil = "repo") @@ -69,9 +76,15 @@ def _tf_toolchains(): # Note that we check the minimum bazel version in WORKSPACE. clang6_configure(name = "local_config_clang6") cc_download_clang_toolchain(name = "local_config_download_clang") - cuda_configure(name = "local_config_cuda") + + # If you need to use non-hermetic CUDA, replace the line below with + # cuda_configure(name = "local_config_cuda") + hermetic_cuda_configure(name = "local_config_cuda") tensorrt_configure(name = "local_config_tensorrt") - nccl_configure(name = "local_config_nccl") + + # If you need to use non-hermetic CUDA, replace the line below with + # nccl_configure(name = "local_config_nccl") + hermetic_nccl_configure(name = "local_config_nccl") git_configure(name = "local_config_git") syslibs_configure(name = "local_config_syslibs") python_configure(name = "local_config_python") @@ -597,6 +610,28 @@ def _tf_repositories(): urls = tf_mirror_urls("https://github.com/google/glog/archive/refs/tags/v0.4.0.tar.gz"), ) +_CUDA_12_3_NCCL_WHEEL_DICT = { + "x86_64-unknown-linux-gnu": { + "url": "https://files.pythonhosted.org/packages/38/00/d0d4e48aef772ad5aebcf70b73028f88db6e5640b36c38e90445b7a57c45/nvidia_nccl_cu12-2.19.3-py3-none-manylinux1_x86_64.whl", + "sha256": "a9734707a2c96443331c1e48c717024aa6678a0e2a4cb66b2c364d18cee6b48d", + }, + "aarch64-unknown-linux-gnu": { + "url": "https://files.pythonhosted.org/packages/c1/bb/d09dda47c881f9ff504afd6f9ca4f502ded6d8fc2f572cacc5e39da91c28/nvidia_nccl_cu12-2.20.5-py3-none-manylinux2014_aarch64.whl", + "sha256": "1fc150d5c3250b170b29410ba682384b14581db722b2531b0d8d33c595f33d01", + }, +} + +_CUDA_12_1_NCCL_WHEEL_DICT = { + "x86_64-unknown-linux-gnu": { + "url": "https://files.pythonhosted.org/packages/38/00/d0d4e48aef772ad5aebcf70b73028f88db6e5640b36c38e90445b7a57c45/nvidia_nccl_cu12-2.19.3-py3-none-manylinux1_x86_64.whl", + "sha256": "a9734707a2c96443331c1e48c717024aa6678a0e2a4cb66b2c364d18cee6b48d", + }, + "aarch64-unknown-linux-gnu": { + "url": "https://files.pythonhosted.org/packages/c1/bb/d09dda47c881f9ff504afd6f9ca4f502ded6d8fc2f572cacc5e39da91c28/nvidia_nccl_cu12-2.20.5-py3-none-manylinux2014_aarch64.whl", + "sha256": "1fc150d5c3250b170b29410ba682384b14581db722b2531b0d8d33c595f33d01", + }, +} + def workspace(): # Check the bazel version before executing any repository rules, in case # those rules rely on the version we require here. @@ -614,6 +649,10 @@ def workspace(): # don't already exist (at least if the external repository macros were # written according to common practice to query native.existing_rule()). _tf_repositories() + cuda_distributives(cuda_nccl_wheel_dict = { + "12.3.2": _CUDA_12_3_NCCL_WHEEL_DICT, + "12.1.1": _CUDA_12_1_NCCL_WHEEL_DICT, + }) # Alias so it can be loaded without assigning to a different symbol to prevent # shadowing previous loads and trigger a buildifier warning. diff --git a/third_party/xla/third_party/tsl/workspace3.bzl b/third_party/xla/third_party/tsl/workspace3.bzl index a1293f59a48885..adba216bc518a1 100644 --- a/third_party/xla/third_party/tsl/workspace3.bzl +++ b/third_party/xla/third_party/tsl/workspace3.bzl @@ -1,8 +1,31 @@ """TensorFlow workspace initialization. Consult the WORKSPACE on how to use it.""" load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") +load("//third_party:cuda_redist_json_repo.bzl", "cuda_redist_json") load("//third_party/llvm:workspace.bzl", llvm = "repo") +_CUDA_REDIST_JSON_DICT = { + "12.1.1": [ + "https://developer.download.nvidia.com/compute/cuda/redist/redistrib_12.1.1.json", + "bafea3cb83a4cf5c764eeedcaac0040d0d3c5db3f9a74550da0e7b6ac24d378c", + ], + "12.3.2": [ + "https://developer.download.nvidia.com/compute/cuda/redist/redistrib_12.3.2.json", + "1b6eacf335dd49803633fed53ef261d62c193e5a56eee5019e7d2f634e39e7ef", + ], +} + +_CUDNN_REDIST_JSON_DICT = { + "8.6": [ + "https://developer.download.nvidia.com/compute/cudnn/redist/redistrib_8.6.0.json", + "7f6f50bed4fd8216dc10d6ef505771dc0ecc99cce813993ab405cb507a21d51d", + ], + "8.9.7.29": [ + "https://developer.download.nvidia.com/compute/cudnn/redist/redistrib_8.9.7.29.json", + "a0734f26f068522464fa09b2f2c186dfbe6ad7407a88ea0c50dd331f0c3389ec", + ], +} + def workspace(): http_archive( name = "io_bazel_rules_closure", @@ -46,6 +69,13 @@ def workspace(): # but provides a script for setting up build rules via overlays. llvm("llvm-raw") + # Load JSON files for CUDA and cuDNN distribution versions. + cuda_redist_json( + name = "cuda_redist_json", + cuda_json_dict = _CUDA_REDIST_JSON_DICT, + cudnn_json_dict = _CUDNN_REDIST_JSON_DICT, + ) + # Alias so it can be loaded without assigning to a different symbol to prevent # shadowing previous loads and trigger a buildifier warning. tsl_workspace3 = workspace diff --git a/third_party/xla/tools/toolchains/remote_config/configs.bzl b/third_party/xla/tools/toolchains/remote_config/configs.bzl index f2eecd61a5faf7..c105ee4544f51f 100644 --- a/third_party/xla/tools/toolchains/remote_config/configs.bzl +++ b/third_party/xla/tools/toolchains/remote_config/configs.bzl @@ -703,7 +703,7 @@ def initialize_rbe_configs(): "TF_CUDNN_VERSION": "8.9", "TF_ENABLE_XLA": "1", "TF_NEED_CUDA": "1", - "TF_NEED_TENSORRT": "1", + "TF_NEED_TENSORRT": "0", "TF_SYSROOT": "/dt9", "TF_TENSORRT_VERSION": "8.6", }, @@ -742,7 +742,7 @@ def initialize_rbe_configs(): "TF_CUDNN_VERSION": "8.9", "TF_ENABLE_XLA": "1", "TF_NEED_CUDA": "1", - "TF_NEED_TENSORRT": "1", + "TF_NEED_TENSORRT": "0", "TF_SYSROOT": "/dt9", "TF_TENSORRT_VERSION": "8.6", }, @@ -782,7 +782,7 @@ def initialize_rbe_configs(): "TF_ENABLE_XLA": "1", "TF_NEED_CUDA": "1", "TF_SYSROOT": "/dt9", - "TF_NEED_TENSORRT": "1", + "TF_NEED_TENSORRT": "0", "TF_TENSORRT_VERSION": "8.6", }, ) @@ -820,7 +820,7 @@ def initialize_rbe_configs(): "TF_ENABLE_XLA": "1", "TF_NEED_CUDA": "1", "TF_SYSROOT": "/dt9", - "TF_NEED_TENSORRT": "1", + "TF_NEED_TENSORRT": "0", "TF_TENSORRT_VERSION": "8.6", }, ) diff --git a/third_party/xla/tools/toolchains/remote_config/rbe_config.bzl b/third_party/xla/tools/toolchains/remote_config/rbe_config.bzl index 18a84d96c39f82..9ade984f45351d 100644 --- a/third_party/xla/tools/toolchains/remote_config/rbe_config.bzl +++ b/third_party/xla/tools/toolchains/remote_config/rbe_config.bzl @@ -1,8 +1,13 @@ """Macro that creates external repositories for remote config.""" -load("//third_party/gpus:cuda_configure.bzl", "remote_cuda_configure") +# If you need to use non-hermetic CUDA, replace the line below with +# load("//third_party/gpus:cuda_configure.bzl", "remote_cuda_configure") +load("//third_party/gpus:hermetic_cuda_configure.bzl", "hermetic_cuda_configure") load("//third_party/gpus:rocm_configure.bzl", "remote_rocm_configure") -load("//third_party/nccl:nccl_configure.bzl", "remote_nccl_configure") + +# If you need to use non-hermetic CUDA, replace the line below with +# load("//third_party/nccl:nccl_configure.bzl", "remote_nccl_configure") +load("//third_party/nccl:hermetic_nccl_configure.bzl", "hermetic_nccl_configure") load("//third_party/py:python_configure.bzl", "local_python_configure", "remote_python_configure") load("//third_party/remote_config:remote_platform_configure.bzl", "remote_platform_configure") load("//third_party/tensorrt:tensorrt_configure.bzl", "remote_tensorrt_configure") @@ -42,7 +47,7 @@ def _tensorflow_rbe_config(name, compiler, python_versions, os, rocm_version = N "TF_CUDNN_VERSION": cudnn_version, "TF_CUDA_VERSION": cuda_version, "CUDNN_INSTALL_PATH": cudnn_install_path if cudnn_install_path != None else "/usr/lib/x86_64-linux-gnu", - "TF_NEED_TENSORRT": "1", + "TF_NEED_TENSORRT": "0", "TF_TENSORRT_VERSION": tensorrt_version if tensorrt_version != None else "", "TENSORRT_INSTALL_PATH": tensorrt_install_path if tensorrt_install_path != None else "/usr/lib/x86_64-linux-gnu", "GCC_HOST_COMPILER_PATH": compiler if not compiler.endswith("clang") else "", @@ -58,13 +63,17 @@ def _tensorflow_rbe_config(name, compiler, python_versions, os, rocm_version = N "Pool": "default", } - remote_cuda_configure( + # If you need to use non-hermetic CUDA, replace the call below with + # remote_cuda_configure. + hermetic_cuda_configure( name = "%s_config_cuda" % name, environ = env, exec_properties = exec_properties, ) - remote_nccl_configure( + # If you need to use non-hermetic CUDA, replace the call below with + # remote_nccl_configure. + hermetic_nccl_configure( name = "%s_config_nccl" % name, environ = env, exec_properties = exec_properties, @@ -175,13 +184,15 @@ def sigbuild_tf_configs(name_container_map, env): "Pool": "default", } - remote_cuda_configure( + # TODO (ybaturina): DEPRECATED: replace with remote_cuda_configure for non-hermetic CUDA. + hermetic_cuda_configure( name = "%s_config_cuda" % name, environ = env, exec_properties = exec_properties, ) - remote_nccl_configure( + # TODO (ybaturina): DEPRECATED: replace with remote_nccl_configure for non-hermetic NCCL. + hermetic_nccl_configure( name = "%s_config_nccl" % name, environ = env, exec_properties = exec_properties, diff --git a/third_party/xla/xla/service/BUILD b/third_party/xla/xla/service/BUILD index 5c8aaea5723b70..ac1d1a87becb06 100644 --- a/third_party/xla/xla/service/BUILD +++ b/third_party/xla/xla/service/BUILD @@ -1382,6 +1382,7 @@ cc_library( ]) + if_cuda_is_configured([ "//xla/service/gpu:nvptx_compiler", "//xla/stream_executor/cuda:stream_executor_cuda", + "//xla/tsl:gpu_runtime_hermetic_cuda_deps", ]) + if_rocm_is_configured([ "//xla/service/gpu:amdgpu_compiler", "//xla/stream_executor/rocm:stream_executor_rocm", diff --git a/third_party/xla/xla/service/gpu/tests/add_preds.hlo b/third_party/xla/xla/service/gpu/tests/add_preds.hlo index 120b6a5ad686bf..b106b806c0470c 100644 --- a/third_party/xla/xla/service/gpu/tests/add_preds.hlo +++ b/third_party/xla/xla/service/gpu/tests/add_preds.hlo @@ -1,3 +1,4 @@ +// RUN: export XLA_FLAGS="--xla_gpu_cuda_data_dir=%S/../../../../../cuda_nvcc" // RUN: hlo-opt %s --platform=gpu --stage=llvm-before-optimizations --xla_gpu_target_config_filename=%S/../../../tools/hlo_opt/gpu_specs/%{GPU}.txtpb | FileCheck %s // CHECK: define void @fusion({{.*}}%[[ARG0:.*]], {{.*}}%[[ARG1:.*]], diff --git a/third_party/xla/xla/service/gpu/tests/calling_convention.hlo b/third_party/xla/xla/service/gpu/tests/calling_convention.hlo index c84e0194c347cb..ba00bd6423aa74 100644 --- a/third_party/xla/xla/service/gpu/tests/calling_convention.hlo +++ b/third_party/xla/xla/service/gpu/tests/calling_convention.hlo @@ -1,3 +1,4 @@ +// RUN: export XLA_FLAGS="--xla_gpu_cuda_data_dir=%S/../../../../../cuda_nvcc" // RUN: hlo-opt %s --platform=gpu --stage=llvm-before-optimizations --xla_gpu_target_config_filename=%S/../../../tools/hlo_opt/gpu_specs/%{GPU}.txtpb | FileCheck --check-prefixes=CHECK,CHECK-%{PTX} %s // Arguments are passed separately. diff --git a/third_party/xla/xla/service/gpu/tests/copy.hlo b/third_party/xla/xla/service/gpu/tests/copy.hlo index beac8e6d36b115..997cefda91b22a 100644 --- a/third_party/xla/xla/service/gpu/tests/copy.hlo +++ b/third_party/xla/xla/service/gpu/tests/copy.hlo @@ -1,3 +1,4 @@ +// RUN: export XLA_FLAGS="--xla_gpu_cuda_data_dir=%S/../../../../../cuda_nvcc" // RUN: hlo-opt %s --platform=gpu --stage=llvm-before-optimizations --xla_gpu_target_config_filename=%S/../../../tools/hlo_opt/gpu_specs/%{GPU}.txtpb | FileCheck --check-prefixes=CHECK-%{PTX} %s // NOTE: Assertions have been autogenerated by utils/generate-test-checks.py diff --git a/third_party/xla/xla/service/gpu/tests/dynamic_update_slice_inplace.hlo b/third_party/xla/xla/service/gpu/tests/dynamic_update_slice_inplace.hlo index 3d0af18b081103..974fb26e5a9193 100644 --- a/third_party/xla/xla/service/gpu/tests/dynamic_update_slice_inplace.hlo +++ b/third_party/xla/xla/service/gpu/tests/dynamic_update_slice_inplace.hlo @@ -1,3 +1,4 @@ +// RUN: export XLA_FLAGS="--xla_gpu_cuda_data_dir=%S/../../../../../cuda_nvcc" // RUN: hlo-opt %s --platform=gpu --stage=llvm-before-optimizations --xla_gpu_target_config_filename=%S/../../../tools/hlo_opt/gpu_specs/%{GPU}.txtpb --split-input-file | FileCheck --check-prefixes=CHECK,CHECK-%{PTX} %s // NOTE: Assertions have been autogenerated by utils/generate-test-checks.py diff --git a/third_party/xla/xla/service/gpu/tests/element_wise_row_vectorization.hlo b/third_party/xla/xla/service/gpu/tests/element_wise_row_vectorization.hlo index b49e155da0a685..05d2b141757621 100644 --- a/third_party/xla/xla/service/gpu/tests/element_wise_row_vectorization.hlo +++ b/third_party/xla/xla/service/gpu/tests/element_wise_row_vectorization.hlo @@ -1,3 +1,4 @@ +// RUN: export XLA_FLAGS="--xla_gpu_cuda_data_dir=%S/../../../../../cuda_nvcc" // RUN: hlo-opt %s --platform=gpu --stage=ptx --xla_gpu_target_config_filename=%S/../../../tools/hlo_opt/gpu_specs/v100.txtpb --split-input-file | FileCheck %s // RUN: hlo-opt %s --platform=gpu --stage=llvm-before-optimizations --xla_gpu_target_config_filename=%S/../../../tools/hlo_opt/gpu_specs/%{GPU}.txtpb --split-input-file | FileCheck --check-prefixes=CHECK-LLVM %s // We check that the row loads are vectorized. diff --git a/third_party/xla/xla/service/gpu/tests/fused_scatter.hlo b/third_party/xla/xla/service/gpu/tests/fused_scatter.hlo index 9a30436ebfa38c..e11711b8ba9556 100644 --- a/third_party/xla/xla/service/gpu/tests/fused_scatter.hlo +++ b/third_party/xla/xla/service/gpu/tests/fused_scatter.hlo @@ -1,3 +1,4 @@ +// RUN: export XLA_FLAGS="--xla_gpu_cuda_data_dir=%S/../../../../../cuda_nvcc" // RUN: hlo-opt %s --platform=gpu --stage=llvm-before-optimizations --xla_gpu_target_config_filename=%S/../../../tools/hlo_opt/gpu_specs/%{GPU}.txtpb | FileCheck --check-prefixes=CHECK,CHECK-%{PTX} %s // NOTE: Assertions have been autogenerated by utils/generate-test-checks.py diff --git a/third_party/xla/xla/service/gpu/tests/fused_slice.hlo b/third_party/xla/xla/service/gpu/tests/fused_slice.hlo index 4affcb0de7533b..b5abb7dafa5960 100644 --- a/third_party/xla/xla/service/gpu/tests/fused_slice.hlo +++ b/third_party/xla/xla/service/gpu/tests/fused_slice.hlo @@ -1,3 +1,4 @@ +// RUN: export XLA_FLAGS="--xla_gpu_cuda_data_dir=%S/../../../../../cuda_nvcc" // RUN: hlo-opt %s --platform=gpu --stage=llvm-before-optimizations --xla_gpu_target_config_filename=%S/../../../tools/hlo_opt/gpu_specs/%{GPU}.txtpb | FileCheck --check-prefixes=CHECK,CHECK-%{PTX} %s // NOTE: Assertions have been autogenerated by utils/generate-test-checks.py diff --git a/third_party/xla/xla/service/gpu/tests/kernel_reuse.hlo b/third_party/xla/xla/service/gpu/tests/kernel_reuse.hlo index 41734e06259a00..431edac2748f42 100644 --- a/third_party/xla/xla/service/gpu/tests/kernel_reuse.hlo +++ b/third_party/xla/xla/service/gpu/tests/kernel_reuse.hlo @@ -1,3 +1,4 @@ +// RUN: export XLA_FLAGS="--xla_gpu_cuda_data_dir=%S/../../../../../cuda_nvcc" // RUN: hlo-opt %s --platform=gpu --stage=llvm-before-optimizations --xla_gpu_target_config_filename=%S/../../../tools/hlo_opt/gpu_specs/%{GPU}.txtpb --split-input-file | FileCheck --check-prefixes=CHECK,CHECK-%{PTX} %s // All fusions must reuse the same kernel: diff --git a/third_party/xla/xla/service/gpu/tests/launch_dimensions.hlo b/third_party/xla/xla/service/gpu/tests/launch_dimensions.hlo index bcfa37733f7e67..ecdb8e91df4342 100644 --- a/third_party/xla/xla/service/gpu/tests/launch_dimensions.hlo +++ b/third_party/xla/xla/service/gpu/tests/launch_dimensions.hlo @@ -1,3 +1,4 @@ +// RUN: export XLA_FLAGS="--xla_gpu_cuda_data_dir=%S/../../../../../cuda_nvcc" // RUN: hlo-opt %s --platform=gpu --stage=llvm-before-optimizations --xla_gpu_target_config_filename=%S/../../../tools/hlo_opt/gpu_specs/%{GPU}.txtpb --split-input-file | FileCheck --check-prefixes=CHECK,CHECK-%{PTX} %s // This tests that we do not increase the grid launch size when // few_waves is enabled. diff --git a/third_party/xla/xla/service/gpu/tests/pad_to_static.hlo b/third_party/xla/xla/service/gpu/tests/pad_to_static.hlo index 6e147df3928c09..5f2d6d64eb829d 100644 --- a/third_party/xla/xla/service/gpu/tests/pad_to_static.hlo +++ b/third_party/xla/xla/service/gpu/tests/pad_to_static.hlo @@ -1,3 +1,4 @@ +// RUN: export XLA_FLAGS="--xla_gpu_cuda_data_dir=%S/../../../../../cuda_nvcc" // RUN: hlo-opt %s --platform=gpu --stage=llvm-before-optimizations --xla_gpu_target_config_filename=%S/../../../tools/hlo_opt/gpu_specs/%{GPU}.txtpb | FileCheck --check-prefixes=CHECK,CHECK-%{PTX} %s // NOTE: Assertions have been autogenerated by utils/generate-test-checks.py diff --git a/third_party/xla/xla/service/gpu/tests/reduce_atomic_min.hlo b/third_party/xla/xla/service/gpu/tests/reduce_atomic_min.hlo index c7165c9e11763c..05537b327fdc57 100644 --- a/third_party/xla/xla/service/gpu/tests/reduce_atomic_min.hlo +++ b/third_party/xla/xla/service/gpu/tests/reduce_atomic_min.hlo @@ -1,3 +1,4 @@ +// RUN: export XLA_FLAGS="--xla_gpu_cuda_data_dir=%S/../../../../../cuda_nvcc" // RUN: hlo-opt %s --platform=gpu --stage=llvm-before-optimizations --xla_gpu_target_config_filename=%S/../../../tools/hlo_opt/gpu_specs/%{GPU}.txtpb | FileCheck %s --check-prefixes=CHECK,CHECK-%{PTX} // Check that for "min" we are still using atomics (CAS loop). diff --git a/third_party/xla/xla/service/gpu/tests/reduce_column_layout_change.hlo b/third_party/xla/xla/service/gpu/tests/reduce_column_layout_change.hlo index cb30643886de4e..1b30f13e0e1af9 100644 --- a/third_party/xla/xla/service/gpu/tests/reduce_column_layout_change.hlo +++ b/third_party/xla/xla/service/gpu/tests/reduce_column_layout_change.hlo @@ -1,3 +1,4 @@ +// RUN: export XLA_FLAGS="--xla_gpu_cuda_data_dir=%S/../../../../../cuda_nvcc" // RUN: hlo-opt %s --platform=gpu --stage=llvm-before-optimizations --xla_gpu_target_config_filename=%S/../../../tools/hlo_opt/gpu_specs/%{GPU}.txtpb | FileCheck %s --check-prefixes=CHECK,CHECK-%{PTX} HloModule reduce_with_layout_change, is_scheduled=true diff --git a/third_party/xla/xla/service/gpu/tests/reduce_f64_column.hlo b/third_party/xla/xla/service/gpu/tests/reduce_f64_column.hlo index 982e45863e2547..70138e77d9362b 100644 --- a/third_party/xla/xla/service/gpu/tests/reduce_f64_column.hlo +++ b/third_party/xla/xla/service/gpu/tests/reduce_f64_column.hlo @@ -1,3 +1,4 @@ +// RUN: export XLA_FLAGS="--xla_gpu_cuda_data_dir=%S/../../../../../cuda_nvcc" // RUN: hlo-opt %s --platform=gpu --stage=llvm-before-optimizations --xla_gpu_target_config_filename=%S/../../../tools/hlo_opt/gpu_specs/%{GPU}.txtpb | FileCheck %s --check-prefixes=CHECK,CHECK-%{PTX} HloModule m, is_scheduled=true diff --git a/third_party/xla/xla/service/gpu/tests/reduce_large_row_to_scalar.hlo b/third_party/xla/xla/service/gpu/tests/reduce_large_row_to_scalar.hlo index 736e583ad4c3c9..5fa7f4ed5b5c2c 100644 --- a/third_party/xla/xla/service/gpu/tests/reduce_large_row_to_scalar.hlo +++ b/third_party/xla/xla/service/gpu/tests/reduce_large_row_to_scalar.hlo @@ -1,3 +1,4 @@ +// RUN: export XLA_FLAGS="--xla_gpu_cuda_data_dir=%S/../../../../../cuda_nvcc" // RUN: hlo-opt %s --platform=gpu --stage=llvm-before-optimizations --xla_gpu_target_config_filename=%S/../../../tools/hlo_opt/gpu_specs/%{GPU}.txtpb --split-input-file | FileCheck %s --check-prefixes=CHECK,CHECK-%{PTX} HloModule LargeReduction, is_scheduled=true diff --git a/third_party/xla/xla/service/gpu/tests/reduce_row_vectorized.hlo b/third_party/xla/xla/service/gpu/tests/reduce_row_vectorized.hlo index bba7986d830fb3..7107fd24d491f2 100644 --- a/third_party/xla/xla/service/gpu/tests/reduce_row_vectorized.hlo +++ b/third_party/xla/xla/service/gpu/tests/reduce_row_vectorized.hlo @@ -1,3 +1,4 @@ +// RUN: export XLA_FLAGS="--xla_gpu_cuda_data_dir=%S/../../../../../cuda_nvcc" // RUN: hlo-opt %s --platform=gpu --stage=llvm-before-optimizations --xla_gpu_target_config_filename=%S/../../../tools/hlo_opt/gpu_specs/%{GPU}.txtpb | FileCheck %s --check-prefixes=CHECK,CHECK-%{PTX} HloModule RowReductionVectorized, is_scheduled=true diff --git a/third_party/xla/xla/service/gpu/tests/reduce_unnested.hlo b/third_party/xla/xla/service/gpu/tests/reduce_unnested.hlo index 844c3ded2ef024..919a4dc1e9c7a9 100644 --- a/third_party/xla/xla/service/gpu/tests/reduce_unnested.hlo +++ b/third_party/xla/xla/service/gpu/tests/reduce_unnested.hlo @@ -1,3 +1,4 @@ +// RUN: export XLA_FLAGS="--xla_gpu_cuda_data_dir=%S/../../../../../cuda_nvcc" // RUN: hlo-opt %s --platform=gpu --stage=llvm-before-optimizations --xla_gpu_target_config_filename=%S/../../../tools/hlo_opt/gpu_specs/%{GPU}.txtpb --split-input-file | FileCheck %s // CHECK: define void @fusion_row_reduction_too_small( diff --git a/third_party/xla/xla/service/gpu/tests/reduce_variadic_column.hlo b/third_party/xla/xla/service/gpu/tests/reduce_variadic_column.hlo index 36008daa5ceda8..64029a503506b8 100644 --- a/third_party/xla/xla/service/gpu/tests/reduce_variadic_column.hlo +++ b/third_party/xla/xla/service/gpu/tests/reduce_variadic_column.hlo @@ -1,3 +1,4 @@ +// RUN: export XLA_FLAGS="--xla_gpu_cuda_data_dir=%S/../../../../../cuda_nvcc" // RUN: hlo-opt %s --platform=gpu --stage=llvm-before-optimizations --xla_gpu_target_config_filename=%S/../../../tools/hlo_opt/gpu_specs/%{GPU}.txtpb | FileCheck %s --check-prefixes=CHECK,CHECK-%{PTX} HloModule Test, is_scheduled=true diff --git a/third_party/xla/xla/service/gpu/tests/reduction_vectorization_sm_all.hlo b/third_party/xla/xla/service/gpu/tests/reduction_vectorization_sm_all.hlo index 6a25580a4bcff9..a1d7eb6aa38619 100644 --- a/third_party/xla/xla/service/gpu/tests/reduction_vectorization_sm_all.hlo +++ b/third_party/xla/xla/service/gpu/tests/reduction_vectorization_sm_all.hlo @@ -1,3 +1,4 @@ +// RUN: export XLA_FLAGS="--xla_gpu_cuda_data_dir=%S/../../../../../cuda_nvcc" // RUN: hlo-opt %s --platform=gpu --stage=ptx --xla_gpu_target_config_filename=%S/../../../tools/hlo_opt/gpu_specs/%{GPU}.txtpb --split-input-file | FileCheck %s // RUN: hlo-opt %s --platform=gpu --stage=ptx --xla_gpu_target_config_filename=%S/../../../tools/hlo_opt/gpu_specs/p100.txtpb --split-input-file | FileCheck %s --check-prefixes=CHECK-SM60 // RUN: hlo-opt %s --platform=gpu --stage=ptx --xla_gpu_target_config_filename=%S/../../../tools/hlo_opt/gpu_specs/v100.txtpb --split-input-file | FileCheck %s --check-prefixes=CHECK-SM70 diff --git a/third_party/xla/xla/service/gpu/tests/rng_get_and_update_state.hlo b/third_party/xla/xla/service/gpu/tests/rng_get_and_update_state.hlo index e140b56af9d60c..7836f3ee1ddbe9 100644 --- a/third_party/xla/xla/service/gpu/tests/rng_get_and_update_state.hlo +++ b/third_party/xla/xla/service/gpu/tests/rng_get_and_update_state.hlo @@ -1,3 +1,4 @@ +// RUN: export XLA_FLAGS="--xla_gpu_cuda_data_dir=%S/../../../../../cuda_nvcc" // RUN: hlo-opt %s --platform=gpu --stage=llvm-before-optimizations --xla_gpu_target_config_filename=%S/../../../tools/hlo_opt/gpu_specs/%{GPU}.txtpb | FileCheck %s HloModule TestModule, is_scheduled=true diff --git a/third_party/xla/xla/service/gpu/tests/scatter.hlo b/third_party/xla/xla/service/gpu/tests/scatter.hlo index 20211bdbe892f4..b81113587814da 100644 --- a/third_party/xla/xla/service/gpu/tests/scatter.hlo +++ b/third_party/xla/xla/service/gpu/tests/scatter.hlo @@ -1,5 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/generate-test-checks.py +// RUN: export XLA_FLAGS="--xla_gpu_cuda_data_dir=%S/../../../../../cuda_nvcc" // RUN: hlo-opt %s --platform=gpu --stage=llvm-before-optimizations --xla_gpu_target_config_filename=%S/../../../tools/hlo_opt/gpu_specs/%{GPU}.txtpb --split-input-file | FileCheck --check-prefixes=CHECK,CHECK-%{PTX} %s // CHECK-LABEL: entry: diff --git a/third_party/xla/xla/service/gpu/tests/select_and_scatter.hlo b/third_party/xla/xla/service/gpu/tests/select_and_scatter.hlo index 08751943c13efb..587a605dca24ee 100644 --- a/third_party/xla/xla/service/gpu/tests/select_and_scatter.hlo +++ b/third_party/xla/xla/service/gpu/tests/select_and_scatter.hlo @@ -1,3 +1,4 @@ +// RUN: export XLA_FLAGS="--xla_gpu_cuda_data_dir=%S/../../../../../cuda_nvcc" // RUN: hlo-opt %s --platform=gpu --stage=llvm-before-optimizations --xla_gpu_target_config_filename=%S/../../../tools/hlo_opt/gpu_specs/%{GPU}.txtpb | FileCheck --check-prefixes=CHECK,CHECK-%{PTX} %s // NOTE: Assertions have been autogenerated by utils/generate-test-checks.py diff --git a/third_party/xla/xla/service/gpu/tests/single_instruction.hlo b/third_party/xla/xla/service/gpu/tests/single_instruction.hlo index c8378f746aa983..3fdbc565981679 100644 --- a/third_party/xla/xla/service/gpu/tests/single_instruction.hlo +++ b/third_party/xla/xla/service/gpu/tests/single_instruction.hlo @@ -1,3 +1,4 @@ +// RUN: export XLA_FLAGS="--xla_gpu_cuda_data_dir=%S/../../../../../cuda_nvcc" // RUN: hlo-opt %s --platform=gpu --stage=ptx --xla_gpu_target_config_filename=%S/../../../tools/hlo_opt/gpu_specs/%{GPU}.txtpb --split-input-file | FileCheck %s // RUN: hlo-opt %s --platform=gpu --stage=ptx --xla_gpu_target_config_filename=%S/../../../tools/hlo_opt/gpu_specs/a100_80.txtpb --split-input-file | FileCheck %s --check-prefixes=CHECK-SM80 // RUN: hlo-opt %s --platform=gpu --stage=ptx --xla_gpu_target_config_filename=%S/../../../tools/hlo_opt/gpu_specs/h100.txtpb --split-input-file | FileCheck %s --check-prefixes=CHECK-SM90 diff --git a/third_party/xla/xla/service/gpu/tests/slice_to_dynamic.hlo b/third_party/xla/xla/service/gpu/tests/slice_to_dynamic.hlo index 242bd749bdaf11..a62181874c323c 100644 --- a/third_party/xla/xla/service/gpu/tests/slice_to_dynamic.hlo +++ b/third_party/xla/xla/service/gpu/tests/slice_to_dynamic.hlo @@ -1,3 +1,4 @@ +// RUN: export XLA_FLAGS="--xla_gpu_cuda_data_dir=%S/../../../../../cuda_nvcc" // RUN: hlo-opt %s --platform=gpu --stage=llvm-before-optimizations --xla_gpu_target_config_filename=%S/../../../tools/hlo_opt/gpu_specs/%{GPU}.txtpb | FileCheck --check-prefixes=CHECK,CHECK-%{PTX} %s // NOTE: Assertions have been autogenerated by utils/generate-test-checks.py diff --git a/third_party/xla/xla/service/gpu/tests/transpose_021.hlo b/third_party/xla/xla/service/gpu/tests/transpose_021.hlo index 7d3e1fe0ffb9f6..ef8580fa62ac77 100644 --- a/third_party/xla/xla/service/gpu/tests/transpose_021.hlo +++ b/third_party/xla/xla/service/gpu/tests/transpose_021.hlo @@ -1,4 +1,5 @@ // NOTE: Assertions have been autogenerated by utils/generate-test-checks.py +// RUN: export XLA_FLAGS="--xla_gpu_cuda_data_dir=%S/../../../../../cuda_nvcc" // RUN: hlo-opt %s --platform=gpu --stage=llvm-before-optimizations --xla_gpu_target_config_filename=%S/../../../tools/hlo_opt/gpu_specs/%{GPU}.txtpb --split-input-file | FileCheck --check-prefixes=CHECK,CHECK-%{PTX} %s HloModule Transpose, is_scheduled=true diff --git a/third_party/xla/xla/service/gpu/tests/transpose_021_extra_output.hlo b/third_party/xla/xla/service/gpu/tests/transpose_021_extra_output.hlo index 5e638321294f1e..659a9e2abf3dec 100644 --- a/third_party/xla/xla/service/gpu/tests/transpose_021_extra_output.hlo +++ b/third_party/xla/xla/service/gpu/tests/transpose_021_extra_output.hlo @@ -1,4 +1,5 @@ // NOTE: Assertions have been autogenerated by utils/generate-test-checks.py +// RUN: export XLA_FLAGS="--xla_gpu_cuda_data_dir=%S/../../../../../cuda_nvcc" // RUN: hlo-opt %s --platform=gpu --stage=llvm-before-optimizations --xla_gpu_target_config_filename=%S/../../../tools/hlo_opt/gpu_specs/%{GPU}.txtpb --split-input-file | FileCheck --check-prefixes=CHECK,CHECK-%{PTX} %s HloModule Transpose, is_scheduled=true diff --git a/third_party/xla/xla/service/gpu/tests/transpose_210.hlo b/third_party/xla/xla/service/gpu/tests/transpose_210.hlo index f37bd17ffe2a6e..1fb1dd139797da 100644 --- a/third_party/xla/xla/service/gpu/tests/transpose_210.hlo +++ b/third_party/xla/xla/service/gpu/tests/transpose_210.hlo @@ -1,4 +1,5 @@ // NOTE: Assertions have been autogenerated by utils/generate-test-checks.py +// RUN: export XLA_FLAGS="--xla_gpu_cuda_data_dir=%S/../../../../../cuda_nvcc" // RUN: hlo-opt %s --platform=gpu --stage=llvm-before-optimizations --xla_gpu_target_config_filename=%S/../../../tools/hlo_opt/gpu_specs/%{GPU}.txtpb --split-input-file | FileCheck --check-prefixes=CHECK,CHECK-%{PTX} %s HloModule Transpose, is_scheduled=true diff --git a/third_party/xla/xla/service/gpu/tests/transpose_210_extra_output.hlo b/third_party/xla/xla/service/gpu/tests/transpose_210_extra_output.hlo index a3831d2da1de52..b9693dfc388679 100644 --- a/third_party/xla/xla/service/gpu/tests/transpose_210_extra_output.hlo +++ b/third_party/xla/xla/service/gpu/tests/transpose_210_extra_output.hlo @@ -1,4 +1,5 @@ // NOTE: Assertions have been autogenerated by utils/generate-test-checks.py +// RUN: export XLA_FLAGS="--xla_gpu_cuda_data_dir=%S/../../../../../cuda_nvcc" // RUN: hlo-opt %s --platform=gpu --stage=llvm-before-optimizations --xla_gpu_target_config_filename=%S/../../../tools/hlo_opt/gpu_specs/%{GPU}.txtpb --split-input-file | FileCheck --check-prefixes=CHECK,CHECK-%{PTX} %s HloModule Transpose, is_scheduled=true diff --git a/third_party/xla/xla/service/gpu/tests/triton_naming.hlo b/third_party/xla/xla/service/gpu/tests/triton_naming.hlo index 2739e349181786..36704d7fcb280f 100644 --- a/third_party/xla/xla/service/gpu/tests/triton_naming.hlo +++ b/third_party/xla/xla/service/gpu/tests/triton_naming.hlo @@ -1,3 +1,4 @@ +// RUN: export XLA_FLAGS="--xla_gpu_cuda_data_dir=%S/../../../../../cuda_nvcc" // RUN: hlo-opt %s --platform=gpu --stage=llvm-before-optimizations --xla_gpu_target_config_filename=%S/../../../tools/hlo_opt/gpu_specs/%{GPU}.txtpb | FileCheck --check-prefixes=CHECK-%{PTX} %s // CHECK-PTX: define void @triton_gemm_r( diff --git a/third_party/xla/xla/stream_executor/cuda/BUILD b/third_party/xla/xla/stream_executor/cuda/BUILD index 234649a31ea65b..30d0d3d48c51dd 100644 --- a/third_party/xla/xla/stream_executor/cuda/BUILD +++ b/third_party/xla/xla/stream_executor/cuda/BUILD @@ -24,7 +24,7 @@ load( "tf_additional_cudnn_plugin_copts", "tf_additional_gpu_compilation_copts", ) -load("//xla/tsl:tsl.bzl", "if_google", "if_nccl", "internal_visibility", "tsl_copts") +load("//xla/tsl:tsl.bzl", "if_google", "if_hermetic_cuda_tools", "if_nccl", "internal_visibility", "tsl_copts") package( # copybara:uncomment default_applicable_licenses = ["//tensorflow:license"], @@ -113,13 +113,31 @@ cuda_only_cc_library( # Buildozer can not remove dependencies inside select guards, so we have to use # an intermediate target. -cc_library(name = "ptxas_wrapper") +cc_library( + name = "ptxas_wrapper", + data = if_hermetic_cuda_tools( + ["@cuda_nvcc//:ptxas"], + [], + ), +) -cc_library(name = "nvlink_wrapper") +cc_library( + name = "nvlink_wrapper", + data = if_hermetic_cuda_tools( + ["@cuda_nvcc//:nvlink"], + [], + ), +) # Buildozer can not remove dependencies inside select guards, so we have to use # an intermediate target. -cc_library(name = "fatbinary_wrapper") +cc_library( + name = "fatbinary_wrapper", + data = if_hermetic_cuda_tools( + ["@cuda_nvcc//:fatbinary"], + [], + ), +) cuda_only_cc_library( name = "cuda_driver", diff --git a/third_party/xla/xla/tools/hlo_opt/gpu_hlo.hlo b/third_party/xla/xla/tools/hlo_opt/gpu_hlo.hlo index 0633b7e5ef7ce8..8ab7b9039faf0f 100755 --- a/third_party/xla/xla/tools/hlo_opt/gpu_hlo.hlo +++ b/third_party/xla/xla/tools/hlo_opt/gpu_hlo.hlo @@ -1,3 +1,4 @@ +// RUN: export XLA_FLAGS="--xla_gpu_cuda_data_dir=%S/../../../../cuda_nvcc" // RUN: hlo-opt %s --platform=gpu --stage=hlo --xla_gpu_target_config_filename=%S/gpu_specs/%{GPU}.txtpb | FileCheck %s HloModule module diff --git a/third_party/xla/xla/tools/hlo_opt/gpu_hlo_backend.hlo b/third_party/xla/xla/tools/hlo_opt/gpu_hlo_backend.hlo index 61b6b9aa778b9e..66317c2d276c46 100644 --- a/third_party/xla/xla/tools/hlo_opt/gpu_hlo_backend.hlo +++ b/third_party/xla/xla/tools/hlo_opt/gpu_hlo_backend.hlo @@ -1,3 +1,4 @@ +// RUN: export XLA_FLAGS="--xla_gpu_cuda_data_dir=%S/../../../../cuda_nvcc" // RUN: hlo-opt %s --platform=gpu --stage=hlo-backend --xla_gpu_target_config_filename=%S/gpu_specs/%{GPU}.txtpb | FileCheck %s HloModule module diff --git a/third_party/xla/xla/tools/hlo_opt/gpu_hlo_buffers.hlo b/third_party/xla/xla/tools/hlo_opt/gpu_hlo_buffers.hlo index e7b8321cc6480b..a706e7c75df0d1 100644 --- a/third_party/xla/xla/tools/hlo_opt/gpu_hlo_buffers.hlo +++ b/third_party/xla/xla/tools/hlo_opt/gpu_hlo_buffers.hlo @@ -1,3 +1,4 @@ +// RUN: export XLA_FLAGS="--xla_gpu_cuda_data_dir=%S/../../../../cuda_nvcc" // RUN: hlo-opt %s --platform=gpu --stage=buffer-assignment --xla_gpu_target_config_filename=%S/gpu_specs/%{GPU}.txtpb | FileCheck %s HloModule m diff --git a/third_party/xla/xla/tools/hlo_opt/gpu_hlo_llvm.hlo b/third_party/xla/xla/tools/hlo_opt/gpu_hlo_llvm.hlo index 59800a9d170560..2eb00d4cac81b8 100644 --- a/third_party/xla/xla/tools/hlo_opt/gpu_hlo_llvm.hlo +++ b/third_party/xla/xla/tools/hlo_opt/gpu_hlo_llvm.hlo @@ -1,3 +1,4 @@ +// RUN: export XLA_FLAGS="--xla_gpu_cuda_data_dir=%S/../../../../cuda_nvcc" // RUN: hlo-opt %s --platform=gpu --stage=llvm --xla_gpu_target_config_filename=%S/gpu_specs/%{GPU}.txtpb --split-input-file | FileCheck --check-prefixes=CHECK,CHECK-%{PTX} %s HloModule m diff --git a/third_party/xla/xla/tools/hlo_opt/gpu_hlo_ptx.hlo b/third_party/xla/xla/tools/hlo_opt/gpu_hlo_ptx.hlo index 5c6485a57813a8..fae7ed1437107d 100644 --- a/third_party/xla/xla/tools/hlo_opt/gpu_hlo_ptx.hlo +++ b/third_party/xla/xla/tools/hlo_opt/gpu_hlo_ptx.hlo @@ -1,3 +1,4 @@ +// RUN: export XLA_FLAGS="--xla_gpu_cuda_data_dir=%S/../../../../cuda_nvcc" // RUN: hlo-opt %s --platform=CUDA --stage=ptx --xla_gpu_target_config_filename=%S/gpu_specs/%{GPU}.txtpb --split-input-file | FileCheck %s HloModule m diff --git a/third_party/xla/xla/tools/hlo_opt/gpu_hlo_unoptimized_llvm.hlo b/third_party/xla/xla/tools/hlo_opt/gpu_hlo_unoptimized_llvm.hlo index 6c8bc8bd54fe6a..63766db3e0b039 100644 --- a/third_party/xla/xla/tools/hlo_opt/gpu_hlo_unoptimized_llvm.hlo +++ b/third_party/xla/xla/tools/hlo_opt/gpu_hlo_unoptimized_llvm.hlo @@ -1,3 +1,4 @@ +// RUN: export XLA_FLAGS="--xla_gpu_cuda_data_dir=%S/../../../../cuda_nvcc" // RUN: hlo-opt %s --platform=gpu --stage=llvm-before-optimizations --xla_gpu_target_config_filename=%S/gpu_specs/%{GPU}.txtpb | FileCheck %s // CHECK: fusion.in_bounds-true: diff --git a/third_party/xla/xla/tsl/BUILD b/third_party/xla/xla/tsl/BUILD index 8a7cb42086d5d5..47d112444da58e 100644 --- a/third_party/xla/xla/tsl/BUILD +++ b/third_party/xla/xla/tsl/BUILD @@ -1,7 +1,7 @@ load("@bazel_skylib//:bzl_library.bzl", "bzl_library") load("@bazel_skylib//lib:selects.bzl", "selects") load("@bazel_skylib//rules:common_settings.bzl", "bool_flag", "bool_setting") -load("tsl.bzl", "if_google", "if_oss") +load("tsl.bzl", "if_google", "if_hermetic_cuda_libs", "if_oss") # copybara:uncomment package(default_applicable_licenses = ["//tensorflow:license"]) @@ -551,3 +551,25 @@ cc_library( }), ) # copybara:comment_end + +cc_library( + name = "gpu_runtime_hermetic_cuda_deps", + tags = ["manual"], + visibility = ["//visibility:public"], + deps = if_hermetic_cuda_libs([ + "@cuda_cudart//:cudart", + "@cuda_cudnn//:cudnn", + "@cuda_cudnn//:cudnn_ops_infer", + "@cuda_cudnn//:cudnn_cnn_infer", + "@cuda_cudnn//:cudnn_ops_train", + "@cuda_cudnn//:cudnn_cnn_train", + "@cuda_cudnn//:cudnn_adv_infer", + "@cuda_cudnn//:cudnn_adv_train", + "@cuda_cublas//:cublas", + "@cuda_cublas//:cublasLt", + "@cuda_cusolver//:cusolver", + "@cuda_cufft//:cufft", + "@cuda_cusparse//:cusparse", + "@cuda_nvjitlink//:nvjitlink", + ]), +) diff --git a/third_party/xla/xla/tsl/cuda/BUILD.bazel b/third_party/xla/xla/tsl/cuda/BUILD.bazel index 6f0e9aefab72f0..0992a9bcf1c775 100644 --- a/third_party/xla/xla/tsl/cuda/BUILD.bazel +++ b/third_party/xla/xla/tsl/cuda/BUILD.bazel @@ -10,6 +10,10 @@ load( "cuda_rpath_flags", "if_cuda_is_configured", ) +load( + "//xla/tsl:tsl.bzl", + "if_hermetic_cuda_libs", +) load("//xla/tsl/cuda:stub.bzl", "cuda_stub") package( @@ -41,6 +45,8 @@ cc_library( "@local_tsl//tsl/platform:dso_loader", "@local_tsl//tsl/platform:logging", "@local_tsl//tsl/platform:load_library", + ]) + if_hermetic_cuda_libs([ + "@cuda_cublas//:cublas", ]), ) @@ -65,6 +71,8 @@ cc_library( "@local_tsl//tsl/platform:dso_loader", "@local_tsl//tsl/platform:logging", "@local_tsl//tsl/platform:load_library", + ]) + if_hermetic_cuda_libs([ + "@cuda_cublas//:cublasLt", ]), ) @@ -126,7 +134,9 @@ cc_library( "@local_tsl//tsl/platform:logging", ], "//conditions:default": [], - }), + }) + if_hermetic_cuda_libs([ + "@cuda_cudart//:cudart", + ]), ) cuda_stub( @@ -152,6 +162,14 @@ cc_library( "@local_tsl//tsl/platform:dso_loader", "@local_tsl//tsl/platform:logging", "@local_tsl//tsl/platform:load_library", + ]) + if_hermetic_cuda_libs([ + "@cuda_cudnn//:cudnn", + "@cuda_cudnn//:cudnn_ops_infer", + "@cuda_cudnn//:cudnn_cnn_infer", + "@cuda_cudnn//:cudnn_ops_train", + "@cuda_cudnn//:cudnn_cnn_train", + "@cuda_cudnn//:cudnn_adv_infer", + "@cuda_cudnn//:cudnn_adv_train", ]), ) @@ -189,6 +207,8 @@ cc_library( "@local_tsl//tsl/platform:dso_loader", "@local_tsl//tsl/platform:logging", "@local_tsl//tsl/platform:load_library", + ]) + if_hermetic_cuda_libs([ + "@cuda_cufft//:cufft", ]), ) @@ -216,6 +236,8 @@ cc_library( "@local_tsl//tsl/platform:dso_loader", "@local_tsl//tsl/platform:logging", "@local_tsl//tsl/platform:load_library", + ]) + if_hermetic_cuda_libs([ + "@cuda_cupti//:cupti", ]), ) @@ -241,6 +263,8 @@ cc_library( "@local_tsl//tsl/platform:dso_loader", "@local_tsl//tsl/platform:logging", "@local_tsl//tsl/platform:load_library", + ]) + if_hermetic_cuda_libs([ + "@cuda_cusolver//:cusolver", ]), ) @@ -266,6 +290,9 @@ cc_library( "@local_tsl//tsl/platform:dso_loader", "@local_tsl//tsl/platform:logging", "@local_tsl//tsl/platform:load_library", + ]) + if_hermetic_cuda_libs([ + "@cuda_cusparse//:cusparse", + "@cuda_nvjitlink//:nvjitlink", ]), ) @@ -293,5 +320,7 @@ cc_library( "@local_tsl//tsl/platform:dso_loader", "@local_tsl//tsl/platform:logging", "@local_tsl//tsl/platform:load_library", + ]) + if_hermetic_cuda_libs([ + "@cuda_nccl//:nccl", ]), ) diff --git a/third_party/xla/xla/tsl/tsl.bzl b/third_party/xla/xla/tsl/tsl.bzl index 5ff893e03edda8..505a4deb8a4518 100644 --- a/third_party/xla/xla/tsl/tsl.bzl +++ b/third_party/xla/xla/tsl/tsl.bzl @@ -224,6 +224,17 @@ def if_with_tpu_support(if_true, if_false = []): "//conditions:default": if_false, }) +# These configs are used to determine whether we should use the hermetic CUDA +# tools in cc_libraries (see go/hermetic-cuda). +# They are intended for the OSS builds only. +def if_hermetic_cuda_tools(if_true, if_false = []): + """Shorthand for select()'ing on whether we're building with hermetic CUDA tools.""" + return select({"@local_config_cuda//cuda:hermetic_cuda_tools": if_true, "//conditions:default": if_false}) # copybara:comment_replace return if_false + +def if_hermetic_cuda_libs(if_true, if_false = []): + """Shorthand for select()'ing on whether we need to include hermetic CUDA libraries.""" + return select({"@local_config_cuda//cuda:hermetic_cuda_tools_and_libs": if_true, "//conditions:default": if_false}) # copybara:comment_replace return if_false + def get_win_copts(is_external = False): WINDOWS_COPTS = [ # copybara:uncomment_begin(no MSVC flags in google)