Introduce hermetic CUDA in Google ML projects.

Instead of having pre-installed NVIDIA CUDA and CUDNN libraries and setting environment variables pointing to the installation locations, Bazel should automatically download CUDA and CUDNN distributives in the cache and use them during build and test phases. The Bazel version used in JAX is bumped from 6.1.2 to 6.5.0. PiperOrigin-RevId: 616865795
tensorflow · May 7, 2024 · 9b69f8b · 9b69f8b
1 parent 027d6b3
commit 9b69f8b
Show file tree

Hide file tree

Showing 127 changed files with 5,095 additions and 766 deletions.
diff --git a/.bazelrc b/.bazelrc
@@ -226,12 +226,13 @@ build:cuda --repo_env TF_NEED_CUDA=1
 build:cuda --crosstool_top=@local_config_cuda//crosstool:toolchain
 build:cuda --@local_config_cuda//:enable_cuda
 
+build:no_cuda_libs --@local_config_cuda//cuda:include_hermetic_cuda_libs=false
+
 # CUDA: This config refers to building CUDA op kernels with clang.
 build:cuda_clang --config=cuda
-# Enable TensorRT optimizations https://developer.nvidia.com/tensorrt
-build:cuda_clang --config=tensorrt
 build:cuda_clang --action_env=TF_CUDA_CLANG="1"
 build:cuda_clang --@local_config_cuda//:cuda_compiler=clang
+build:cuda_clang --copt=-Qunused-arguments
 # Select supported compute capabilities (supported graphics cards).
 # This is the same as the official TensorFlow builds.
 # See https://developer.nvidia.com/cuda-gpus#compute
@@ -244,12 +245,10 @@ build:cuda_clang --repo_env=TF_CUDA_COMPUTE_CAPABILITIES="sm_60,sm_70,sm_80,sm_8
 
 # Set up compilation CUDA version and paths and use the CUDA Clang toolchain.
 build:cuda_clang_official --config=cuda_clang
-build:cuda_clang_official --action_env=TF_CUDA_VERSION="12"
-build:cuda_clang_official --action_env=TF_CUDNN_VERSION="8"
-build:cuda_clang_official --action_env=CUDA_TOOLKIT_PATH="/usr/local/cuda-12.3"
+build:cuda_clang_official --action_env=TF_CUDA_VERSION="12.3"
+build:cuda_clang_official --action_env=TF_CUDNN_VERSION="8.9"
 build:cuda_clang_official --action_env=GCC_HOST_COMPILER_PATH="/dt9/usr/bin/gcc"
 build:cuda_clang_official --action_env=CLANG_CUDA_COMPILER_PATH="/usr/lib/llvm-17/bin/clang"
-build:cuda_clang_official --action_env=LD_LIBRARY_PATH="/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64"
 build:cuda_clang_official --crosstool_top="@sigbuild-r2.17-clang_config_cuda//crosstool:toolchain"
 
 # Build with nvcc for CUDA and clang for host
@@ -545,10 +544,6 @@ build:rbe_linux_cuda --config=cuda_clang_official
 build:rbe_linux_cuda --config=rbe_linux_cpu
 # For Remote build execution -- GPU configuration
 build:rbe_linux_cuda --repo_env=REMOTE_GPU_TESTING=1
-build:rbe_linux_cuda --repo_env=TF_CUDA_CONFIG_REPO="@sigbuild-r2.17-clang_config_cuda"
-build:rbe_linux_cuda --repo_env=TF_TENSORRT_CONFIG_REPO="@sigbuild-r2.17-clang_config_tensorrt"
-build:rbe_linux_cuda --repo_env=TF_NCCL_CONFIG_REPO="@sigbuild-r2.17-clang_config_nccl"
-test:rbe_linux_cuda --test_env=LD_LIBRARY_PATH="/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64"
 
 build:rbe_linux_cuda_nvcc --config=rbe_linux_cuda
 build:rbe_linux_cuda_nvcc --config=nvcc_clang
@@ -633,7 +628,6 @@ build:release_cpu_linux_base --repo_env=BAZEL_COMPILER="/usr/lib/llvm-17/bin/cla
 # Test-related settings below this point.
 test:release_linux_base --build_tests_only --keep_going --test_output=errors --verbose_failures=true
 test:release_linux_base --local_test_jobs=HOST_CPUS
-test:release_linux_base --test_env=LD_LIBRARY_PATH
 # Give only the list of failed tests at the end of the log
 test:release_linux_base --test_summary=short
 
@@ -645,7 +639,6 @@ build:release_gpu_linux --config=release_cpu_linux
 # Set up compilation CUDA version and paths and use the CUDA Clang toolchain.
 # Note that linux cpu and cuda builds share the same toolchain now.
 build:release_gpu_linux --config=cuda_clang_official
-test:release_gpu_linux --test_env=LD_LIBRARY_PATH="/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64"
 # Local test jobs has to be 4 because parallel_gpu_execute is fragile, I think
 test:release_gpu_linux --test_timeout=300,450,1200,3600 --local_test_jobs=4 --run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute
 
@@ -674,11 +667,8 @@ test:unsupported_cpu_linux --config=release_base
 build:unsupported_gpu_linux --config=cuda
 build:unsupported_gpu_linux --config=unsupported_cpu_linux
 build:unsupported_gpu_linux --action_env=TF_CUDA_VERSION="11"
-build:unsupported_gpu_linux --action_env=TF_CUDNN_VERSION="8"
+build:unsupported_gpu_linux --action_env=TF_CUDNN_VERSION="8.6"
 build:unsupported_gpu_linux --repo_env=TF_CUDA_COMPUTE_CAPABILITIES="sm_35,sm_50,sm_60,sm_70,sm_75,compute_80"
-build:unsupported_gpu_linux --config=tensorrt
-build:unsupported_gpu_linux --action_env=CUDA_TOOLKIT_PATH="/usr/local/cuda-11.2"
-build:unsupported_gpu_linux --action_env=LD_LIBRARY_PATH="/usr/local/cuda:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda-11.1/lib64:/usr/local/tensorrt/lib"
 build:unsupported_gpu_linux --action_env=GCC_HOST_COMPILER_PATH="/dt9/usr/bin/gcc"
 build:unsupported_gpu_linux --crosstool_top=@ubuntu20.04-gcc9_manylinux2014-cuda11.2-cudnn8.1-tensorrt7.2_config_cuda//crosstool:toolchain
 

diff --git a/ci/official/wheel.sh b/ci/official/wheel.sh
@@ -27,7 +27,8 @@ if [[ "$TFCI_NIGHTLY_UPDATE_VERSION_ENABLE" == 1 ]]; then
   export TFCI_BUILD_PIP_PACKAGE_ARGS="$(echo $TFCI_BUILD_PIP_PACKAGE_ARGS | sed 's/tensorflow/tf_nightly/')"
 fi
 
-tfrun bazel build $TFCI_BAZEL_COMMON_ARGS //tensorflow/tools/pip_package:wheel $TFCI_BUILD_PIP_PACKAGE_ARGS
+tfrun bazel build $TFCI_BAZEL_COMMON_ARGS --config=no_cuda_libs \
+//tensorflow/tools/pip_package:wheel $TFCI_BUILD_PIP_PACKAGE_ARGS
 tfrun find ./bazel-bin/tensorflow/tools/pip_package -iname "*.whl" -exec cp {} $TFCI_OUTPUT_DIR \;
 tfrun ./ci/official/utilities/rename_and_verify_wheels.sh
 

diff --git a/configure.py b/configure.py
@@ -16,7 +16,6 @@
 
 import argparse
 import errno
-import glob
 import json
 import os
 import platform
@@ -239,7 +238,7 @@ def setup_python(environ_cp):
   write_to_bazelrc('build --python_path=\"{}"'.format(python_bin_path))
   environ_cp['PYTHON_BIN_PATH'] = python_bin_path
 
-  # If choosen python_lib_path is from a path specified in the PYTHONPATH
+  # If chosen python_lib_path is from a path specified in the PYTHONPATH
   # variable, need to tell bazel to include PYTHONPATH
   if environ_cp.get('PYTHONPATH'):
     python_paths = environ_cp.get('PYTHONPATH').split(':')
@@ -775,11 +774,6 @@ def get_ndk_api_level(environ_cp, android_ndk_home_path):
 def set_gcc_host_compiler_path(environ_cp):
   """Set GCC_HOST_COMPILER_PATH."""
   default_gcc_host_compiler_path = which('gcc') or ''
-  cuda_bin_symlink = '%s/bin/gcc' % environ_cp.get('CUDA_TOOLKIT_PATH')
-
-  if os.path.islink(cuda_bin_symlink):
-    # os.readlink is only available in linux
-    default_gcc_host_compiler_path = os.path.realpath(cuda_bin_symlink)
 
   gcc_host_compiler_path = prompt_loop_or_load_from_env(
       environ_cp,
@@ -937,17 +931,6 @@ def disable_clang_offsetof_extension(clang_version):
     write_to_bazelrc('build --copt=-Wno-gnu-offsetof-extensions')
 
 
-def set_tf_cuda_paths(environ_cp):
-  """Set TF_CUDA_PATHS."""
-  ask_cuda_paths = (
-      'Please specify the comma-separated list of base paths to look for CUDA '
-      'libraries and headers. [Leave empty to use the default]: ')
-  tf_cuda_paths = get_from_env_or_user_or_default(environ_cp, 'TF_CUDA_PATHS',
-                                                  ask_cuda_paths, '')
-  if tf_cuda_paths:
-    environ_cp['TF_CUDA_PATHS'] = tf_cuda_paths
-
-
 def set_tf_cuda_version(environ_cp):
   """Set TF_CUDA_VERSION."""
   ask_cuda_version = (
@@ -972,73 +955,10 @@ def set_tf_cudnn_version(environ_cp):
   environ_cp['TF_CUDNN_VERSION'] = tf_cudnn_version
 
 
-def set_tf_tensorrt_version(environ_cp):
-  """Set TF_TENSORRT_VERSION."""
-  if not (is_linux() or is_windows()):
-    raise ValueError('Currently TensorRT is only supported on Linux platform.')
-
-  if not int(environ_cp.get('TF_NEED_TENSORRT', False)):
-    return
-
-  ask_tensorrt_version = (
-      'Please specify the TensorRT version you want to use. '
-      '[Leave empty to default to TensorRT %s]: ') % _DEFAULT_TENSORRT_VERSION
-  tf_tensorrt_version = get_from_env_or_user_or_default(
-      environ_cp, 'TF_TENSORRT_VERSION', ask_tensorrt_version,
-      _DEFAULT_TENSORRT_VERSION)
-  environ_cp['TF_TENSORRT_VERSION'] = tf_tensorrt_version
-
-
-def set_tf_nccl_version(environ_cp):
-  """Set TF_NCCL_VERSION."""
-  if not is_linux():
-    raise ValueError('Currently NCCL is only supported on Linux platform.')
-
-  if 'TF_NCCL_VERSION' in environ_cp:
-    return
-
-  ask_nccl_version = (
-      'Please specify the locally installed NCCL version you want to use. '
-      '[Leave empty to use http://github.com/nvidia/nccl]: ')
-  tf_nccl_version = get_from_env_or_user_or_default(environ_cp,
-                                                    'TF_NCCL_VERSION',
-                                                    ask_nccl_version, '')
-  environ_cp['TF_NCCL_VERSION'] = tf_nccl_version
-
-
-def get_native_cuda_compute_capabilities(environ_cp):
-  """Get native cuda compute capabilities.
-
-  Args:
-    environ_cp: copy of the os.environ.
-
-  Returns:
-    string of native cuda compute capabilities, separated by comma.
-  """
-  device_query_bin = os.path.join(
-      environ_cp.get('CUDA_TOOLKIT_PATH'), 'extras/demo_suite/deviceQuery')
-  if os.path.isfile(device_query_bin) and os.access(device_query_bin, os.X_OK):
-    try:
-      output = run_shell(device_query_bin).split('\n')
-      pattern = re.compile('[0-9]*\\.[0-9]*')
-      output = [pattern.search(x) for x in output if 'Capability' in x]
-      output = ','.join(x.group() for x in output if x is not None)
-    except subprocess.CalledProcessError:
-      output = ''
-  else:
-    output = ''
-  return output
-
-
 def set_tf_cuda_compute_capabilities(environ_cp):
   """Set TF_CUDA_COMPUTE_CAPABILITIES."""
   while True:
-    native_cuda_compute_capabilities = get_native_cuda_compute_capabilities(
-        environ_cp)
-    if not native_cuda_compute_capabilities:
-      default_cuda_compute_capabilities = _DEFAULT_CUDA_COMPUTE_CAPABILITIES
-    else:
-      default_cuda_compute_capabilities = native_cuda_compute_capabilities
+    default_cuda_compute_capabilities = _DEFAULT_CUDA_COMPUTE_CAPABILITIES
 
     ask_cuda_compute_capabilities = (
         'Please specify a list of comma-separated CUDA compute capabilities '
@@ -1217,73 +1137,6 @@ def configure_ios(environ_cp):
     symlink_force(filepath, new_filepath)
 
 
-def validate_cuda_config(environ_cp):
-  """Run find_cuda_config.py and return cuda_toolkit_path, or None."""
-
-  def maybe_encode_env(env):
-    """Encodes unicode in env to str on Windows python 2.x."""
-    if not is_windows() or sys.version_info[0] != 2:
-      return env
-    for k, v in env.items():
-      if isinstance(k, unicode):
-        k = k.encode('ascii')
-      if isinstance(v, unicode):
-        v = v.encode('ascii')
-      env[k] = v
-    return env
-
-  cuda_libraries = ['cuda', 'cudnn']
-  if is_linux():
-    if int(environ_cp.get('TF_NEED_TENSORRT', False)):
-      cuda_libraries.append('tensorrt')
-    if environ_cp.get('TF_NCCL_VERSION', None):
-      cuda_libraries.append('nccl')
-  if is_windows():
-    if int(environ_cp.get('TF_NEED_TENSORRT', False)):
-      cuda_libraries.append('tensorrt')
-      print('WARNING: TensorRT support on Windows is experimental\n')
-
-  paths = glob.glob('**/third_party/gpus/find_cuda_config.py', recursive=True)
-  if not paths:
-    raise FileNotFoundError(
-        "Can't find 'find_cuda_config.py' script inside working directory")
-  proc = subprocess.Popen(
-      [environ_cp['PYTHON_BIN_PATH'], paths[0]] + cuda_libraries,
-      stdout=subprocess.PIPE,
-      env=maybe_encode_env(environ_cp))
-
-  if proc.wait():
-    # Errors from find_cuda_config.py were sent to stderr.
-    print('Asking for detailed CUDA configuration...\n')
-    return False
-
-  config = dict(
-      tuple(line.decode('ascii').rstrip().split(': ')) for line in proc.stdout)
-
-  print('Found CUDA %s in:' % config['cuda_version'])
-  print('    %s' % config['cuda_library_dir'])
-  print('    %s' % config['cuda_include_dir'])
-
-  print('Found cuDNN %s in:' % config['cudnn_version'])
-  print('    %s' % config['cudnn_library_dir'])
-  print('    %s' % config['cudnn_include_dir'])
-
-  if 'tensorrt_version' in config:
-    print('Found TensorRT %s in:' % config['tensorrt_version'])
-    print('    %s' % config['tensorrt_library_dir'])
-    print('    %s' % config['tensorrt_include_dir'])
-
-  if config.get('nccl_version', None):
-    print('Found NCCL %s in:' % config['nccl_version'])
-    print('    %s' % config['nccl_library_dir'])
-    print('    %s' % config['nccl_include_dir'])
-
-  print('\n')
-
-  environ_cp['CUDA_TOOLKIT_PATH'] = config['cuda_toolkit_path']
-  return True
-
-
 def get_gcc_compiler(environ_cp):
   gcc_env = environ_cp.get('CXX') or environ_cp.get('CC') or which('gcc')
   if gcc_env is not None:
@@ -1388,57 +1241,20 @@ def main():
   if (environ_cp.get('TF_NEED_CUDA') == '1' and
       'TF_CUDA_CONFIG_REPO' not in environ_cp):
 
-    set_action_env_var(
-        environ_cp,
-        'TF_NEED_TENSORRT',
-        'TensorRT',
-        False,
-        bazel_config_name='tensorrt')
-
-    environ_save = dict(environ_cp)
     for _ in range(_DEFAULT_PROMPT_ASK_ATTEMPTS):
 
-      if validate_cuda_config(environ_cp):
-        cuda_env_names = [
-            'TF_CUDA_VERSION',
-            'TF_CUBLAS_VERSION',
-            'TF_CUDNN_VERSION',
-            'TF_TENSORRT_VERSION',
-            'TF_NCCL_VERSION',
-            'TF_CUDA_PATHS',
-            # Items below are for backwards compatibility when not using
-            # TF_CUDA_PATHS.
-            'CUDA_TOOLKIT_PATH',
-            'CUDNN_INSTALL_PATH',
-            'NCCL_INSTALL_PATH',
-            'NCCL_HDR_PATH',
-            'TENSORRT_INSTALL_PATH'
-        ]
-        # Note: set_action_env_var above already writes to bazelrc.
-        for name in cuda_env_names:
-          if name in environ_cp:
-            write_action_env_to_bazelrc(name, environ_cp[name])
-        break
-
-      # Restore settings changed below if CUDA config could not be validated.
-      environ_cp = dict(environ_save)
-
-      set_tf_cuda_version(environ_cp)
-      set_tf_cudnn_version(environ_cp)
-      if is_windows():
-        set_tf_tensorrt_version(environ_cp)
-      if is_linux():
-        set_tf_tensorrt_version(environ_cp)
-        set_tf_nccl_version(environ_cp)
-
-      set_tf_cuda_paths(environ_cp)
+      cuda_env_names = [
+          'TF_CUDA_VERSION',
+          'TF_CUDNN_VERSION',
+      ]
+      # Note: set_action_env_var above already writes to bazelrc.
+      for name in cuda_env_names:
+        if name in environ_cp:
+          write_action_env_to_bazelrc(name, environ_cp[name])
+      break
 
-    else:
-      raise UserInputError(
-          'Invalid CUDA setting were provided %d '
-          'times in a row. Assuming to be a scripting mistake.'
-          % _DEFAULT_PROMPT_ASK_ATTEMPTS
-      )
+    set_tf_cuda_version(environ_cp)
+    set_tf_cudnn_version(environ_cp)
 
     set_tf_cuda_compute_capabilities(environ_cp)
     if 'LD_LIBRARY_PATH' in environ_cp and environ_cp.get(

diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/tests/hlo_to_kernel/tanh.mlir b/tensorflow/compiler/mlir/tools/kernel_gen/tests/hlo_to_kernel/tanh.mlir
@@ -1,3 +1,4 @@
+// RUN: export XLA_FLAGS="--xla_gpu_cuda_data_dir=%S/../../../../../../../../cuda_nvcc"
 // RUN: hlo_to_kernel --input=%s --output=%t --unroll_factors=4 --tile_sizes=256 --arch=sm_70
 
 func.func @tanh(%arg0: tensor<*xf32>) -> tensor<*xf32> attributes {tf_entry} {

diff --git a/tensorflow/core/common_runtime/gpu/BUILD b/tensorflow/core/common_runtime/gpu/BUILD
@@ -158,6 +158,7 @@ tf_cuda_library(
         "@local_config_cuda//cuda:cudnn_header",
         "@local_xla//xla/stream_executor/cuda:cuda_platform",
         "@local_xla//xla/stream_executor/gpu:gpu_stream",
+        "@local_xla//xla/tsl:gpu_runtime_hermetic_cuda_deps",
     ],
     defines = if_linux_x86_64(["TF_PLATFORM_LINUX_X86_64"]),
     features = ["-layering_check"],