Skip to content

Commit

Permalink
Introduce hermetic CUDA in Google ML projects.
Browse files Browse the repository at this point in the history
Instead of having pre-installed NVIDIA CUDA and CUDNN libraries and setting environment variables pointing to the installation locations, Bazel should automatically download CUDA and CUDNN distributives in the cache and use them during build and test phases.

The Bazel version used in JAX is bumped from 6.1.2 to 6.5.0.

PiperOrigin-RevId: 616865795
  • Loading branch information
tensorflower-gardener committed May 7, 2024
1 parent 027d6b3 commit 9b69f8b
Show file tree
Hide file tree
Showing 127 changed files with 5,095 additions and 766 deletions.
22 changes: 6 additions & 16 deletions .bazelrc
Expand Up @@ -226,12 +226,13 @@ build:cuda --repo_env TF_NEED_CUDA=1
build:cuda --crosstool_top=@local_config_cuda//crosstool:toolchain
build:cuda --@local_config_cuda//:enable_cuda

build:no_cuda_libs --@local_config_cuda//cuda:include_hermetic_cuda_libs=false

# CUDA: This config refers to building CUDA op kernels with clang.
build:cuda_clang --config=cuda
# Enable TensorRT optimizations https://developer.nvidia.com/tensorrt
build:cuda_clang --config=tensorrt
build:cuda_clang --action_env=TF_CUDA_CLANG="1"
build:cuda_clang --@local_config_cuda//:cuda_compiler=clang
build:cuda_clang --copt=-Qunused-arguments
# Select supported compute capabilities (supported graphics cards).
# This is the same as the official TensorFlow builds.
# See https://developer.nvidia.com/cuda-gpus#compute
Expand All @@ -244,12 +245,10 @@ build:cuda_clang --repo_env=TF_CUDA_COMPUTE_CAPABILITIES="sm_60,sm_70,sm_80,sm_8

# Set up compilation CUDA version and paths and use the CUDA Clang toolchain.
build:cuda_clang_official --config=cuda_clang
build:cuda_clang_official --action_env=TF_CUDA_VERSION="12"
build:cuda_clang_official --action_env=TF_CUDNN_VERSION="8"
build:cuda_clang_official --action_env=CUDA_TOOLKIT_PATH="/usr/local/cuda-12.3"
build:cuda_clang_official --action_env=TF_CUDA_VERSION="12.3"
build:cuda_clang_official --action_env=TF_CUDNN_VERSION="8.9"
build:cuda_clang_official --action_env=GCC_HOST_COMPILER_PATH="/dt9/usr/bin/gcc"
build:cuda_clang_official --action_env=CLANG_CUDA_COMPILER_PATH="/usr/lib/llvm-17/bin/clang"
build:cuda_clang_official --action_env=LD_LIBRARY_PATH="/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64"
build:cuda_clang_official --crosstool_top="@sigbuild-r2.17-clang_config_cuda//crosstool:toolchain"

# Build with nvcc for CUDA and clang for host
Expand Down Expand Up @@ -545,10 +544,6 @@ build:rbe_linux_cuda --config=cuda_clang_official
build:rbe_linux_cuda --config=rbe_linux_cpu
# For Remote build execution -- GPU configuration
build:rbe_linux_cuda --repo_env=REMOTE_GPU_TESTING=1
build:rbe_linux_cuda --repo_env=TF_CUDA_CONFIG_REPO="@sigbuild-r2.17-clang_config_cuda"
build:rbe_linux_cuda --repo_env=TF_TENSORRT_CONFIG_REPO="@sigbuild-r2.17-clang_config_tensorrt"
build:rbe_linux_cuda --repo_env=TF_NCCL_CONFIG_REPO="@sigbuild-r2.17-clang_config_nccl"
test:rbe_linux_cuda --test_env=LD_LIBRARY_PATH="/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64"

build:rbe_linux_cuda_nvcc --config=rbe_linux_cuda
build:rbe_linux_cuda_nvcc --config=nvcc_clang
Expand Down Expand Up @@ -633,7 +628,6 @@ build:release_cpu_linux_base --repo_env=BAZEL_COMPILER="/usr/lib/llvm-17/bin/cla
# Test-related settings below this point.
test:release_linux_base --build_tests_only --keep_going --test_output=errors --verbose_failures=true
test:release_linux_base --local_test_jobs=HOST_CPUS
test:release_linux_base --test_env=LD_LIBRARY_PATH
# Give only the list of failed tests at the end of the log
test:release_linux_base --test_summary=short

Expand All @@ -645,7 +639,6 @@ build:release_gpu_linux --config=release_cpu_linux
# Set up compilation CUDA version and paths and use the CUDA Clang toolchain.
# Note that linux cpu and cuda builds share the same toolchain now.
build:release_gpu_linux --config=cuda_clang_official
test:release_gpu_linux --test_env=LD_LIBRARY_PATH="/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64"
# Local test jobs has to be 4 because parallel_gpu_execute is fragile, I think
test:release_gpu_linux --test_timeout=300,450,1200,3600 --local_test_jobs=4 --run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute

Expand Down Expand Up @@ -674,11 +667,8 @@ test:unsupported_cpu_linux --config=release_base
build:unsupported_gpu_linux --config=cuda
build:unsupported_gpu_linux --config=unsupported_cpu_linux
build:unsupported_gpu_linux --action_env=TF_CUDA_VERSION="11"
build:unsupported_gpu_linux --action_env=TF_CUDNN_VERSION="8"
build:unsupported_gpu_linux --action_env=TF_CUDNN_VERSION="8.6"
build:unsupported_gpu_linux --repo_env=TF_CUDA_COMPUTE_CAPABILITIES="sm_35,sm_50,sm_60,sm_70,sm_75,compute_80"
build:unsupported_gpu_linux --config=tensorrt
build:unsupported_gpu_linux --action_env=CUDA_TOOLKIT_PATH="/usr/local/cuda-11.2"
build:unsupported_gpu_linux --action_env=LD_LIBRARY_PATH="/usr/local/cuda:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda-11.1/lib64:/usr/local/tensorrt/lib"
build:unsupported_gpu_linux --action_env=GCC_HOST_COMPILER_PATH="/dt9/usr/bin/gcc"
build:unsupported_gpu_linux --crosstool_top=@ubuntu20.04-gcc9_manylinux2014-cuda11.2-cudnn8.1-tensorrt7.2_config_cuda//crosstool:toolchain

Expand Down
3 changes: 2 additions & 1 deletion ci/official/wheel.sh
Expand Up @@ -27,7 +27,8 @@ if [[ "$TFCI_NIGHTLY_UPDATE_VERSION_ENABLE" == 1 ]]; then
export TFCI_BUILD_PIP_PACKAGE_ARGS="$(echo $TFCI_BUILD_PIP_PACKAGE_ARGS | sed 's/tensorflow/tf_nightly/')"
fi

tfrun bazel build $TFCI_BAZEL_COMMON_ARGS //tensorflow/tools/pip_package:wheel $TFCI_BUILD_PIP_PACKAGE_ARGS
tfrun bazel build $TFCI_BAZEL_COMMON_ARGS --config=no_cuda_libs \
//tensorflow/tools/pip_package:wheel $TFCI_BUILD_PIP_PACKAGE_ARGS
tfrun find ./bazel-bin/tensorflow/tools/pip_package -iname "*.whl" -exec cp {} $TFCI_OUTPUT_DIR \;
tfrun ./ci/official/utilities/rename_and_verify_wheels.sh

Expand Down
210 changes: 13 additions & 197 deletions configure.py
Expand Up @@ -16,7 +16,6 @@

import argparse
import errno
import glob
import json
import os
import platform
Expand Down Expand Up @@ -239,7 +238,7 @@ def setup_python(environ_cp):
write_to_bazelrc('build --python_path=\"{}"'.format(python_bin_path))
environ_cp['PYTHON_BIN_PATH'] = python_bin_path

# If choosen python_lib_path is from a path specified in the PYTHONPATH
# If chosen python_lib_path is from a path specified in the PYTHONPATH
# variable, need to tell bazel to include PYTHONPATH
if environ_cp.get('PYTHONPATH'):
python_paths = environ_cp.get('PYTHONPATH').split(':')
Expand Down Expand Up @@ -775,11 +774,6 @@ def get_ndk_api_level(environ_cp, android_ndk_home_path):
def set_gcc_host_compiler_path(environ_cp):
"""Set GCC_HOST_COMPILER_PATH."""
default_gcc_host_compiler_path = which('gcc') or ''
cuda_bin_symlink = '%s/bin/gcc' % environ_cp.get('CUDA_TOOLKIT_PATH')

if os.path.islink(cuda_bin_symlink):
# os.readlink is only available in linux
default_gcc_host_compiler_path = os.path.realpath(cuda_bin_symlink)

gcc_host_compiler_path = prompt_loop_or_load_from_env(
environ_cp,
Expand Down Expand Up @@ -937,17 +931,6 @@ def disable_clang_offsetof_extension(clang_version):
write_to_bazelrc('build --copt=-Wno-gnu-offsetof-extensions')


def set_tf_cuda_paths(environ_cp):
"""Set TF_CUDA_PATHS."""
ask_cuda_paths = (
'Please specify the comma-separated list of base paths to look for CUDA '
'libraries and headers. [Leave empty to use the default]: ')
tf_cuda_paths = get_from_env_or_user_or_default(environ_cp, 'TF_CUDA_PATHS',
ask_cuda_paths, '')
if tf_cuda_paths:
environ_cp['TF_CUDA_PATHS'] = tf_cuda_paths


def set_tf_cuda_version(environ_cp):
"""Set TF_CUDA_VERSION."""
ask_cuda_version = (
Expand All @@ -972,73 +955,10 @@ def set_tf_cudnn_version(environ_cp):
environ_cp['TF_CUDNN_VERSION'] = tf_cudnn_version


def set_tf_tensorrt_version(environ_cp):
"""Set TF_TENSORRT_VERSION."""
if not (is_linux() or is_windows()):
raise ValueError('Currently TensorRT is only supported on Linux platform.')

if not int(environ_cp.get('TF_NEED_TENSORRT', False)):
return

ask_tensorrt_version = (
'Please specify the TensorRT version you want to use. '
'[Leave empty to default to TensorRT %s]: ') % _DEFAULT_TENSORRT_VERSION
tf_tensorrt_version = get_from_env_or_user_or_default(
environ_cp, 'TF_TENSORRT_VERSION', ask_tensorrt_version,
_DEFAULT_TENSORRT_VERSION)
environ_cp['TF_TENSORRT_VERSION'] = tf_tensorrt_version


def set_tf_nccl_version(environ_cp):
"""Set TF_NCCL_VERSION."""
if not is_linux():
raise ValueError('Currently NCCL is only supported on Linux platform.')

if 'TF_NCCL_VERSION' in environ_cp:
return

ask_nccl_version = (
'Please specify the locally installed NCCL version you want to use. '
'[Leave empty to use http://github.com/nvidia/nccl]: ')
tf_nccl_version = get_from_env_or_user_or_default(environ_cp,
'TF_NCCL_VERSION',
ask_nccl_version, '')
environ_cp['TF_NCCL_VERSION'] = tf_nccl_version


def get_native_cuda_compute_capabilities(environ_cp):
"""Get native cuda compute capabilities.
Args:
environ_cp: copy of the os.environ.
Returns:
string of native cuda compute capabilities, separated by comma.
"""
device_query_bin = os.path.join(
environ_cp.get('CUDA_TOOLKIT_PATH'), 'extras/demo_suite/deviceQuery')
if os.path.isfile(device_query_bin) and os.access(device_query_bin, os.X_OK):
try:
output = run_shell(device_query_bin).split('\n')
pattern = re.compile('[0-9]*\\.[0-9]*')
output = [pattern.search(x) for x in output if 'Capability' in x]
output = ','.join(x.group() for x in output if x is not None)
except subprocess.CalledProcessError:
output = ''
else:
output = ''
return output


def set_tf_cuda_compute_capabilities(environ_cp):
"""Set TF_CUDA_COMPUTE_CAPABILITIES."""
while True:
native_cuda_compute_capabilities = get_native_cuda_compute_capabilities(
environ_cp)
if not native_cuda_compute_capabilities:
default_cuda_compute_capabilities = _DEFAULT_CUDA_COMPUTE_CAPABILITIES
else:
default_cuda_compute_capabilities = native_cuda_compute_capabilities
default_cuda_compute_capabilities = _DEFAULT_CUDA_COMPUTE_CAPABILITIES

ask_cuda_compute_capabilities = (
'Please specify a list of comma-separated CUDA compute capabilities '
Expand Down Expand Up @@ -1217,73 +1137,6 @@ def configure_ios(environ_cp):
symlink_force(filepath, new_filepath)


def validate_cuda_config(environ_cp):
"""Run find_cuda_config.py and return cuda_toolkit_path, or None."""

def maybe_encode_env(env):
"""Encodes unicode in env to str on Windows python 2.x."""
if not is_windows() or sys.version_info[0] != 2:
return env
for k, v in env.items():
if isinstance(k, unicode):
k = k.encode('ascii')
if isinstance(v, unicode):
v = v.encode('ascii')
env[k] = v
return env

cuda_libraries = ['cuda', 'cudnn']
if is_linux():
if int(environ_cp.get('TF_NEED_TENSORRT', False)):
cuda_libraries.append('tensorrt')
if environ_cp.get('TF_NCCL_VERSION', None):
cuda_libraries.append('nccl')
if is_windows():
if int(environ_cp.get('TF_NEED_TENSORRT', False)):
cuda_libraries.append('tensorrt')
print('WARNING: TensorRT support on Windows is experimental\n')

paths = glob.glob('**/third_party/gpus/find_cuda_config.py', recursive=True)
if not paths:
raise FileNotFoundError(
"Can't find 'find_cuda_config.py' script inside working directory")
proc = subprocess.Popen(
[environ_cp['PYTHON_BIN_PATH'], paths[0]] + cuda_libraries,
stdout=subprocess.PIPE,
env=maybe_encode_env(environ_cp))

if proc.wait():
# Errors from find_cuda_config.py were sent to stderr.
print('Asking for detailed CUDA configuration...\n')
return False

config = dict(
tuple(line.decode('ascii').rstrip().split(': ')) for line in proc.stdout)

print('Found CUDA %s in:' % config['cuda_version'])
print(' %s' % config['cuda_library_dir'])
print(' %s' % config['cuda_include_dir'])

print('Found cuDNN %s in:' % config['cudnn_version'])
print(' %s' % config['cudnn_library_dir'])
print(' %s' % config['cudnn_include_dir'])

if 'tensorrt_version' in config:
print('Found TensorRT %s in:' % config['tensorrt_version'])
print(' %s' % config['tensorrt_library_dir'])
print(' %s' % config['tensorrt_include_dir'])

if config.get('nccl_version', None):
print('Found NCCL %s in:' % config['nccl_version'])
print(' %s' % config['nccl_library_dir'])
print(' %s' % config['nccl_include_dir'])

print('\n')

environ_cp['CUDA_TOOLKIT_PATH'] = config['cuda_toolkit_path']
return True


def get_gcc_compiler(environ_cp):
gcc_env = environ_cp.get('CXX') or environ_cp.get('CC') or which('gcc')
if gcc_env is not None:
Expand Down Expand Up @@ -1388,57 +1241,20 @@ def main():
if (environ_cp.get('TF_NEED_CUDA') == '1' and
'TF_CUDA_CONFIG_REPO' not in environ_cp):

set_action_env_var(
environ_cp,
'TF_NEED_TENSORRT',
'TensorRT',
False,
bazel_config_name='tensorrt')

environ_save = dict(environ_cp)
for _ in range(_DEFAULT_PROMPT_ASK_ATTEMPTS):

if validate_cuda_config(environ_cp):
cuda_env_names = [
'TF_CUDA_VERSION',
'TF_CUBLAS_VERSION',
'TF_CUDNN_VERSION',
'TF_TENSORRT_VERSION',
'TF_NCCL_VERSION',
'TF_CUDA_PATHS',
# Items below are for backwards compatibility when not using
# TF_CUDA_PATHS.
'CUDA_TOOLKIT_PATH',
'CUDNN_INSTALL_PATH',
'NCCL_INSTALL_PATH',
'NCCL_HDR_PATH',
'TENSORRT_INSTALL_PATH'
]
# Note: set_action_env_var above already writes to bazelrc.
for name in cuda_env_names:
if name in environ_cp:
write_action_env_to_bazelrc(name, environ_cp[name])
break

# Restore settings changed below if CUDA config could not be validated.
environ_cp = dict(environ_save)

set_tf_cuda_version(environ_cp)
set_tf_cudnn_version(environ_cp)
if is_windows():
set_tf_tensorrt_version(environ_cp)
if is_linux():
set_tf_tensorrt_version(environ_cp)
set_tf_nccl_version(environ_cp)

set_tf_cuda_paths(environ_cp)
cuda_env_names = [
'TF_CUDA_VERSION',
'TF_CUDNN_VERSION',
]
# Note: set_action_env_var above already writes to bazelrc.
for name in cuda_env_names:
if name in environ_cp:
write_action_env_to_bazelrc(name, environ_cp[name])
break

else:
raise UserInputError(
'Invalid CUDA setting were provided %d '
'times in a row. Assuming to be a scripting mistake.'
% _DEFAULT_PROMPT_ASK_ATTEMPTS
)
set_tf_cuda_version(environ_cp)
set_tf_cudnn_version(environ_cp)

set_tf_cuda_compute_capabilities(environ_cp)
if 'LD_LIBRARY_PATH' in environ_cp and environ_cp.get(
Expand Down
@@ -1,3 +1,4 @@
// RUN: export XLA_FLAGS="--xla_gpu_cuda_data_dir=%S/../../../../../../../../cuda_nvcc"
// RUN: hlo_to_kernel --input=%s --output=%t --unroll_factors=4 --tile_sizes=256 --arch=sm_70

func.func @tanh(%arg0: tensor<*xf32>) -> tensor<*xf32> attributes {tf_entry} {
Expand Down
1 change: 1 addition & 0 deletions tensorflow/core/common_runtime/gpu/BUILD
Expand Up @@ -158,6 +158,7 @@ tf_cuda_library(
"@local_config_cuda//cuda:cudnn_header",
"@local_xla//xla/stream_executor/cuda:cuda_platform",
"@local_xla//xla/stream_executor/gpu:gpu_stream",
"@local_xla//xla/tsl:gpu_runtime_hermetic_cuda_deps",
],
defines = if_linux_x86_64(["TF_PLATFORM_LINUX_X86_64"]),
features = ["-layering_check"],
Expand Down

0 comments on commit 9b69f8b

Please sign in to comment.