Skip to content

Commit

Permalink
Merge pull request #2641 from ROCm/develop-upstream-sync-240820
Browse files Browse the repository at this point in the history
Develop upstream sync 240820
  • Loading branch information
mmakevic-amd authored Sep 6, 2024
2 parents 71a8fc1 + 5bad423 commit 8f1cb0f
Show file tree
Hide file tree
Showing 2,567 changed files with 79,670 additions and 37,142 deletions.
60 changes: 38 additions & 22 deletions .bazelrc
Original file line number Diff line number Diff line change
Expand Up @@ -225,13 +225,16 @@ build:mkl_aarch64_threadpool -c opt
build:cuda --repo_env TF_NEED_CUDA=1
build:cuda --crosstool_top=@local_config_cuda//crosstool:toolchain
build:cuda --@local_config_cuda//:enable_cuda
# Default CUDA and CUDNN versions.
build:cuda --repo_env=HERMETIC_CUDA_VERSION="12.3.2"
build:cuda --repo_env=HERMETIC_CUDNN_VERSION="8.9.7.29"
# This flag is needed to include hermetic CUDA libraries for bazel tests.
test:cuda --@local_config_cuda//cuda:include_hermetic_cuda_libs=true

# CUDA: This config refers to building CUDA op kernels with clang.
build:cuda_clang --config=cuda
# Enable TensorRT optimizations https://developer.nvidia.com/tensorrt
build:cuda_clang --config=tensorrt
build:cuda_clang --action_env=TF_CUDA_CLANG="1"
build:cuda_clang --@local_config_cuda//:cuda_compiler=clang
build:cuda_clang --copt=-Qunused-arguments
# Select supported compute capabilities (supported graphics cards).
# This is the same as the official TensorFlow builds.
# See https://developer.nvidia.com/cuda-gpus#compute
Expand All @@ -240,22 +243,22 @@ build:cuda_clang --@local_config_cuda//:cuda_compiler=clang
# release while SASS is only forward compatible inside the current
# major release. Example: sm_80 kernels can run on sm_89 GPUs but
# not on sm_90 GPUs. compute_80 kernels though can also run on sm_90 GPUs.
build:cuda_clang --repo_env=TF_CUDA_COMPUTE_CAPABILITIES="sm_60,sm_70,sm_80,sm_89,compute_90"
build:cuda_clang --repo_env=HERMETIC_CUDA_COMPUTE_CAPABILITIES="sm_60,sm_70,sm_80,sm_89,compute_90"
# Set lld as the linker.
build:cuda_clang --host_linkopt="-fuse-ld=lld"
build:cuda_clang --host_linkopt="-lm"
build:cuda_clang --linkopt="-fuse-ld=lld"
build:cuda_clang --linkopt="-lm"

# Set up compilation CUDA version and paths and use the CUDA Clang toolchain.
build:cuda_clang_official --config=cuda_clang
build:cuda_clang_official --action_env=TF_CUDA_VERSION="12"
build:cuda_clang_official --action_env=TF_CUDNN_VERSION="8"
build:cuda_clang_official --action_env=CUDA_TOOLKIT_PATH="/usr/local/cuda-12.3"
build:cuda_clang_official --action_env=GCC_HOST_COMPILER_PATH="/dt9/usr/bin/gcc"
build:cuda_clang_official --repo_env=HERMETIC_CUDA_VERSION="12.3.2"
build:cuda_clang_official --repo_env=HERMETIC_CUDNN_VERSION="8.9.7.29"
build:cuda_clang_official --action_env=CLANG_CUDA_COMPILER_PATH="/usr/lib/llvm-18/bin/clang"
build:cuda_clang_official --action_env=LD_LIBRARY_PATH="/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64"
build:cuda_clang_official --crosstool_top="@sigbuild-r2.17-clang_config_cuda//crosstool:toolchain"

# Build with nvcc for CUDA and clang for host
build:nvcc_clang --config=cuda
# Unfortunately, cuda_configure.bzl demands this for using nvcc + clang
build:nvcc_clang --action_env=TF_CUDA_CLANG="1"
build:nvcc_clang --action_env=TF_NVCC_CLANG="1"
build:nvcc_clang --@local_config_cuda//:cuda_compiler=nvcc

Expand Down Expand Up @@ -382,6 +385,13 @@ build:windows --features=archive_param_file
build:windows --copt=/d2ReducedOptimizeHugeFunctions
build:windows --host_copt=/d2ReducedOptimizeHugeFunctions

# Before VS 2017 15.8, the member "type" would non-conformingly have an
# alignment of only alignof(max_align_t). VS 2017 15.8 was fixed to handle this
# correctly, but the fix inherently changes layout and breaks binary
# compatibility (*only* for uses of aligned_storage with extended alignments).
build:windows --copt=-D_ENABLE_EXTENDED_ALIGNED_STORAGE
build:windows --host_copt=-D_ENABLE_EXTENDED_ALIGNED_STORAGE

# Enable the runfiles symlink tree on Windows. This makes it possible to build
# the pip package on Windows without an intermediate data-file archive, as the
# build_pip_package script in its current form (as of Aug 2023) uses the
Expand Down Expand Up @@ -569,10 +579,7 @@ build:rbe_linux_cuda --config=cuda_clang_official
build:rbe_linux_cuda --config=rbe_linux_cpu
# For Remote build execution -- GPU configuration
build:rbe_linux_cuda --repo_env=REMOTE_GPU_TESTING=1
build:rbe_linux_cuda --repo_env=TF_CUDA_CONFIG_REPO="@sigbuild-r2.17-clang_config_cuda"
build:rbe_linux_cuda --repo_env=TF_TENSORRT_CONFIG_REPO="@sigbuild-r2.17-clang_config_tensorrt"
build:rbe_linux_cuda --repo_env=TF_NCCL_CONFIG_REPO="@sigbuild-r2.17-clang_config_nccl"
test:rbe_linux_cuda --test_env=LD_LIBRARY_PATH="/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64"


# ROCm
# TODO(rocm) Is this actualy used?
Expand Down Expand Up @@ -609,6 +616,9 @@ build:rbe_win_clang --compiler=clang-cl
build:rbe_win_clang --linkopt=/FORCE:MULTIPLE
build:rbe_win_clang --host_linkopt=/FORCE:MULTIPLE

# TODO(belitskiy): Rename `rbe_win_clang` to this, once done switching presubmits.
build:rbe_windows_x86_cpu --config=rbe_win_clang

# END TF REMOTE BUILD EXECUTION OPTIONS

# TFLite build configs for generic embedded Linux
Expand Down Expand Up @@ -671,7 +681,6 @@ build:release_cpu_linux_base --linkopt="-fuse-ld=lld"
# Test-related settings below this point.
test:release_linux_base --build_tests_only --keep_going --test_output=errors --verbose_failures=true
test:release_linux_base --local_test_jobs=HOST_CPUS
test:release_linux_base --test_env=LD_LIBRARY_PATH
# Give only the list of failed tests at the end of the log
test:release_linux_base --test_summary=short

Expand All @@ -686,7 +695,6 @@ build:release_cpu_linux --config=release_cpu_linux_base
# Set up compilation CUDA version and paths and use the CUDA Clang toolchain.
# Note that linux cpu and cuda builds share the same toolchain now.
build:release_gpu_linux --config=cuda_clang_official
test:release_gpu_linux --test_env=LD_LIBRARY_PATH="/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64"
# Local test jobs has to be 4 because parallel_gpu_execute is fragile, I think
test:release_gpu_linux --test_timeout=300,450,1200,3600 --local_test_jobs=4 --run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute

Expand Down Expand Up @@ -717,9 +725,8 @@ build:unsupported_gpu_linux --config=unsupported_cpu_linux
build:unsupported_gpu_linux --action_env=TF_CUDA_VERSION="11"
build:unsupported_gpu_linux --action_env=TF_CUDNN_VERSION="8"
build:unsupported_gpu_linux --repo_env=TF_CUDA_COMPUTE_CAPABILITIES="sm_35,sm_50,sm_60,sm_70,sm_75,compute_80"
build:unsupported_gpu_linux --config=tensorrt
build:unsupported_gpu_linux --action_env=CUDA_TOOLKIT_PATH="/usr/local/cuda-11.2"
build:unsupported_gpu_linux --action_env=LD_LIBRARY_PATH="/usr/local/cuda:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda-11.1/lib64:/usr/local/tensorrt/lib"
build:unsupported_gpu_linux --action_env=LD_LIBRARY_PATH="/usr/local/cuda:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:/usr/local/cuda-11.1/lib64"
build:unsupported_gpu_linux --action_env=GCC_HOST_COMPILER_PATH="/dt9/usr/bin/gcc"
build:unsupported_gpu_linux [email protected]_manylinux2014-cuda11.2-cudnn8.1-tensorrt7.2_config_cuda//crosstool:toolchain

Expand Down Expand Up @@ -829,17 +836,19 @@ test:macos_x86_wheel_test --config=macos_x86_wheel_test_filters -- //tensorflow/

# PYCPP TESTS run a suite of Python and C++ tests to verify general correctness over
# the whole TF code base. These are usually run continuously or upon presubmit.
# CPU PYCPP:
# LINUX CPU PYCPP:
test:linux_cpu_pycpp_test_filters --test_tag_filters=-no_oss,-oss_excluded,-oss_serial,-gpu,-tpu,-benchmark-test,-v1only
test:linux_cpu_pycpp_test_filters --build_tag_filters=-no_oss,-oss_excluded,-oss_serial,-gpu,-tpu,-benchmark-test,-v1only
test:linux_cpu_pycpp_test_filters --test_lang_filters=cc,py --test_size_filters=small,medium
test:linux_cpu_pycpp_test --config=linux_cpu_pycpp_test_filters -- //tensorflow/... -//tensorflow/python/integration_testing/... -//tensorflow/compiler/tf2tensorrt/... -//tensorflow/core/tpu/... -//tensorflow/lite/... -//tensorflow/tools/toolchains/...
# CUDA PYCPP:

# LINUX CUDA PYCPP:
test:linux_cuda_pycpp_test_filters --test_tag_filters=-no_oss,-oss_excluded,-oss_serial,-benchmark-test,-v1only,gpu,-no_gpu,-no_gpu_presubmit,-no_cuda11
test:linux_cuda_pycpp_test_filters --build_tag_filters=-no_oss,-oss_excluded,-oss_serial,-benchmark-test,-v1only,gpu,-no_gpu,-no_gpu_presubmit,-no_cuda11
test:linux_cuda_pycpp_test_filters --test_lang_filters=cc,py --test_size_filters=small,medium
test:linux_cuda_pycpp_test --config=linux_cuda_pycpp_test_filters -- //tensorflow/... -//tensorflow/python/integration_testing/... -//tensorflow/compiler/tf2tensorrt/... -//tensorflow/core/tpu/... -//tensorflow/lite/... -//tensorflow/tools/toolchains/...
# ARM64 PYCPP

# LINUX ARM64 PYCPP
# In Linux Arm64 presubmit/continuous build, we cross-compile the binaries on
# Linux x86 so that we can use RBE. Since tests still need to run on the single
# host Arm64 machine, the build becomes too slow (~30 min) to be a presubmit.
Expand Down Expand Up @@ -872,6 +881,13 @@ build:macos_x86_pycpp_test --config=macos_x86_pycpp_test_filters -- //tensorflow
# CROSS-COMPILE MACOS X86 PYCPP
build:cross_compile_macos_x86_pycpp_test --config=macos_x86_pycpp_test
build:cross_compile_macos_x86_pycpp_test -//tensorflow/core/kernels:quantized_conv_ops_test -//tensorflow/core/kernels:quantized_matmul_op_test -//tensorflow/python/ops:quantized_conv_ops_test -//tensorflow/tools/graph_transforms:transforms_test -//tensorflow/python/tools:aot_compiled_test
# WINDOWS X86-64 CPU PYCPP
test:windows_x86_cpu_pycpp_test_filters --test_tag_filters=-no_windows,-windows_excluded,-no_oss,-oss_excluded,-gpu,-tpu,-benchmark-test
test:windows_x86_cpu_pycpp_test_filters --build_tag_filters=-no_windows,-windows_excluded,-no_oss,-oss_excluded,-benchmark-test
test:windows_x86_cpu_pycpp_test_filters --test_lang_filters=cc,py --test_size_filters=small,medium --test_timeout="300,450,1200,3600"
test:windows_x86_cpu_pycpp_test_opts --copt=/d2ReducedOptimizeHugeFunctions --host_copt=/d2ReducedOptimizeHugeFunctions --dynamic_mode=off --build_tests_only
test:windows_x86_cpu_pycpp_test --config=windows_x86_cpu_pycpp_test_opts --config=windows_x86_cpu_pycpp_test_filters -- //tensorflow/... -//tensorflow/java/... -//tensorflow/lite/... -//tensorflow/compiler/...

# END TF TEST SUITE OPTIONS

# START CROSS-COMPILE CONFIGS
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/osv-scanner-scheduled.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ permissions:
jobs:
scan-scheduled:
if: github.repository == 'tensorflow/tensorflow'
uses: "google/osv-scanner-action/.github/workflows/[email protected].1"
uses: "google/osv-scanner-action/.github/workflows/[email protected].2"
with:
scan-args: |-
--lockfile=requirements.txt:./requirements_lock_3_9.txt
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/pylint-presubmit.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ jobs:
run: |
echo Changed files: ${{ steps.get_file_changes.outputs.files }}
- name: Set up Python 3.9
uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0
uses: actions/setup-python@39cd14951b08e74b54015e9e001cdefcf80e669f # v5.1.1
with:
python-version: "3.9"
- name: Install Python dependencies
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/scorecards-analysis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ jobs:
persist-credentials: false

- name: "Run analysis"
uses: ossf/scorecard-action@dc50aa9510b46c811795eb24b2f1ba02a914e534 # v2.3.3
uses: ossf/scorecard-action@62b2cac7ed8198b15735ed49ab1e5cf35480ba46 # v2.4.0
with:
results_file: results.sarif
results_format: sarif
Expand All @@ -55,7 +55,7 @@ jobs:
# Upload the results as artifacts (optional). Commenting out will disable uploads of run results in SARIF
# format to the repository Actions tab.
- name: "Upload artifact"
uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4.3.3
uses: actions/upload-artifact@0b2256b8c012f0828dc542b3febcab082c67f72b # v4.3.4
with:
name: SARIF file
path: results.sarif
Expand All @@ -64,6 +64,6 @@ jobs:
# Upload the results to GitHub's code scanning dashboard (optional).
# Commenting out will disable upload of results to your repo's Code Scanning dashboard
- name: "Upload to code-scanning"
uses: github/codeql-action/upload-sarif@b611370bb5703a7efb587f9d136a52ea24c5c38c # v3.25.11
uses: github/codeql-action/upload-sarif@afb54ba388a7dca6ecae48f608c4ff05ff4cc77a # v3.25.15
with:
sarif_file: results.sarif
8 changes: 4 additions & 4 deletions .github/workflows/sigbuild-docker-branch.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,16 +43,16 @@ jobs:
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
-
name: Set up Docker Buildx
uses: docker/setup-buildx-action@d70bba72b1f3fd22344832f00baa16ece964efeb # v3.3.0
uses: docker/setup-buildx-action@988b5a0280414f521da01fcc63a27aeeb4b104db # v3.6.1
-
name: Login to DockerHub
uses: docker/login-action@0d4c9c5ea7693da7b068278f7b52bda2a190a446 # v3.2.0
uses: docker/login-action@9780b0c442fbb1117ed29e0efdff1e18412f7567 # v3.3.0
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
-
name: Login to GCR
uses: docker/login-action@0d4c9c5ea7693da7b068278f7b52bda2a190a446 # v3.2.0
uses: docker/login-action@9780b0c442fbb1117ed29e0efdff1e18412f7567 # v3.3.0
with:
registry: gcr.io
username: _json_key
Expand All @@ -67,7 +67,7 @@ jobs:
-
name: Build and push
id: docker_build
uses: docker/build-push-action@15560696de535e4014efeff63c48f16952e52dd1 # v6.2.0
uses: docker/build-push-action@5176d81f87c23d6fc96624dfdbcd9f3830bbe445 # v6.5.0
with:
push: true
context: ./tensorflow/tools/tf_sig_build_dockerfiles
Expand Down
16 changes: 13 additions & 3 deletions .github/workflows/sigbuild-docker-presubmit.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,15 +47,24 @@ jobs:
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
-
name: Set up Docker Buildx
uses: docker/setup-buildx-action@d70bba72b1f3fd22344832f00baa16ece964efeb # v3.3.0
uses: docker/setup-buildx-action@988b5a0280414f521da01fcc63a27aeeb4b104db # v3.6.1
-
name: Login to GCR
if: contains(github.event.pull_request.labels.*.name, 'build and push to gcr.io for staging')
uses: docker/login-action@0d4c9c5ea7693da7b068278f7b52bda2a190a446 # v3.2.0
uses: docker/login-action@9780b0c442fbb1117ed29e0efdff1e18412f7567 # v3.3.0
with:
registry: gcr.io
username: _json_key
password: ${{ secrets.GCP_CREDS }}
-
name: Login to AR
# Once this is verified, change the label's name. For now, we will piggyback on gcr.io actions.
if: contains(github.event.pull_request.labels.*.name, 'build and push to gcr.io for staging')
uses: docker/login-action@0d4c9c5ea7693da7b068278f7b52bda2a190a446 # v3.2.0
with:
registry: us-central1-docker.pkg.dev
username: _json_key
password: ${{ secrets.GCP_CREDS }}
-
name: Grab the date to do cache busting (assumes same day OK to keep)
run: |
Expand All @@ -64,7 +73,7 @@ jobs:
-
name: Build containers, and push to GCR only if the 'build and push to gcr.io for staging' label is applied
id: docker_build
uses: docker/build-push-action@15560696de535e4014efeff63c48f16952e52dd1 # v6.2.0
uses: docker/build-push-action@5176d81f87c23d6fc96624dfdbcd9f3830bbe445 # v6.5.0
with:
push: ${{ contains(github.event.pull_request.labels.*.name, 'build and push to gcr.io for staging') }}
context: ./tensorflow/tools/tf_sig_build_dockerfiles
Expand All @@ -74,6 +83,7 @@ jobs:
CACHEBUSTER=${{ steps.date.outputs.DATE }}
tags: |
gcr.io/tensorflow-sigs/build:${{ github.event.number }}-${{ matrix.python-version }}
us-central1-docker.pkg.dev/tensorflow-sigs/tensorflow/build:${{ github.event.number }}-${{ matrix.python-version }}
cache-from: |
type=registry,ref=tensorflow/build:latest-${{ matrix.python-version }}
type=registry,ref=gcr.io/tensorflow-sigs/build:${{ github.event.number }}-${{ matrix.python-version }}
Expand Down
18 changes: 14 additions & 4 deletions .github/workflows/sigbuild-docker.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,20 +46,28 @@ jobs:
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
-
name: Set up Docker Buildx
uses: docker/setup-buildx-action@d70bba72b1f3fd22344832f00baa16ece964efeb # v3.3.0
uses: docker/setup-buildx-action@988b5a0280414f521da01fcc63a27aeeb4b104db # v3.6.1
-
name: Login to DockerHub
uses: docker/login-action@0d4c9c5ea7693da7b068278f7b52bda2a190a446 # v3.2.0
uses: docker/login-action@9780b0c442fbb1117ed29e0efdff1e18412f7567 # v3.3.0
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
-
name: Login to GCR
uses: docker/login-action@0d4c9c5ea7693da7b068278f7b52bda2a190a446 # v3.2.0
uses: docker/login-action@9780b0c442fbb1117ed29e0efdff1e18412f7567 # v3.3.0
with:
registry: gcr.io
username: _json_key
password: ${{ secrets.GCP_CREDS }}
-
name: Login to AR
# Once this is verified, removed gcr.io actions.
uses: docker/login-action@9780b0c442fbb1117ed29e0efdff1e18412f7567 # v3.3.0
with:
registry: us-central1-docker.pkg.dev
username: _json_key
password: ${{ secrets.GCP_CREDS }}
-
name: Grab the upcoming TF version to tag this container
run: |
Expand All @@ -74,7 +82,7 @@ jobs:
-
name: Build and push
id: docker_build
uses: docker/build-push-action@15560696de535e4014efeff63c48f16952e52dd1 # v6.2.0
uses: docker/build-push-action@5176d81f87c23d6fc96624dfdbcd9f3830bbe445 # v6.5.0
with:
push: true
context: ./tensorflow/tools/tf_sig_build_dockerfiles
Expand All @@ -87,6 +95,8 @@ jobs:
tensorflow/build:${{ steps.tf-version.outputs.TF_VERSION }}-${{ matrix.python-version }}
gcr.io/tensorflow-sigs/build:latest-${{ matrix.python-version }}
gcr.io/tensorflow-sigs/build:${{ steps.tf-version.outputs.TF_VERSION }}-${{ matrix.python-version }}
us-central1-docker.pkg.dev/tensorflow-sigs/tensorflow/build:latest-${{ matrix.python-version }}
us-central1-docker.pkg.dev/tensorflow-sigs/tensorflow/build:${{ steps.tf-version.outputs.TF_VERSION }}-${{ matrix.python-version }}
cache-from: type=registry,ref=tensorflow/build:latest-${{ matrix.python-version }}
cache-to: type=inline
-
Expand Down
20 changes: 14 additions & 6 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -253,13 +253,21 @@ There are two ways to run TensorFlow unit tests.
export flags="--config=opt -k"
```

If the tests are to be run on the GPU, add CUDA paths to LD_LIBRARY_PATH and
add the `cuda` option flag
If the tests are to be run on the GPU:
* For TensorFlow versions starting from v.2.18.0:
Add the `cuda` option flag.

```bash
export LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:$LD_LIBRARY_PATH"
export flags="--config=opt --config=cuda -k"
```
```bash
export flags="--config=opt --config=cuda -k"
```

* For TensorFlow versions prior v.2.18.0:
Add CUDA paths to LD_LIBRARY_PATH and add the `cuda` option flag.

```bash
export LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:$LD_LIBRARY_PATH"
export flags="--config=opt --config=cuda -k"
```

For example, to run all tests under tensorflow/python, do:

Expand Down
Loading

0 comments on commit 8f1cb0f

Please sign in to comment.