Skip to content

Commit

Permalink
Merge pull request #2325 from ROCmSoftwarePlatform/to-2.13.1
Browse files Browse the repository at this point in the history
Update 2.13 branch to 2.13.1
  • Loading branch information
jayfurmanek authored Dec 18, 2023
2 parents 644d3a9 + 01e3857 commit 10c36c1
Show file tree
Hide file tree
Showing 14 changed files with 103 additions and 46 deletions.
6 changes: 6 additions & 0 deletions RELEASE.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
# Release 2.13.1

### Bug Fixes and Other Changes

* Refactor CpuExecutable to propagate LLVM errors.

# Release 2.13.0

## TensorFlow
Expand Down
1 change: 1 addition & 0 deletions tensorflow/compiler/tests/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -2114,6 +2114,7 @@ tf_xla_py_test(
"//tensorflow/python:framework",
"//tensorflow/python:platform_test",
"//tensorflow/python:training",
"//tensorflow/python/framework:errors",
"//tensorflow/python/platform:client_testlib",
],
)
Expand Down
17 changes: 17 additions & 0 deletions tensorflow/compiler/tests/xla_custom_call_ops_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from tensorflow.compiler.tf2xla.python import xla
from tensorflow.python.eager import def_function
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import errors_impl
from tensorflow.python.framework import ops
from tensorflow.python.framework import tensor_spec
from tensorflow.python.ops import random_ops
Expand Down Expand Up @@ -46,6 +47,22 @@ def f(x, y):
self.assertIn('custom_call_target="my_call"', hlo)
self.assertIn('backend_config="my_backend_config"', hlo)

def testXlaCustomCallOpDoesntExist(self):
with ops.device('device:{}:0'.format(self.device)):

def f():
return xla.custom_call(
args=(1, 2),
target_name='my_non_existing_call_target',
dtype=dtypes.int32,
shape=(),
backend_config='my_backend_config',
)

with self.assertRaises(errors_impl.InvalidArgumentError):
compiled_f = def_function.function(f, jit_compile=True)
compiled_f()

def testXlaCustomCallV2Op(self):
with ops.device('device:{}:0'.format(self.device)):

Expand Down
11 changes: 7 additions & 4 deletions tensorflow/compiler/xla/service/cpu/cpu_compiler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1403,9 +1403,12 @@ CpuCompiler::CompileLegacyCpuExecutable(std::unique_ptr<HloModule> module) {
std::move(llvm_context));
cantFail((*jit)->AddModule(std::move(thread_safe_module)));

auto cpu_executable = std::make_unique<CpuExecutable>(
std::move(*jit), std::move(assignment), std::move(module), function_name,
std::move(hlo_profile_printer_data), std::move(hlo_profile_index_map));
TF_ASSIGN_OR_RETURN(
auto cpu_executable,
CpuExecutable::Create(std::move(*jit), std::move(assignment),
std::move(module), function_name,
std::move(hlo_profile_printer_data),
std::move(hlo_profile_index_map)));

if (embed_ir_in_executable) {
cpu_executable->set_ir_module_string(ir_module_string);
Expand Down Expand Up @@ -1507,7 +1510,7 @@ CpuCompiler::CompileXlaRuntimeCpuExecutable(
obj_file);
}

return std::make_unique<CpuExecutable>(
return CpuExecutable::Create(
std::move(hlo_module), std::move(hlo_profile_printer_data),
std::move(hlo_profile_index_map), std::move(assignment),
std::move(xla_runtime_executable));
Expand Down
61 changes: 34 additions & 27 deletions tensorflow/compiler/xla/service/cpu/cpu_executable.cc
Original file line number Diff line number Diff line change
Expand Up @@ -58,53 +58,60 @@ namespace cpu {

namespace runtime = ::xla::runtime;

CpuExecutable::CpuExecutable(
StatusOr<std::unique_ptr<CpuExecutable>> CpuExecutable::Create(
std::unique_ptr<SimpleOrcJIT> jit,
std::unique_ptr<const BufferAssignment> assignment,
std::unique_ptr<HloModule> hlo_module,
const std::string& entry_function_name,
std::unique_ptr<HloProfilePrinterData> hlo_profile_printer_data,
std::unique_ptr<HloProfileIndexMap> hlo_profile_index_map)
: Executable(std::move(hlo_module), std::move(hlo_profile_printer_data),
std::move(hlo_profile_index_map)),
jit_(std::move(jit)),
assignment_(std::move(assignment)),
module_name_(entry_function_name) {
if (assignment_) {
buffer_assignment_ =
std::make_shared<BufferAssignmentProto>(assignment_->ToProto());
}
if (has_module()) {
XlaDebugInfoManager::Get()->RegisterModule(
module().unique_id(), shared_module(), buffer_assignment_);
}
std::unique_ptr<HloProfileIndexMap> hlo_profile_index_map) {
std::unique_ptr<CpuExecutable> executable(new CpuExecutable(
std::move(hlo_module), std::move(hlo_profile_printer_data),
std::move(hlo_profile_index_map), std::move(assignment)));
executable->jit_ = std::move(jit);
executable->module_name_ = entry_function_name;

// Resolve symbols in the constructor rather than at execution time to avoid
// races because FindSymbol is not thread safe.
llvm::Expected<llvm::orc::ExecutorSymbolDef> sym =
jit_->FindCompiledSymbol(entry_function_name);
executable->jit_->FindCompiledSymbol(entry_function_name);
// We expect to find the symbol provided with entry_function_name; otherwise
// this is an internal error.
CHECK(sym->getAddress()) << "Symbol " << entry_function_name << " not found.";
if (!sym) {
return absl::InvalidArgumentError(
absl::StrCat("Symbol ", entry_function_name, " not found."));
}
// getAddress can do work under the hood in the jit, so it needs to be
// guarded by the mutex.
compute_function_ =
executable->compute_function_ =
reinterpret_cast<ComputeFunctionType>(sym->getAddress().getValue());
VLOG(1) << "compute_function_ at address "
<< reinterpret_cast<void*>(compute_function_);
jit_->DoneCompiling();
<< reinterpret_cast<void*>(executable->compute_function_);
executable->jit_->DoneCompiling();
return executable;
}

CpuExecutable::CpuExecutable(
StatusOr<std::unique_ptr<CpuExecutable>> CpuExecutable::Create(
std::unique_ptr<HloModule> hlo_module,
std::unique_ptr<HloProfilePrinterData> hlo_profile_printer_data,
std::unique_ptr<HloProfileIndexMap> hlo_profile_index_map,
std::unique_ptr<const BufferAssignment> assignment,
std::unique_ptr<XlaRuntimeCpuExecutable> xla_runtime_executable)
std::unique_ptr<XlaRuntimeCpuExecutable> xla_runtime_executable) {
std::unique_ptr<CpuExecutable> executable(new CpuExecutable(
std::move(hlo_module), std::move(hlo_profile_printer_data),
std::move(hlo_profile_index_map), std::move(assignment)));
executable->xla_runtime_executable_ = std::move(xla_runtime_executable);
return executable;
}

CpuExecutable::CpuExecutable(
std::unique_ptr<HloModule> hlo_module,
std::unique_ptr<HloProfilePrinterData> hlo_profile_printer_data,
std::unique_ptr<HloProfileIndexMap> hlo_profile_index_map,
std::unique_ptr<const BufferAssignment> assignment)
: Executable(std::move(hlo_module), std::move(hlo_profile_printer_data),
std::move(hlo_profile_index_map)),
assignment_(std::move(assignment)),
xla_runtime_executable_(std::move(xla_runtime_executable)) {
assignment_(std::move(assignment)) {
if (assignment_) {
buffer_assignment_ =
std::make_shared<BufferAssignmentProto>(assignment_->ToProto());
Expand Down Expand Up @@ -328,9 +335,9 @@ StatusOr<std::unique_ptr<Executable>> CpuExecutable::LoadFromObjFile(
std::move(executable_ptr), xla_framework_mapping,
std::move(*ffi_modules_state));

return std::unique_ptr<Executable>(new CpuExecutable(
std::move(hlo_module), nullptr, nullptr, std::move(buffer_assignment),
std::move(xla_runtime_executable)));
return CpuExecutable::Create(std::move(hlo_module), nullptr, nullptr,
std::move(buffer_assignment),
std::move(xla_runtime_executable));
}

StatusOr<ExecutionOutput> CpuExecutable::CreateResultShapedBuffer(
Expand Down
23 changes: 14 additions & 9 deletions tensorflow/compiler/xla/service/cpu/cpu_executable.h
Original file line number Diff line number Diff line change
Expand Up @@ -138,14 +138,15 @@ class XlaRuntimeCpuExecutable {
// architecture, so JIT-ed code and host code share the same ABI.
class CpuExecutable : public Executable {
public:
CpuExecutable(std::unique_ptr<SimpleOrcJIT> jit,
std::unique_ptr<const BufferAssignment> assignment,
std::unique_ptr<HloModule> hlo_module,
const std::string& entry_function_name,
std::unique_ptr<HloProfilePrinterData> hlo_profile_printer_data,
std::unique_ptr<HloProfileIndexMap> hlo_profile_index_map);
// XLA Runtime constructor.
CpuExecutable(
static StatusOr<std::unique_ptr<CpuExecutable>> Create(
std::unique_ptr<SimpleOrcJIT> jit,
std::unique_ptr<const BufferAssignment> assignment,
std::unique_ptr<HloModule> hlo_module,
const std::string& entry_function_name,
std::unique_ptr<HloProfilePrinterData> hlo_profile_printer_data,
std::unique_ptr<HloProfileIndexMap> hlo_profile_index_map);
// XLA Runtime factory method.
static StatusOr<std::unique_ptr<CpuExecutable>> Create(
std::unique_ptr<HloModule> hlo_module,
std::unique_ptr<HloProfilePrinterData> hlo_profile_printer_data,
std::unique_ptr<HloProfileIndexMap> hlo_profile_index_map,
Expand Down Expand Up @@ -257,7 +258,7 @@ class CpuExecutable : public Executable {
const InstructionValueSet& GetRootValueSet() const;

// The JIT containing compiled modules.
const std::unique_ptr<SimpleOrcJIT> jit_;
std::unique_ptr<SimpleOrcJIT> jit_;

// Buffer assignment for the buffers we need to allocate.
const std::unique_ptr<const BufferAssignment> assignment_;
Expand All @@ -281,6 +282,10 @@ class CpuExecutable : public Executable {
// If not null, XLA Runtime is enabled.
std::unique_ptr<XlaRuntimeCpuExecutable> xla_runtime_executable_;

CpuExecutable(std::unique_ptr<HloModule> hlo_module,
std::unique_ptr<HloProfilePrinterData> hlo_profile_printer_data,
std::unique_ptr<HloProfileIndexMap> hlo_profile_index_map,
std::unique_ptr<const BufferAssignment> assignment);
CpuExecutable(const CpuExecutable&) = delete;
CpuExecutable& operator=(const CpuExecutable&) = delete;
};
Expand Down
2 changes: 1 addition & 1 deletion tensorflow/core/public/version.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ limitations under the License.
// tensorflow/tools/pip_package/setup.py
#define TF_MAJOR_VERSION 2
#define TF_MINOR_VERSION 13
#define TF_PATCH_VERSION 0
#define TF_PATCH_VERSION 1

// TF_VERSION_SUFFIX is non-empty for pre-releases (e.g. "-alpha", "-alpha.1",
// "-beta", "-rc", "-rc.1")
Expand Down
2 changes: 1 addition & 1 deletion tensorflow/tensorflow.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def register_extension_info(**kwargs):
# not contain rc or alpha, only numbers.
# Also update tensorflow/core/public/version.h
# and tensorflow/tools/pip_package/setup.py
VERSION = "2.13.0"
VERSION = "2.13.1"
VERSION_MAJOR = VERSION.split(".")[0]
two_gpu_tags = ["requires-gpu-nvidia:2", "notap", "manual", "no_pip"]

Expand Down
4 changes: 4 additions & 0 deletions tensorflow/tools/ci_build/build_scripts/ARM_SKIP_TESTS.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@
set -x

ARM_SKIP_TESTS="-//tensorflow/lite/... \
-//tensorflow/core/platform:ram_file_system_test \
-//tensorflow/python/compiler/xla:xla_test \
-//tensorflow/python/data/experimental/kernel_tests:checkpoint_input_pipeline_hook_test \
-//tensorflow/python/distribute:parameter_server_strategy_test \
-//tensorflow/python/kernel_tests/nn_ops:atrous_conv2d_test \
-//tensorflow/python/kernel_tests/nn_ops:conv_ops_test \
"
4 changes: 2 additions & 2 deletions tensorflow/tools/pip_package/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,8 @@
# result for pip.
# Also update tensorflow/tensorflow.bzl and
# tensorflow/core/public/version.h
_VERSION = '2.13.0'
_RC_VERSION = ''
_VERSION = '2.13.1'


# We use the same setup.py for all tensorflow_* packages and for the nightly
# equivalents (tf_nightly_*). The package is controlled from the argument line
Expand Down
8 changes: 7 additions & 1 deletion tensorflow/tools/tf_sig_build_dockerfiles/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,18 @@ RUN /setup.sources.sh && /setup.packages.sh /devel.packages.txt && /setup.cuda.s
# - buildifier: clean bazel build deps
# - buildozer: clean bazel build deps
# - gcloud SDK: communicate with Google Cloud Platform (GCP) for RBE, CI
# - patchelf: Utility tool to modify existing ELF executables and libraries
RUN git clone --branch v1.7.0 https://github.com/bats-core/bats-core.git && bats-core/install.sh /usr/local && rm -rf bats-core
RUN wget https://github.com/bazelbuild/bazelisk/releases/download/v1.11.0/bazelisk-linux-amd64 -O /usr/local/bin/bazel && chmod +x /usr/local/bin/bazel
RUN wget https://github.com/bazelbuild/buildtools/releases/download/3.5.0/buildifier -O /usr/local/bin/buildifier && chmod +x /usr/local/bin/buildifier
RUN wget https://github.com/bazelbuild/buildtools/releases/download/3.5.0/buildozer -O /usr/local/bin/buildozer && chmod +x /usr/local/bin/buildozer
RUN curl -sSL https://sdk.cloud.google.com > /tmp/gcloud && bash /tmp/gcloud --install-dir=~/usr/local/bin --disable-prompts

# Download and install patchelf v0.18.0 from GitHub. The default Ubuntu focal
# packages only provide the "0.10-2build1" version. We use patchelf to manipulate
# certain shared libraries during the wheel building process (https://github.com/tensorflow/tensorflow/blob/master/tensorflow/tools/pip_package/build_pip_package.sh#L255-L262).
# When we use Patchelf versions <0.12, those shared libraries end up with a
# corrupted PT_NOTE program header. This was fixed in v0.12, see https://github.com/NixOS/patchelf/commit/43a33482b501b0f5ee9da312aabfca3806570cc9.
RUN wget https://github.com/NixOS/patchelf/releases/download/0.18.0/patchelf-0.18.0-x86_64.tar.gz && tar -zxvf patchelf-0.18.0-x86_64.tar.gz -C /usr && rm -rf patchelf-0.18.0-x86_64.tar.gz

# All lines past this point are reset when $CACHEBUSTER is set. We need this
# for Python specifically because we install some nightly packages which are
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ build-essential
ca-certificates
llvm-16
clang-16
lld-16
clang-format-12
colordiff
curl
Expand All @@ -47,7 +48,6 @@ mlocate
moreutils
openjdk-11-jdk
openjdk-11-jre-headless
patchelf
pkg-config
python3-dev
python3-setuptools
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@ build --define=tf_api_version=2 --action_env=TF2_BEHAVIOR=1
# Target the AVX instruction set
build --copt=-mavx --host_copt=-mavx

# Use lld as the linker
build --linkopt="-fuse-ld=lld"
build --linkopt="-lm"

# Disable clang extention that rejects type definitions within offsetof.
# This was added in clang-16 by https://reviews.llvm.org/D133574.
# Can be removed once upb is updated, since a type definition is used within
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@ build --copt=-mavx --host_copt=-mavx
# See https://github.com/protocolbuffers/upb/blob/9effcbcb27f0a665f9f345030188c0b291e32482/upb/upb.c#L183.
build --copt=-Wno-gnu-offsetof-extensions

# Use lld as the linker
build --linkopt="-fuse-ld=lld"
build --linkopt="-lm"

# Store performance profiling log in the mounted artifact directory.
# The profile can be viewed by visiting chrome://tracing in a Chrome browser.
# See https://docs.bazel.build/versions/main/skylark/performance.html#performance-profiling
Expand Down

0 comments on commit 10c36c1

Please sign in to comment.