Skip to content

Commit

Permalink
Merge pull request #2565 from ROCm/develop-upstream-sync-240603
Browse files Browse the repository at this point in the history
Develop upstream sync 240603
  • Loading branch information
mmakevic-amd authored Jun 24, 2024
2 parents c467913 + 184dce5 commit 6d4a519
Show file tree
Hide file tree
Showing 1,140 changed files with 31,789 additions and 12,848 deletions.
1 change: 0 additions & 1 deletion .github/bot_config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
# A list of assignees
assignees:
- sushreebarsa
- SuryanarayanaY
- tilakrayal
- Venkat6871
# A list of assignees for compiler folder
Expand Down
2 changes: 2 additions & 0 deletions RELEASE.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@
* Add TensorFlow to StableHLO converter to TensorFlow pip package.
* TensorRT support: this is the last release supporting TensorRT. It will be
removed in the next release.
* NumPy 2.0 support: TensorFlow is going to support NumPy 2.0 in the next
release. It may break some edge cases of TensorFlow API usage.

## Keras

Expand Down
2 changes: 1 addition & 1 deletion ci/official/envs/linux_arm64
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ TFCI_BAZEL_TARGET_SELECTING_CONFIG_PREFIX=linux_arm64
# despite lacking Nvidia CUDA support.
TFCI_BUILD_PIP_PACKAGE_ARGS="--repo_env=WHEEL_NAME=tensorflow"
TFCI_DOCKER_ENABLE=1
TFCI_DOCKER_IMAGE=gcr.io/tensorflow-sigs/build-arm64:tf-2-16-multi-python
TFCI_DOCKER_IMAGE=us-central1-docker.pkg.dev/tensorflow-sigs/tensorflow/build-arm64:tf-2-16-multi-python
TFCI_DOCKER_PULL_ENABLE=1
TFCI_DOCKER_REBUILD_ARGS="--target=tf ci/official/containers/linux_arm64"
TFCI_INDEX_HTML_ENABLE=1
Expand Down
1 change: 0 additions & 1 deletion tensorflow/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -1614,7 +1614,6 @@ genrule(
"//tensorflow/c/eager:headers",
"//tensorflow/cc:headers",
"//tensorflow/core:headers",
"@local_xla//xla/stream_executor:stream_executor_install_hdrs",
],
outs = ["include"],
cmd = """
Expand Down
1 change: 1 addition & 0 deletions tensorflow/c/experimental/ops/gen/common/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -41,5 +41,6 @@ tf_cc_tests(
"//tensorflow/core:test",
"//tensorflow/core:test_main",
"//tensorflow/core:testlib",
"//tensorflow/core/platform:types",
],
)
4 changes: 1 addition & 3 deletions tensorflow/c/experimental/ops/gen/cpp/views/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,10 @@ cc_library(
deps = [
"//tensorflow/c/experimental/ops/gen/common",
"//tensorflow/c/experimental/ops/gen/model",
"//tensorflow/core:framework",
"//tensorflow/core:lib",
"//tensorflow/core:lib_internal",
"//tensorflow/core:op_gen_lib",
"//tensorflow/core:protos_all_cc",
"//tensorflow/core/platform:str_util",
"@com_google_absl//absl/log",
"@com_google_absl//absl/strings",
],
alwayslink = 1,
Expand Down
2 changes: 2 additions & 0 deletions tensorflow/c/experimental/ops/gen/cpp/views/arg_type_view.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@ limitations under the License.
==============================================================================*/
#include "tensorflow/c/experimental/ops/gen/cpp/views/arg_type_view.h"

#include "tensorflow/c/experimental/ops/gen/model/arg_type.h"
#include "tensorflow/core/framework/types.pb.h"
#include "tensorflow/core/platform/types.h"

namespace tensorflow {
namespace generator {
Expand Down
3 changes: 3 additions & 0 deletions tensorflow/c/experimental/ops/gen/cpp/views/arg_view.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ limitations under the License.
==============================================================================*/
#include "tensorflow/c/experimental/ops/gen/cpp/views/arg_view.h"

#include "tensorflow/c/experimental/ops/gen/model/arg_spec.h"
#include "tensorflow/core/platform/types.h"

namespace tensorflow {
namespace generator {
namespace cpp {
Expand Down
6 changes: 5 additions & 1 deletion tensorflow/c/experimental/ops/gen/cpp/views/attr_view.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,14 @@ limitations under the License.

#include <string>

#include "absl/log/log.h"
#include "absl/strings/str_cat.h"
#include "absl/strings/substitute.h"
#include "tensorflow/c/experimental/ops/gen/common/case_format.h"
#include "tensorflow/c/experimental/ops/gen/common/view_util.h"
#include "tensorflow/core/framework/types.h"
#include "tensorflow/core/framework/attr_value.pb.h"
#include "tensorflow/core/framework/types.pb.h"
#include "tensorflow/core/platform/types.h"

namespace tensorflow {
namespace generator {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@ limitations under the License.
#include "tensorflow/c/experimental/ops/gen/cpp/views/arg_type_view.h"
#include "tensorflow/c/experimental/ops/gen/cpp/views/arg_view.h"
#include "tensorflow/c/experimental/ops/gen/cpp/views/attr_view.h"
#include "tensorflow/c/experimental/ops/gen/model/arg_spec.h"
#include "tensorflow/c/experimental/ops/gen/model/attr_spec.h"
#include "tensorflow/core/platform/types.h"

namespace tensorflow {
namespace generator {
Expand Down
2 changes: 2 additions & 0 deletions tensorflow/c/experimental/ops/gen/cpp/views/op_view.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ limitations under the License.
#include "tensorflow/c/experimental/ops/gen/common/view_util.h"
#include "tensorflow/c/experimental/ops/gen/cpp/views/arg_view.h"
#include "tensorflow/c/experimental/ops/gen/cpp/views/attr_view.h"
#include "tensorflow/c/experimental/ops/gen/cpp/views/op_argument_view.h"
#include "tensorflow/c/experimental/ops/gen/model/op_spec.h"
#include "tensorflow/core/lib/strings/str_util.h"
#include "tensorflow/core/platform/logging.h"

Expand Down
1 change: 0 additions & 1 deletion tensorflow/c/experimental/stream_executor/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,6 @@ cc_library(
"@local_tsl//tsl/platform:statusor",
"@local_xla//xla/stream_executor",
"@local_xla//xla/stream_executor:stream_executor_interface",
"@local_xla//xla/stream_executor:stream_interface",
],
)

Expand Down
51 changes: 18 additions & 33 deletions tensorflow/c/experimental/stream_executor/stream_executor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -186,15 +186,15 @@ void HostCallbackTrampoline(void* ctx, TF_Status* status) {
delete host_ctx;
}

class CStreamExecutor : public StreamExecutor {
class CStreamExecutor : public StreamExecutorCommon {
public:
explicit CStreamExecutor(Platform* se_platform, SP_Device device,
SP_DeviceFns* device_fns,
SP_StreamExecutor* stream_executor,
SP_Platform* platform, SP_PlatformFns* platform_fns,
SP_TimerFns* timer_fns, const std::string& name,
int visible_device_count)
: StreamExecutor(se_platform),
: StreamExecutorCommon(se_platform),
device_(std::move(device)),
device_fns_(device_fns),
stream_executor_(stream_executor),
Expand Down Expand Up @@ -316,8 +316,7 @@ class CStreamExecutor : public StreamExecutor {
absl::Status MemZero(Stream* stream, DeviceMemoryBase* location,
uint64 size) override {
OwnedTFStatus c_status(TF_NewStatus());
SP_Stream stream_handle =
static_cast<CStream*>(stream->implementation())->Handle();
SP_Stream stream_handle = static_cast<CStream*>(stream)->Handle();
SP_DeviceMemoryBase device_mem = DeviceMemoryBaseToC(location);
stream_executor_->mem_zero(&device_, stream_handle, &device_mem, size,
c_status.get());
Expand All @@ -326,8 +325,7 @@ class CStreamExecutor : public StreamExecutor {
absl::Status Memset(Stream* stream, DeviceMemoryBase* location, uint8 pattern,
uint64 size) override {
OwnedTFStatus c_status(TF_NewStatus());
SP_Stream stream_handle =
static_cast<CStream*>(stream->implementation())->Handle();
SP_Stream stream_handle = static_cast<CStream*>(stream)->Handle();
SP_DeviceMemoryBase device_mem = DeviceMemoryBaseToC(location);
stream_executor_->memset(&device_, stream_handle, &device_mem, pattern,
size, c_status.get());
Expand All @@ -336,8 +334,7 @@ class CStreamExecutor : public StreamExecutor {
absl::Status Memset32(Stream* stream, DeviceMemoryBase* location,
uint32 pattern, uint64 size) override {
OwnedTFStatus c_status(TF_NewStatus());
SP_Stream stream_handle =
static_cast<CStream*>(stream->implementation())->Handle();
SP_Stream stream_handle = static_cast<CStream*>(stream)->Handle();
SP_DeviceMemoryBase device_mem = DeviceMemoryBaseToC(location);
stream_executor_->memset32(&device_, stream_handle, &device_mem, pattern,
size, c_status.get());
Expand All @@ -346,8 +343,7 @@ class CStreamExecutor : public StreamExecutor {
absl::Status Memcpy(Stream* stream, void* host_dst,
const DeviceMemoryBase& gpu_src, uint64 size) override {
OwnedTFStatus c_status(TF_NewStatus());
SP_Stream stream_handle =
static_cast<CStream*>(stream->implementation())->Handle();
SP_Stream stream_handle = static_cast<CStream*>(stream)->Handle();
SP_DeviceMemoryBase device_mem_src = DeviceMemoryBaseToC(&gpu_src);
stream_executor_->memcpy_dtoh(&device_, stream_handle, host_dst,
&device_mem_src, size, c_status.get());
Expand All @@ -359,8 +355,7 @@ class CStreamExecutor : public StreamExecutor {
absl::Status Memcpy(Stream* stream, DeviceMemoryBase* gpu_dst,
const void* host_src, uint64 size) override {
OwnedTFStatus c_status(TF_NewStatus());
SP_Stream stream_handle =
static_cast<CStream*>(stream->implementation())->Handle();
SP_Stream stream_handle = static_cast<CStream*>(stream)->Handle();
SP_DeviceMemoryBase device_mem_dst = DeviceMemoryBaseToC(gpu_dst);
stream_executor_->memcpy_htod(&device_, stream_handle, &device_mem_dst,
host_src, size, c_status.get());
Expand All @@ -373,8 +368,7 @@ class CStreamExecutor : public StreamExecutor {
const DeviceMemoryBase& gpu_src,
uint64 size) override {
OwnedTFStatus c_status(TF_NewStatus());
SP_Stream stream_handle =
static_cast<CStream*>(stream->implementation())->Handle();
SP_Stream stream_handle = static_cast<CStream*>(stream)->Handle();
SP_DeviceMemoryBase device_mem_dst = DeviceMemoryBaseToC(gpu_dst);
SP_DeviceMemoryBase device_mem_src = DeviceMemoryBaseToC(&gpu_src);
stream_executor_->memcpy_dtod(&device_, stream_handle, &device_mem_dst,
Expand All @@ -387,20 +381,17 @@ class CStreamExecutor : public StreamExecutor {
}
bool HostCallback(Stream* stream,
absl::AnyInvocable<absl::Status() &&> callback) override {
SP_Stream stream_handle =
static_cast<CStream*>(stream->implementation())->Handle();
SP_Stream stream_handle = static_cast<CStream*>(stream)->Handle();
HostCallbackContext* ctx = new HostCallbackContext{std::move(callback)};
return stream_executor_->host_callback(&device_, stream_handle,
&HostCallbackTrampoline, ctx);
}
absl::Status RecordEvent(Stream* stream, Event* event) override {
SP_Stream stream_handle =
static_cast<CStream*>(stream->implementation())->Handle();
SP_Stream stream_handle = static_cast<CStream*>(stream)->Handle();
return static_cast<CEvent*>(event)->Record(stream_handle);
}
absl::Status WaitForEvent(Stream* stream, Event* event) override {
SP_Stream stream_handle =
static_cast<CStream*>(stream->implementation())->Handle();
SP_Stream stream_handle = static_cast<CStream*>(stream)->Handle();
SP_Event event_handle = static_cast<CEvent*>(event)->Handle();
OwnedTFStatus c_status(TF_NewStatus());
stream_executor_->wait_for_event(&device_, stream_handle, event_handle,
Expand All @@ -409,14 +400,12 @@ class CStreamExecutor : public StreamExecutor {
return s;
}
void DeallocateStream(Stream* stream) override {
static_cast<CStream*>(stream->implementation())->Destroy();
static_cast<CStream*>(stream)->Destroy();
}
bool CreateStreamDependency(Stream* dependent, Stream* other) override {
OwnedTFStatus c_status(TF_NewStatus());
SP_Stream dependent_handle =
static_cast<CStream*>(dependent->implementation())->Handle();
SP_Stream other_handle =
static_cast<CStream*>(other->implementation())->Handle();
SP_Stream dependent_handle = static_cast<CStream*>(dependent)->Handle();
SP_Stream other_handle = static_cast<CStream*>(other)->Handle();
stream_executor_->create_stream_dependency(&device_, dependent_handle,
other_handle, c_status.get());
if (TF_GetCode(c_status.get()) != TF_OK) {
Expand All @@ -435,8 +424,7 @@ class CStreamExecutor : public StreamExecutor {

absl::Status BlockHostUntilDone(Stream* stream) override {
OwnedTFStatus c_status(TF_NewStatus());
SP_Stream stream_handle =
static_cast<CStream*>(stream->implementation())->Handle();
SP_Stream stream_handle = static_cast<CStream*>(stream)->Handle();

// If `block_host_until_done` is set, use it.
if (stream_executor_->block_host_until_done != nullptr) {
Expand All @@ -463,20 +451,17 @@ class CStreamExecutor : public StreamExecutor {

absl::Status GetStatus(Stream* stream) override {
OwnedTFStatus c_status(TF_NewStatus());
SP_Stream stream_handle =
static_cast<CStream*>(stream->implementation())->Handle();
SP_Stream stream_handle = static_cast<CStream*>(stream)->Handle();
stream_executor_->get_stream_status(&device_, stream_handle,
c_status.get());
return StatusFromTF_Status(c_status.get());
}

absl::Status EnablePeerAccessTo(StreamExecutorInterface* other) override {
absl::Status EnablePeerAccessTo(StreamExecutor* other) override {
return tsl::errors::Unimplemented(
"EnablePeerAccessTo is not supported by pluggable device.");
}
bool CanEnablePeerAccessTo(StreamExecutorInterface* other) override {
return false;
}
bool CanEnablePeerAccessTo(StreamExecutor* other) override { return false; }

bool DeviceMemoryUsage(int64_t* free, int64_t* total) const override {
return stream_executor_->device_memory_usage(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@ limitations under the License.
#include "tensorflow/c/tf_status_helper.h"
#include "xla/stream_executor/executor_cache.h"
#include "xla/stream_executor/platform.h"
#include "xla/stream_executor/stream_common.h"
#include "xla/stream_executor/stream_executor.h"
#include "xla/stream_executor/stream_executor_interface.h"
#include "xla/stream_executor/stream_interface.h"
#include "tsl/platform/statusor.h"

namespace stream_executor {
Expand Down Expand Up @@ -97,16 +97,17 @@ class CPlatform : public Platform {
stream_executor::ExecutorCache executor_cache_;
};

class CStream : public Stream {
class CStream : public StreamCommon {
public:
CStream(SP_Device* device, SP_StreamExecutor* stream_executor,
StreamExecutor* executor)
: Stream(executor),
: StreamCommon(executor),
device_(device),
stream_executor_(stream_executor),
stream_handle_(nullptr) {}
~CStream() override {
parent()->BlockHostUntilDone(this).IgnoreError();
parent()->DeallocateStream(this);
Destroy();
}

Expand Down
2 changes: 1 addition & 1 deletion tensorflow/c/kernels.cc
Original file line number Diff line number Diff line change
Expand Up @@ -360,7 +360,7 @@ SP_Stream TF_GetStream(TF_OpKernelContext* ctx, TF_Status* status) {
} else { // Is a PluggableDevice
TF_SetStatus(status, TF_OK, "");
auto c_stream = static_cast<stream_executor::CStream*>(
cc_ctx->op_device_context()->stream()->implementation());
cc_ctx->op_device_context()->stream());
return c_stream->Handle();
}
#endif // defined(IS_MOBILE_PLATFORM) || defined(IS_SLIM_BUILD)
Expand Down
Binary file not shown.
7 changes: 6 additions & 1 deletion tensorflow/compiler/jit/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -956,9 +956,10 @@ cc_library(
":shape_inference_helpers",
"//tensorflow/core:core_cpu_internal",
"//tensorflow/core:framework",
"//tensorflow/core:graph",
"//tensorflow/core:lib",
"//tensorflow/core:protos_all_cc",
"@com_google_absl//absl/log",
"@com_google_absl//absl/strings",
"@local_xla//xla:statusor",
],
)
Expand Down Expand Up @@ -988,11 +989,15 @@ tf_cc_test(
"//tensorflow/cc:cc_ops_internal",
"//tensorflow/cc:ops",
"//tensorflow/cc:resource_variable_ops",
"//tensorflow/cc:scope",
"//tensorflow/core:core_cpu_base",
"//tensorflow/core:framework",
"//tensorflow/core:ops",
"//tensorflow/core:portable_gif_internal",
"//tensorflow/core:test",
"//tensorflow/core:test_main",
"//tensorflow/core/kernels:constant_op",
"@local_tsl//tsl/platform:status",
],
)

Expand Down
Loading

0 comments on commit 6d4a519

Please sign in to comment.