Skip to content
This repository has been archived by the owner on Jan 26, 2024. It is now read-only.

Commit

Permalink
SWDEV-351980 - Move activity_ to the ProfilingInfo
Browse files Browse the repository at this point in the history
The activity_ is only instantiated if profiling is enabled.

Remove the HIP private global record ID. Instead, use the correlation ID
stored in the hip_api_data_t by the profiler while the last HIP function
is in scope.

For NDRange and Copy commands, store the kernel name and byte size
(respectively) in the record.

General cleanups to improve the code's readability.

(cherry picked from commit 3e2681b)

SWDEV-351980 - Enable profiling for commands reporting activities

Profiling should be enabled for any command reporting activities as the
activity record captures the profilingInfo's start and end timestamps.

Since IS_PROFILER_ON is only used to determine whether API tracing is
enabled, there is no need to expose it globally, it should be a property
of the activity_prof::CallbacksTable.

(cherry picked from commit 6853a11)

SWDEV-351980 - Consolidate registration tables in the roctracer library

Remove the activity_prof::CallbacksTable. The table was redundant with
the information already stored in the roctracer library. Instead use a
single callback into the roctracer library to query whether the activity
is enabled, and to report it.

(cherry picked from commit b73e175)

Change-Id: If452a42113547fae4426eabe692b677e6fc415d5
  • Loading branch information
lmoriche committed Oct 13, 2022
1 parent e445c92 commit 666b996
Show file tree
Hide file tree
Showing 10 changed files with 197 additions and 212 deletions.
2 changes: 0 additions & 2 deletions device/device.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1246,8 +1246,6 @@ class VirtualDevice : public amd::HeapObject {
virtual void submitStreamOperation(amd::StreamOperationCommand& cmd) { ShouldNotReachHere(); }
virtual void submitVirtualMap(amd::VirtualMapCommand& cmd) { ShouldNotReachHere(); }

virtual void profilerAttach(bool enable) = 0;

virtual address allocKernelArguments(size_t size, size_t alignment) { return nullptr; }

//! Get the blit manager object
Expand Down
3 changes: 2 additions & 1 deletion device/rocm/rocvirtual.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include "device/rocm/rocmemory.hpp"
#include "device/rocm/rocblit.hpp"
#include "device/rocm/roccounters.hpp"
#include "platform/activity.hpp"
#include "platform/kernel.hpp"
#include "platform/context.hpp"
#include "platform/command.hpp"
Expand Down Expand Up @@ -167,7 +168,7 @@ bool HsaAmdSignalHandler(hsa_signal_value_t value, void* arg) {
return false;
}

if (ts->gpu()->isProfilerAttached()) {
if (activity_prof::IsEnabled(OP_ID_DISPATCH)) {
amd::Command* head = ts->getParsedCommand();
if (head == nullptr) {
head = ts->command().GetBatchHead();
Expand Down
5 changes: 0 additions & 5 deletions device/rocm/rocvirtual.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -396,10 +396,6 @@ class VirtualGPU : public device::VirtualDevice {

Timestamp* timestamp() const { return timestamp_; }

void profilerAttach(bool enable = false) { profilerAttached_ = enable; }

bool isProfilerAttached() const { return profilerAttached_; }

//! Indicates the status of the callback handler. The callback would process the commands
//! and would collect profiling data, update refcounts
bool isHandlerPending() const { return barriers_.IsHandlerPending(); }
Expand Down Expand Up @@ -483,7 +479,6 @@ class VirtualGPU : public device::VirtualDevice {
uint32_t cooperative_ : 1; //!< Cooperative launch is enabled
uint32_t addSystemScope_ : 1; //!< Insert a system scope to the next aql
uint32_t tracking_created_ : 1; //!< Enabled if tracking object was properly initialized
uint32_t profilerAttached_ : 1; //!< Indicates if profiler is attached
uint32_t retainExternalSignals_ : 1; //!< Indicate to retain external signal array
};
uint32_t state_;
Expand Down
169 changes: 128 additions & 41 deletions platform/activity.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,47 +19,134 @@
THE SOFTWARE. */

#include "platform/activity.hpp"
#include "platform/command.hpp"
#include "platform/commandqueue.hpp"

ACTIVITY_PROF_INSTANCES();

#define CASE_STRING(X, C) case X: case_string = #C ;break;

const char* getOclCommandKindString(uint32_t op) {
const char* case_string;

switch(static_cast<cl_command_type>(op)) {
CASE_STRING(0, InternalMarker)
CASE_STRING(CL_COMMAND_MARKER, Marker)
CASE_STRING(CL_COMMAND_NDRANGE_KERNEL, KernelExecution)
CASE_STRING(CL_COMMAND_READ_BUFFER, CopyDeviceToHost)
CASE_STRING(CL_COMMAND_WRITE_BUFFER, CopyHostToDevice)
CASE_STRING(CL_COMMAND_COPY_BUFFER, CopyDeviceToDevice)
CASE_STRING(CL_COMMAND_READ_BUFFER_RECT, CopyDeviceToHost2D)
CASE_STRING(CL_COMMAND_WRITE_BUFFER_RECT, CopyHostToDevice2D)
CASE_STRING(CL_COMMAND_COPY_BUFFER_RECT, CopyDeviceToDevice2D)
CASE_STRING(CL_COMMAND_FILL_BUFFER, FillBuffer)
CASE_STRING(CL_COMMAND_TASK, Task)
CASE_STRING(CL_COMMAND_NATIVE_KERNEL, NativeKernel)
CASE_STRING(CL_COMMAND_READ_IMAGE, ReadImage)
CASE_STRING(CL_COMMAND_WRITE_IMAGE, WriteImage)
CASE_STRING(CL_COMMAND_COPY_IMAGE, CopyImage)
CASE_STRING(CL_COMMAND_COPY_IMAGE_TO_BUFFER, CopyImageToBuffer)
CASE_STRING(CL_COMMAND_COPY_BUFFER_TO_IMAGE, CopyBufferToImage)
CASE_STRING(CL_COMMAND_MAP_BUFFER, MapBuffer)
CASE_STRING(CL_COMMAND_MAP_IMAGE, MapImage)
CASE_STRING(CL_COMMAND_UNMAP_MEM_OBJECT, UnmapMemObject)
CASE_STRING(CL_COMMAND_ACQUIRE_GL_OBJECTS, AcquireGLObjects)
CASE_STRING(CL_COMMAND_RELEASE_GL_OBJECTS, ReleaseGLObjects)
CASE_STRING(CL_COMMAND_USER, User)
CASE_STRING(CL_COMMAND_BARRIER, Barrier)
CASE_STRING(CL_COMMAND_MIGRATE_MEM_OBJECTS, MigrateMemObjects)
CASE_STRING(CL_COMMAND_FILL_IMAGE, FillImage)
CASE_STRING(CL_COMMAND_SVM_FREE, SvmFree)
CASE_STRING(CL_COMMAND_SVM_MEMCPY, SvmMemcpy)
CASE_STRING(CL_COMMAND_SVM_MEMFILL, SvmMemFill)
CASE_STRING(CL_COMMAND_SVM_MAP, SvmMap)
CASE_STRING(CL_COMMAND_SVM_UNMAP, SvmUnmap)
default: case_string = "Unknown command type";
#include <atomic>

namespace activity_prof {

decltype(report_activity) report_activity{nullptr};

#if USE_PROF_API

#if defined(__linux__)
__thread activity_correlation_id_t correlation_id __attribute__((tls_model("initial-exec"))) = 0;
#elif defined(_WIN32)
__declspec(thread) activity_correlation_id_t correlation_id = 0;
#endif // defined(_WIN32)

static inline size_t linearSize(const amd::Coord3D& size3d) {
size_t size = size3d[0];
if (size3d[1] != 0) size *= size3d[1];
if (size3d[2] != 0) size *= size3d[2];
return size;
}

bool IsEnabled(OpId operation_id) {
if (operation_id < OP_ID_NUMBER)
if (auto report = report_activity.load(std::memory_order_relaxed))
return report(ACTIVITY_DOMAIN_HIP_OPS, operation_id, nullptr) == 0;
return false;
}

void ReportActivity(const amd::Command& command) {
assert(command.profilingInfo().enabled_ && "profiling must be enabled for this command");
auto operation_id = OperationId(command.type());
if (operation_id >= OP_ID_NUMBER)
// This command does not translate into a profiler activity (dispatch, memcopy, etc...), there
// is nothing to report to the profiler.
return;

auto function = report_activity.load(std::memory_order_relaxed);
if (!function) return;

const auto* queue = command.queue();
assert(queue != nullptr);

activity_record_t record{
ACTIVITY_DOMAIN_HIP_OPS, // activity domain
command.type(), // activity kind
operation_id, // operation id
command.profilingInfo().correlation_id_, // activity correlation id
command.profilingInfo().start_, // begin timestamp, ns
command.profilingInfo().end_, // end timestamp, ns
{{
static_cast<int>(queue->device().index()), // device id
queue->vdev()->index() // queue id
}},
{} // copied data size for memcpy, or kernel name for dispatch
};

switch (command.type()) {
case CL_COMMAND_NDRANGE_KERNEL:
record.kernel_name =
static_cast<const amd::NDRangeKernelCommand&>(command).kernel().name().c_str();
break;
case CL_COMMAND_READ_BUFFER:
case CL_COMMAND_READ_BUFFER_RECT:
record.bytes = linearSize(static_cast<const amd::ReadMemoryCommand&>(command).size());
break;
case CL_COMMAND_WRITE_BUFFER:
case CL_COMMAND_WRITE_BUFFER_RECT:
record.bytes = linearSize(static_cast<const amd::WriteMemoryCommand&>(command).size());
break;
case CL_COMMAND_COPY_BUFFER:
case CL_COMMAND_COPY_BUFFER_RECT:
record.bytes = linearSize(static_cast<const amd::CopyMemoryCommand&>(command).size());
break;
default:
break;
}

function(ACTIVITY_DOMAIN_HIP_OPS, operation_id, &record);
}

#endif // USE_PROF_API

} // namespace activity_prof

#define CASE_STRING(X, C) \
case X: \
return #C

const char* getOclCommandKindString(cl_command_type commandType) {
switch (commandType) {
CASE_STRING(0, InternalMarker);
CASE_STRING(CL_COMMAND_MARKER, Marker);
CASE_STRING(CL_COMMAND_NDRANGE_KERNEL, KernelExecution);
CASE_STRING(CL_COMMAND_READ_BUFFER, CopyDeviceToHost);
CASE_STRING(CL_COMMAND_WRITE_BUFFER, CopyHostToDevice);
CASE_STRING(CL_COMMAND_COPY_BUFFER, CopyDeviceToDevice);
CASE_STRING(CL_COMMAND_READ_BUFFER_RECT, CopyDeviceToHost2D);
CASE_STRING(CL_COMMAND_WRITE_BUFFER_RECT, CopyHostToDevice2D);
CASE_STRING(CL_COMMAND_COPY_BUFFER_RECT, CopyDeviceToDevice2D);
CASE_STRING(CL_COMMAND_FILL_BUFFER, FillBuffer);
CASE_STRING(CL_COMMAND_TASK, Task);
CASE_STRING(CL_COMMAND_NATIVE_KERNEL, NativeKernel);
CASE_STRING(CL_COMMAND_READ_IMAGE, ReadImage);
CASE_STRING(CL_COMMAND_WRITE_IMAGE, WriteImage);
CASE_STRING(CL_COMMAND_COPY_IMAGE, CopyImage);
CASE_STRING(CL_COMMAND_COPY_IMAGE_TO_BUFFER, CopyImageToBuffer);
CASE_STRING(CL_COMMAND_COPY_BUFFER_TO_IMAGE, CopyBufferToImage);
CASE_STRING(CL_COMMAND_MAP_BUFFER, MapBuffer);
CASE_STRING(CL_COMMAND_MAP_IMAGE, MapImage);
CASE_STRING(CL_COMMAND_UNMAP_MEM_OBJECT, UnmapMemObject);
CASE_STRING(CL_COMMAND_ACQUIRE_GL_OBJECTS, AcquireGLObjects);
CASE_STRING(CL_COMMAND_RELEASE_GL_OBJECTS, ReleaseGLObjects);
CASE_STRING(CL_COMMAND_USER, User);
CASE_STRING(CL_COMMAND_BARRIER, Barrier);
CASE_STRING(CL_COMMAND_MIGRATE_MEM_OBJECTS, MigrateMemObjects);
CASE_STRING(CL_COMMAND_FILL_IMAGE, FillImage);
CASE_STRING(CL_COMMAND_SVM_FREE, SvmFree);
CASE_STRING(CL_COMMAND_SVM_MEMCPY, SvmMemcpy);
CASE_STRING(CL_COMMAND_SVM_MEMFILL, SvmMemFill);
CASE_STRING(CL_COMMAND_SVM_MAP, SvmMap);
CASE_STRING(CL_COMMAND_SVM_UNMAP, SvmUnmap);
CASE_STRING(ROCCLR_COMMAND_STREAM_WAIT_VALUE, StreamWait);
CASE_STRING(ROCCLR_COMMAND_STREAM_WRITE_VALUE, StreamWrite);
default:
break;
};
return case_string;
return "Unknown command kind";
};
Loading

0 comments on commit 666b996

Please sign in to comment.