diff --git a/device/device.hpp b/device/device.hpp index 72d9c8ce..a3da76b1 100644 --- a/device/device.hpp +++ b/device/device.hpp @@ -1246,8 +1246,6 @@ class VirtualDevice : public amd::HeapObject { virtual void submitStreamOperation(amd::StreamOperationCommand& cmd) { ShouldNotReachHere(); } virtual void submitVirtualMap(amd::VirtualMapCommand& cmd) { ShouldNotReachHere(); } - virtual void profilerAttach(bool enable) = 0; - virtual address allocKernelArguments(size_t size, size_t alignment) { return nullptr; } //! Get the blit manager object diff --git a/device/rocm/rocvirtual.cpp b/device/rocm/rocvirtual.cpp index 8c2af93b..570d97fb 100644 --- a/device/rocm/rocvirtual.cpp +++ b/device/rocm/rocvirtual.cpp @@ -25,6 +25,7 @@ #include "device/rocm/rocmemory.hpp" #include "device/rocm/rocblit.hpp" #include "device/rocm/roccounters.hpp" +#include "platform/activity.hpp" #include "platform/kernel.hpp" #include "platform/context.hpp" #include "platform/command.hpp" @@ -167,7 +168,7 @@ bool HsaAmdSignalHandler(hsa_signal_value_t value, void* arg) { return false; } - if (ts->gpu()->isProfilerAttached()) { + if (activity_prof::IsEnabled(OP_ID_DISPATCH)) { amd::Command* head = ts->getParsedCommand(); if (head == nullptr) { head = ts->command().GetBatchHead(); diff --git a/device/rocm/rocvirtual.hpp b/device/rocm/rocvirtual.hpp index 307d9150..7e9c3454 100644 --- a/device/rocm/rocvirtual.hpp +++ b/device/rocm/rocvirtual.hpp @@ -396,10 +396,6 @@ class VirtualGPU : public device::VirtualDevice { Timestamp* timestamp() const { return timestamp_; } - void profilerAttach(bool enable = false) { profilerAttached_ = enable; } - - bool isProfilerAttached() const { return profilerAttached_; } - //! Indicates the status of the callback handler. The callback would process the commands //! and would collect profiling data, update refcounts bool isHandlerPending() const { return barriers_.IsHandlerPending(); } @@ -483,7 +479,6 @@ class VirtualGPU : public device::VirtualDevice { uint32_t cooperative_ : 1; //!< Cooperative launch is enabled uint32_t addSystemScope_ : 1; //!< Insert a system scope to the next aql uint32_t tracking_created_ : 1; //!< Enabled if tracking object was properly initialized - uint32_t profilerAttached_ : 1; //!< Indicates if profiler is attached uint32_t retainExternalSignals_ : 1; //!< Indicate to retain external signal array }; uint32_t state_; diff --git a/platform/activity.cpp b/platform/activity.cpp index 2e19ef27..2242ee27 100644 --- a/platform/activity.cpp +++ b/platform/activity.cpp @@ -19,47 +19,134 @@ THE SOFTWARE. */ #include "platform/activity.hpp" +#include "platform/command.hpp" +#include "platform/commandqueue.hpp" -ACTIVITY_PROF_INSTANCES(); - -#define CASE_STRING(X, C) case X: case_string = #C ;break; - -const char* getOclCommandKindString(uint32_t op) { - const char* case_string; - - switch(static_cast(op)) { - CASE_STRING(0, InternalMarker) - CASE_STRING(CL_COMMAND_MARKER, Marker) - CASE_STRING(CL_COMMAND_NDRANGE_KERNEL, KernelExecution) - CASE_STRING(CL_COMMAND_READ_BUFFER, CopyDeviceToHost) - CASE_STRING(CL_COMMAND_WRITE_BUFFER, CopyHostToDevice) - CASE_STRING(CL_COMMAND_COPY_BUFFER, CopyDeviceToDevice) - CASE_STRING(CL_COMMAND_READ_BUFFER_RECT, CopyDeviceToHost2D) - CASE_STRING(CL_COMMAND_WRITE_BUFFER_RECT, CopyHostToDevice2D) - CASE_STRING(CL_COMMAND_COPY_BUFFER_RECT, CopyDeviceToDevice2D) - CASE_STRING(CL_COMMAND_FILL_BUFFER, FillBuffer) - CASE_STRING(CL_COMMAND_TASK, Task) - CASE_STRING(CL_COMMAND_NATIVE_KERNEL, NativeKernel) - CASE_STRING(CL_COMMAND_READ_IMAGE, ReadImage) - CASE_STRING(CL_COMMAND_WRITE_IMAGE, WriteImage) - CASE_STRING(CL_COMMAND_COPY_IMAGE, CopyImage) - CASE_STRING(CL_COMMAND_COPY_IMAGE_TO_BUFFER, CopyImageToBuffer) - CASE_STRING(CL_COMMAND_COPY_BUFFER_TO_IMAGE, CopyBufferToImage) - CASE_STRING(CL_COMMAND_MAP_BUFFER, MapBuffer) - CASE_STRING(CL_COMMAND_MAP_IMAGE, MapImage) - CASE_STRING(CL_COMMAND_UNMAP_MEM_OBJECT, UnmapMemObject) - CASE_STRING(CL_COMMAND_ACQUIRE_GL_OBJECTS, AcquireGLObjects) - CASE_STRING(CL_COMMAND_RELEASE_GL_OBJECTS, ReleaseGLObjects) - CASE_STRING(CL_COMMAND_USER, User) - CASE_STRING(CL_COMMAND_BARRIER, Barrier) - CASE_STRING(CL_COMMAND_MIGRATE_MEM_OBJECTS, MigrateMemObjects) - CASE_STRING(CL_COMMAND_FILL_IMAGE, FillImage) - CASE_STRING(CL_COMMAND_SVM_FREE, SvmFree) - CASE_STRING(CL_COMMAND_SVM_MEMCPY, SvmMemcpy) - CASE_STRING(CL_COMMAND_SVM_MEMFILL, SvmMemFill) - CASE_STRING(CL_COMMAND_SVM_MAP, SvmMap) - CASE_STRING(CL_COMMAND_SVM_UNMAP, SvmUnmap) - default: case_string = "Unknown command type"; +#include + +namespace activity_prof { + +decltype(report_activity) report_activity{nullptr}; + +#if USE_PROF_API + +#if defined(__linux__) +__thread activity_correlation_id_t correlation_id __attribute__((tls_model("initial-exec"))) = 0; +#elif defined(_WIN32) +__declspec(thread) activity_correlation_id_t correlation_id = 0; +#endif // defined(_WIN32) + +static inline size_t linearSize(const amd::Coord3D& size3d) { + size_t size = size3d[0]; + if (size3d[1] != 0) size *= size3d[1]; + if (size3d[2] != 0) size *= size3d[2]; + return size; +} + +bool IsEnabled(OpId operation_id) { + if (operation_id < OP_ID_NUMBER) + if (auto report = report_activity.load(std::memory_order_relaxed)) + return report(ACTIVITY_DOMAIN_HIP_OPS, operation_id, nullptr) == 0; + return false; +} + +void ReportActivity(const amd::Command& command) { + assert(command.profilingInfo().enabled_ && "profiling must be enabled for this command"); + auto operation_id = OperationId(command.type()); + if (operation_id >= OP_ID_NUMBER) + // This command does not translate into a profiler activity (dispatch, memcopy, etc...), there + // is nothing to report to the profiler. + return; + + auto function = report_activity.load(std::memory_order_relaxed); + if (!function) return; + + const auto* queue = command.queue(); + assert(queue != nullptr); + + activity_record_t record{ + ACTIVITY_DOMAIN_HIP_OPS, // activity domain + command.type(), // activity kind + operation_id, // operation id + command.profilingInfo().correlation_id_, // activity correlation id + command.profilingInfo().start_, // begin timestamp, ns + command.profilingInfo().end_, // end timestamp, ns + {{ + static_cast(queue->device().index()), // device id + queue->vdev()->index() // queue id + }}, + {} // copied data size for memcpy, or kernel name for dispatch + }; + + switch (command.type()) { + case CL_COMMAND_NDRANGE_KERNEL: + record.kernel_name = + static_cast(command).kernel().name().c_str(); + break; + case CL_COMMAND_READ_BUFFER: + case CL_COMMAND_READ_BUFFER_RECT: + record.bytes = linearSize(static_cast(command).size()); + break; + case CL_COMMAND_WRITE_BUFFER: + case CL_COMMAND_WRITE_BUFFER_RECT: + record.bytes = linearSize(static_cast(command).size()); + break; + case CL_COMMAND_COPY_BUFFER: + case CL_COMMAND_COPY_BUFFER_RECT: + record.bytes = linearSize(static_cast(command).size()); + break; + default: + break; + } + + function(ACTIVITY_DOMAIN_HIP_OPS, operation_id, &record); +} + +#endif // USE_PROF_API + +} // namespace activity_prof + +#define CASE_STRING(X, C) \ + case X: \ + return #C + +const char* getOclCommandKindString(cl_command_type commandType) { + switch (commandType) { + CASE_STRING(0, InternalMarker); + CASE_STRING(CL_COMMAND_MARKER, Marker); + CASE_STRING(CL_COMMAND_NDRANGE_KERNEL, KernelExecution); + CASE_STRING(CL_COMMAND_READ_BUFFER, CopyDeviceToHost); + CASE_STRING(CL_COMMAND_WRITE_BUFFER, CopyHostToDevice); + CASE_STRING(CL_COMMAND_COPY_BUFFER, CopyDeviceToDevice); + CASE_STRING(CL_COMMAND_READ_BUFFER_RECT, CopyDeviceToHost2D); + CASE_STRING(CL_COMMAND_WRITE_BUFFER_RECT, CopyHostToDevice2D); + CASE_STRING(CL_COMMAND_COPY_BUFFER_RECT, CopyDeviceToDevice2D); + CASE_STRING(CL_COMMAND_FILL_BUFFER, FillBuffer); + CASE_STRING(CL_COMMAND_TASK, Task); + CASE_STRING(CL_COMMAND_NATIVE_KERNEL, NativeKernel); + CASE_STRING(CL_COMMAND_READ_IMAGE, ReadImage); + CASE_STRING(CL_COMMAND_WRITE_IMAGE, WriteImage); + CASE_STRING(CL_COMMAND_COPY_IMAGE, CopyImage); + CASE_STRING(CL_COMMAND_COPY_IMAGE_TO_BUFFER, CopyImageToBuffer); + CASE_STRING(CL_COMMAND_COPY_BUFFER_TO_IMAGE, CopyBufferToImage); + CASE_STRING(CL_COMMAND_MAP_BUFFER, MapBuffer); + CASE_STRING(CL_COMMAND_MAP_IMAGE, MapImage); + CASE_STRING(CL_COMMAND_UNMAP_MEM_OBJECT, UnmapMemObject); + CASE_STRING(CL_COMMAND_ACQUIRE_GL_OBJECTS, AcquireGLObjects); + CASE_STRING(CL_COMMAND_RELEASE_GL_OBJECTS, ReleaseGLObjects); + CASE_STRING(CL_COMMAND_USER, User); + CASE_STRING(CL_COMMAND_BARRIER, Barrier); + CASE_STRING(CL_COMMAND_MIGRATE_MEM_OBJECTS, MigrateMemObjects); + CASE_STRING(CL_COMMAND_FILL_IMAGE, FillImage); + CASE_STRING(CL_COMMAND_SVM_FREE, SvmFree); + CASE_STRING(CL_COMMAND_SVM_MEMCPY, SvmMemcpy); + CASE_STRING(CL_COMMAND_SVM_MEMFILL, SvmMemFill); + CASE_STRING(CL_COMMAND_SVM_MAP, SvmMap); + CASE_STRING(CL_COMMAND_SVM_UNMAP, SvmUnmap); + CASE_STRING(ROCCLR_COMMAND_STREAM_WAIT_VALUE, StreamWait); + CASE_STRING(ROCCLR_COMMAND_STREAM_WRITE_VALUE, StreamWrite); + default: + break; }; - return case_string; + return "Unknown command kind"; }; diff --git a/platform/activity.hpp b/platform/activity.hpp index bcda9e93..e6256807 100644 --- a/platform/activity.hpp +++ b/platform/activity.hpp @@ -20,170 +20,71 @@ #pragma once -#include "thread/monitor.hpp" +#include "top.hpp" #include +#include #include -#include +#include +#include + +namespace amd { +class Command; +} // namespace amd #define USE_PROF_API 1 #if USE_PROF_API + enum OpId { OP_ID_DISPATCH = 0, OP_ID_COPY = 1, OP_ID_BARRIER = 2, OP_ID_NUMBER = 3 }; #include "prof_protocol.h" -// Statically allocated table of callbacks and global unique ID of each operation -#define ACTIVITY_PROF_INSTANCES() \ - namespace activity_prof { \ - CallbacksTable::table_t CallbacksTable::table_{}; \ - std::atomic ActivityProf::globe_record_id_(0); \ - } // activity_prof - namespace activity_prof { -typedef activity_correlation_id_t record_id_t; -typedef activity_op_t op_id_t; -typedef uint32_t command_id_t; - -typedef activity_id_callback_t id_callback_fun_t; -typedef activity_async_callback_t callback_fun_t; -typedef void* callback_arg_t; - -// Activity callbacks table -class CallbacksTable { - public: - struct table_t { - id_callback_fun_t id_callback; - callback_fun_t op_callback; - callback_arg_t arg; - std::atomic enabled[OP_ID_NUMBER]; - }; - - // Initialize record id callback and activity callback - static void init(const id_callback_fun_t& id_callback, const callback_fun_t& op_callback, - const callback_arg_t& arg) { - table_.id_callback = id_callback; - table_.op_callback = op_callback; - table_.arg = arg; - } - - static bool SetEnabled(const op_id_t& op_id, const bool& enable) { - bool ret = true; - if (op_id < OP_ID_NUMBER) { - table_.enabled[op_id].store(enable, std::memory_order_release); - } else { - ret = false; - } - return ret; - } - - static bool IsEnabled(const op_id_t& op_id) { - return table_.enabled[op_id].load(std::memory_order_acquire); - } - static id_callback_fun_t get_id_callback() { return table_.id_callback; } - static callback_fun_t get_op_callback() { return table_.op_callback; } - static callback_arg_t get_arg() { return table_.arg; } - - private: - static table_t table_; -}; - -// Activity profile class -class ActivityProf { - public: - // Domain ID - static constexpr int ACTIVITY_DOMAIN_ID = ACTIVITY_DOMAIN_HIP_VDI; - - ActivityProf() : command_id_(0), queue_id_(0), device_id_(0), record_id_(0), enabled_(false) {} - - // Initialization - void Initialize(const command_id_t command_id, const uint32_t queue_id, - const uint32_t device_id) { - activity_op_t op_id = (command_id == CL_COMMAND_NDRANGE_KERNEL) ? OP_ID_DISPATCH : OP_ID_COPY; - enabled_ = CallbacksTable::IsEnabled(op_id); - if (IsEnabled()) { - command_id_ = command_id; - queue_id_ = queue_id; - device_id_ = device_id; - record_id_ = globe_record_id_.fetch_add(1, std::memory_order_relaxed); - (CallbacksTable::get_id_callback())(record_id_); - } +extern std::atomic + report_activity; + +#if defined(__linux__) +extern __thread activity_correlation_id_t correlation_id __attribute__((tls_model("initial-exec"))); +#elif defined(_WIN32) +extern __declspec(thread) activity_correlation_id_t correlation_id; +#endif // defined(_WIN32) + +constexpr OpId OperationId(cl_command_type commandType) { + switch (commandType) { + case CL_COMMAND_NDRANGE_KERNEL: + return OP_ID_DISPATCH; + case CL_COMMAND_READ_BUFFER: + case CL_COMMAND_READ_BUFFER_RECT: + case CL_COMMAND_WRITE_BUFFER: + case CL_COMMAND_WRITE_BUFFER_RECT: + case CL_COMMAND_COPY_BUFFER: + case CL_COMMAND_COPY_BUFFER_RECT: + case CL_COMMAND_FILL_BUFFER: + case CL_COMMAND_READ_IMAGE: + case CL_COMMAND_WRITE_IMAGE: + case CL_COMMAND_COPY_IMAGE: + case CL_COMMAND_FILL_IMAGE: + case CL_COMMAND_COPY_BUFFER_TO_IMAGE: + case CL_COMMAND_COPY_IMAGE_TO_BUFFER: + return OP_ID_COPY; + case CL_COMMAND_MARKER: + return OP_ID_BARRIER; + default: + return OP_ID_NUMBER; } +} - template inline void ReportEventTimestamps(T& obj, const size_t bytes = 0) { - if (IsEnabled()) { - uint64_t start = obj.profilingInfo().start_; - uint64_t end = obj.profilingInfo().end_; - callback(obj.type(), start, end, bytes); - } - } - - bool IsEnabled() const { return enabled_; } - - private: - // Activity callback routine - void callback(const command_id_t command_id, - const uint64_t begin_ts, const uint64_t end_ts, const size_t bytes) { - activity_op_t op_id = (command_id == CL_COMMAND_NDRANGE_KERNEL) ? OP_ID_DISPATCH : OP_ID_COPY; - activity_record_t record { - ACTIVITY_DOMAIN_ID, // domain id - (activity_kind_t)command_id, // activity kind - op_id, // operation id - record_id_, // activity correlation id - begin_ts, // begin timestamp, ns - end_ts, // end timestamp, ns - { - { - static_cast(device_id_), // device id - queue_id_ // queue id - } - }, - bytes // copied data size, for memcpy - }; - (CallbacksTable::get_op_callback())(op_id, &record, CallbacksTable::get_arg()); - } - - command_id_t command_id_; //!< Command ID, executed on the queue - uint32_t queue_id_; //!< Queue ID, associated with this command - uint32_t device_id_; //!< Device ID, associated with this command - record_id_t record_id_; //!< Uniqueue execution ID(counter) of this command - bool enabled_; //!< Activity profiling is enabled - - // Global record ID - static std::atomic globe_record_id_; //!< GLobal counter of all executed commands -}; +bool IsEnabled(OpId operation_id); +void ReportActivity(const amd::Command& command); } // namespace activity_prof -#else -#define ACTIVITY_PROF_INSTANCES() - -namespace activity_prof { -typedef uint32_t op_id_t; -typedef uint32_t command_id_t; - -typedef void* id_callback_fun_t; -typedef void* callback_fun_t; -typedef void* callback_arg_t; - -struct CallbacksTable { - static void init(const id_callback_fun_t& id_callback, const callback_fun_t& op_callback, - const callback_arg_t& arg) {} - static bool SetEnabled(const op_id_t& op_id, const bool& enable) { return false; } -}; - -class ActivityProf { - public: - ActivityProf() {} - inline void Initialize(const command_id_t command_id, const uint32_t queue_id, - const uint32_t device_id) {} - template inline void ReportEventTimestamps(T& obj, const size_t bytes = 0) {} - inline bool IsEnabled() { return false; } -}; +#else // !USE_PROF_API -} // namespace activity_prof +static inline void ReportActivity(const amd::Command& command) {} -#endif +#endif // !USE_PROF_API -const char* getOclCommandKindString(uint32_t op); +const char* getOclCommandKindString(cl_command_type kind); diff --git a/platform/command.cpp b/platform/command.cpp index 2a6729d3..26336b31 100644 --- a/platform/command.cpp +++ b/platform/command.cpp @@ -26,6 +26,7 @@ * \date October 2008 */ +#include "platform/activity.hpp" #include "platform/command.hpp" #include "platform/commandqueue.hpp" #include "device/device.hpp" @@ -43,14 +44,13 @@ namespace amd { // ================================================================================================ -Event::Event(HostQueue& queue) +Event::Event(HostQueue& queue, bool profilingEnabled) : callbacks_(NULL), status_(CL_INT_MAX), hw_event_(nullptr), notify_event_(nullptr), device_(&queue.device()), - profilingInfo_(IS_PROFILER_ON || queue.properties().test(CL_QUEUE_PROFILING_ENABLE) || - Agent::shouldPostEventEvents()), + profilingInfo_(profilingEnabled), event_scope_(Device::kCacheStateInvalid) { notified_.clear(); } @@ -162,7 +162,8 @@ bool Event::setStatus(int32_t status, uint64_t timeStamp) { releaseResources(); } - activity_.ReportEventTimestamps(command()); + if (profilingInfo().enabled_) activity_prof::ReportActivity(command()); + // Broadcast all the waiters. if (referenceCount() > 1) { signal(); @@ -311,7 +312,8 @@ const Event::EventWaitList Event::nullWaitList(0); // ================================================================================================ Command::Command(HostQueue& queue, cl_command_type type, const EventWaitList& eventWaitList, uint32_t commandWaitBits, const Event* waitingEvent) - : Event(queue), + : Event(queue, activity_prof::IsEnabled(activity_prof::OperationId(type)) || + queue.properties().test(CL_QUEUE_PROFILING_ENABLE) || Agent::shouldPostEventEvents()), queue_(&queue), next_(nullptr), type_(type), @@ -323,7 +325,6 @@ Command::Command(HostQueue& queue, cl_command_type type, for (const auto &event: eventWaitList) { event->retain(); } - if (type != 0) activity_.Initialize(type, queue.vdev()->index(), queue.device().index()); } // ================================================================================================ diff --git a/platform/command.hpp b/platform/command.hpp index 72888a75..65195573 100644 --- a/platform/command.hpp +++ b/platform/command.hpp @@ -108,6 +108,7 @@ class Event : public RuntimeObject { if (enabled) { clear(); callback_ = nullptr; + correlation_id_ = activity_prof::correlation_id; } } @@ -118,6 +119,7 @@ class Event : public RuntimeObject { bool enabled_; //!< Profiling enabled for the wave limiter uint32_t waves_; //!< The number of waves used in a dispatch ProfilingCallback* callback_; + uint64_t correlation_id_; bool marker_ts_; //!< TS marker void clear() { @@ -137,13 +139,11 @@ class Event : public RuntimeObject { } } profilingInfo_; - activity_prof::ActivityProf activity_; //!< Activity profiling - //! Construct a new event. Event(); //! Construct a new event associated to the given command \a queue. - Event(HostQueue& queue); + Event(HostQueue& queue, bool profilingEnabled = false); //! Destroy the event. virtual ~Event(); @@ -164,6 +164,7 @@ class Event : public RuntimeObject { profilingInfo_.enabled_ = true; profilingInfo_.clear(); profilingInfo_.callback_ = nullptr; + profilingInfo_.correlation_id_ = activity_prof::correlation_id; } public: diff --git a/platform/prof_protocol.h b/platform/prof_protocol.h index ae23fa5a..5471ab06 100644 --- a/platform/prof_protocol.h +++ b/platform/prof_protocol.h @@ -27,9 +27,10 @@ typedef enum { ACTIVITY_DOMAIN_HSA_API = 0, // HSA API domain ACTIVITY_DOMAIN_HSA_OPS = 1, // HSA async activity domain - ACTIVITY_DOMAIN_HCC_OPS = 2, // HCC async activity domain + ACTIVITY_DOMAIN_HIP_OPS = 2, // HIP async activity domain + ACTIVITY_DOMAIN_HCC_OPS = ACTIVITY_DOMAIN_HIP_OPS, // HCC async activity domain + ACTIVITY_DOMAIN_HIP_VDI = ACTIVITY_DOMAIN_HIP_OPS, // HIP VDI domain ACTIVITY_DOMAIN_HIP_API = 3, // HIP API domain - ACTIVITY_DOMAIN_HIP_VDI = ACTIVITY_DOMAIN_HCC_OPS, // HIP VDI domain ACTIVITY_DOMAIN_KFD_API = 4, // KFD API domain ACTIVITY_DOMAIN_EXT_API = 5, // External ID domain ACTIVITY_DOMAIN_ROCTX = 6, // ROCTX domain @@ -42,7 +43,7 @@ typedef enum { ACTIVITY_EXT_OP_EXTERN_ID = 1 } activity_ext_op_t; -// API calback type +// API callback type typedef void (*activity_rtapi_callback_t)(uint32_t domain, uint32_t cid, const void* data, void* arg); typedef uint32_t activity_kind_t; typedef uint32_t activity_op_t; @@ -78,13 +79,15 @@ struct activity_record_t { activity_correlation_id_t external_id; // external correlatino id }; }; + union { size_t bytes; // data size bytes + const char* kernel_name; + }; }; // Activity sync calback type -typedef void* (*activity_sync_callback_t)(uint32_t cid, activity_record_t* record, const void* data, void* arg); +typedef void (*activity_sync_callback_t)(uint32_t cid, activity_record_t* record, const void* data, void* arg); // Activity async calback type -typedef void (*activity_id_callback_t)(activity_correlation_id_t id); -typedef void (*activity_async_callback_t)(uint32_t op, void* record, void* arg); +typedef void (*activity_async_callback_t)(uint32_t op, activity_record_t* record, void* arg); #endif // INC_EXT_PROF_PROTOCOL_H_ diff --git a/utils/flags.cpp b/utils/flags.cpp index 616743d5..d97182f1 100644 --- a/utils/flags.cpp +++ b/utils/flags.cpp @@ -82,7 +82,6 @@ namespace amd { #endif // __APPLE__ bool IS_HIP = false; -std::atomic_bool IS_PROFILER_ON(false); #if !defined(_WIN32) && defined(WITH_PAL_DEVICE) bool IS_LEGACY = true; diff --git a/utils/flags.hpp b/utils/flags.hpp index a1927eb0..94ee1957 100644 --- a/utils/flags.hpp +++ b/utils/flags.hpp @@ -280,7 +280,6 @@ release(bool, GPU_STREAMOPS_CP_WAIT, false, \ namespace amd { extern bool IS_HIP; -extern std::atomic_bool IS_PROFILER_ON; extern bool IS_LEGACY;