Skip to content

Commit

Permalink
enable ov_cpu_plugin_trace_log, cherry-pick from usstq@a81cc1a0a90328…
Browse files Browse the repository at this point in the history
…8fa0ceebed30ec8da88b2b77db;How to use: export OV_CPU_PROFILE=1, ov_profile.json file will be generated at current path
  • Loading branch information
liubo-intel committed Aug 28, 2024
1 parent 4343b04 commit 3003dc7
Show file tree
Hide file tree
Showing 4 changed files with 257 additions and 1 deletion.
7 changes: 6 additions & 1 deletion src/plugins/intel_cpu/src/graph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
#include "utils/node_dumper.h"
#include "utils/verbose.h"
#include "utils/precision_support.h"
#include "utils/profiler.hpp"

#include <oneapi/dnnl/dnnl.hpp>
#include "common/primitive_desc_iface.hpp"
Expand Down Expand Up @@ -1030,6 +1031,7 @@ bool Graph::ProcessDynNodes() {
}

void Graph::PushInputData(const std::size_t& index, const ov::SoPtr<ITensor>& input) {
PROFILE(_prof, "Graph::PushInputData");
if (!IsReady()) OPENVINO_THROW("Wrong state. Topology not ready.");
auto input_itr = inputNodesMap.find(index);
if (input_itr != inputNodesMap.end()) {
Expand Down Expand Up @@ -1062,6 +1064,7 @@ void Graph::PushInputData(const std::size_t& index, const ov::SoPtr<ITensor>& in

// suppose always being shared infer_request intel_cpu::Tensor to Graph if isDynamic.
void Graph::PullOutputData(std::unordered_map<std::size_t, ov::SoPtr<ITensor>>& output) {
PROFILE(_prof, "Graph::PullOutputData");
if (!IsReady())
OPENVINO_THROW("Wrong state. Topology not ready.");

Expand Down Expand Up @@ -1362,6 +1365,7 @@ void Graph::InferDynamic(SyncInferRequest* request, UpdateStrategy&& update) {
VERBOSE(node, getConfig().debugCaps.verbose);
PERF(node, getConfig().collectPerfCounters);

PROFILE(_prof, node->getTypeStr(), node->getName());
if (request)
request->throw_if_canceled();
try {
Expand Down Expand Up @@ -1474,7 +1478,8 @@ void Graph::Infer(SyncInferRequest* request) {
OPENVINO_ASSERT(IsReady(), "Wrong state of the ov::intel_cpu::Graph. Topology is not ready: ", static_cast<int>(status));
}

if (infer_count != -1) infer_count++;
// if (infer_count != -1) infer_count++;
infer_count++;
}

void Graph::SortTopologically() {
Expand Down
3 changes: 3 additions & 0 deletions src/plugins/intel_cpu/src/node.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
#include "memory_desc/dnnl_blocked_memory_desc.h"
#include <common/primitive_desc.hpp>
#include <common/primitive_desc_iface.hpp>
#include "utils/profiler.hpp"

using namespace dnnl;
using namespace openvino;
Expand Down Expand Up @@ -551,6 +552,7 @@ void Node::updateShapes() {
getName());
try {
if (needShapeInfer()) {
PROFILE(_prof, "updateShape", getName());
auto result = shapeInfer();
if (ShapeInferStatus::success == result.status) {
redefineOutputMemory(result.dims);
Expand All @@ -568,6 +570,7 @@ void Node::updateDynamicParams() {
" with name: ",
getName());
try {
PROFILE(_prof, "updateDynamicParams", getName());
if (isExecutable()) {
if (needPrepareParams()) {
OPENVINO_ASSERT(inputShapesDefined(),
Expand Down
165 changes: 165 additions & 0 deletions src/plugins/intel_cpu/src/utils/profiler.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "profiler.hpp"

#include <algorithm>
#include <cassert>
#include <cmath>
#include <fstream>
#include <iostream>
#include <map>
#include <numeric>
#include <set>
#include <sstream>
#include <stdexcept>
#include <string>
#include <vector>

#ifdef OV_CPU_WITH_PROFILER
namespace ov {
namespace intel_cpu {

uint64_t tsc_ticks_per_second;
uint64_t tsc_ticks_base;
inline uint64_t tsc_to_usec(uint64_t tsc_ticks) {
return (tsc_ticks - tsc_ticks_base) * 1000000 / tsc_ticks_per_second;
}

void init_tsc() {
static std::once_flag flag;
std::call_once(flag, []() {
uint64_t start_ticks = __rdtsc();
std::this_thread::sleep_for(std::chrono::seconds(1));
tsc_ticks_per_second = (__rdtsc() - start_ticks);
std::cout << "[OV_CPU_PROFILE] tsc_ticks_per_second = " << tsc_ticks_per_second << std::endl;
tsc_ticks_base = __rdtsc();
});
}

// to minimize the overhead of profiler when it's not being enabled,
// the inst is not put inside a singleton function to save extra
// cost in multi-threading safety checks.

struct chromeTrace {
std::ostream& os;
int fake_tid;
uint64_t ts;
chromeTrace(std::ostream& os, int fake_tid) : os(os), fake_tid(fake_tid) {}
void addCompleteEvent(std::string name,
std::string cat,
uint64_t start,
uint64_t dur) {
// chrome tracing will show & group-by to name, so we use cat as name
os << "{\"ph\": \"X\", \"name\": \"" << cat << "\", \"cat\":\"" << name << "\","
<< "\"pid\": " << fake_tid << ", \"tid\": 0,"
<< "\"ts\": " << start << ", \"dur\": " << dur << "},\n";
}
};

thread_local ProfilerManager profilerManagerInstance;

static std::atomic_int totalProfilerManagers{0};

void ProfileData::record_end(ProfileData* p) {
p->end = __rdtsc();
}

bool not_finalized = true;

struct ProfilerManagerFinalizer {
std::mutex g_mutex;
std::set<ProfilerManager*> all_managers;
const char* dump_file_name = "ov_profile.json";
bool dump_file_over = false;

~ProfilerManagerFinalizer() {
if (not_finalized)
finalize();
}

void finalize() {
if (!not_finalized)
return;

std::lock_guard<std::mutex> guard(g_mutex);
if (dump_file_over || all_managers.empty())
return;

// start dump
std::ofstream fw;
fw.open(dump_file_name, std::ios::out);
fw << "{\n";
fw << "\"schemaVersion\": 1,\n";
fw << "\"traceEvents\": [\n";
fw.flush();

for (auto& pthis : all_managers) {
if (!pthis->enabled)
continue;
auto data_size = pthis->all_data.size();
if (!data_size)
continue;

// open output file
chromeTrace ct(fw, pthis->serial);
for (auto& d : pthis->all_data) {
ct.addCompleteEvent(d.name,
d.cat,
tsc_to_usec(d.start),
tsc_to_usec(d.end) - tsc_to_usec(d.start));
}
pthis->all_data.clear();
std::cout << "[OV_CPU_PROFILE] #" << pthis->serial << "(" << pthis << ") finalize: dumpped "
<< data_size << std::endl;
}
all_managers.clear();

fw << R"({
"name": "Profiler End",
"ph": "i",
"s": "g",
"pid": "Traces",
"tid": "Trace OV Profiler",
"ts":)"
<< tsc_to_usec(__rdtsc()) << "}",
fw << "]\n";
fw << "}\n";
auto total_size = fw.tellp();
fw.close();
dump_file_over = true;
not_finalized = false;
std::cout << "[OV_CPU_PROFILE] Dumpped " << total_size / (1024 * 1024) << " (MB) to " << dump_file_name
<< std::endl;
}

void register_manager(ProfilerManager* pthis) {
std::lock_guard<std::mutex> guard(g_mutex);
std::stringstream ss;
ss << "[OV_CPU_PROFILE] #" << pthis->serial << "(" << this << ") : is registed." << std::endl;
std::cout << ss.str();
all_managers.emplace(pthis);
}
} finalizer;

ProfilerManager::ProfilerManager() {
const char* str_enable = std::getenv("OV_CPU_PROFILE");
if (!str_enable)
str_enable = "0";
enabled = atoi(str_enable) > 0;
if (enabled) {
init_tsc();
tid = std::this_thread::get_id();
serial = totalProfilerManagers.fetch_add(1);
finalizer.register_manager(this);
}
}

ProfilerManager::~ProfilerManager() {
if (not_finalized)
finalizer.finalize();
}

} // namespace intel_cpu
} // namespace ov
#endif
83 changes: 83 additions & 0 deletions src/plugins/intel_cpu/src/utils/profiler.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
// Copyright (C) 2018-2022 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once
#define OV_CPU_WITH_PROFILER
#ifdef OV_CPU_WITH_PROFILER

#include <atomic>
#include <chrono>
#include <cstddef>
#include <deque>
#include <fstream>
#include <functional>
#include <iostream>
#include <map>
#include <memory>
#include <mutex>
#include <thread>
#include <vector>

extern "C" {
#ifdef _WIN32
# include <intrin.h>
#else
# include <x86intrin.h>
#endif
}

namespace ov {
namespace intel_cpu {

struct ProfileData {
uint64_t start;
uint64_t end;
std::string cat;
std::string name;

ProfileData(const std::string& cat, const std::string& name) : cat(cat), name(name) {
start = __rdtsc();
}
static void record_end(ProfileData* p);
};

typedef std::unique_ptr<ProfileData, void (*)(ProfileData*)> ProfileDataHandle;

class chromeTrace;
struct ProfilerManagerFinalizer;

class ProfilerManager {
bool enabled;
std::deque<ProfileData> all_data;
std::thread::id tid;
int serial;

public:
ProfilerManager();
~ProfilerManager();

ProfileDataHandle startProfile(const std::string& cat, const std::string& name = {}) {
if (!enabled) {
return ProfileDataHandle(nullptr, [](ProfileData*) {});
}
all_data.emplace_back(cat, name);
return ProfileDataHandle(&all_data.back(), ProfileData::record_end);
}

friend class ProfilerManagerFinalizer;
};

extern thread_local ProfilerManager profilerManagerInstance;

} // namespace intel_cpu
} // namespace ov

# define PROFILE(var_name, ...) \
auto var_name = ov::intel_cpu::profilerManagerInstance.startProfile(__VA_ARGS__); \
(void)var_name;
#else

# define PROFILE(var_name, ...)

#endif

0 comments on commit 3003dc7

Please sign in to comment.