Skip to content

Commit

Permalink
Add support for NEC SX-Aurora counters
Browse files Browse the repository at this point in the history
Besides sampling, there are several performance monitor counters that we
can read, such as L1 cache misses.

This closes #237 as sampling and PMCs are probably the two reasonable
things we can read from lo2s.
  • Loading branch information
cvonelm committed Jul 11, 2024
1 parent 161b93c commit a044508
Show file tree
Hide file tree
Showing 5 changed files with 93 additions and 12 deletions.
8 changes: 8 additions & 0 deletions include/lo2s/measurement_scope.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ enum class MeasurementScopeType
SAMPLE,
GROUP_METRIC,
USERSPACE_METRIC,
NEC_METRIC,
BIO,
SYSCALL,
UNKNOWN
Expand Down Expand Up @@ -57,6 +58,11 @@ struct MeasurementScope
return { MeasurementScopeType::GROUP_METRIC, s };
}

static MeasurementScope nec_metric(ExecutionScope s)
{
return { MeasurementScopeType::NEC_METRIC, s };
}

static MeasurementScope userspace_metric(ExecutionScope s)
{
return { MeasurementScopeType::USERSPACE_METRIC, s };
Expand Down Expand Up @@ -95,6 +101,8 @@ struct MeasurementScope
case MeasurementScopeType::GROUP_METRIC:
case MeasurementScopeType::USERSPACE_METRIC:
return fmt::format("metrics for {}", scope.name());
case MeasurementScopeType::NEC_METRIC:
return fmt::format("metrics for NEC {}", scope.name());
case MeasurementScopeType::SAMPLE:
return fmt::format("samples for {}", scope.name());
case MeasurementScopeType::BIO:
Expand Down
3 changes: 2 additions & 1 deletion include/lo2s/monitor/nec_thread_monitor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,14 @@

#include <lo2s/monitor/poll_monitor.hpp>
#include <lo2s/perf/calling_context_manager.hpp>
#include <lo2s/perf/counter/metric_writer.hpp>
#include <lo2s/trace/trace.hpp>

namespace lo2s
{
namespace nec
{
class NecThreadMonitor : public monitor::PollMonitor
class NecThreadMonitor : public monitor::PollMonitor, perf::counter::MetricWriter
{
public:
NecThreadMonitor(Thread thread, trace::Trace& trace, NecDevice device);
Expand Down
9 changes: 6 additions & 3 deletions include/lo2s/trace/reg_keys.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -126,15 +126,18 @@ using ByEventDescription = SimpleKeyType<perf::EventDescription, ByEventDescript
struct ByCounterCollectionTag
{
};

using ByCounterCollection = SimpleKeyType<perf::counter::CounterCollection, ByCounterCollectionTag>;

struct ByNecDeviceTag
{
};

using ByNecDevice = SimpleKeyType<NecDevice, ByNecDeviceTag>;

struct ByMeasurementScopeTypeTag
{
};
using ByMeasurementScopeType = SimpleKeyType<MeasurementScopeType, ByMeasurementScopeTypeTag>;

template <typename Definition>
struct Holder
{
Expand All @@ -156,7 +159,7 @@ template <>
struct Holder<otf2::definition::metric_class>
{
using type = otf2::lookup_definition_holder<otf2::definition::metric_class, ByString,
ByCounterCollection>;
ByCounterCollection, ByMeasurementScopeType>;
};

template <>
Expand Down
47 changes: 47 additions & 0 deletions include/lo2s/trace/trace.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -176,8 +176,55 @@ class Trace
otf2::common::metric_type::other, otf2::common::metric_mode::accumulated_start,
otf2::common::type::Double, otf2::common::base_type::decimal, 0, intern(event.unit));
}

otf2::definition::metric_class& perf_metric_class(MeasurementScope scope)
{

if (scope.type == MeasurementScopeType::NEC_METRIC)
{
if (registry_.has<otf2::definition::metric_class>(ByMeasurementScopeType(scope.type)))
{
return registry_.get<otf2::definition::metric_class>(
ByMeasurementScopeType(scope.type));
}
auto& metric_class = registry_.emplace<otf2::definition::metric_class>(
ByMeasurementScopeType(scope.type), otf2::common::metric_occurence::async,
otf2::common::recorder_kind::abstract);

// https://sxauroratsubasa.sakura.ne.jp/documents/guide/pdfs/Aurora_ISA_guide.pdf
// (page 38)
const std::vector<std::pair<std::string, std::string>> nec_counters = {
{ "execution_count", "Execution Count (EX)" },
{ "vector_execution_count", "Vector execution count (VX)" },
{ "fpec", "Floating point data element count (FPEC)" },
{ "vector_elements_count", "Vector elements count (VE)" },
{ "vecc", "Vector execution clock count (VECC)" },
{ "l1mcc", "L1 cache miss clocc count (L1MCC)" },
{ "vector_elements_count2", "Vector elements count 2 (VE2)" },
{ "varec", "Vector arithmetic execution clock count (VAREC)" },
{ "vldec", "Vector load execution clock count (VLDEC)" },
{ "pccc", "Port conflict clock count (PCCC)" },
{ "vlpc", "Vector Load packet count (VLPC)" },
{ "vlec", "Vector load element count (VLEC)" },
{ "vlcme", "Vector load cache miss element count (VLCME)" },
{ "fmaec", "Fused multiply add element count (FMAEC)" },
{ "ptcc", "Power throttling clock count (PTCC)" },
{ "ttcc", "Thermal throttling clock coung (TTCC)" }
};

for (const auto& counter : nec_counters)
{
auto& member = registry_.emplace<otf2::definition::metric_member>(
ByString(counter.first), intern(counter.first), intern(counter.second),
otf2::common::metric_type::other, otf2::common::metric_mode::accumulated_start,
otf2::common::type::uint64, otf2::common::base_type::decimal, 0, intern("#"));

metric_class.add_member(member);
}

return metric_class;
}

const perf::counter::CounterCollection& counter_collection =
perf::counter::CounterProvider::instance().collection_for(scope);

Expand Down
38 changes: 30 additions & 8 deletions src/monitor/nec_thread_monitor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,23 +19,26 @@
* along with lo2s. If not, see <http://www.gnu.org/licenses/>.
*/

#include <chrono>
#include <lo2s/monitor/nec_thread_monitor.hpp>

#include <chrono>

#include <cassert>

extern "C"
{
#include <libved.h>
#include <veosinfo/veosinfo.h>
}

#include <libved.h>

namespace lo2s
{
namespace nec
{
NecThreadMonitor::NecThreadMonitor(Thread thread, trace::Trace& trace, NecDevice device)
: PollMonitor(trace, fmt::format("VE{} {}", device, thread.as_pid_t()),
std::chrono::duration_cast<std::chrono::nanoseconds>(config().nec_read_interval)),
perf::counter::MetricWriter(MeasurementScope::nec_metric(thread.as_scope()), trace),
nec_read_interval_(config().nec_read_interval), otf2_writer_(trace.nec_writer(device, thread)),
nec_thread_(thread), trace_(trace), device_(device), cctx_manager_(trace)
{
Expand All @@ -45,10 +48,19 @@ NecThreadMonitor::NecThreadMonitor(Thread thread, trace::Trace& trace, NecDevice

void NecThreadMonitor::monitor([[maybe_unused]] int fd)
{
static int reg[] = { VE_USR_IC };
uint64_t val;
static int reg[] = {
VE_USR_PMC00, VE_USR_PMC01, VE_USR_PMC02, VE_USR_PMC03, VE_USR_PMC04, VE_USR_PMC05,
VE_USR_PMC06, VE_USR_PMC07, VE_USR_PMC08, VE_USR_PMC09, VE_USR_PMC10, VE_USR_PMC11,
VE_USR_PMC12, VE_USR_PMC13, VE_USR_PMC14, VE_USR_PMC15, VE_USR_IC,
};

constexpr size_t num_counters = sizeof(reg) / sizeof(int);

auto ret = ve_get_regvals(device_.as_int(), nec_thread_.as_pid_t(), 1, reg, &val);
uint64_t val[num_counters];

assert(reg[num_counters - 1] == VE_USR_IC);

auto ret = ve_get_regvals(device_.as_int(), nec_thread_.as_pid_t(), num_counters, reg, val);

if (ret == -1)
{
Expand All @@ -57,8 +69,18 @@ void NecThreadMonitor::monitor([[maybe_unused]] int fd)
}

otf2::chrono::time_point tp = lo2s::time::now();
otf2_writer_.write_calling_context_sample(tp, cctx_manager_.sample_ref(val), 2,
trace_.nec_interrupt_generator().ref());
otf2_writer_.write_calling_context_sample(tp, cctx_manager_.sample_ref(val[num_counters - 1]),
2, trace_.nec_interrupt_generator().ref());

metric_event_.timestamp(tp);

otf2::event::metric::values& values = metric_event_.raw_values();

for (size_t i = 0; i < num_counters - 1; i++)
{
values[i] = val[i];
}
writer_.write(metric_event_);
}

void NecThreadMonitor::finalize_thread()
Expand Down

0 comments on commit a044508

Please sign in to comment.