Skip to content

Commit

Permalink
Merge pull request #299 from tud-zih-energy/issue-253-nec-sampling
Browse files Browse the repository at this point in the history
feat(sampling): Implement sampling for NEC vector accelerators
  • Loading branch information
bmario authored Jan 19, 2024
2 parents b4d5f99 + e84a063 commit 44ffb06
Show file tree
Hide file tree
Showing 19 changed files with 573 additions and 8 deletions.
15 changes: 14 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ find_package(Doxygen COMPONENTS dot)
find_package(x86_energy 2.0 CONFIG)
find_package(StdFilesystem REQUIRED)
find_package(Sensors)
find_package(Veosinfo)
find_package(PkgConfig)

if(PkgConfig_FOUND)
Expand All @@ -118,7 +119,8 @@ CMAKE_DEPENDENT_OPTION(USE_SENSORS "Use the libsensors to read system metrics."
add_feature_info("USE_SENSORS" USE_SENSORS "Use the libsensors to read system metrics.")
CMAKE_DEPENDENT_OPTION(USE_LIBAUDIT "Use libaudit for syscall name resolution" ON Audit_FOUND OFF)
add_feature_info("USE_LIBAUDIT" USE_LIBAUDIT "Use libaudit for syscall name resolution.")

CMAKE_DEPENDENT_OPTION(USE_VEOSINFO "Use libveosinfo to sample NEC SX-Aurora Tsubasa cards." ON "Veosinfo_FOUND" OFF)
add_feature_info("USE_VEOSINFO" USE_VEOSINFO "Use libveosinfo to sample NEC SX-Aurora Tsubasa cards.")
# system configuration checks
CHECK_INCLUDE_FILES(linux/hw_breakpoint.h HAVE_HW_BREAKPOINT_H)
CHECK_STRUCT_HAS_MEMBER("struct perf_event_attr" clockid linux/perf_event.h HAVE_PERF_EVENT_ATTR_CLOCKID)
Expand Down Expand Up @@ -267,6 +269,17 @@ if (USE_SENSORS)
endif()
endif()

if (USE_VEOSINFO)
if(Veosinfo_FOUND)
target_compile_definitions(lo2s PUBLIC HAVE_VEOSINFO)
target_link_libraries(lo2s PRIVATE Veosinfo::veosinfo)
target_sources(lo2s PRIVATE src/monitor/nec_thread_monitor.cpp
src/monitor/nec_monitor_main.cpp)
else()
message(SEND_ERROR "Veosinfo not found but requested.")
endif()
endif()

if (USE_LIBAUDIT)
if (Audit_FOUND)
target_compile_definitions(lo2s PUBLIC HAVE_LIBAUDIT)
Expand Down
18 changes: 18 additions & 0 deletions cmake/FindVeosinfo.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@

find_path(Veosinfo_INCLUDE_DIRS veosinfo/veosinfo.h PATHS ENV C_INCLUDE_PATH ENV CPATH PATH_SUFFIXES include)

find_library(Veosinfo_LIBRARIES veosinfo HINT ENV LIBRARY_PATH ENV LD_LIBRARY_PATH)


include (FindPackageHandleStandardArgs)

FIND_PACKAGE_HANDLE_STANDARD_ARGS(Veosinfo DEFAULT_MSG Veosinfo_LIBRARIES Veosinfo_INCLUDE_DIRS)

if(Veosinfo_FOUND)
add_library(libveosinfo INTERFACE)
target_link_libraries(libveosinfo INTERFACE ${Veosinfo_LIBRARIES})
target_include_directories(libveosinfo SYSTEM INTERFACE ${Veosinfo_INCLUDE_DIRS})
add_library(Veosinfo::veosinfo ALIAS libveosinfo)
endif()

mark_as_advanced(Veosinfo_LIBRARIES Veosinfo_INCLUDE_DIRS)
4 changes: 4 additions & 0 deletions include/lo2s/config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,10 @@ struct Config
// syscalls
bool use_syscalls = false;
std::vector<int64_t> syscall_filter;
// NEC SX-Aurora Tsubasa
bool use_nec;
std::chrono::microseconds nec_read_interval;
std::chrono::milliseconds nec_check_interval;
};

const Config& config();
Expand Down
7 changes: 6 additions & 1 deletion include/lo2s/monitor/main_monitor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,10 @@
#endif
#include <lo2s/mmap.hpp>
#include <lo2s/monitor/io_monitor.hpp>
#ifdef HAVE_VEOSINFO
#include <lo2s/monitor/nec_monitor_main.hpp>
#endif
#include <lo2s/monitor/tracepoint_monitor.hpp>
#include <lo2s/perf/bio/writer.hpp>
#include <lo2s/process_info.hpp>
#include <lo2s/trace/trace.hpp>
#include <lo2s/types.hpp>
Expand Down Expand Up @@ -82,6 +84,9 @@ class MainMonitor
#ifdef HAVE_SENSORS
std::unique_ptr<metric::sensors::Recorder> sensors_recorder_;
#endif
#ifdef HAVE_VEOSINFO
std::vector<std::unique_ptr<nec::NecMonitorMain>> nec_monitors_;
#endif
};
} // namespace monitor
} // namespace lo2s
69 changes: 69 additions & 0 deletions include/lo2s/monitor/nec_monitor_main.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
/*
* This file is part of the lo2s software.
* Linux OTF2 sampling
*
* Copyright (c) 2017,
* Technische Universitaet Dresden, Germany
*
* lo2s is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* lo2s is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with lo2s. If not, see <http://www.gnu.org/licenses/>.
*/

#pragma once

#include <lo2s/monitor/nec_thread_monitor.hpp>
#include <lo2s/monitor/threaded_monitor.hpp>
#include <lo2s/trace/trace.hpp>
#include <lo2s/types.hpp>

#include <filesystem>
#include <utility>

extern "C"
{
#include <veosinfo/veosinfo.h>

#include <libved.h>
}

namespace lo2s
{
namespace nec
{
class NecMonitorMain : public monitor::ThreadedMonitor
{
public:
NecMonitorMain(trace::Trace& trace, NecDevice device);

void stop() override;

protected:
std::string group() const override
{
return "nec::MonitorMain";
}

void run() override;
void finalize_thread() override;

private:
std::optional<NecDevice> get_device_of(Thread thread);
std::vector<Thread> get_tasks_of(NecDevice device);
std::map<Thread, NecThreadMonitor> monitors_;
trace::Trace& trace_;
NecDevice device_;
std::atomic<bool> stopped_;
ve_nodeinfo nodeinfo_;
};
} // namespace nec
} // namespace lo2s
58 changes: 58 additions & 0 deletions include/lo2s/monitor/nec_thread_monitor.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
/*
* This file is part of the lo2s software.
* Linux OTF2 sampling
*
* Copyright (c) 2016,
* Technische Universitaet Dresden, Germany
*
* lo2s is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* lo2s is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with lo2s. If not, see <http://www.gnu.org/licenses/>.
*/

#pragma once

#include <chrono>

#include <lo2s/monitor/poll_monitor.hpp>
#include <lo2s/perf/calling_context_manager.hpp>
#include <lo2s/trace/trace.hpp>

namespace lo2s
{
namespace nec
{
class NecThreadMonitor : public monitor::PollMonitor
{
public:
NecThreadMonitor(Thread thread, trace::Trace& trace, NecDevice device);

protected:
std::string group() const override
{
return "nec::ThreadMonitor";
}

void finalize_thread() override;

void monitor(int fd) override;

private:
std::chrono::microseconds nec_read_interval_;
otf2::writer::local& otf2_writer_;
Thread nec_thread_;
trace::Trace& trace_;
NecDevice device_;
perf::CallingContextManager cctx_manager_;
};
} // namespace nec
} // namespace lo2s
2 changes: 1 addition & 1 deletion include/lo2s/monitor/poll_monitor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ class PollMonitor : public ThreadedMonitor

protected:
void run() override;
void monitor() override;
void monitor();

void add_fd(int fd);

Expand Down
1 change: 0 additions & 1 deletion include/lo2s/monitor/threaded_monitor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,6 @@ class ThreadedMonitor
virtual void run() = 0;

void thread_main();
virtual void monitor() = 0;

void register_thread();

Expand Down
21 changes: 21 additions & 0 deletions include/lo2s/topology.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include <fstream>
#include <iterator>
#include <map>
#include <regex>
#include <set>
#include <sstream>
#include <stdexcept>
Expand Down Expand Up @@ -70,6 +71,26 @@ class Topology
return cpus_;
}

const std::set<NecDevice> nec_devices() const
{
std::set<NecDevice> devices;

const std::regex nec_regex("/sys/class/ve/ve(\\d)");

for (auto& dir_entry : std::filesystem::directory_iterator("/sys/class/ve"))
{
std::smatch nec_match;

auto path = dir_entry.path().string();
if (std::regex_match(path, nec_match, nec_regex))
{
devices.emplace(NecDevice(std::stoi(nec_match[1])));
}
}

return devices;
}

Core core_of(Cpu cpu) const
{
return cpu_to_core_.at(cpu);
Expand Down
19 changes: 15 additions & 4 deletions include/lo2s/trace/reg_keys.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,11 @@ struct ByThreadTag
};
using ByThread = SimpleKeyType<Thread, ByThreadTag>;

struct ByNecThreadTag
{
};
using ByNecThread = SimpleKeyType<Thread, ByNecThreadTag>;

struct ByProcessTag
{
};
Expand Down Expand Up @@ -124,6 +129,12 @@ struct ByCounterCollectionTag

using ByCounterCollection = SimpleKeyType<perf::counter::CounterCollection, ByCounterCollectionTag>;

struct ByNecDeviceTag
{
};

using ByNecDevice = SimpleKeyType<NecDevice, ByNecDeviceTag>;

template <typename Definition>
struct Holder
{
Expand All @@ -132,8 +143,8 @@ struct Holder
template <>
struct Holder<otf2::definition::system_tree_node>
{
using type = otf2::lookup_definition_holder<otf2::definition::system_tree_node, ByCore,
ByProcess, ByBlockDevice, ByCpu, ByPackage>;
using type = otf2::lookup_definition_holder<otf2::definition::system_tree_node, ByNecDevice,
ByCore, ByProcess, ByBlockDevice, ByCpu, ByPackage>;
};
template <>
struct Holder<otf2::definition::regions_group>
Expand Down Expand Up @@ -174,13 +185,13 @@ struct Holder<otf2::definition::location_group>
{
using type =
otf2::lookup_definition_holder<otf2::definition::location_group, ByMeasurementScope,
ByExecutionScope, ByBlockDevice>;
ByExecutionScope, ByNecThread, ByBlockDevice>;
};
template <>
struct Holder<otf2::definition::location>
{
using type = otf2::lookup_definition_holder<otf2::definition::location, ByExecutionScope,
ByMeasurementScope, ByBlockDevice>;
ByMeasurementScope, ByNecThread, ByBlockDevice>;
};
template <>
struct Holder<otf2::definition::region>
Expand Down
9 changes: 9 additions & 0 deletions include/lo2s/trace/trace.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@ class Trace
otf2::writer::local& syscall_writer(const Cpu& cpu);
otf2::writer::local& bio_writer(BlockDevice dev);
otf2::writer::local& create_metric_writer(const std::string& name);
otf2::writer::local& nec_writer(NecDevice device, const Thread& nec_thread);

otf2::definition::io_handle& block_io_handle(BlockDevice dev);

Expand Down Expand Up @@ -225,6 +226,12 @@ class Trace
{
return interrupt_generator_;
}

const otf2::definition::interrupt_generator nec_interrupt_generator() const
{
return nec_interrupt_generator_;
}

const otf2::definition::system_tree_node& system_tree_cpu_node(Cpu cpu) const
{
return registry_.get<otf2::definition::system_tree_node>(ByCpu(cpu));
Expand Down Expand Up @@ -327,6 +334,8 @@ class Trace

otf2::definition::interrupt_generator& interrupt_generator_;

otf2::definition::detail::weak_ref<otf2::definition::interrupt_generator>
nec_interrupt_generator_;
// TODO add location groups (processes), read path from /proc/self/exe symlink

std::map<Thread, std::string> thread_names_;
Expand Down
Loading

0 comments on commit 44ffb06

Please sign in to comment.