Skip to content

Commit

Permalink
Merge pull request ceph#50302 from weirdwiz/rbd-perf-counters
Browse files Browse the repository at this point in the history
rbd-mirror: switch to labeled perf counters

Reviewed-by: Juan Miguel Olmo <[email protected]>
Reviewed-by: Ilya Dryomov <[email protected]>
  • Loading branch information
idryomov authored Mar 24, 2023
2 parents 76177ab + 1a1477b commit b89782a
Show file tree
Hide file tree
Showing 4 changed files with 102 additions and 51 deletions.
17 changes: 11 additions & 6 deletions src/tools/rbd_mirror/Types.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,19 @@ template <typename> struct MirrorStatusUpdater;
// Performance counters
enum {
l_rbd_mirror_journal_first = 27000,
l_rbd_mirror_replay,
l_rbd_mirror_replay_bytes,
l_rbd_mirror_replay_latency,
l_rbd_mirror_journal_entries,
l_rbd_mirror_journal_replay_bytes,
l_rbd_mirror_journal_replay_latency,
l_rbd_mirror_journal_last,
l_rbd_mirror_snapshot_first,
l_rbd_mirror_snapshot_replay_snapshots,
l_rbd_mirror_snapshot_replay_snapshots_time,
l_rbd_mirror_snapshot_replay_bytes,
l_rbd_mirror_snapshot_snapshots,
l_rbd_mirror_snapshot_sync_time,
l_rbd_mirror_snapshot_sync_bytes,
// per-image only counters below
l_rbd_mirror_snapshot_remote_timestamp,
l_rbd_mirror_snapshot_local_timestamp,
l_rbd_mirror_snapshot_last_sync_time,
l_rbd_mirror_snapshot_last_sync_bytes,
l_rbd_mirror_snapshot_last,
};

Expand Down
40 changes: 27 additions & 13 deletions src/tools/rbd_mirror/image_replayer/journal/Replayer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
#include "Replayer.h"
#include "common/debug.h"
#include "common/errno.h"
#include "common/perf_counters.h"
#include "common/perf_counters_key.h"
#include "common/Timer.h"
#include "librbd/Journal.h"
#include "librbd/Utils.h"
Expand Down Expand Up @@ -1159,9 +1161,11 @@ void Replayer<I>::handle_process_entry_safe(

auto latency = ceph_clock_now() - replay_start_time;
if (g_journal_perf_counters) {
g_journal_perf_counters->inc(l_rbd_mirror_replay);
g_journal_perf_counters->inc(l_rbd_mirror_replay_bytes, replay_bytes);
g_journal_perf_counters->tinc(l_rbd_mirror_replay_latency, latency);
g_journal_perf_counters->inc(l_rbd_mirror_journal_entries);
g_journal_perf_counters->inc(l_rbd_mirror_journal_replay_bytes,
replay_bytes);
g_journal_perf_counters->tinc(l_rbd_mirror_journal_replay_latency,
latency);
}

auto ctx = new LambdaContext(
Expand All @@ -1170,9 +1174,9 @@ void Replayer<I>::handle_process_entry_safe(
schedule_flush_local_replay_task();

if (m_perf_counters) {
m_perf_counters->inc(l_rbd_mirror_replay);
m_perf_counters->inc(l_rbd_mirror_replay_bytes, replay_bytes);
m_perf_counters->tinc(l_rbd_mirror_replay_latency, latency);
m_perf_counters->inc(l_rbd_mirror_journal_entries);
m_perf_counters->inc(l_rbd_mirror_journal_replay_bytes, replay_bytes);
m_perf_counters->tinc(l_rbd_mirror_journal_replay_latency, latency);
}

m_event_replay_tracker.finish_op();
Expand Down Expand Up @@ -1270,13 +1274,23 @@ void Replayer<I>::register_perf_counters() {

auto cct = static_cast<CephContext *>(m_state_builder->local_image_ctx->cct);
auto prio = cct->_conf.get_val<int64_t>("rbd_mirror_image_perf_stats_prio");
PerfCountersBuilder plb(g_ceph_context, "rbd_mirror_image_" + m_image_spec,
l_rbd_mirror_journal_first, l_rbd_mirror_journal_last);
plb.add_u64_counter(l_rbd_mirror_replay, "replay", "Replays", "r", prio);
plb.add_u64_counter(l_rbd_mirror_replay_bytes, "replay_bytes",
"Replayed data", "rb", prio, unit_t(UNIT_BYTES));
plb.add_time_avg(l_rbd_mirror_replay_latency, "replay_latency",
"Replay latency", "rl", prio);

auto local_image_ctx = m_state_builder->local_image_ctx;
std::string labels = ceph::perf_counters::key_create(
"rbd_mirror_journal_image",
{{"pool", local_image_ctx->md_ctx.get_pool_name()},
{"namespace", local_image_ctx->md_ctx.get_namespace()},
{"image", local_image_ctx->name}});

PerfCountersBuilder plb(g_ceph_context, labels, l_rbd_mirror_journal_first,
l_rbd_mirror_journal_last);
plb.add_u64_counter(l_rbd_mirror_journal_entries, "entries",
"Number of entries replayed", nullptr, prio);
plb.add_u64_counter(l_rbd_mirror_journal_replay_bytes, "replay_bytes",
"Total bytes replayed", nullptr, prio,
unit_t(UNIT_BYTES));
plb.add_time_avg(l_rbd_mirror_journal_replay_latency, "replay_latency",
"Replay latency", nullptr, prio);
m_perf_counters = plb.create_perf_counters();
g_ceph_context->get_perfcounters_collection()->add(m_perf_counters);
}
Expand Down
61 changes: 45 additions & 16 deletions src/tools/rbd_mirror/image_replayer/snapshot/Replayer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
#include "Replayer.h"
#include "common/debug.h"
#include "common/errno.h"
#include "common/perf_counters.h"
#include "common/perf_counters_key.h"
#include "include/stringify.h"
#include "common/Timer.h"
#include "cls/rbd/cls_rbd_client.h"
Expand Down Expand Up @@ -254,6 +256,10 @@ bool Replayer<I>::get_replay_status(std::string* description,
json_spirit::mObject root_obj;
root_obj["replay_state"] = replay_state;
root_obj["remote_snapshot_timestamp"] = remote_snap_info->timestamp.sec();
if (m_perf_counters) {
m_perf_counters->tset(l_rbd_mirror_snapshot_remote_timestamp,
remote_snap_info->timestamp);
}

auto matching_remote_snap_id = util::compute_remote_snap_id(
m_state_builder->local_image_ctx->image_lock,
Expand All @@ -269,6 +275,10 @@ bool Replayer<I>::get_replay_status(std::string* description,
// synced and not the consistency point in time.
root_obj["local_snapshot_timestamp"] =
matching_remote_snap_it->second.timestamp.sec();
if (m_perf_counters) {
m_perf_counters->tset(l_rbd_mirror_snapshot_local_timestamp,
matching_remote_snap_it->second.timestamp);
}
}

matching_remote_snap_it = m_state_builder->remote_image_ctx->snap_info.find(
Expand Down Expand Up @@ -1119,16 +1129,19 @@ void Replayer<I>::handle_copy_image(int r) {
m_last_snapshot_sync_seconds = duration.sec();

if (g_snapshot_perf_counters) {
g_snapshot_perf_counters->inc(l_rbd_mirror_snapshot_replay_bytes,
g_snapshot_perf_counters->inc(l_rbd_mirror_snapshot_sync_bytes,
m_snapshot_bytes);
g_snapshot_perf_counters->inc(l_rbd_mirror_snapshot_replay_snapshots);
g_snapshot_perf_counters->tinc(
l_rbd_mirror_snapshot_replay_snapshots_time, duration);
g_snapshot_perf_counters->inc(l_rbd_mirror_snapshot_snapshots);
g_snapshot_perf_counters->tinc(l_rbd_mirror_snapshot_sync_time,
duration);
}
if (m_perf_counters) {
m_perf_counters->inc(l_rbd_mirror_snapshot_replay_bytes, m_snapshot_bytes);
m_perf_counters->inc(l_rbd_mirror_snapshot_replay_snapshots);
m_perf_counters->tinc(l_rbd_mirror_snapshot_replay_snapshots_time, duration);
m_perf_counters->inc(l_rbd_mirror_snapshot_sync_bytes, m_snapshot_bytes);
m_perf_counters->inc(l_rbd_mirror_snapshot_snapshots);
m_perf_counters->tinc(l_rbd_mirror_snapshot_sync_time, duration);
m_perf_counters->tset(l_rbd_mirror_snapshot_last_sync_time, duration);
m_perf_counters->set(l_rbd_mirror_snapshot_last_sync_bytes,
m_snapshot_bytes);
}
}

Expand Down Expand Up @@ -1563,16 +1576,32 @@ void Replayer<I>::register_perf_counters() {

auto cct = static_cast<CephContext *>(m_state_builder->local_image_ctx->cct);
auto prio = cct->_conf.get_val<int64_t>("rbd_mirror_image_perf_stats_prio");
PerfCountersBuilder plb(g_ceph_context,
"rbd_mirror_snapshot_image_" + m_image_spec,
l_rbd_mirror_snapshot_first,

auto local_image_ctx = m_state_builder->local_image_ctx;
std::string labels = ceph::perf_counters::key_create(
"rbd_mirror_snapshot_image",
{{"pool", local_image_ctx->md_ctx.get_pool_name()},
{"namespace", local_image_ctx->md_ctx.get_namespace()},
{"image", local_image_ctx->name}});

PerfCountersBuilder plb(g_ceph_context, labels, l_rbd_mirror_snapshot_first,
l_rbd_mirror_snapshot_last);
plb.add_u64_counter(l_rbd_mirror_snapshot_replay_snapshots,
"snapshots", "Snapshots", "r", prio);
plb.add_time_avg(l_rbd_mirror_snapshot_replay_snapshots_time,
"snapshots_time", "Snapshots time", "rl", prio);
plb.add_u64_counter(l_rbd_mirror_snapshot_replay_bytes, "replay_bytes",
"Replayed data", "rb", prio, unit_t(UNIT_BYTES));
plb.add_u64_counter(l_rbd_mirror_snapshot_snapshots, "snapshots",
"Number of snapshots synced", nullptr, prio);
plb.add_time_avg(l_rbd_mirror_snapshot_sync_time, "sync_time",
"Average sync time", nullptr, prio);
plb.add_u64_counter(l_rbd_mirror_snapshot_sync_bytes, "sync_bytes",
"Total bytes synced", nullptr, prio, unit_t(UNIT_BYTES));
plb.add_time(l_rbd_mirror_snapshot_remote_timestamp, "remote_timestamp",
"Timestamp of the remote snapshot", nullptr, prio);
plb.add_time(l_rbd_mirror_snapshot_local_timestamp, "local_timestamp",
"Timestamp of the local snapshot", nullptr, prio);
plb.add_time(l_rbd_mirror_snapshot_last_sync_time, "last_sync_time",
"Time taken to sync the last snapshot", nullptr, prio);
plb.add_u64(l_rbd_mirror_snapshot_last_sync_bytes, "last_sync_bytes",
"Bytes synced for the last snapshot", nullptr, prio,
unit_t(UNIT_BYTES));

m_perf_counters = plb.create_perf_counters();
g_ceph_context->get_perfcounters_collection()->add(m_perf_counters);
}
Expand Down
35 changes: 19 additions & 16 deletions src/tools/rbd_mirror/main.cc
Original file line number Diff line number Diff line change
Expand Up @@ -68,27 +68,30 @@ int main(int argc, const char **argv)
auto prio =
g_ceph_context->_conf.get_val<int64_t>("rbd_mirror_perf_stats_prio");
{
PerfCountersBuilder plb(g_ceph_context, "rbd_mirror",
PerfCountersBuilder plb(g_ceph_context, "rbd_mirror_journal",
rbd::mirror::l_rbd_mirror_journal_first,
rbd::mirror::l_rbd_mirror_journal_last);
plb.add_u64_counter(rbd::mirror::l_rbd_mirror_replay, "replay", "Replays",
"r", prio);
plb.add_u64_counter(rbd::mirror::l_rbd_mirror_replay_bytes, "replay_bytes",
"Replayed data", "rb", prio, unit_t(UNIT_BYTES));
plb.add_time_avg(rbd::mirror::l_rbd_mirror_replay_latency, "replay_latency",
"Replay latency", "rl", prio);
plb.add_u64_counter(rbd::mirror::l_rbd_mirror_journal_entries, "entries",
"Number of entries replayed", nullptr, prio);
plb.add_u64_counter(rbd::mirror::l_rbd_mirror_journal_replay_bytes,
"replay_bytes", "Total bytes replayed", nullptr, prio,
unit_t(UNIT_BYTES));
plb.add_time_avg(rbd::mirror::l_rbd_mirror_journal_replay_latency,
"replay_latency", "Replay latency", nullptr, prio);
g_journal_perf_counters = plb.create_perf_counters();
}
{
PerfCountersBuilder plb(g_ceph_context, "rbd_mirror_snapshot",
rbd::mirror::l_rbd_mirror_snapshot_first,
rbd::mirror::l_rbd_mirror_snapshot_last);
plb.add_u64_counter(rbd::mirror::l_rbd_mirror_snapshot_replay_snapshots,
"snapshots", "Snapshots", "r", prio);
plb.add_time_avg(rbd::mirror::l_rbd_mirror_snapshot_replay_snapshots_time,
"snapshots_time", "Snapshots time", "rl", prio);
plb.add_u64_counter(rbd::mirror::l_rbd_mirror_snapshot_replay_bytes,
"replay_bytes", "Replayed data", "rb", prio,
PerfCountersBuilder plb(
g_ceph_context, "rbd_mirror_snapshot",
rbd::mirror::l_rbd_mirror_snapshot_first,
rbd::mirror::l_rbd_mirror_snapshot_remote_timestamp);
plb.add_u64_counter(rbd::mirror::l_rbd_mirror_snapshot_snapshots,
"snapshots", "Number of snapshots synced", nullptr,
prio);
plb.add_time_avg(rbd::mirror::l_rbd_mirror_snapshot_sync_time, "sync_time",
"Average sync time", nullptr, prio);
plb.add_u64_counter(rbd::mirror::l_rbd_mirror_snapshot_sync_bytes,
"sync_bytes", "Total bytes synced", nullptr, prio,
unit_t(UNIT_BYTES));
g_snapshot_perf_counters = plb.create_perf_counters();
}
Expand Down

0 comments on commit b89782a

Please sign in to comment.