From 1a1477b9fd7f3fcf7eb66c8a768476bea1322d1d Mon Sep 17 00:00:00 2001 From: Divyansh Kamboj Date: Fri, 17 Mar 2023 14:49:14 +0530 Subject: [PATCH] rbd-mirror: add and rename perf counters for journal and snapshot mirroring This commit renames the existing snapshot and journal based performance counters in the rbd-mirror daemon to better reflect their purpose. Additionally, new perf counters have been added to provide more detailed information about the synchronization of snapshots between the source and destination clusters. This commit also switches to use labels instead of having the image spec in the name of the counter. Signed-off-by: Divyansh Kamboj --- src/tools/rbd_mirror/Types.h | 17 ++++-- .../image_replayer/journal/Replayer.cc | 40 ++++++++---- .../image_replayer/snapshot/Replayer.cc | 61 ++++++++++++++----- src/tools/rbd_mirror/main.cc | 35 ++++++----- 4 files changed, 102 insertions(+), 51 deletions(-) diff --git a/src/tools/rbd_mirror/Types.h b/src/tools/rbd_mirror/Types.h index 7b2a3b5cea5fa..9bba58fb130c3 100644 --- a/src/tools/rbd_mirror/Types.h +++ b/src/tools/rbd_mirror/Types.h @@ -21,14 +21,19 @@ template struct MirrorStatusUpdater; // Performance counters enum { l_rbd_mirror_journal_first = 27000, - l_rbd_mirror_replay, - l_rbd_mirror_replay_bytes, - l_rbd_mirror_replay_latency, + l_rbd_mirror_journal_entries, + l_rbd_mirror_journal_replay_bytes, + l_rbd_mirror_journal_replay_latency, l_rbd_mirror_journal_last, l_rbd_mirror_snapshot_first, - l_rbd_mirror_snapshot_replay_snapshots, - l_rbd_mirror_snapshot_replay_snapshots_time, - l_rbd_mirror_snapshot_replay_bytes, + l_rbd_mirror_snapshot_snapshots, + l_rbd_mirror_snapshot_sync_time, + l_rbd_mirror_snapshot_sync_bytes, + // per-image only counters below + l_rbd_mirror_snapshot_remote_timestamp, + l_rbd_mirror_snapshot_local_timestamp, + l_rbd_mirror_snapshot_last_sync_time, + l_rbd_mirror_snapshot_last_sync_bytes, l_rbd_mirror_snapshot_last, }; diff --git a/src/tools/rbd_mirror/image_replayer/journal/Replayer.cc b/src/tools/rbd_mirror/image_replayer/journal/Replayer.cc index 3ce9104d247bd..20560038ca7d9 100644 --- a/src/tools/rbd_mirror/image_replayer/journal/Replayer.cc +++ b/src/tools/rbd_mirror/image_replayer/journal/Replayer.cc @@ -4,6 +4,8 @@ #include "Replayer.h" #include "common/debug.h" #include "common/errno.h" +#include "common/perf_counters.h" +#include "common/perf_counters_key.h" #include "common/Timer.h" #include "librbd/Journal.h" #include "librbd/Utils.h" @@ -1159,9 +1161,11 @@ void Replayer::handle_process_entry_safe( auto latency = ceph_clock_now() - replay_start_time; if (g_journal_perf_counters) { - g_journal_perf_counters->inc(l_rbd_mirror_replay); - g_journal_perf_counters->inc(l_rbd_mirror_replay_bytes, replay_bytes); - g_journal_perf_counters->tinc(l_rbd_mirror_replay_latency, latency); + g_journal_perf_counters->inc(l_rbd_mirror_journal_entries); + g_journal_perf_counters->inc(l_rbd_mirror_journal_replay_bytes, + replay_bytes); + g_journal_perf_counters->tinc(l_rbd_mirror_journal_replay_latency, + latency); } auto ctx = new LambdaContext( @@ -1170,9 +1174,9 @@ void Replayer::handle_process_entry_safe( schedule_flush_local_replay_task(); if (m_perf_counters) { - m_perf_counters->inc(l_rbd_mirror_replay); - m_perf_counters->inc(l_rbd_mirror_replay_bytes, replay_bytes); - m_perf_counters->tinc(l_rbd_mirror_replay_latency, latency); + m_perf_counters->inc(l_rbd_mirror_journal_entries); + m_perf_counters->inc(l_rbd_mirror_journal_replay_bytes, replay_bytes); + m_perf_counters->tinc(l_rbd_mirror_journal_replay_latency, latency); } m_event_replay_tracker.finish_op(); @@ -1270,13 +1274,23 @@ void Replayer::register_perf_counters() { auto cct = static_cast(m_state_builder->local_image_ctx->cct); auto prio = cct->_conf.get_val("rbd_mirror_image_perf_stats_prio"); - PerfCountersBuilder plb(g_ceph_context, "rbd_mirror_image_" + m_image_spec, - l_rbd_mirror_journal_first, l_rbd_mirror_journal_last); - plb.add_u64_counter(l_rbd_mirror_replay, "replay", "Replays", "r", prio); - plb.add_u64_counter(l_rbd_mirror_replay_bytes, "replay_bytes", - "Replayed data", "rb", prio, unit_t(UNIT_BYTES)); - plb.add_time_avg(l_rbd_mirror_replay_latency, "replay_latency", - "Replay latency", "rl", prio); + + auto local_image_ctx = m_state_builder->local_image_ctx; + std::string labels = ceph::perf_counters::key_create( + "rbd_mirror_journal_image", + {{"pool", local_image_ctx->md_ctx.get_pool_name()}, + {"namespace", local_image_ctx->md_ctx.get_namespace()}, + {"image", local_image_ctx->name}}); + + PerfCountersBuilder plb(g_ceph_context, labels, l_rbd_mirror_journal_first, + l_rbd_mirror_journal_last); + plb.add_u64_counter(l_rbd_mirror_journal_entries, "entries", + "Number of entries replayed", nullptr, prio); + plb.add_u64_counter(l_rbd_mirror_journal_replay_bytes, "replay_bytes", + "Total bytes replayed", nullptr, prio, + unit_t(UNIT_BYTES)); + plb.add_time_avg(l_rbd_mirror_journal_replay_latency, "replay_latency", + "Replay latency", nullptr, prio); m_perf_counters = plb.create_perf_counters(); g_ceph_context->get_perfcounters_collection()->add(m_perf_counters); } diff --git a/src/tools/rbd_mirror/image_replayer/snapshot/Replayer.cc b/src/tools/rbd_mirror/image_replayer/snapshot/Replayer.cc index 65caf28cff12a..b4da280bf1be8 100644 --- a/src/tools/rbd_mirror/image_replayer/snapshot/Replayer.cc +++ b/src/tools/rbd_mirror/image_replayer/snapshot/Replayer.cc @@ -4,6 +4,8 @@ #include "Replayer.h" #include "common/debug.h" #include "common/errno.h" +#include "common/perf_counters.h" +#include "common/perf_counters_key.h" #include "include/stringify.h" #include "common/Timer.h" #include "cls/rbd/cls_rbd_client.h" @@ -254,6 +256,10 @@ bool Replayer::get_replay_status(std::string* description, json_spirit::mObject root_obj; root_obj["replay_state"] = replay_state; root_obj["remote_snapshot_timestamp"] = remote_snap_info->timestamp.sec(); + if (m_perf_counters) { + m_perf_counters->tset(l_rbd_mirror_snapshot_remote_timestamp, + remote_snap_info->timestamp); + } auto matching_remote_snap_id = util::compute_remote_snap_id( m_state_builder->local_image_ctx->image_lock, @@ -269,6 +275,10 @@ bool Replayer::get_replay_status(std::string* description, // synced and not the consistency point in time. root_obj["local_snapshot_timestamp"] = matching_remote_snap_it->second.timestamp.sec(); + if (m_perf_counters) { + m_perf_counters->tset(l_rbd_mirror_snapshot_local_timestamp, + matching_remote_snap_it->second.timestamp); + } } matching_remote_snap_it = m_state_builder->remote_image_ctx->snap_info.find( @@ -1119,16 +1129,19 @@ void Replayer::handle_copy_image(int r) { m_last_snapshot_sync_seconds = duration.sec(); if (g_snapshot_perf_counters) { - g_snapshot_perf_counters->inc(l_rbd_mirror_snapshot_replay_bytes, + g_snapshot_perf_counters->inc(l_rbd_mirror_snapshot_sync_bytes, m_snapshot_bytes); - g_snapshot_perf_counters->inc(l_rbd_mirror_snapshot_replay_snapshots); - g_snapshot_perf_counters->tinc( - l_rbd_mirror_snapshot_replay_snapshots_time, duration); + g_snapshot_perf_counters->inc(l_rbd_mirror_snapshot_snapshots); + g_snapshot_perf_counters->tinc(l_rbd_mirror_snapshot_sync_time, + duration); } if (m_perf_counters) { - m_perf_counters->inc(l_rbd_mirror_snapshot_replay_bytes, m_snapshot_bytes); - m_perf_counters->inc(l_rbd_mirror_snapshot_replay_snapshots); - m_perf_counters->tinc(l_rbd_mirror_snapshot_replay_snapshots_time, duration); + m_perf_counters->inc(l_rbd_mirror_snapshot_sync_bytes, m_snapshot_bytes); + m_perf_counters->inc(l_rbd_mirror_snapshot_snapshots); + m_perf_counters->tinc(l_rbd_mirror_snapshot_sync_time, duration); + m_perf_counters->tset(l_rbd_mirror_snapshot_last_sync_time, duration); + m_perf_counters->set(l_rbd_mirror_snapshot_last_sync_bytes, + m_snapshot_bytes); } } @@ -1563,16 +1576,32 @@ void Replayer::register_perf_counters() { auto cct = static_cast(m_state_builder->local_image_ctx->cct); auto prio = cct->_conf.get_val("rbd_mirror_image_perf_stats_prio"); - PerfCountersBuilder plb(g_ceph_context, - "rbd_mirror_snapshot_image_" + m_image_spec, - l_rbd_mirror_snapshot_first, + + auto local_image_ctx = m_state_builder->local_image_ctx; + std::string labels = ceph::perf_counters::key_create( + "rbd_mirror_snapshot_image", + {{"pool", local_image_ctx->md_ctx.get_pool_name()}, + {"namespace", local_image_ctx->md_ctx.get_namespace()}, + {"image", local_image_ctx->name}}); + + PerfCountersBuilder plb(g_ceph_context, labels, l_rbd_mirror_snapshot_first, l_rbd_mirror_snapshot_last); - plb.add_u64_counter(l_rbd_mirror_snapshot_replay_snapshots, - "snapshots", "Snapshots", "r", prio); - plb.add_time_avg(l_rbd_mirror_snapshot_replay_snapshots_time, - "snapshots_time", "Snapshots time", "rl", prio); - plb.add_u64_counter(l_rbd_mirror_snapshot_replay_bytes, "replay_bytes", - "Replayed data", "rb", prio, unit_t(UNIT_BYTES)); + plb.add_u64_counter(l_rbd_mirror_snapshot_snapshots, "snapshots", + "Number of snapshots synced", nullptr, prio); + plb.add_time_avg(l_rbd_mirror_snapshot_sync_time, "sync_time", + "Average sync time", nullptr, prio); + plb.add_u64_counter(l_rbd_mirror_snapshot_sync_bytes, "sync_bytes", + "Total bytes synced", nullptr, prio, unit_t(UNIT_BYTES)); + plb.add_time(l_rbd_mirror_snapshot_remote_timestamp, "remote_timestamp", + "Timestamp of the remote snapshot", nullptr, prio); + plb.add_time(l_rbd_mirror_snapshot_local_timestamp, "local_timestamp", + "Timestamp of the local snapshot", nullptr, prio); + plb.add_time(l_rbd_mirror_snapshot_last_sync_time, "last_sync_time", + "Time taken to sync the last snapshot", nullptr, prio); + plb.add_u64(l_rbd_mirror_snapshot_last_sync_bytes, "last_sync_bytes", + "Bytes synced for the last snapshot", nullptr, prio, + unit_t(UNIT_BYTES)); + m_perf_counters = plb.create_perf_counters(); g_ceph_context->get_perfcounters_collection()->add(m_perf_counters); } diff --git a/src/tools/rbd_mirror/main.cc b/src/tools/rbd_mirror/main.cc index 9fecb17f44da3..85e95e6b6c3c1 100644 --- a/src/tools/rbd_mirror/main.cc +++ b/src/tools/rbd_mirror/main.cc @@ -68,27 +68,30 @@ int main(int argc, const char **argv) auto prio = g_ceph_context->_conf.get_val("rbd_mirror_perf_stats_prio"); { - PerfCountersBuilder plb(g_ceph_context, "rbd_mirror", + PerfCountersBuilder plb(g_ceph_context, "rbd_mirror_journal", rbd::mirror::l_rbd_mirror_journal_first, rbd::mirror::l_rbd_mirror_journal_last); - plb.add_u64_counter(rbd::mirror::l_rbd_mirror_replay, "replay", "Replays", - "r", prio); - plb.add_u64_counter(rbd::mirror::l_rbd_mirror_replay_bytes, "replay_bytes", - "Replayed data", "rb", prio, unit_t(UNIT_BYTES)); - plb.add_time_avg(rbd::mirror::l_rbd_mirror_replay_latency, "replay_latency", - "Replay latency", "rl", prio); + plb.add_u64_counter(rbd::mirror::l_rbd_mirror_journal_entries, "entries", + "Number of entries replayed", nullptr, prio); + plb.add_u64_counter(rbd::mirror::l_rbd_mirror_journal_replay_bytes, + "replay_bytes", "Total bytes replayed", nullptr, prio, + unit_t(UNIT_BYTES)); + plb.add_time_avg(rbd::mirror::l_rbd_mirror_journal_replay_latency, + "replay_latency", "Replay latency", nullptr, prio); g_journal_perf_counters = plb.create_perf_counters(); } { - PerfCountersBuilder plb(g_ceph_context, "rbd_mirror_snapshot", - rbd::mirror::l_rbd_mirror_snapshot_first, - rbd::mirror::l_rbd_mirror_snapshot_last); - plb.add_u64_counter(rbd::mirror::l_rbd_mirror_snapshot_replay_snapshots, - "snapshots", "Snapshots", "r", prio); - plb.add_time_avg(rbd::mirror::l_rbd_mirror_snapshot_replay_snapshots_time, - "snapshots_time", "Snapshots time", "rl", prio); - plb.add_u64_counter(rbd::mirror::l_rbd_mirror_snapshot_replay_bytes, - "replay_bytes", "Replayed data", "rb", prio, + PerfCountersBuilder plb( + g_ceph_context, "rbd_mirror_snapshot", + rbd::mirror::l_rbd_mirror_snapshot_first, + rbd::mirror::l_rbd_mirror_snapshot_remote_timestamp); + plb.add_u64_counter(rbd::mirror::l_rbd_mirror_snapshot_snapshots, + "snapshots", "Number of snapshots synced", nullptr, + prio); + plb.add_time_avg(rbd::mirror::l_rbd_mirror_snapshot_sync_time, "sync_time", + "Average sync time", nullptr, prio); + plb.add_u64_counter(rbd::mirror::l_rbd_mirror_snapshot_sync_bytes, + "sync_bytes", "Total bytes synced", nullptr, prio, unit_t(UNIT_BYTES)); g_snapshot_perf_counters = plb.create_perf_counters(); }