Skip to content

Commit

Permalink
Track dataset size (elastic#1827)
Browse files Browse the repository at this point in the history
With this commit we introduce a new metric "Datset size" that tracks
not only local disk usage but also disk usage of partially mounted
snapshots. This allows to track disk usage also in Serverless.
  • Loading branch information
danielmitterdorfer authored Jan 30, 2024
1 parent 2c59d03 commit 5e6b1bc
Show file tree
Hide file tree
Showing 6 changed files with 24 additions and 0 deletions.
1 change: 1 addition & 0 deletions docs/metrics.rst
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,7 @@ Rally stores the following metrics:
* ``flush_total_time``: Cumulative time used for index flush of primary shards, as reported by the index stats API. Note that this is not Wall clock time. These metrics records also have a ``per-shard`` property that contains the times across primary shards in an array.
* ``flush_total_count``: Cumulative number of flushes of primary shards, as reported by index stats API under ``_all/primaries``.
* ``final_index_size_bytes``: Final resulting index size on the file system after all nodes have been shutdown at the end of the benchmark. It includes all files in the nodes' data directories (actual index files and translog).
* ``dataset_size_in_bytes``: Total data set size in bytes of the index. This includes the size of shards not stored fully on nodes, such as the cache for partially mounted indices.
* ``store_size_in_bytes``: The size in bytes of the index (excluding the translog), as reported by the index stats API.
* ``translog_size_in_bytes``: The size in bytes of the translog, as reported by the index stats API.
* ``ml_processing_time``: A structure containing the minimum, mean, median and maximum bucket processing time in milliseconds per machine learning job. These metrics are only available if a machine learning job has been created in the respective benchmark.
6 changes: 6 additions & 0 deletions docs/summary_report.rst
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,12 @@ Total ZGC Pauses GC count
* **Definition**: The total number of Stop-The-World pauses performed by ZGC across the whole cluster as reported by the node stats API.
* **Corresponding metrics key**: ``node_total_zgc_pauses_gc_count``

Dataset size
------------

* **Definition**: Total data set size in bytes of the index. This includes the size of shards not stored fully on nodes, such as the cache for partially mounted indices.
* **Corresponding metrics key**: ``dataset_size_in_bytes``

Store size
----------

Expand Down
2 changes: 2 additions & 0 deletions esrally/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -2078,6 +2078,7 @@ def __call__(self):
result.memory_norms = self.median("segments_norms_memory_in_bytes")
result.memory_points = self.median("segments_points_memory_in_bytes")
result.memory_stored_fields = self.median("segments_stored_fields_memory_in_bytes")
result.dataset_size = self.sum("dataset_size_in_bytes")
result.store_size = self.sum("store_size_in_bytes")
result.translog_size = self.sum("translog_size_in_bytes")

Expand Down Expand Up @@ -2264,6 +2265,7 @@ def __init__(self, d=None):
self.memory_norms = self.v(d, "memory_norms")
self.memory_points = self.v(d, "memory_points")
self.memory_stored_fields = self.v(d, "memory_stored_fields")
self.dataset_size = self.v(d, "dataset_size")
self.store_size = self.v(d, "store_size")
self.translog_size = self.v(d, "translog_size")
self.segment_count = self.v(d, "segment_count")
Expand Down
10 changes: 10 additions & 0 deletions esrally/reporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -292,6 +292,7 @@ def _report_gc_metrics(self, stats):

def _report_disk_usage(self, stats):
return self._join(
self._line("Dataset size", "", stats.dataset_size, "GB", convert.bytes_to_gb),
self._line("Store size", "", stats.store_size, "GB", convert.bytes_to_gb),
self._line("Translog size", "", stats.translog_size, "GB", convert.bytes_to_gb),
)
Expand Down Expand Up @@ -886,6 +887,15 @@ def _count_metric(metric_prefix, description):

def _report_disk_usage(self, baseline_stats, contender_stats):
return self._join(
self._line(
"Dataset size",
baseline_stats.dataset_size,
contender_stats.dataset_size,
"",
"GB",
treat_increase_as_improvement=False,
formatter=convert.bytes_to_gb,
),
self._line(
"Store size",
baseline_stats.store_size,
Expand Down
3 changes: 3 additions & 0 deletions esrally/telemetry.py
Original file line number Diff line number Diff line change
Expand Up @@ -2039,6 +2039,9 @@ def on_benchmark_stop(self):
self.add_metrics(self.extract_value(p, ["segments", "terms_memory_in_bytes"]), "segments_terms_memory_in_bytes", "byte")
self.add_metrics(self.extract_value(p, ["segments", "norms_memory_in_bytes"]), "segments_norms_memory_in_bytes", "byte")
self.add_metrics(self.extract_value(p, ["segments", "points_memory_in_bytes"]), "segments_points_memory_in_bytes", "byte")
self.add_metrics(
self.extract_value(index_stats, ["_all", "total", "store", "total_data_set_size_in_bytes"]), "dataset_size_in_bytes", "byte"
)
self.add_metrics(self.extract_value(index_stats, ["_all", "total", "store", "size_in_bytes"]), "store_size_in_bytes", "byte")
self.add_metrics(self.extract_value(index_stats, ["_all", "total", "translog", "size_in_bytes"]), "translog_size_in_bytes", "byte")

Expand Down
2 changes: 2 additions & 0 deletions tests/telemetry_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -4025,6 +4025,7 @@ def test_stores_available_index_stats(self, metrics_store_cluster_value, metrics
"total": {
"store": {
"size_in_bytes": 2113867510,
"total_data_set_size_in_bytes": 112113867510,
},
"translog": {
"operations": 6840000,
Expand Down Expand Up @@ -4208,6 +4209,7 @@ def test_stores_available_index_stats(self, metrics_store_cluster_value, metrics
mock.call("segments_terms_memory_in_bytes", 256, "byte"),
# we don't have norms, so nothing should have been called
mock.call("store_size_in_bytes", 2113867510, "byte"),
mock.call("dataset_size_in_bytes", 112113867510, "byte"),
mock.call("translog_size_in_bytes", 2647984713, "byte"),
],
any_order=True,
Expand Down

0 comments on commit 5e6b1bc

Please sign in to comment.