Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Track dataset size #1827

Merged
merged 1 commit into from
Jan 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/metrics.rst
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,7 @@ Rally stores the following metrics:
* ``flush_total_time``: Cumulative time used for index flush of primary shards, as reported by the index stats API. Note that this is not Wall clock time. These metrics records also have a ``per-shard`` property that contains the times across primary shards in an array.
* ``flush_total_count``: Cumulative number of flushes of primary shards, as reported by index stats API under ``_all/primaries``.
* ``final_index_size_bytes``: Final resulting index size on the file system after all nodes have been shutdown at the end of the benchmark. It includes all files in the nodes' data directories (actual index files and translog).
* ``dataset_size_in_bytes``: Total data set size in bytes of the index. This includes the size of shards not stored fully on nodes, such as the cache for partially mounted indices.
* ``store_size_in_bytes``: The size in bytes of the index (excluding the translog), as reported by the index stats API.
* ``translog_size_in_bytes``: The size in bytes of the translog, as reported by the index stats API.
* ``ml_processing_time``: A structure containing the minimum, mean, median and maximum bucket processing time in milliseconds per machine learning job. These metrics are only available if a machine learning job has been created in the respective benchmark.
6 changes: 6 additions & 0 deletions docs/summary_report.rst
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,12 @@ Total ZGC Pauses GC count
* **Definition**: The total number of Stop-The-World pauses performed by ZGC across the whole cluster as reported by the node stats API.
* **Corresponding metrics key**: ``node_total_zgc_pauses_gc_count``

Dataset size
------------

* **Definition**: Total data set size in bytes of the index. This includes the size of shards not stored fully on nodes, such as the cache for partially mounted indices.
* **Corresponding metrics key**: ``dataset_size_in_bytes``

Store size
----------

Expand Down
2 changes: 2 additions & 0 deletions esrally/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -2078,6 +2078,7 @@ def __call__(self):
result.memory_norms = self.median("segments_norms_memory_in_bytes")
result.memory_points = self.median("segments_points_memory_in_bytes")
result.memory_stored_fields = self.median("segments_stored_fields_memory_in_bytes")
result.dataset_size = self.sum("dataset_size_in_bytes")
result.store_size = self.sum("store_size_in_bytes")
result.translog_size = self.sum("translog_size_in_bytes")

Expand Down Expand Up @@ -2264,6 +2265,7 @@ def __init__(self, d=None):
self.memory_norms = self.v(d, "memory_norms")
self.memory_points = self.v(d, "memory_points")
self.memory_stored_fields = self.v(d, "memory_stored_fields")
self.dataset_size = self.v(d, "dataset_size")
self.store_size = self.v(d, "store_size")
self.translog_size = self.v(d, "translog_size")
self.segment_count = self.v(d, "segment_count")
Expand Down
10 changes: 10 additions & 0 deletions esrally/reporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -292,6 +292,7 @@ def _report_gc_metrics(self, stats):

def _report_disk_usage(self, stats):
return self._join(
self._line("Dataset size", "", stats.dataset_size, "GB", convert.bytes_to_gb),
self._line("Store size", "", stats.store_size, "GB", convert.bytes_to_gb),
self._line("Translog size", "", stats.translog_size, "GB", convert.bytes_to_gb),
)
Expand Down Expand Up @@ -886,6 +887,15 @@ def _count_metric(metric_prefix, description):

def _report_disk_usage(self, baseline_stats, contender_stats):
return self._join(
self._line(
"Dataset size",
baseline_stats.dataset_size,
contender_stats.dataset_size,
"",
"GB",
treat_increase_as_improvement=False,
formatter=convert.bytes_to_gb,
),
self._line(
"Store size",
baseline_stats.store_size,
Expand Down
3 changes: 3 additions & 0 deletions esrally/telemetry.py
Original file line number Diff line number Diff line change
Expand Up @@ -2039,6 +2039,9 @@ def on_benchmark_stop(self):
self.add_metrics(self.extract_value(p, ["segments", "terms_memory_in_bytes"]), "segments_terms_memory_in_bytes", "byte")
self.add_metrics(self.extract_value(p, ["segments", "norms_memory_in_bytes"]), "segments_norms_memory_in_bytes", "byte")
self.add_metrics(self.extract_value(p, ["segments", "points_memory_in_bytes"]), "segments_points_memory_in_bytes", "byte")
self.add_metrics(
self.extract_value(index_stats, ["_all", "total", "store", "total_data_set_size_in_bytes"]), "dataset_size_in_bytes", "byte"
)
self.add_metrics(self.extract_value(index_stats, ["_all", "total", "store", "size_in_bytes"]), "store_size_in_bytes", "byte")
self.add_metrics(self.extract_value(index_stats, ["_all", "total", "translog", "size_in_bytes"]), "translog_size_in_bytes", "byte")

Expand Down
2 changes: 2 additions & 0 deletions tests/telemetry_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -4025,6 +4025,7 @@ def test_stores_available_index_stats(self, metrics_store_cluster_value, metrics
"total": {
"store": {
"size_in_bytes": 2113867510,
"total_data_set_size_in_bytes": 112113867510,
},
"translog": {
"operations": 6840000,
Expand Down Expand Up @@ -4208,6 +4209,7 @@ def test_stores_available_index_stats(self, metrics_store_cluster_value, metrics
mock.call("segments_terms_memory_in_bytes", 256, "byte"),
# we don't have norms, so nothing should have been called
mock.call("store_size_in_bytes", 2113867510, "byte"),
mock.call("dataset_size_in_bytes", 112113867510, "byte"),
mock.call("translog_size_in_bytes", 2647984713, "byte"),
],
any_order=True,
Expand Down
Loading