diff --git a/docs/metrics.rst b/docs/metrics.rst
index af0ac6c5b..9f9627d22 100644
--- a/docs/metrics.rst
+++ b/docs/metrics.rst
@@ -150,6 +150,7 @@ Rally stores the following metrics:
 * ``flush_total_time``: Cumulative time used for index flush of primary shards, as reported by the index stats API. Note that this is not Wall clock time.  These metrics records also have a ``per-shard`` property that contains the times across primary shards in an array.
 * ``flush_total_count``: Cumulative number of flushes of primary shards, as reported by index stats API under ``_all/primaries``.
 * ``final_index_size_bytes``: Final resulting index size on the file system after all nodes have been shutdown at the end of the benchmark. It includes all files in the nodes' data directories (actual index files and translog).
+* ``dataset_size_in_bytes``: Total data set size in bytes of the index. This includes the size of shards not stored fully on nodes, such as the cache for partially mounted indices.
 * ``store_size_in_bytes``: The size in bytes of the index (excluding the translog), as reported by the index stats API.
 * ``translog_size_in_bytes``: The size in bytes of the translog, as reported by the index stats API.
 * ``ml_processing_time``: A structure containing the minimum, mean, median and maximum bucket processing time in milliseconds per machine learning job. These metrics are only available if a machine learning job has been created in the respective benchmark.
diff --git a/docs/summary_report.rst b/docs/summary_report.rst
index f5882a0a3..394a3cad1 100644
--- a/docs/summary_report.rst
+++ b/docs/summary_report.rst
@@ -151,6 +151,12 @@ Total ZGC Pauses GC count
 * **Definition**: The total number of Stop-The-World pauses performed by ZGC across the whole cluster as reported by the node stats API.
 * **Corresponding metrics key**: ``node_total_zgc_pauses_gc_count``
 
+Dataset size
+------------
+
+* **Definition**: Total data set size in bytes of the index. This includes the size of shards not stored fully on nodes, such as the cache for partially mounted indices.
+* **Corresponding metrics key**: ``dataset_size_in_bytes``
+
 Store size
 ----------
 
diff --git a/esrally/metrics.py b/esrally/metrics.py
index 61e00d4dd..03fe531e5 100644
--- a/esrally/metrics.py
+++ b/esrally/metrics.py
@@ -2078,6 +2078,7 @@ def __call__(self):
         result.memory_norms = self.median("segments_norms_memory_in_bytes")
         result.memory_points = self.median("segments_points_memory_in_bytes")
         result.memory_stored_fields = self.median("segments_stored_fields_memory_in_bytes")
+        result.dataset_size = self.sum("dataset_size_in_bytes")
         result.store_size = self.sum("store_size_in_bytes")
         result.translog_size = self.sum("translog_size_in_bytes")
 
@@ -2264,6 +2265,7 @@ def __init__(self, d=None):
         self.memory_norms = self.v(d, "memory_norms")
         self.memory_points = self.v(d, "memory_points")
         self.memory_stored_fields = self.v(d, "memory_stored_fields")
+        self.dataset_size = self.v(d, "dataset_size")
         self.store_size = self.v(d, "store_size")
         self.translog_size = self.v(d, "translog_size")
         self.segment_count = self.v(d, "segment_count")
diff --git a/esrally/reporter.py b/esrally/reporter.py
index 73bd7bef9..900b11cb7 100644
--- a/esrally/reporter.py
+++ b/esrally/reporter.py
@@ -292,6 +292,7 @@ def _report_gc_metrics(self, stats):
 
     def _report_disk_usage(self, stats):
         return self._join(
+            self._line("Dataset size", "", stats.dataset_size, "GB", convert.bytes_to_gb),
             self._line("Store size", "", stats.store_size, "GB", convert.bytes_to_gb),
             self._line("Translog size", "", stats.translog_size, "GB", convert.bytes_to_gb),
         )
@@ -886,6 +887,15 @@ def _count_metric(metric_prefix, description):
 
     def _report_disk_usage(self, baseline_stats, contender_stats):
         return self._join(
+            self._line(
+                "Dataset size",
+                baseline_stats.dataset_size,
+                contender_stats.dataset_size,
+                "",
+                "GB",
+                treat_increase_as_improvement=False,
+                formatter=convert.bytes_to_gb,
+            ),
             self._line(
                 "Store size",
                 baseline_stats.store_size,
diff --git a/esrally/telemetry.py b/esrally/telemetry.py
index 9343fead9..2c1b3c75f 100644
--- a/esrally/telemetry.py
+++ b/esrally/telemetry.py
@@ -2039,6 +2039,9 @@ def on_benchmark_stop(self):
         self.add_metrics(self.extract_value(p, ["segments", "terms_memory_in_bytes"]), "segments_terms_memory_in_bytes", "byte")
         self.add_metrics(self.extract_value(p, ["segments", "norms_memory_in_bytes"]), "segments_norms_memory_in_bytes", "byte")
         self.add_metrics(self.extract_value(p, ["segments", "points_memory_in_bytes"]), "segments_points_memory_in_bytes", "byte")
+        self.add_metrics(
+            self.extract_value(index_stats, ["_all", "total", "store", "total_data_set_size_in_bytes"]), "dataset_size_in_bytes", "byte"
+        )
         self.add_metrics(self.extract_value(index_stats, ["_all", "total", "store", "size_in_bytes"]), "store_size_in_bytes", "byte")
         self.add_metrics(self.extract_value(index_stats, ["_all", "total", "translog", "size_in_bytes"]), "translog_size_in_bytes", "byte")
 
diff --git a/tests/telemetry_test.py b/tests/telemetry_test.py
index 5dfe96a5d..c14b79c26 100644
--- a/tests/telemetry_test.py
+++ b/tests/telemetry_test.py
@@ -4025,6 +4025,7 @@ def test_stores_available_index_stats(self, metrics_store_cluster_value, metrics
                 "total": {
                     "store": {
                         "size_in_bytes": 2113867510,
+                        "total_data_set_size_in_bytes": 112113867510,
                     },
                     "translog": {
                         "operations": 6840000,
@@ -4208,6 +4209,7 @@ def test_stores_available_index_stats(self, metrics_store_cluster_value, metrics
                 mock.call("segments_terms_memory_in_bytes", 256, "byte"),
                 # we don't have norms, so nothing should have been called
                 mock.call("store_size_in_bytes", 2113867510, "byte"),
+                mock.call("dataset_size_in_bytes", 112113867510, "byte"),
                 mock.call("translog_size_in_bytes", 2647984713, "byte"),
             ],
             any_order=True,