diff --git a/docs/telemetry.rst b/docs/telemetry.rst index cab91f26c..2c97dc35d 100644 --- a/docs/telemetry.rst +++ b/docs/telemetry.rst @@ -37,6 +37,7 @@ If you invoke ``esrally list telemetry``, it will show which telemetry devices a data-stream-stats Data Stream Stats Regularly samples data stream stats ingest-pipeline-stats Ingest Pipeline Stats Reports Ingest Pipeline stats at the end of the benchmark. disk-usage-stats Disk usage of each field Runs the indices disk usage API after benchmarking + geoip-stats GeoIp Stats Writes geo ip stats to the metrics store at the end of the benchmark. Keep in mind that each telemetry device may incur a runtime overhead which can skew results. @@ -414,3 +415,15 @@ Example of recorded documents given two nodes in the target cluster:: .. note:: This telemetry device is only applicable to `Elastic Serverless `_ and requires elevated privleges only available to Elastic developers. + +geoip-stats +---------------- + +The geoip-stats telemetry device fetches data from the `GeoIP Stats API `_ at the end of the run, and stores geoip cache stats as metrics for the run. This is available only in Elasticsearch 8.14.0 and higher. Stored metrics include: + +* ``geoip_cache_count``: The number of items in the cache. +* ``geoip_cache_hits``: The number of times an IP address was found in the cache. +* ``geoip_cache_misses``: The number of times an IP address was not found in the cache. +* ``geoip_cache_evictions``: The number of times an entry was evicted from the cache because the max cache size had been reached. +* ``geoip_cache_hits_time_in_millis``: The total amount of time spent fetching data from the cache, for cache hits only. +* ``geoip_cache_misses_time_in_millis``: The total amount of time spent fetching data from the cache, for cache misses only. This includes time spent fetching data from the geoip database. diff --git a/esrally/driver/driver.py b/esrally/driver/driver.py index dc267106e..e762e42df 100644 --- a/esrally/driver/driver.py +++ b/esrally/driver/driver.py @@ -650,6 +650,7 @@ def prepare_telemetry(self, es, enable, index_names, data_stream_names, build_ha telemetry.IngestPipelineStats(es, self.metrics_store), telemetry.DiskUsageStats(telemetry_params, es_default, self.metrics_store, index_names, data_stream_names), telemetry.BlobStoreStats(telemetry_params, es, self.metrics_store), + telemetry.GeoIpStats(es_default, self.metrics_store), ] else: devices = [] diff --git a/esrally/telemetry.py b/esrally/telemetry.py index 2c1b3c75f..85d5fe752 100644 --- a/esrally/telemetry.py +++ b/esrally/telemetry.py @@ -48,6 +48,7 @@ def list_telemetry(): DataStreamStats, IngestPipelineStats, DiskUsageStats, + GeoIpStats, ] ] console.println(tabulate.tabulate(devices, ["Command", "Name", "Description"])) @@ -2551,3 +2552,40 @@ def object_store_stats(self, stats): def operational_backup_service_stats(self, stats): return flatten_stats_fields(prefix="operational_backup", stats=stats.get("operational_backup_service_stats", {})) + + +class GeoIpStats(TelemetryDevice): + internal = False + serverless_status = serverless.Status.Internal + command = "geoip-stats" + human_name = "GeoIp Stats" + help = "Writes geo ip stats to the metrics store at the end of the benchmark." + + def __init__(self, client, metrics_store): + super().__init__() + self.client = client + self.metrics_store = metrics_store + + def on_benchmark_stop(self): + self.logger.info("Gathering GeoIp stats at benchmark end") + # First, build a map of node id to node name, because the geoip stats API doesn't return node name: + try: + nodes_info = self.client.nodes.info(node_id="_all")["nodes"].items() + except BaseException: + self.logger.exception("Could not retrieve nodes info") + nodes_info = {} + node_id_to_name_dict = {} + for node_id, node in nodes_info: + node_id_to_name_dict[node_id] = node["name"] + geoip_stats = self.client.ingest.geo_ip_stats() + stats_dict = geoip_stats.body + nodes_dict = stats_dict["nodes"] + for node_id, node in nodes_dict.items(): + node_name = node_id_to_name_dict[node_id] + cache_stats = node["cache_stats"] + self.metrics_store.put_value_node_level(node_name, "geoip_cache_count", cache_stats["count"]) + self.metrics_store.put_value_node_level(node_name, "geoip_cache_hits", cache_stats["hits"]) + self.metrics_store.put_value_node_level(node_name, "geoip_cache_misses", cache_stats["misses"]) + self.metrics_store.put_value_node_level(node_name, "geoip_cache_evictions", cache_stats["evictions"]) + self.metrics_store.put_value_node_level(node_name, "geoip_cache_hits_time_in_millis", cache_stats["hits_time_in_millis"]) + self.metrics_store.put_value_node_level(node_name, "geoip_cache_misses_time_in_millis", cache_stats["misses_time_in_millis"])