From 287c3f463987e5eee5c52079bf136a976523a48c Mon Sep 17 00:00:00 2001 From: Yadnesh Kulkarni Date: Tue, 26 Nov 2024 06:28:07 -0500 Subject: [PATCH] Explicitly enable metrics needed for Kepler dashboard - Disable estimated idle power metrics - Enable container, vm and process metrics - Timeout healthcheck curl command after 5 seconds --- .../files/healthchecks/exporter/healthcheck | 3 ++- roles/edpm_telemetry_power_monitoring/tasks/install.yml | 3 +-- .../templates/kepler.json.j2 | 7 ++++--- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/roles/edpm_telemetry_power_monitoring/files/healthchecks/exporter/healthcheck b/roles/edpm_telemetry_power_monitoring/files/healthchecks/exporter/healthcheck index 5f6baf851..53a669f60 100644 --- a/roles/edpm_telemetry_power_monitoring/files/healthchecks/exporter/healthcheck +++ b/roles/edpm_telemetry_power_monitoring/files/healthchecks/exporter/healthcheck @@ -16,9 +16,10 @@ # under the License. URL="http://0.0.0.0:8888/healthz" +TIMEOUT=5 # Timeout in seconds # Get the HTTP status code and response body using curl -RESPONSE=$(curl -s -w "%{http_code}" $URL) +RESPONSE=$(curl -s -w "%{http_code}" $URL --max-time $TIMEOUT) BODY=${RESPONSE:0:-3} # Extract the body (all but the last 3 characters) HTTP_CODE=${RESPONSE: -3} # Extract the last 3 characters as the HTTP status code diff --git a/roles/edpm_telemetry_power_monitoring/tasks/install.yml b/roles/edpm_telemetry_power_monitoring/tasks/install.yml index 9d6aa547e..16dfc31ea 100644 --- a/roles/edpm_telemetry_power_monitoring/tasks/install.yml +++ b/roles/edpm_telemetry_power_monitoring/tasks/install.yml @@ -63,8 +63,7 @@ become: true ansible.builtin.systemd: name: edpm_kepler.service - enabled: true - state: restarted + state: started - name: List deployed health check scripts ansible.builtin.find: diff --git a/roles/edpm_telemetry_power_monitoring/templates/kepler.json.j2 b/roles/edpm_telemetry_power_monitoring/templates/kepler.json.j2 index 8c7e04e1e..3b839ab63 100644 --- a/roles/edpm_telemetry_power_monitoring/templates/kepler.json.j2 +++ b/roles/edpm_telemetry_power_monitoring/templates/kepler.json.j2 @@ -3,13 +3,14 @@ "privileged": "true", "restart": "always", "ports": ["8888:8888"], - "command": "-v 2", + "command": "-v=2", "recreate": true, "environment": { "ENABLE_GPU": "true", + "EXPOSE_CONTAINER_METRICS": "true", "ENABLE_PROCESS_METRICS": "true", - "EXPOSE_ESTIMATED_IDLE_POWER_METRICS": "true", "EXPOSE_VM_METRICS": "true", + "EXPOSE_ESTIMATED_IDLE_POWER_METRICS": "false", "LIBVIRT_METADATA_URI": "http://openstack.org/xmlns/libvirt/nova/1.1" }, {% if edpm_telemetry_power_monitoring_healthcheck %} @@ -21,7 +22,7 @@ "volumes": [ "/lib/modules:/lib/modules:ro", "/run/libvirt:/run/libvirt:shared,ro", - "/sys/:/sys/", + "/sys:/sys", "/proc:/proc" ] }