From 33803634e75a5ffa9ccd5dd37e492a77273cbc56 Mon Sep 17 00:00:00 2001 From: Dirk Sammel Date: Wed, 13 Sep 2023 14:33:37 +0200 Subject: [PATCH] add cpu_time_unit to config and implement conversion function for cpu_time --- CHANGELOG.md | 2 + media/website/content/_index.md | 94 ++++++------------- plugins/apel/src/auditor_apel_plugin/core.py | 19 ++++ .../apel/tests/test_auditor_apel_plugin.py | 43 +++++++++ 4 files changed, 92 insertions(+), 66 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e1f59935..2a38def4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [Unreleased] +### Breaking changes +- Apel plugin: `cpu_time_unit` has to be present in the config file. See [Documentation](https://github.com/ALU-Schumacher/AUDITOR/blob/main/media/website/content/_index.md#apel-plugin) ([@dirksammel](https://github.com/dirksammel)) ### Added ### Changed diff --git a/media/website/content/_index.md b/media/website/content/_index.md index a8ca3eda..a4d6c44f 100644 --- a/media/website/content/_index.md +++ b/media/website/content/_index.md @@ -407,74 +407,35 @@ See below for all currently available collectors. The APEL plugin creates job summary records and sends them to APEL. The following fields need to be present in the config file: -``` -[logging] -log_level = - -[paths] -time_db_path = - -[intervals] -report_interval = - -[site] -publish_since = -sites_to_report = -site_name_mapping = -default_submit_host = -infrastructure_type = -benchmark_type = - -[auditor] -auditor_ip = -auditor_port = -auditor_timeout = -benchmark_name = -cores_name = -cpu_time_name = -nnodes_name = -meta_key_site = -meta_key_submithost = -meta_key_voms = -meta_key_username = - -[authentication] -auth_url = -ams_url = -client_cert = -client_key = -ca_path = -verify_ca = -``` - | Parameter | Description | -| --------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `log_level` | Can be set to `DEBUG`, `INFO`, `WARNING`, `ERROR`, or `CRITICAL` (with decreasing verbosity). | +|-----------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `log_level` | Can be set to `DEBUG`, `INFO`, `WARNING`, `ERROR`, or `CRITICAL` (with decreasing verbosity). | | `time_db_path` | Path of the `time.db`. The database should be located at a persistent path and stores the end time of the latest reported job, and the time of the latest report to APEL. | -| `report_interval` | Time in seconds between reports to APEL. | -| `publish_since` | Date and time (UTC) after which jobs will be published. Only relevant for first run when no `time.db` is present yet. | -| `sites_to_report` | List of sites that will be reported. Uses the site name as stored in the AUDITOR records. | -| `site_name_mapping` | Maps the site name as stored in the AUDITOR record to the name of the site in the GOCDB. | -| `default_submit_host` | Default submit host if this information is missing in the AUDITOR record. | -| `infrastructure_type` | Origin of the job, can be set to `grid` or `local`. | -| `benchmark_type` | Name of the benchmark that will be reported to APEL. | -| `auditor_ip` | IP of the AUDITOR instance. | -| `auditor_port` | Port of the AUDITOR instance. | -| `auditor_timeout` | Time in seconds after which the connection to the AUDITOR instance times out. | -| `benchmark_name` | Name of the `benchmark` field in the AUDITOR records. | -| `cores_name` | Name of the `cores` field in the AUDITOR records. | -| `cpu_time_name` | Name of the field that stores the total CPU time in the AUDITOR records. | -| `nnodes_name` | Name of the field that stores the number of nodes in the AUDITOR records. | -| `meta_key_site` | Name of the field that stores the name of the site in the AUDITOR records. | -| `meta_key_submithost` | Name of the field that stores the submithost in the AUDITOR records. | -| `meta_key_voms` | Name of the field that stores the VOMS information in the AUDITOR records. | -| `meta_key_user` | Name of the field that stores the GlobalUserName in the AUDITOR records. | -| `auth_url` | URL from which the APEL authentication token is received. | -| `ams_url` | URL to which the reports are sent. | -| `client_cert` | Path of the host certificate. | -| `client_key` | Path of the host key. | -| `ca_path` | Path of the local certificate folder. | -| `verify_ca` | Controls the verification of the certificate of the APEL server. Can be set to `True` or `False` (the latter might be necessary for local test setups). | +| `report_interval` | Time in seconds between reports to APEL. | +| `publish_since` | Date and time (UTC) after which jobs will be published. Only relevant for first run when no `time.db` is present yet. | +| `sites_to_report` | List of sites that will be reported. Uses the site name as stored in the AUDITOR records. | +| `site_name_mapping` | Maps the site name as stored in the AUDITOR record to the name of the site in the GOCDB. | +| `default_submit_host` | Default submit host if this information is missing in the AUDITOR record. | +| `infrastructure_type` | Origin of the job, can be set to `grid` or `local`. | +| `benchmark_type` | Name of the benchmark that will be reported to APEL. | +| `auditor_ip` | IP of the AUDITOR instance. | +| `auditor_port` | Port of the AUDITOR instance. | +| `auditor_timeout` | Time in seconds after which the connection to the AUDITOR instance times out. | +| `benchmark_name` | Name of the `benchmark` field in the AUDITOR records. | +| `cores_name` | Name of the `cores` field in the AUDITOR records. | +| `cpu_time_name` | Name of the field that stores the total CPU time in the AUDITOR records. | +| `cpu_time_unit` | Unit of total CPU time in the AUDITOR records, can be `seconds` or `milliseconds`. | +| `nnodes_name` | Name of the field that stores the number of nodes in the AUDITOR records. | +| `meta_key_site` | Name of the field that stores the name of the site in the AUDITOR records. | +| `meta_key_submithost` | Name of the field that stores the submithost in the AUDITOR records. | +| `meta_key_voms` | Name of the field that stores the VOMS information in the AUDITOR records. | +| `meta_key_user` | Name of the field that stores the GlobalUserName in the AUDITOR records. | +| `auth_url` | URL from which the APEL authentication token is received. | +| `ams_url` | URL to which the reports are sent. | +| `client_cert` | Path of the host certificate. | +| `client_key` | Path of the host key. | +| `ca_path` | Path of the local certificate folder. | +| `verify_ca` | Controls the verification of the certificate of the APEL server. Can be set to `True` or `False` (the latter might be necessary for local test setups). | Example config: @@ -504,6 +465,7 @@ auditor_timeout = 60 benchmark_name = hepscore23 cores_name = Cores cpu_time_name = TotalCPU +cpu_time_unit = milliseconds nnodes_name = NNodes meta_key_site = site_id meta_key_submithost = headnode diff --git a/plugins/apel/src/auditor_apel_plugin/core.py b/plugins/apel/src/auditor_apel_plugin/core.py index f207adae..ff2a1823 100644 --- a/plugins/apel/src/auditor_apel_plugin/core.py +++ b/plugins/apel/src/auditor_apel_plugin/core.py @@ -339,6 +339,8 @@ def create_summary_db(config, records): logging.critical(f"no {cpu_time_name} in components") raise + cputime = convert_to_seconds(cputime, config) + try: nodecount = component_dict[nnodes_name].amount except KeyError: @@ -650,6 +652,7 @@ def build_payload(msg): def send_payload(config, token, payload): ams_url = config["authentication"].get("ams_url") verify_ca = config["authentication"].getboolean("verify_ca") + if verify_ca: ca_path = config["authentication"].get("ca_path") else: @@ -664,3 +667,19 @@ def send_payload(config, token, payload): ) return post + + +def convert_to_seconds(cpu_time, config): + cpu_time_name = config["auditor"].get("cpu_time_name") + cpu_time_unit = config["auditor"].get("cpu_time_unit") + + if cpu_time_unit == "seconds": + return cpu_time + elif cpu_time_unit == "milliseconds": + return round(cpu_time / 1000) + else: + logging.critical( + f"Unknown unit for {cpu_time_name}: {cpu_time_unit}. " + "Possible values are seconds or milliseconds." + ) + raise ValueError diff --git a/plugins/apel/tests/test_auditor_apel_plugin.py b/plugins/apel/tests/test_auditor_apel_plugin.py index 243a2089..cdf6dd6e 100644 --- a/plugins/apel/tests/test_auditor_apel_plugin.py +++ b/plugins/apel/tests/test_auditor_apel_plugin.py @@ -13,6 +13,7 @@ replace_record_string, get_records, get_site_id, + convert_to_seconds, ) from datetime import datetime, timezone import sqlite3 @@ -350,6 +351,7 @@ def test_create_summary_db(self): benchmark_name = "hepscore" cores_name = "Cores" cpu_time_name = "TotalCPU" + cpu_time_unit = "seconds" nnodes_name = "NNodes" meta_key_site = "site_id" meta_key_submithost = "headnode" @@ -369,6 +371,7 @@ def test_create_summary_db(self): "benchmark_name": benchmark_name, "cores_name": cores_name, "cpu_time_name": cpu_time_name, + "cpu_time_unit": cpu_time_unit, "nnodes_name": nnodes_name, "meta_key_site": meta_key_site, "meta_key_submithost": meta_key_submithost, @@ -518,6 +521,7 @@ def test_create_summary_db_fail(self): benchmark_name = "hepscore" cores_name = "Cores" cpu_time_name = "TotalCPU" + cpu_time_unit = "seconds" nnodes_name = "NNodes" meta_key_site = "site_id" meta_key_submithost = "headnode" @@ -537,6 +541,7 @@ def test_create_summary_db_fail(self): "benchmark_name": benchmark_name, "cores_name": cores_name, "cpu_time_name": cpu_time_name, + "cpu_time_unit": cpu_time_unit, "nnodes_name": nnodes_name, "meta_key_site": meta_key_site, "meta_key_submithost": meta_key_submithost, @@ -1028,3 +1033,41 @@ def test_get_site_id_fail(self): with pytest.raises(Exception) as pytest_error: get_site_id(rec_2, conf) assert pytest_error.type == AttributeError + + def test_convert_to_seconds(self): + cpu_time_name = "TotalCPU" + cpu_time_unit = "seconds" + + conf = configparser.ConfigParser() + conf["auditor"] = { + "cpu_time_name": cpu_time_name, + "cpu_time_unit": cpu_time_unit, + } + + result = convert_to_seconds(1100, conf) + assert result == 1100 + + result = convert_to_seconds(1500, conf) + assert result == 1500 + + conf["auditor"]["cpu_time_unit"] = "milliseconds" + + result = convert_to_seconds(1100, conf) + assert result == 1 + + result = convert_to_seconds(1500, conf) + assert result == 2 + + def test_convert_to_seconds_fail(self): + cpu_time_name = "TotalCPU" + cpu_time_unit = "hours" + + conf = configparser.ConfigParser() + conf["auditor"] = { + "cpu_time_name": cpu_time_name, + "cpu_time_unit": cpu_time_unit, + } + + with pytest.raises(Exception) as pytest_error: + convert_to_seconds(1100, conf) + assert pytest_error.type == ValueError