diff --git a/slurm/changelog.d/19117.fixed b/slurm/changelog.d/19117.fixed new file mode 100644 index 0000000000000..11da713cd1f7d --- /dev/null +++ b/slurm/changelog.d/19117.fixed @@ -0,0 +1 @@ +Fix issue in which the sacct params kept growing with each iteration diff --git a/slurm/datadog_checks/slurm/check.py b/slurm/datadog_checks/slurm/check.py index f06b2b9cca423..f5f1e0a7b94e0 100644 --- a/slurm/datadog_checks/slurm/check.py +++ b/slurm/datadog_checks/slurm/check.py @@ -316,18 +316,20 @@ def process_sdiag(self, output): self.gauge('sdiag.enabled', 1) def _update_sacct_params(self): + sacct_params = SACCT_PARAMS.copy() if self.last_run_time is not None: now = get_timestamp() delta = now - self.last_run_time start_time_param = f"--starttime=now-{int(delta)}seconds" - SACCT_PARAMS.append(start_time_param) + sacct_params = [param for param in sacct_params if not param.startswith('--starttime')] + sacct_params.append(start_time_param) + self.log.debug("Updating sacct command with new timestamp: %s", start_time_param) self.last_run_time = get_timestamp() # Update the sacct command with the dynamic SACCT_PARAMS - self.log.debug("Updating sacct command with new timestamp: %s", start_time_param) - self.sacct_cmd = self.get_slurm_command('sacct', SACCT_PARAMS) + self.sacct_cmd = self.get_slurm_command('sacct', sacct_params) def _process_sinfo_cpu_state(self, cpus_state, namespace, tags): # "0/2/0/2" diff --git a/slurm/tests/test_unit.py b/slurm/tests/test_unit.py index 4c56ffcf0cd73..5562b4540db63 100644 --- a/slurm/tests/test_unit.py +++ b/slurm/tests/test_unit.py @@ -1,11 +1,13 @@ # (C) Datadog, Inc. 2024-present # All rights reserved # Licensed under a 3-clause BSD style license (see LICENSE) +import time from unittest.mock import patch import pytest from datadog_checks.slurm import SlurmCheck +from datadog_checks.slurm.constants import SACCT_PARAMS from .common import ( DEFAULT_SINFO_PATH, @@ -50,6 +52,23 @@ def test_sinfo_command_params(collection_level, gpu_stats, expected_params, inst assert check.sinfo_partition_cmd == expected_params +def test_acct_command_params(instance): + # Mock the instance configuration + instance['collect_sacct_stats'] = True + + check = SlurmCheck('slurm', {}, [instance]) + base_cmd = ['/usr/bin/sacct'] + SACCT_PARAMS + + # Test to ensure that the sacct is being constructed correctly + loops = [0, 1, 2] + for loop in loops: + if loop > 0: + time.sleep(loop) + check._update_sacct_params() + expected_cmd = base_cmd + ([f'--starttime=now-{loop}seconds'] if loop > 0 else []) + assert check.sacct_cmd == expected_cmd + + @pytest.mark.parametrize( "expected_metrics, binary", [