Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Backport 7.60.x] Sy/slurm sacct #19118

Open
wants to merge 1 commit into
base: 7.60.x
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions slurm/changelog.d/19117.fixed
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix issue in which the sacct params kept growing with each iteration
8 changes: 5 additions & 3 deletions slurm/datadog_checks/slurm/check.py
Original file line number Diff line number Diff line change
Expand Up @@ -316,18 +316,20 @@ def process_sdiag(self, output):
self.gauge('sdiag.enabled', 1)

def _update_sacct_params(self):
sacct_params = SACCT_PARAMS.copy()
if self.last_run_time is not None:
now = get_timestamp()
delta = now - self.last_run_time
start_time_param = f"--starttime=now-{int(delta)}seconds"

SACCT_PARAMS.append(start_time_param)
sacct_params = [param for param in sacct_params if not param.startswith('--starttime')]
sacct_params.append(start_time_param)
self.log.debug("Updating sacct command with new timestamp: %s", start_time_param)

self.last_run_time = get_timestamp()

# Update the sacct command with the dynamic SACCT_PARAMS
self.log.debug("Updating sacct command with new timestamp: %s", start_time_param)
self.sacct_cmd = self.get_slurm_command('sacct', SACCT_PARAMS)
self.sacct_cmd = self.get_slurm_command('sacct', sacct_params)

def _process_sinfo_cpu_state(self, cpus_state, namespace, tags):
# "0/2/0/2"
Expand Down
19 changes: 19 additions & 0 deletions slurm/tests/test_unit.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
# (C) Datadog, Inc. 2024-present
# All rights reserved
# Licensed under a 3-clause BSD style license (see LICENSE)
import time
from unittest.mock import patch

import pytest

from datadog_checks.slurm import SlurmCheck
from datadog_checks.slurm.constants import SACCT_PARAMS

from .common import (
DEFAULT_SINFO_PATH,
Expand Down Expand Up @@ -50,6 +52,23 @@ def test_sinfo_command_params(collection_level, gpu_stats, expected_params, inst
assert check.sinfo_partition_cmd == expected_params


def test_acct_command_params(instance):
# Mock the instance configuration
instance['collect_sacct_stats'] = True

check = SlurmCheck('slurm', {}, [instance])
base_cmd = ['/usr/bin/sacct'] + SACCT_PARAMS

# Test to ensure that the sacct is being constructed correctly
loops = [0, 1, 2]
for loop in loops:
if loop > 0:
time.sleep(loop)
check._update_sacct_params()
expected_cmd = base_cmd + ([f'--starttime=now-{loop}seconds'] if loop > 0 else [])
assert check.sacct_cmd == expected_cmd


@pytest.mark.parametrize(
"expected_metrics, binary",
[
Expand Down
Loading