diff --git a/.gitattributes b/.gitattributes index b99b9c5..d5777ba 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,3 +1,8 @@ lustrefs-exporter/src/snapshots/lustrefs_exporter__tests__valid_fixture_lustre-2.14.0_ddn125__ds86.txt.snap filter=lfs diff=lfs merge=lfs -text lustre-collector/src/fixtures/valid/lustre-2.14.0_ddn125/ds86.txt filter=lfs diff=lfs merge=lfs -text lustre-collector/src/snapshots/lustre_collector__parser__tests__valid_fixture_lustre-2.14.0_ddn125__ds86.txt.snap filter=lfs diff=lfs merge=lfs -text +lustre-collector/src/snapshots/lustre_collector__parser__tests__params_jobstats_only.snap filter=lfs diff=lfs merge=lfs -text +lustre-collector/src/snapshots/lustre_collector__parser__tests__params_no_jobstats.snap filter=lfs diff=lfs merge=lfs -text +lustre-collector/src/snapshots/lustre_collector__parser__tests__valid_fixture_lustre-2.14.0_ddn125__jobstats_only.txt.snap filter=lfs diff=lfs merge=lfs -text +lustrefs-exporter/src/snapshots/lustrefs_exporter__tests__valid_fixture_lustre-2.14.0_ddn125__jobstats_only.txt.snap filter=lfs diff=lfs merge=lfs -text +lustre-collector/src/fixtures/valid/lustre-2.14.0_ddn125/jobstats_only.txt filter=lfs diff=lfs merge=lfs -text diff --git a/lustre-collector/src/fixtures/valid/lustre-2.14.0_ddn125/jobstats_only.txt b/lustre-collector/src/fixtures/valid/lustre-2.14.0_ddn125/jobstats_only.txt new file mode 100644 index 0000000..775d921 --- /dev/null +++ b/lustre-collector/src/fixtures/valid/lustre-2.14.0_ddn125/jobstats_only.txt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7fc3c374852fe01b926645291b3005625b962321a4288f398cc5a5f73857fa79 +size 421040329 diff --git a/lustre-collector/src/lib.rs b/lustre-collector/src/lib.rs index 12bada7..dc023ca 100644 --- a/lustre-collector/src/lib.rs +++ b/lustre-collector/src/lib.rs @@ -35,6 +35,7 @@ fn check_output(records: Vec, state: &str) -> Result, Lustre let params = crate::parser::params().join(" "); if !state.is_empty() { + println!("Left : {}", state); return Err(io::Error::new( io::ErrorKind::InvalidInput, format!("Content left in input buffer. Please run and supply to support: `lctl get_param {params}`"), diff --git a/lustre-collector/src/mds/job_stats.rs b/lustre-collector/src/mds/job_stats.rs index 2c0b849..00c4c7c 100644 --- a/lustre-collector/src/mds/job_stats.rs +++ b/lustre-collector/src/mds/job_stats.rs @@ -4,7 +4,7 @@ use crate::types::{JobStatMdt, JobStatsMdt}; use combine::{ - attempt, + attempt, eof, error::{ParseError, StreamError}, optional, parser::{ @@ -22,9 +22,10 @@ where { ( optional(newline()), // If Jobstats are present, the whole yaml blob will be on a newline - take_until(attempt((newline(), alpha_num()))), + take_until(attempt((newline(), alpha_num()).map(drop).or(eof()))), ) - .skip(newline()) + .skip(optional(newline())) + .skip(optional(eof())) .and_then(|(_, x): (_, String)| { serde_yaml::from_str(&x) .map(|x: JobStatsMdt| x.job_stats) diff --git a/lustre-collector/src/mds/mdt_parser.rs b/lustre-collector/src/mds/mdt_parser.rs index c8df2d5..ec585e6 100644 --- a/lustre-collector/src/mds/mdt_parser.rs +++ b/lustre-collector/src/mds/mdt_parser.rs @@ -61,6 +61,20 @@ pub(crate) fn params() -> Vec { .collect() } +pub(crate) fn params_no_jobstats() -> Vec { + [ + format!("mdt.*.{STATS}"), + format!("mdt.*MDT*.{NUM_EXPORTS}"), + format!("mdt.*MDT*.{EXPORTS_PARAMS}"), + ] + .into_iter() + .collect() +} + +pub(crate) fn params_jobstats_only() -> Vec { + vec![format!("mdt.*.{JOB_STATS}")] +} + fn target_name() -> impl Parser where I: Stream, diff --git a/lustre-collector/src/mds/mod.rs b/lustre-collector/src/mds/mod.rs index a6ea0c5..15a39d5 100644 --- a/lustre-collector/src/mds/mod.rs +++ b/lustre-collector/src/mds/mod.rs @@ -17,6 +17,17 @@ pub(crate) fn params() -> Vec { .collect() } +pub(crate) fn params_no_jobstats() -> Vec { + mds_parser::params() + .into_iter() + .chain(mdt_parser::params_no_jobstats()) + .collect() +} + +pub(crate) fn params_jobstats_only() -> Vec { + mdt_parser::params_jobstats_only() +} + pub(crate) fn parse() -> impl Parser where I: Stream, diff --git a/lustre-collector/src/oss/mod.rs b/lustre-collector/src/oss/mod.rs index 636e299..c8d9a6d 100644 --- a/lustre-collector/src/oss/mod.rs +++ b/lustre-collector/src/oss/mod.rs @@ -16,6 +16,17 @@ pub(crate) fn params() -> Vec { .collect() } +pub(crate) fn params_no_jobstats() -> Vec { + obdfilter_parser::obd_params_no_jobstats() + .into_iter() + .chain(oss_parser::params()) + .collect() +} + +pub(crate) fn params_jobstats_only() -> Vec { + obdfilter_parser::obd_params_jobstats_only() +} + pub(crate) fn parse() -> impl Parser where I: Stream, diff --git a/lustre-collector/src/oss/obdfilter_parser.rs b/lustre-collector/src/oss/obdfilter_parser.rs index edb5a3d..60d24cb 100644 --- a/lustre-collector/src/oss/obdfilter_parser.rs +++ b/lustre-collector/src/oss/obdfilter_parser.rs @@ -39,6 +39,16 @@ pub(crate) const OBD_STATS: [&str; 7] = [ EXPORTS_PARAMS, ]; +pub(crate) const OBD_STATS_NO_JOBSTATS: [&str; 6] = [ + STATS, + NUM_EXPORTS, + TOT_DIRTY, + TOT_GRANTED, + TOT_PENDING, + EXPORTS_PARAMS, +]; + +pub(crate) const OBD_STATS_JOBSTATS_ONLY: [&str; 1] = [JOBSTATS]; /// Takes OBD_STATS and produces a list of params for /// consumption in proper ltcl get_param format. pub(crate) fn obd_params() -> Vec { @@ -48,6 +58,21 @@ pub(crate) fn obd_params() -> Vec { .collect() } +/// Takes OBD_STATS and produces a list of params for +/// consumption in proper ltcl get_param format. +pub(crate) fn obd_params_no_jobstats() -> Vec { + OBD_STATS_NO_JOBSTATS + .iter() + .map(|x| format!("obdfilter.*OST*.{x}")) + .collect() +} +pub(crate) fn obd_params_jobstats_only() -> Vec { + OBD_STATS_JOBSTATS_ONLY + .iter() + .map(|x| format!("obdfilter.*OST*.{x}")) + .collect() +} + /// Parses the name of a target fn target_name() -> impl Parser where diff --git a/lustre-collector/src/parser.rs b/lustre-collector/src/parser.rs index 89fd897..81c15b5 100644 --- a/lustre-collector/src/parser.rs +++ b/lustre-collector/src/parser.rs @@ -26,6 +26,28 @@ pub fn params() -> Vec { .collect() } +pub fn params_no_jobstats() -> Vec { + top_level_parser::top_level_params() + .into_iter() + .chain(client_count_parser::params()) + .chain(osd_parser::params()) + .chain(mgs_parser::params()) + .chain(oss::params_no_jobstats()) + .chain(mds::params_no_jobstats()) + .chain(ldlm::params()) + .chain(llite::params()) + .chain(mdd_parser::params()) + .chain(quota::params()) + .collect() +} + +pub fn params_jobstats_only() -> Vec { + oss::params_jobstats_only() + .into_iter() + .chain(mds::params_jobstats_only()) + .collect() +} + pub fn parse() -> impl Parser> where I: Stream, @@ -90,6 +112,16 @@ mod tests { assert_debug_snapshot!(params()); } + #[test] + fn test_params_no_jobstats() { + assert_debug_snapshot!(params_no_jobstats()); + } + + #[test] + fn test_params_jobstats_only() { + assert_debug_snapshot!(params_jobstats_only()); + } + #[test] fn test_mdt_output() { let x = r#"memused=343719411 diff --git a/lustre-collector/src/snapshots/lustre_collector__parser__tests__params_jobstats_only.snap b/lustre-collector/src/snapshots/lustre_collector__parser__tests__params_jobstats_only.snap new file mode 100644 index 0000000..0dfae84 --- /dev/null +++ b/lustre-collector/src/snapshots/lustre_collector__parser__tests__params_jobstats_only.snap @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcff55dec6c4dca94f8d0809ca8fa1c7b30d3a83031be83eac6d089266c73ae3 +size 142 diff --git a/lustre-collector/src/snapshots/lustre_collector__parser__tests__params_no_jobstats.snap b/lustre-collector/src/snapshots/lustre_collector__parser__tests__params_no_jobstats.snap new file mode 100644 index 0000000..78c57d9 --- /dev/null +++ b/lustre-collector/src/snapshots/lustre_collector__parser__tests__params_no_jobstats.snap @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c82b55131bdd48ebbb40f0f64d60f5ae59af93e66f0a895db258f9465626228b +size 2134 diff --git a/lustre-collector/src/snapshots/lustre_collector__parser__tests__valid_fixture_lustre-2.14.0_ddn125__jobstats_only.txt.snap b/lustre-collector/src/snapshots/lustre_collector__parser__tests__valid_fixture_lustre-2.14.0_ddn125__jobstats_only.txt.snap new file mode 100644 index 0000000..1fa370f --- /dev/null +++ b/lustre-collector/src/snapshots/lustre_collector__parser__tests__valid_fixture_lustre-2.14.0_ddn125__jobstats_only.txt.snap @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94770df5fdc730ece2db29c92c985274f669321f349a00ceb4b606399cf214b8 +size 836493071 diff --git a/lustrefs-exporter/src/main.rs b/lustrefs-exporter/src/main.rs index 4a1ce0e..cd7a02e 100644 --- a/lustrefs-exporter/src/main.rs +++ b/lustrefs-exporter/src/main.rs @@ -2,12 +2,18 @@ // Use of this source code is governed by a MIT-style // license that can be found in the LICENSE file. +use std::{sync::Arc, time::Duration}; + use clap::Parser; use lustre_collector::{parse_lctl_output, parse_lnetctl_output, parse_lnetctl_stats, parser}; use lustrefs_exporter::build_lustre_stats; use prometheus_exporter_base::prelude::*; -use tokio::process::Command; +use tokio::{ + process::Command, + sync::Mutex, + time::{interval, MissedTickBehavior}, +}; const LUSTREFS_EXPORTER_PORT: &str = "32221"; @@ -31,6 +37,52 @@ async fn main() { authorization: Authorization::None, }; + let mtx_record = Arc::new(Mutex::new(None)); + let mtx_record2 = Arc::clone(&mtx_record); + + let mut ticker = interval(Duration::from_secs(10)); + ticker.set_missed_tick_behavior(MissedTickBehavior::Skip); + + tokio::spawn(async move { + loop { + ticker.tick().await; + + let lctl = match Command::new("lctl") + .arg("get_param") + .args(parser::params_jobstats_only()) + .kill_on_drop(true) + .output() + .await + { + Ok(r) => r, + Err(e) => { + tracing::debug!("Failed to retrieve jobstats parameters. {e}"); + continue; + } + }; + + // Offload CPU-intensive parsing to a blocking task + let parsed_result = + tokio::task::spawn_blocking(move || parse_lctl_output(&lctl.stdout)).await; + + match parsed_result { + Ok(Ok(r)) => { + let stat = build_lustre_stats(r); + let mut lock = mtx_record.lock().await; + *lock = Some(stat); + } + Ok(Err(e)) => { + tracing::debug!("Failed to parse jobstats information. {e}"); + continue; + } + Err(e) => { + tracing::debug!("Failed to execute parse_lctl_output in blocking task. {e}"); + continue; + } + } + } + }); + render_prometheus(server_opts, Options, |request, options| async move { tracing::debug!(?request, ?options); @@ -38,7 +90,7 @@ async fn main() { let lctl = Command::new("lctl") .arg("get_param") - .args(parser::params()) + .args(parser::params_no_jobstats()) .kill_on_drop(true) .output() .await?; @@ -69,7 +121,14 @@ async fn main() { output.append(&mut lnetctl_stats_record); - Ok(build_lustre_stats(output)) + let lustre_stats = build_lustre_stats(output); + + let jobstats = { mtx_record2.lock().await.clone() }; + + match jobstats { + Some(jobstats) => Ok([lustre_stats, jobstats].join("\n")), + None => Ok(lustre_stats), + } }) .await; } diff --git a/lustrefs-exporter/src/snapshots/lustrefs_exporter__tests__valid_fixture_lustre-2.14.0_ddn125__jobstats_only.txt.snap b/lustrefs-exporter/src/snapshots/lustrefs_exporter__tests__valid_fixture_lustre-2.14.0_ddn125__jobstats_only.txt.snap new file mode 100644 index 0000000..6baa2de --- /dev/null +++ b/lustrefs-exporter/src/snapshots/lustrefs_exporter__tests__valid_fixture_lustre-2.14.0_ddn125__jobstats_only.txt.snap @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:080f85583318a8f3bc0dded58536eaccf81d465ac2dd25db016994a0f3fe86fd +size 333242973