Skip to content

Commit

Permalink
in_node_exporter_metrics: add support for thermal_zone. (#7522)
Browse files Browse the repository at this point in the history
---------

Signed-off-by: Phillip Whelan <[email protected]>
  • Loading branch information
pwhelan authored Dec 20, 2023
1 parent a1faa4f commit 0ab459a
Show file tree
Hide file tree
Showing 9 changed files with 480 additions and 6 deletions.
1 change: 1 addition & 0 deletions plugins/in_node_exporter_metrics/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ set(src
ne_utils.c
ne_config.c
ne_systemd.c
ne_thermalzone.c
ne.c
)

Expand Down
7 changes: 7 additions & 0 deletions plugins/in_node_exporter_metrics/ne.c
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
#include "ne_systemd.h"
#include "ne_processes.h"
#include "ne_nvme.h"
#include "ne_thermalzone.h"

/*
* Update the metrics, this function is invoked every time 'scrape_interval'
Expand Down Expand Up @@ -192,6 +193,7 @@ static int in_ne_init(struct flb_input_instance *in,
mk_list_add(&systemd_collector._head, &ctx->collectors);
mk_list_add(&processes_collector._head, &ctx->collectors);
mk_list_add(&nvme_collector._head, &ctx->collectors);
mk_list_add(&thermalzone_collector._head, &ctx->collectors);

mk_list_foreach(head, &ctx->collectors) {
coll = mk_list_entry(head, struct flb_ne_collector, _head);
Expand Down Expand Up @@ -402,6 +404,11 @@ static struct flb_config_map config_map[] = {
0, FLB_FALSE, 0,
"scrape interval to collect processes metrics from the node."
},
{
FLB_CONFIG_MAP_TIME, "collector.thermalzone.scrape_interval", "0",
0, FLB_FALSE, 0,
"scrape interval to collect thermal zone metrics from the node."
},

{
FLB_CONFIG_MAP_TIME, "collector.nvme.scrape_interval", "0",
Expand Down
7 changes: 6 additions & 1 deletion plugins/in_node_exporter_metrics/ne.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
/* Default enabled metrics */

#ifdef __linux__
#define NE_DEFAULT_ENABLED_METRICS "cpu,cpufreq,meminfo,diskstats,filesystem,uname,stat,time,loadavg,vmstat,netdev,filefd,systemd,nvme"
#define NE_DEFAULT_ENABLED_METRICS "cpu,cpufreq,meminfo,diskstats,filesystem,uname,stat,time,loadavg,vmstat,netdev,filefd,systemd,nvme,thermal_zone"
#elif __APPLE__
#define NE_DEFAULT_ENABLED_METRICS "cpu,loadavg,meminfo,diskstats,uname,netdev"
#endif
Expand Down Expand Up @@ -206,6 +206,11 @@ struct flb_ne {

/* nvme */
struct cmt_gauge *nvme_info;

/* thermal zone */
struct cmt_gauge *thermalzone_temp;
struct cmt_gauge *cooling_device_cur_state;
struct cmt_gauge *cooling_device_max_state;
};

struct flb_ne_collector {
Expand Down
32 changes: 32 additions & 0 deletions plugins/in_node_exporter_metrics/ne_thermalzone.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */

/* Fluent Bit
* ==========
* Copyright (C) 2023 The Fluent Bit Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifdef __linux__
#include "ne_thermalzone_linux.c"
#else

#include "ne.h"

struct flb_ne_collector thermalzone_collector = {
.name = "thermal_zone",
.cb_init = NULL,
.cb_update = NULL,
.cb_exit = NULL
};
#endif
27 changes: 27 additions & 0 deletions plugins/in_node_exporter_metrics/ne_thermalzone.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */

/* Fluent Bit
* ==========
* Copyright (C) 2023 The Fluent Bit Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef FLB_IN_NE_THERMALZONE_H
#define FLB_IN_NE_THERMALZONE_H

#include "ne.h"

extern struct flb_ne_collector thermalzone_collector;

#endif
267 changes: 267 additions & 0 deletions plugins/in_node_exporter_metrics/ne_thermalzone_linux.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,267 @@
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */

/* Fluent Bit
* ==========
* Copyright (C) 2015-2022 The Fluent Bit Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <fluent-bit/flb_info.h>
#include <fluent-bit/flb_input_plugin.h>

#include "ne.h"
#include "ne_utils.h"
#include "ne_thermalzone_linux.h"

#include <unistd.h>

/*
* See kernel documentation for a description:
* https://www.kernel.org/doc/html/latest/driver-api/thermal/sysfs-api.html
*
* Ensure to pick the correct version of the documentation, older versions here:
* https://github.com/torvalds/linux/tree/master/Documentation
*/
/*
* Thermal zone stats, reads /sys/class/thermal/thermal_zone*
* ----------------------------------------------------------
*/

static int ne_thermalzone_init(struct flb_ne *ctx)
{
ctx->thermalzone_temp = cmt_gauge_create(ctx->cmt, "node", "thermal_zone", "temp",
"Zone temperature in Celsius",
2, (char *[]) {"zone", "type"});
if (!ctx->thermalzone_temp) {
flb_plg_error(ctx->ins, "could not initialize thermal zone metrics");
return -1;
}

ctx->cooling_device_cur_state = cmt_gauge_create(ctx->cmt,
"node", "cooling_device", "cur_state",
"Current throttle state of the cooling device",
2, (char *[]) {"name", "type"});
if (!ctx->cooling_device_cur_state) {
flb_plg_error(ctx->ins, "could not initialize cooling device cur_state metric");
return -1;
}

ctx->cooling_device_max_state = cmt_gauge_create(ctx->cmt,
"node", "cooling_device", "max_state",
"Maximum throttle state of the cooling device",
2, (char *[]) {"name", "type"});
if (!ctx->cooling_device_max_state) {
flb_plg_error(ctx->ins, "could not initialize cooling device max_state metric");
return -1;
}

return 0;
}

static int ne_thermalzone_update_thermal_zones(struct flb_ne *ctx)
{
uint64_t tstamp;
int ret;
uint64_t temp = 0;
struct mk_list *head;
struct mk_list list;
struct flb_slist_entry *entry;
flb_sds_t type;
flb_sds_t full_path_sysfs;
int path_sysfs_len;
char *num;

tstamp = cfl_time_now();

ret = ne_utils_path_scan(ctx, ctx->path_sysfs, THERMAL_ZONE_PATTERN, NE_SCAN_DIR, &list);
if (ret != 0) {
return -1;
}

if (mk_list_size(&list) == 0) {
return 0;
}

full_path_sysfs = flb_sds_create_size(strlen(THERMAL_ZONE_BASE) +
strlen(ctx->path_sysfs) + 8);
if (full_path_sysfs == NULL) {
flb_slist_destroy(&list);
return -1;
}
path_sysfs_len = strlen(ctx->path_sysfs);
if (ctx->path_sysfs[strlen(ctx->path_sysfs)-1] == '/') {
path_sysfs_len--;
}
/* Set the full_path to the sysfs path */
if (flb_sds_cat_safe(&full_path_sysfs, ctx->path_sysfs, path_sysfs_len) < 0) {
flb_slist_destroy(&list);
flb_sds_destroy(full_path_sysfs);
return -1;
}
/* Concatenate the base for all thermalzone objects */
if (flb_sds_cat_safe(&full_path_sysfs, THERMAL_ZONE_BASE,
strlen(THERMAL_ZONE_BASE)) < 0) {
flb_slist_destroy(&list);
flb_sds_destroy(full_path_sysfs);
return -1;
}

/* Process entries */
mk_list_foreach(head, &list) {
entry = mk_list_entry(head, struct flb_slist_entry, _head);

/* Core ID */
ret = ne_utils_file_read_uint64(ctx->path_sysfs,
entry->str,
"temp", NULL,
&temp);
if (ret != 0) {
continue;
}

ret = ne_utils_file_read_sds(ctx->path_sysfs, entry->str, "type", NULL, &type);
if (ret != 0) {
flb_plg_error(ctx->ins, "unable to get type for zone: %s", entry->str);
continue;
}

if (strncmp(entry->str, full_path_sysfs, strlen(full_path_sysfs)) == 0) {
num = &entry->str[strlen(full_path_sysfs)];
} else {
num = entry->str;
}

cmt_gauge_set(ctx->thermalzone_temp, tstamp, ((double) temp)/1000.0,
2, (char *[]) {num, type});

flb_sds_destroy(type);
}

flb_slist_destroy(&list);
flb_sds_destroy(full_path_sysfs);

return 0;
}

static int ne_thermalzone_update_cooling_devices(struct flb_ne *ctx)
{
uint64_t tstamp;
int ret;
uint64_t cur_state = 0;
uint64_t max_state = 0;
struct mk_list *head;
struct mk_list list;
struct flb_slist_entry *entry;
flb_sds_t type;
char *num;
flb_sds_t full_path_sysfs;
int path_sysfs_len;

tstamp = cfl_time_now();

ret = ne_utils_path_scan(ctx, ctx->path_sysfs, COOLING_DEVICE_PATTERN, NE_SCAN_DIR, &list);
if (ret != 0) {
return -1;
}

if (mk_list_size(&list) == 0) {
return 0;
}

full_path_sysfs = flb_sds_create_size(strlen(COOLING_DEVICE_BASE) +
strlen(ctx->path_sysfs) + 8);
if (full_path_sysfs == NULL) {
flb_slist_destroy(&list);
return -1;
}
path_sysfs_len = strlen(ctx->path_sysfs);
if (ctx->path_sysfs[strlen(ctx->path_sysfs)-1] == '/') {
path_sysfs_len--;
}
if (flb_sds_cat_safe(&full_path_sysfs, ctx->path_sysfs, path_sysfs_len) < 0) {
flb_slist_destroy(&list);
flb_sds_destroy(full_path_sysfs);
return -1;
}
if (flb_sds_cat_safe(&full_path_sysfs, COOLING_DEVICE_BASE,
strlen(COOLING_DEVICE_BASE)) < 0) {
flb_slist_destroy(&list);
flb_sds_destroy(full_path_sysfs);
return -1;
}

/* Process entries */
mk_list_foreach(head, &list) {
entry = mk_list_entry(head, struct flb_slist_entry, _head);

/* Core ID */
ret = ne_utils_file_read_uint64(ctx->path_sysfs,
entry->str,
"cur_state", NULL,
&cur_state);
if (ret != 0) {
continue;
}

ret = ne_utils_file_read_uint64(ctx->path_sysfs,
entry->str,
"max_state", NULL,
&max_state);
if (ret != 0) {
continue;
}

ret = ne_utils_file_read_sds(ctx->path_sysfs, entry->str, "type", NULL, &type);
if (ret != 0) {
flb_plg_error(ctx->ins, "unable to get type for zone: %s", entry->str);
continue;
}

if (strncmp(entry->str, full_path_sysfs, strlen(full_path_sysfs)) == 0) {
num = &entry->str[strlen(full_path_sysfs)];
} else {
num = entry->str;
}

cmt_gauge_set(ctx->cooling_device_cur_state, tstamp, ((double)cur_state),
2, (char *[]) {num, type});
cmt_gauge_set(ctx->cooling_device_max_state, tstamp, ((double)max_state),
2, (char *[]) {num, type});
flb_sds_destroy(type);
}

flb_slist_destroy(&list);
flb_sds_destroy(full_path_sysfs);

return 0;
}

static int ne_thermalzone_update(struct flb_input_instance *ins, struct flb_config *config, void *in_context)
{
int ret;
struct flb_ne *ctx = (struct flb_ne *)in_context;

ret = ne_thermalzone_update_thermal_zones(ctx);
if (ret != 0) {
return ret;
}
return ne_thermalzone_update_cooling_devices(ctx);
}

struct flb_ne_collector thermalzone_collector = {
.name = "thermal_zone",
.cb_init = ne_thermalzone_init,
.cb_update = ne_thermalzone_update,
.cb_exit = NULL
};
Loading

0 comments on commit 0ab459a

Please sign in to comment.