Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

in_node_exporter_metrics: add support for thermal_zone. #7522

Merged
merged 13 commits into from
Dec 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions plugins/in_node_exporter_metrics/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ set(src
ne_utils.c
ne_config.c
ne_systemd.c
ne_thermalzone.c
ne.c
)

Expand Down
7 changes: 7 additions & 0 deletions plugins/in_node_exporter_metrics/ne.c
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
#include "ne_systemd.h"
#include "ne_processes.h"
#include "ne_nvme.h"
#include "ne_thermalzone.h"

/*
* Update the metrics, this function is invoked every time 'scrape_interval'
Expand Down Expand Up @@ -192,6 +193,7 @@ static int in_ne_init(struct flb_input_instance *in,
mk_list_add(&systemd_collector._head, &ctx->collectors);
mk_list_add(&processes_collector._head, &ctx->collectors);
mk_list_add(&nvme_collector._head, &ctx->collectors);
mk_list_add(&thermalzone_collector._head, &ctx->collectors);

mk_list_foreach(head, &ctx->collectors) {
coll = mk_list_entry(head, struct flb_ne_collector, _head);
Expand Down Expand Up @@ -402,6 +404,11 @@ static struct flb_config_map config_map[] = {
0, FLB_FALSE, 0,
"scrape interval to collect processes metrics from the node."
},
{
FLB_CONFIG_MAP_TIME, "collector.thermalzone.scrape_interval", "0",
pwhelan marked this conversation as resolved.
Show resolved Hide resolved
0, FLB_FALSE, 0,
"scrape interval to collect thermal zone metrics from the node."
},

{
FLB_CONFIG_MAP_TIME, "collector.nvme.scrape_interval", "0",
Expand Down
7 changes: 6 additions & 1 deletion plugins/in_node_exporter_metrics/ne.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
/* Default enabled metrics */

#ifdef __linux__
#define NE_DEFAULT_ENABLED_METRICS "cpu,cpufreq,meminfo,diskstats,filesystem,uname,stat,time,loadavg,vmstat,netdev,filefd,systemd,nvme"
#define NE_DEFAULT_ENABLED_METRICS "cpu,cpufreq,meminfo,diskstats,filesystem,uname,stat,time,loadavg,vmstat,netdev,filefd,systemd,nvme,thermal_zone"
#elif __APPLE__
#define NE_DEFAULT_ENABLED_METRICS "cpu,loadavg,meminfo,diskstats,uname,netdev"
#endif
Expand Down Expand Up @@ -206,6 +206,11 @@ struct flb_ne {

/* nvme */
struct cmt_gauge *nvme_info;

/* thermal zone */
struct cmt_gauge *thermalzone_temp;
struct cmt_gauge *cooling_device_cur_state;
struct cmt_gauge *cooling_device_max_state;
};

struct flb_ne_collector {
Expand Down
32 changes: 32 additions & 0 deletions plugins/in_node_exporter_metrics/ne_thermalzone.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */

/* Fluent Bit
* ==========
* Copyright (C) 2023 The Fluent Bit Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifdef __linux__
#include "ne_thermalzone_linux.c"
#else

#include "ne.h"

struct flb_ne_collector thermalzone_collector = {
.name = "thermal_zone",
.cb_init = NULL,
.cb_update = NULL,
.cb_exit = NULL
};
#endif
27 changes: 27 additions & 0 deletions plugins/in_node_exporter_metrics/ne_thermalzone.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */

/* Fluent Bit
* ==========
* Copyright (C) 2023 The Fluent Bit Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef FLB_IN_NE_THERMALZONE_H
#define FLB_IN_NE_THERMALZONE_H

#include "ne.h"

extern struct flb_ne_collector thermalzone_collector;

#endif
267 changes: 267 additions & 0 deletions plugins/in_node_exporter_metrics/ne_thermalzone_linux.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,267 @@
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */

/* Fluent Bit
* ==========
* Copyright (C) 2015-2022 The Fluent Bit Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <fluent-bit/flb_info.h>
#include <fluent-bit/flb_input_plugin.h>

#include "ne.h"
#include "ne_utils.h"
#include "ne_thermalzone_linux.h"

#include <unistd.h>

/*
* See kernel documentation for a description:
* https://www.kernel.org/doc/html/latest/driver-api/thermal/sysfs-api.html
*
* Ensure to pick the correct version of the documentation, older versions here:
* https://github.com/torvalds/linux/tree/master/Documentation
*/
/*
* Thermal zone stats, reads /sys/class/thermal/thermal_zone*
* ----------------------------------------------------------
*/

static int ne_thermalzone_init(struct flb_ne *ctx)
{
ctx->thermalzone_temp = cmt_gauge_create(ctx->cmt, "node", "thermal_zone", "temp",
"Zone temperature in Celsius",
2, (char *[]) {"zone", "type"});
if (!ctx->thermalzone_temp) {
flb_plg_error(ctx->ins, "could not initialize thermal zone metrics");
return -1;
}

ctx->cooling_device_cur_state = cmt_gauge_create(ctx->cmt,
"node", "cooling_device", "cur_state",
"Current throttle state of the cooling device",
2, (char *[]) {"name", "type"});
if (!ctx->cooling_device_cur_state) {
flb_plg_error(ctx->ins, "could not initialize cooling device cur_state metric");
return -1;
}

ctx->cooling_device_max_state = cmt_gauge_create(ctx->cmt,
"node", "cooling_device", "max_state",
"Maximum throttle state of the cooling device",
2, (char *[]) {"name", "type"});
if (!ctx->cooling_device_max_state) {
flb_plg_error(ctx->ins, "could not initialize cooling device max_state metric");
return -1;
}

return 0;
}

static int ne_thermalzone_update_thermal_zones(struct flb_ne *ctx)
{
uint64_t tstamp;
int ret;
uint64_t temp = 0;
struct mk_list *head;
struct mk_list list;
struct flb_slist_entry *entry;
flb_sds_t type;
flb_sds_t full_path_sysfs;
int path_sysfs_len;
char *num;
pwhelan marked this conversation as resolved.
Show resolved Hide resolved

tstamp = cfl_time_now();

ret = ne_utils_path_scan(ctx, ctx->path_sysfs, THERMAL_ZONE_PATTERN, NE_SCAN_DIR, &list);
if (ret != 0) {
return -1;
}

if (mk_list_size(&list) == 0) {
return 0;
}

full_path_sysfs = flb_sds_create_size(strlen(THERMAL_ZONE_BASE) +
strlen(ctx->path_sysfs) + 8);
if (full_path_sysfs == NULL) {
flb_slist_destroy(&list);
return -1;
}
path_sysfs_len = strlen(ctx->path_sysfs);
if (ctx->path_sysfs[strlen(ctx->path_sysfs)-1] == '/') {
path_sysfs_len--;
}
/* Set the full_path to the sysfs path */
if (flb_sds_cat_safe(&full_path_sysfs, ctx->path_sysfs, path_sysfs_len) < 0) {
flb_slist_destroy(&list);
flb_sds_destroy(full_path_sysfs);
return -1;
}
/* Concatenate the base for all thermalzone objects */
if (flb_sds_cat_safe(&full_path_sysfs, THERMAL_ZONE_BASE,
strlen(THERMAL_ZONE_BASE)) < 0) {
flb_slist_destroy(&list);
flb_sds_destroy(full_path_sysfs);
return -1;
}

/* Process entries */
mk_list_foreach(head, &list) {
entry = mk_list_entry(head, struct flb_slist_entry, _head);

/* Core ID */
ret = ne_utils_file_read_uint64(ctx->path_sysfs,
entry->str,
"temp", NULL,
&temp);
if (ret != 0) {
continue;
}

ret = ne_utils_file_read_sds(ctx->path_sysfs, entry->str, "type", NULL, &type);
if (ret != 0) {
flb_plg_error(ctx->ins, "unable to get type for zone: %s", entry->str);
continue;
}

if (strncmp(entry->str, full_path_sysfs, strlen(full_path_sysfs)) == 0) {
num = &entry->str[strlen(full_path_sysfs)];
} else {
num = entry->str;
}

cmt_gauge_set(ctx->thermalzone_temp, tstamp, ((double) temp)/1000.0,
2, (char *[]) {num, type});

flb_sds_destroy(type);
}

flb_slist_destroy(&list);
flb_sds_destroy(full_path_sysfs);

return 0;
}

static int ne_thermalzone_update_cooling_devices(struct flb_ne *ctx)
{
uint64_t tstamp;
int ret;
uint64_t cur_state = 0;
uint64_t max_state = 0;
struct mk_list *head;
struct mk_list list;
struct flb_slist_entry *entry;
flb_sds_t type;
char *num;
flb_sds_t full_path_sysfs;
int path_sysfs_len;

tstamp = cfl_time_now();

ret = ne_utils_path_scan(ctx, ctx->path_sysfs, COOLING_DEVICE_PATTERN, NE_SCAN_DIR, &list);
if (ret != 0) {
return -1;
}

if (mk_list_size(&list) == 0) {
return 0;
}

full_path_sysfs = flb_sds_create_size(strlen(COOLING_DEVICE_BASE) +
strlen(ctx->path_sysfs) + 8);
if (full_path_sysfs == NULL) {
flb_slist_destroy(&list);
return -1;
}
path_sysfs_len = strlen(ctx->path_sysfs);
if (ctx->path_sysfs[strlen(ctx->path_sysfs)-1] == '/') {
path_sysfs_len--;
}
if (flb_sds_cat_safe(&full_path_sysfs, ctx->path_sysfs, path_sysfs_len) < 0) {
flb_slist_destroy(&list);
flb_sds_destroy(full_path_sysfs);
return -1;
}
if (flb_sds_cat_safe(&full_path_sysfs, COOLING_DEVICE_BASE,
strlen(COOLING_DEVICE_BASE)) < 0) {
flb_slist_destroy(&list);
flb_sds_destroy(full_path_sysfs);
return -1;
}

/* Process entries */
mk_list_foreach(head, &list) {
entry = mk_list_entry(head, struct flb_slist_entry, _head);

/* Core ID */
ret = ne_utils_file_read_uint64(ctx->path_sysfs,
entry->str,
"cur_state", NULL,
&cur_state);
if (ret != 0) {
continue;
}

ret = ne_utils_file_read_uint64(ctx->path_sysfs,
entry->str,
"max_state", NULL,
&max_state);
if (ret != 0) {
continue;
}

ret = ne_utils_file_read_sds(ctx->path_sysfs, entry->str, "type", NULL, &type);
if (ret != 0) {
flb_plg_error(ctx->ins, "unable to get type for zone: %s", entry->str);
continue;
}

if (strncmp(entry->str, full_path_sysfs, strlen(full_path_sysfs)) == 0) {
num = &entry->str[strlen(full_path_sysfs)];
} else {
num = entry->str;
}

cmt_gauge_set(ctx->cooling_device_cur_state, tstamp, ((double)cur_state),
2, (char *[]) {num, type});
cmt_gauge_set(ctx->cooling_device_max_state, tstamp, ((double)max_state),
2, (char *[]) {num, type});
flb_sds_destroy(type);
}

flb_slist_destroy(&list);
flb_sds_destroy(full_path_sysfs);

return 0;
}

static int ne_thermalzone_update(struct flb_input_instance *ins, struct flb_config *config, void *in_context)
{
int ret;
struct flb_ne *ctx = (struct flb_ne *)in_context;

ret = ne_thermalzone_update_thermal_zones(ctx);
if (ret != 0) {
return ret;
}
return ne_thermalzone_update_cooling_devices(ctx);
}

struct flb_ne_collector thermalzone_collector = {
.name = "thermal_zone",
.cb_init = ne_thermalzone_init,
.cb_update = ne_thermalzone_update,
.cb_exit = NULL
};
Loading
Loading