diff --git a/plugins/in_node_exporter_metrics/CMakeLists.txt b/plugins/in_node_exporter_metrics/CMakeLists.txt index 03dde74a36f..ec30f187260 100644 --- a/plugins/in_node_exporter_metrics/CMakeLists.txt +++ b/plugins/in_node_exporter_metrics/CMakeLists.txt @@ -16,6 +16,7 @@ set(src ne_utils.c ne_config.c ne_systemd.c + ne_thermalzone.c ne.c ) diff --git a/plugins/in_node_exporter_metrics/ne.c b/plugins/in_node_exporter_metrics/ne.c index efd12c213c3..411bec43399 100644 --- a/plugins/in_node_exporter_metrics/ne.c +++ b/plugins/in_node_exporter_metrics/ne.c @@ -43,6 +43,7 @@ #include "ne_systemd.h" #include "ne_processes.h" #include "ne_nvme.h" +#include "ne_thermalzone.h" /* * Update the metrics, this function is invoked every time 'scrape_interval' @@ -192,6 +193,7 @@ static int in_ne_init(struct flb_input_instance *in, mk_list_add(&systemd_collector._head, &ctx->collectors); mk_list_add(&processes_collector._head, &ctx->collectors); mk_list_add(&nvme_collector._head, &ctx->collectors); + mk_list_add(&thermalzone_collector._head, &ctx->collectors); mk_list_foreach(head, &ctx->collectors) { coll = mk_list_entry(head, struct flb_ne_collector, _head); @@ -402,6 +404,11 @@ static struct flb_config_map config_map[] = { 0, FLB_FALSE, 0, "scrape interval to collect processes metrics from the node." }, + { + FLB_CONFIG_MAP_TIME, "collector.thermalzone.scrape_interval", "0", + 0, FLB_FALSE, 0, + "scrape interval to collect thermal zone metrics from the node." + }, { FLB_CONFIG_MAP_TIME, "collector.nvme.scrape_interval", "0", diff --git a/plugins/in_node_exporter_metrics/ne.h b/plugins/in_node_exporter_metrics/ne.h index e6c627e3d2d..6f8ae328704 100644 --- a/plugins/in_node_exporter_metrics/ne.h +++ b/plugins/in_node_exporter_metrics/ne.h @@ -33,7 +33,7 @@ /* Default enabled metrics */ #ifdef __linux__ -#define NE_DEFAULT_ENABLED_METRICS "cpu,cpufreq,meminfo,diskstats,filesystem,uname,stat,time,loadavg,vmstat,netdev,filefd,systemd,nvme" +#define NE_DEFAULT_ENABLED_METRICS "cpu,cpufreq,meminfo,diskstats,filesystem,uname,stat,time,loadavg,vmstat,netdev,filefd,systemd,nvme,thermal_zone" #elif __APPLE__ #define NE_DEFAULT_ENABLED_METRICS "cpu,loadavg,meminfo,diskstats,uname,netdev" #endif @@ -206,6 +206,11 @@ struct flb_ne { /* nvme */ struct cmt_gauge *nvme_info; + + /* thermal zone */ + struct cmt_gauge *thermalzone_temp; + struct cmt_gauge *cooling_device_cur_state; + struct cmt_gauge *cooling_device_max_state; }; struct flb_ne_collector { diff --git a/plugins/in_node_exporter_metrics/ne_thermalzone.c b/plugins/in_node_exporter_metrics/ne_thermalzone.c new file mode 100644 index 00000000000..9c540740c78 --- /dev/null +++ b/plugins/in_node_exporter_metrics/ne_thermalzone.c @@ -0,0 +1,32 @@ +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ + +/* Fluent Bit + * ========== + * Copyright (C) 2023 The Fluent Bit Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifdef __linux__ +#include "ne_thermalzone_linux.c" +#else + +#include "ne.h" + +struct flb_ne_collector thermalzone_collector = { + .name = "thermal_zone", + .cb_init = NULL, + .cb_update = NULL, + .cb_exit = NULL +}; +#endif diff --git a/plugins/in_node_exporter_metrics/ne_thermalzone.h b/plugins/in_node_exporter_metrics/ne_thermalzone.h new file mode 100644 index 00000000000..fa335cefeca --- /dev/null +++ b/plugins/in_node_exporter_metrics/ne_thermalzone.h @@ -0,0 +1,27 @@ +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ + +/* Fluent Bit + * ========== + * Copyright (C) 2023 The Fluent Bit Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef FLB_IN_NE_THERMALZONE_H +#define FLB_IN_NE_THERMALZONE_H + +#include "ne.h" + +extern struct flb_ne_collector thermalzone_collector; + +#endif diff --git a/plugins/in_node_exporter_metrics/ne_thermalzone_linux.c b/plugins/in_node_exporter_metrics/ne_thermalzone_linux.c new file mode 100644 index 00000000000..f4f38479077 --- /dev/null +++ b/plugins/in_node_exporter_metrics/ne_thermalzone_linux.c @@ -0,0 +1,267 @@ +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ + +/* Fluent Bit + * ========== + * Copyright (C) 2015-2022 The Fluent Bit Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include "ne.h" +#include "ne_utils.h" +#include "ne_thermalzone_linux.h" + +#include + +/* + * See kernel documentation for a description: + * https://www.kernel.org/doc/html/latest/driver-api/thermal/sysfs-api.html + * + * Ensure to pick the correct version of the documentation, older versions here: + * https://github.com/torvalds/linux/tree/master/Documentation + */ +/* + * Thermal zone stats, reads /sys/class/thermal/thermal_zone* + * ---------------------------------------------------------- + */ + +static int ne_thermalzone_init(struct flb_ne *ctx) +{ + ctx->thermalzone_temp = cmt_gauge_create(ctx->cmt, "node", "thermal_zone", "temp", + "Zone temperature in Celsius", + 2, (char *[]) {"zone", "type"}); + if (!ctx->thermalzone_temp) { + flb_plg_error(ctx->ins, "could not initialize thermal zone metrics"); + return -1; + } + + ctx->cooling_device_cur_state = cmt_gauge_create(ctx->cmt, + "node", "cooling_device", "cur_state", + "Current throttle state of the cooling device", + 2, (char *[]) {"name", "type"}); + if (!ctx->cooling_device_cur_state) { + flb_plg_error(ctx->ins, "could not initialize cooling device cur_state metric"); + return -1; + } + + ctx->cooling_device_max_state = cmt_gauge_create(ctx->cmt, + "node", "cooling_device", "max_state", + "Maximum throttle state of the cooling device", + 2, (char *[]) {"name", "type"}); + if (!ctx->cooling_device_max_state) { + flb_plg_error(ctx->ins, "could not initialize cooling device max_state metric"); + return -1; + } + + return 0; +} + +static int ne_thermalzone_update_thermal_zones(struct flb_ne *ctx) +{ + uint64_t tstamp; + int ret; + uint64_t temp = 0; + struct mk_list *head; + struct mk_list list; + struct flb_slist_entry *entry; + flb_sds_t type; + flb_sds_t full_path_sysfs; + int path_sysfs_len; + char *num; + + tstamp = cfl_time_now(); + + ret = ne_utils_path_scan(ctx, ctx->path_sysfs, THERMAL_ZONE_PATTERN, NE_SCAN_DIR, &list); + if (ret != 0) { + return -1; + } + + if (mk_list_size(&list) == 0) { + return 0; + } + + full_path_sysfs = flb_sds_create_size(strlen(THERMAL_ZONE_BASE) + + strlen(ctx->path_sysfs) + 8); + if (full_path_sysfs == NULL) { + flb_slist_destroy(&list); + return -1; + } + path_sysfs_len = strlen(ctx->path_sysfs); + if (ctx->path_sysfs[strlen(ctx->path_sysfs)-1] == '/') { + path_sysfs_len--; + } + /* Set the full_path to the sysfs path */ + if (flb_sds_cat_safe(&full_path_sysfs, ctx->path_sysfs, path_sysfs_len) < 0) { + flb_slist_destroy(&list); + flb_sds_destroy(full_path_sysfs); + return -1; + } + /* Concatenate the base for all thermalzone objects */ + if (flb_sds_cat_safe(&full_path_sysfs, THERMAL_ZONE_BASE, + strlen(THERMAL_ZONE_BASE)) < 0) { + flb_slist_destroy(&list); + flb_sds_destroy(full_path_sysfs); + return -1; + } + + /* Process entries */ + mk_list_foreach(head, &list) { + entry = mk_list_entry(head, struct flb_slist_entry, _head); + + /* Core ID */ + ret = ne_utils_file_read_uint64(ctx->path_sysfs, + entry->str, + "temp", NULL, + &temp); + if (ret != 0) { + continue; + } + + ret = ne_utils_file_read_sds(ctx->path_sysfs, entry->str, "type", NULL, &type); + if (ret != 0) { + flb_plg_error(ctx->ins, "unable to get type for zone: %s", entry->str); + continue; + } + + if (strncmp(entry->str, full_path_sysfs, strlen(full_path_sysfs)) == 0) { + num = &entry->str[strlen(full_path_sysfs)]; + } else { + num = entry->str; + } + + cmt_gauge_set(ctx->thermalzone_temp, tstamp, ((double) temp)/1000.0, + 2, (char *[]) {num, type}); + + flb_sds_destroy(type); + } + + flb_slist_destroy(&list); + flb_sds_destroy(full_path_sysfs); + + return 0; +} + +static int ne_thermalzone_update_cooling_devices(struct flb_ne *ctx) +{ + uint64_t tstamp; + int ret; + uint64_t cur_state = 0; + uint64_t max_state = 0; + struct mk_list *head; + struct mk_list list; + struct flb_slist_entry *entry; + flb_sds_t type; + char *num; + flb_sds_t full_path_sysfs; + int path_sysfs_len; + + tstamp = cfl_time_now(); + + ret = ne_utils_path_scan(ctx, ctx->path_sysfs, COOLING_DEVICE_PATTERN, NE_SCAN_DIR, &list); + if (ret != 0) { + return -1; + } + + if (mk_list_size(&list) == 0) { + return 0; + } + + full_path_sysfs = flb_sds_create_size(strlen(COOLING_DEVICE_BASE) + + strlen(ctx->path_sysfs) + 8); + if (full_path_sysfs == NULL) { + flb_slist_destroy(&list); + return -1; + } + path_sysfs_len = strlen(ctx->path_sysfs); + if (ctx->path_sysfs[strlen(ctx->path_sysfs)-1] == '/') { + path_sysfs_len--; + } + if (flb_sds_cat_safe(&full_path_sysfs, ctx->path_sysfs, path_sysfs_len) < 0) { + flb_slist_destroy(&list); + flb_sds_destroy(full_path_sysfs); + return -1; + } + if (flb_sds_cat_safe(&full_path_sysfs, COOLING_DEVICE_BASE, + strlen(COOLING_DEVICE_BASE)) < 0) { + flb_slist_destroy(&list); + flb_sds_destroy(full_path_sysfs); + return -1; + } + + /* Process entries */ + mk_list_foreach(head, &list) { + entry = mk_list_entry(head, struct flb_slist_entry, _head); + + /* Core ID */ + ret = ne_utils_file_read_uint64(ctx->path_sysfs, + entry->str, + "cur_state", NULL, + &cur_state); + if (ret != 0) { + continue; + } + + ret = ne_utils_file_read_uint64(ctx->path_sysfs, + entry->str, + "max_state", NULL, + &max_state); + if (ret != 0) { + continue; + } + + ret = ne_utils_file_read_sds(ctx->path_sysfs, entry->str, "type", NULL, &type); + if (ret != 0) { + flb_plg_error(ctx->ins, "unable to get type for zone: %s", entry->str); + continue; + } + + if (strncmp(entry->str, full_path_sysfs, strlen(full_path_sysfs)) == 0) { + num = &entry->str[strlen(full_path_sysfs)]; + } else { + num = entry->str; + } + + cmt_gauge_set(ctx->cooling_device_cur_state, tstamp, ((double)cur_state), + 2, (char *[]) {num, type}); + cmt_gauge_set(ctx->cooling_device_max_state, tstamp, ((double)max_state), + 2, (char *[]) {num, type}); + flb_sds_destroy(type); + } + + flb_slist_destroy(&list); + flb_sds_destroy(full_path_sysfs); + + return 0; +} + +static int ne_thermalzone_update(struct flb_input_instance *ins, struct flb_config *config, void *in_context) +{ + int ret; + struct flb_ne *ctx = (struct flb_ne *)in_context; + + ret = ne_thermalzone_update_thermal_zones(ctx); + if (ret != 0) { + return ret; + } + return ne_thermalzone_update_cooling_devices(ctx); +} + +struct flb_ne_collector thermalzone_collector = { + .name = "thermal_zone", + .cb_init = ne_thermalzone_init, + .cb_update = ne_thermalzone_update, + .cb_exit = NULL +}; diff --git a/plugins/in_node_exporter_metrics/ne_thermalzone_linux.h b/plugins/in_node_exporter_metrics/ne_thermalzone_linux.h new file mode 100644 index 00000000000..83cb248acbb --- /dev/null +++ b/plugins/in_node_exporter_metrics/ne_thermalzone_linux.h @@ -0,0 +1,30 @@ +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ + +/* Fluent Bit + * ========== + * Copyright (C) 2015-2022 The Fluent Bit Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef FLB_IN_NE_THERMALZONE_LINUX_H +#define FLB_IN_NE_THERMALZONE_LINUX_H + +#include "ne.h" + +#define THERMAL_ZONE_BASE "/class/thermal/thermal_zone" +#define THERMAL_ZONE_PATTERN THERMAL_ZONE_BASE "[0-9]*" +#define COOLING_DEVICE_BASE "/class/thermal/cooling_device" +#define COOLING_DEVICE_PATTERN COOLING_DEVICE_BASE "[0-9]*" + +#endif diff --git a/plugins/in_node_exporter_metrics/ne_utils.c b/plugins/in_node_exporter_metrics/ne_utils.c index 54cb2e2da1c..f3034076270 100644 --- a/plugins/in_node_exporter_metrics/ne_utils.c +++ b/plugins/in_node_exporter_metrics/ne_utils.c @@ -91,18 +91,33 @@ int ne_utils_file_read_uint64(const char *mount, } len = strlen(path); - flb_sds_cat_safe(&p, path, len); + if (flb_sds_cat_safe(&p, path, len) < 0) { + flb_sds_destroy(p); + return -1; + } if (join_a) { - flb_sds_cat_safe(&p, "/", 1); + if (flb_sds_cat_safe(&p, "/", 1) < 0) { + flb_sds_destroy(p); + return -1; + } len = strlen(join_a); - flb_sds_cat_safe(&p, join_a, len); + if (flb_sds_cat_safe(&p, join_a, len) < 0) { + flb_sds_destroy(p); + return -1; + } } if (join_b) { - flb_sds_cat_safe(&p, "/", 1); + if (flb_sds_cat_safe(&p, "/", 1) < 0) { + flb_sds_destroy(p); + return -1; + } len = strlen(join_b); - flb_sds_cat_safe(&p, join_b, len); + if (flb_sds_cat_safe(&p, join_b, len) < 0) { + flb_sds_destroy(p); + return -1; + } } fd = open(p, O_RDONLY); @@ -178,6 +193,90 @@ int ne_utils_file_read_lines(const char *mount, const char *path, struct mk_list return 0; } +/* + * Read a file and store the first line as a string. + */ +int ne_utils_file_read_sds(const char *mount, + const char *path, + const char *join_a, + const char *join_b, + flb_sds_t *str) +{ + int fd; + int len; + int i; + flb_sds_t p; + ssize_t bytes; + char tmp[32]; + + /* Check the path starts with the mount point to prevent duplication. */ + if (strncasecmp(path, mount, strlen(mount)) == 0 && + path[strlen(mount)] == '/') { + mount = ""; + } + + /* Compose the final path */ + p = flb_sds_create(mount); + if (!p) { + return -1; + } + + len = strlen(path); + flb_sds_cat_safe(&p, path, len); + + if (join_a) { + if (flb_sds_cat_safe(&p, "/", 1) < 0) { + flb_sds_destroy(p); + return -1; + } + len = strlen(join_a); + if (flb_sds_cat_safe(&p, join_a, len) < 0) { + flb_sds_destroy(p); + return -1; + } + } + + if (join_b) { + if (flb_sds_cat_safe(&p, "/", 1) < 0) { + flb_sds_destroy(p); + return -1; + } + len = strlen(join_b); + if (flb_sds_cat_safe(&p, join_b, len) < 0) { + flb_sds_destroy(p); + return -1; + } + } + + fd = open(p, O_RDONLY); + if (fd == -1) { + flb_sds_destroy(p); + return -1; + } + flb_sds_destroy(p); + + bytes = read(fd, &tmp, sizeof(tmp)); + if (bytes == -1) { + flb_errno(); + close(fd); + return -1; + } + close(fd); + + for (i = bytes-1; i > 0; i--) { + if (tmp[i] != '\n' && tmp[i] != '\r') { + break; + } + } + + *str = flb_sds_create_len(tmp, i+1); + if (*str == NULL) { + return -1; + } + + return 0; +} + int ne_utils_path_scan(struct flb_ne *ctx, const char *mount, const char *path, int expected, struct mk_list *list) { diff --git a/plugins/in_node_exporter_metrics/ne_utils.h b/plugins/in_node_exporter_metrics/ne_utils.h index 448293a033a..50d0937be3f 100644 --- a/plugins/in_node_exporter_metrics/ne_utils.h +++ b/plugins/in_node_exporter_metrics/ne_utils.h @@ -33,6 +33,12 @@ int ne_utils_file_read_uint64(const char *mount, const char *join_a, const char *join_b, uint64_t *out_val); +int ne_utils_file_read_sds(const char *mount, + const char *path, + const char *join_a, + const char *join_b, + flb_sds_t *str); + int ne_utils_file_read_lines(const char *mount, const char *path, struct mk_list *list); int ne_utils_path_scan(struct flb_ne *ctx, const char *mount, const char *path, int expected, struct mk_list *list);