From 6d187c7c827493a7934c9533e6fa498409d8453b Mon Sep 17 00:00:00 2001 From: Stelios Fragkakis <52996999+stelfrag@users.noreply.github.com> Date: Sun, 29 Sep 2024 20:42:04 +0300 Subject: [PATCH 01/23] Change default pages per extent (#18623) --- src/database/engine/rrdengine.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/database/engine/rrdengine.h b/src/database/engine/rrdengine.h index 37ea92b8a9a415..691f0ea7a8b4d4 100644 --- a/src/database/engine/rrdengine.h +++ b/src/database/engine/rrdengine.h @@ -28,7 +28,7 @@ struct rrdengine_instance; struct rrdeng_cmd; #define MAX_PAGES_PER_EXTENT (109) /* TODO: can go higher only when journal supports bigger than 4KiB transactions */ -#define DEFAULT_PAGES_PER_EXTENT (64) +#define DEFAULT_PAGES_PER_EXTENT (109) #define RRDENG_FILE_NUMBER_SCAN_TMPL "%1u-%10u" #define RRDENG_FILE_NUMBER_PRINT_TMPL "%1.1u-%10.10u" From fbeee6b12268b9fea06bdae93463eec5deabc68f Mon Sep 17 00:00:00 2001 From: Stelios Fragkakis <52996999+stelfrag@users.noreply.github.com> Date: Sun, 29 Sep 2024 20:42:17 +0300 Subject: [PATCH 02/23] Misc mqtt related code cleanup (#18622) * Remove pthread locks / use spinlocks * Remove redundant checks as mallocz, callocz can't return NULL * Change logging More code cleanup * Change random number generation Set Origin to empty Use BCryptGenRandom * More cleanup Deduplicate base64_encode/decode * Address review comments --- CMakeLists.txt | 4 +- src/aclk/aclk.c | 40 +- src/aclk/aclk_otp.c | 42 +- src/aclk/aclk_tx_msgs.c | 2 +- src/aclk/aclk_util.c | 43 +- src/aclk/aclk_util.h | 11 +- src/aclk/https_client.c | 3 +- src/aclk/mqtt_websockets/mqtt_ng.c | 491 +++++++++------------ src/aclk/mqtt_websockets/mqtt_ng.h | 1 - src/aclk/mqtt_websockets/mqtt_wss_client.c | 316 ++++--------- src/aclk/mqtt_websockets/mqtt_wss_client.h | 47 +- src/aclk/mqtt_websockets/mqtt_wss_log.c | 126 ------ src/aclk/mqtt_websockets/mqtt_wss_log.h | 39 -- src/aclk/mqtt_websockets/ws_client.c | 247 +++++------ src/aclk/mqtt_websockets/ws_client.h | 11 +- src/libnetdata/c_rhash/c_rhash.c | 3 - src/libnetdata/libnetdata.c | 129 ++++-- src/libnetdata/libnetdata.h | 3 +- src/libnetdata/ringbuffer/ringbuffer.c | 3 - src/libnetdata/socket/security.h | 1 + 20 files changed, 546 insertions(+), 1016 deletions(-) delete mode 100644 src/aclk/mqtt_websockets/mqtt_wss_log.c delete mode 100644 src/aclk/mqtt_websockets/mqtt_wss_log.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 1847b738fd5301..da13eb46d809c3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1555,8 +1555,6 @@ endif() set(MQTT_WEBSOCKETS_FILES src/aclk/mqtt_websockets/mqtt_wss_client.c src/aclk/mqtt_websockets/mqtt_wss_client.h - src/aclk/mqtt_websockets/mqtt_wss_log.c - src/aclk/mqtt_websockets/mqtt_wss_log.h src/aclk/mqtt_websockets/ws_client.c src/aclk/mqtt_websockets/ws_client.h src/aclk/mqtt_websockets/mqtt_ng.c @@ -1721,7 +1719,7 @@ target_include_directories(libnetdata BEFORE PUBLIC ${CONFIG_H_DIR} ${CMAKE_SOUR target_link_libraries(libnetdata PUBLIC "$<$>:atomic>" "$<$,$>:pthread;rt>" - "$<$:kernel32;advapi32;winmm;rpcrt4>" + "$<$:kernel32;advapi32;winmm;rpcrt4;bcrypt>" "$<$:m>" "${SYSTEMD_LDFLAGS}") diff --git a/src/aclk/aclk.c b/src/aclk/aclk.c index b1136b78177906..41f26ded5904b9 100644 --- a/src/aclk/aclk.c +++ b/src/aclk/aclk.c @@ -226,30 +226,6 @@ static int wait_till_agent_claim_ready() return 1; } -void aclk_mqtt_wss_log_cb(mqtt_wss_log_type_t log_type, const char* str) -{ - switch(log_type) { - case MQTT_WSS_LOG_ERROR: - case MQTT_WSS_LOG_FATAL: - nd_log(NDLS_DAEMON, NDLP_ERR, "%s", str); - return; - - case MQTT_WSS_LOG_WARN: - nd_log(NDLS_DAEMON, NDLP_WARNING, "%s", str); - return; - - case MQTT_WSS_LOG_INFO: - nd_log(NDLS_DAEMON, NDLP_INFO, "%s", str); - return; - - case MQTT_WSS_LOG_DEBUG: - return; - - default: - nd_log(NDLS_DAEMON, NDLP_ERR, "Unknown log type from mqtt_wss"); - } -} - static void msg_callback(const char *topic, const void *msg, size_t msglen, int qos) { UNUSED(qos); @@ -362,7 +338,7 @@ static inline void mqtt_connected_actions(mqtt_wss_client client) aclk_rcvd_cloud_msgs = 0; aclk_connection_counter++; - aclk_topic_cache_iter_t iter = ACLK_TOPIC_CACHE_ITER_T_INITIALIZER; + size_t iter = 0; while ((topic = (char*)aclk_topic_cache_iterate(&iter)) != NULL) mqtt_wss_set_topic_alias(client, topic); @@ -768,7 +744,7 @@ static int aclk_attempt_to_connect(mqtt_wss_client client) */ void *aclk_main(void *ptr) { - struct netdata_static_thread *static_thread = (struct netdata_static_thread *)ptr; + struct netdata_static_thread *static_thread = ptr; ACLK_PROXY_TYPE proxy_type; aclk_get_proxy(&proxy_type); @@ -783,7 +759,7 @@ void *aclk_main(void *ptr) if (wait_till_agent_claim_ready()) goto exit; - if (!(mqttwss_client = mqtt_wss_new("mqtt_wss", aclk_mqtt_wss_log_cb, msg_callback, puback_callback))) { + if (!((mqttwss_client = mqtt_wss_new(msg_callback, puback_callback)))) { netdata_log_error("Couldn't initialize MQTT_WSS network library"); goto exit; } @@ -1025,22 +1001,22 @@ char *aclk_state(void) } buffer_sprintf(wb, "Online: %s\nReconnect count: %d\nBanned By Cloud: %s\n", aclk_online() ? "Yes" : "No", aclk_connection_counter > 0 ? (aclk_connection_counter - 1) : 0, aclk_disable_runtime ? "Yes" : "No"); - if (last_conn_time_mqtt && (tmptr = localtime_r(&last_conn_time_mqtt, &tmbuf)) ) { + if (last_conn_time_mqtt && ((tmptr = localtime_r(&last_conn_time_mqtt, &tmbuf))) ) { char timebuf[26]; strftime(timebuf, 26, "%Y-%m-%d %H:%M:%S", tmptr); buffer_sprintf(wb, "Last Connection Time: %s\n", timebuf); } - if (last_conn_time_appl && (tmptr = localtime_r(&last_conn_time_appl, &tmbuf)) ) { + if (last_conn_time_appl && ((tmptr = localtime_r(&last_conn_time_appl, &tmbuf))) ) { char timebuf[26]; strftime(timebuf, 26, "%Y-%m-%d %H:%M:%S", tmptr); buffer_sprintf(wb, "Last Connection Time + %d PUBACKs received: %s\n", ACLK_PUBACKS_CONN_STABLE, timebuf); } - if (last_disconnect_time && (tmptr = localtime_r(&last_disconnect_time, &tmbuf)) ) { + if (last_disconnect_time && ((tmptr = localtime_r(&last_disconnect_time, &tmbuf))) ) { char timebuf[26]; strftime(timebuf, 26, "%Y-%m-%d %H:%M:%S", tmptr); buffer_sprintf(wb, "Last Disconnect Time: %s\n", timebuf); } - if (!aclk_connected && next_connection_attempt && (tmptr = localtime_r(&next_connection_attempt, &tmbuf)) ) { + if (!aclk_connected && next_connection_attempt && ((tmptr = localtime_r(&next_connection_attempt, &tmbuf))) ) { char timebuf[26]; strftime(timebuf, 26, "%Y-%m-%d %H:%M:%S", tmptr); buffer_sprintf(wb, "Next Connection Attempt At: %s\nLast Backoff: %.3f", timebuf, last_backoff_value); @@ -1107,7 +1083,7 @@ static void fill_alert_status_for_host_json(json_object *obj, RRDHOST *host) static json_object *timestamp_to_json(const time_t *t) { struct tm *tmptr, tmbuf; - if (*t && (tmptr = gmtime_r(t, &tmbuf)) ) { + if (*t && ((tmptr = gmtime_r(t, &tmbuf))) ) { char timebuf[26]; strftime(timebuf, 26, "%Y-%m-%d %H:%M:%S", tmptr); return json_object_new_string(timebuf); diff --git a/src/aclk/aclk_otp.c b/src/aclk/aclk_otp.c index d3aade76e178d6..b2b8ab5a4a0086 100644 --- a/src/aclk/aclk_otp.c +++ b/src/aclk/aclk_otp.c @@ -267,40 +267,8 @@ static int aclk_parse_otp_error(const char *json_str) { } #endif -#if defined(OPENSSL_VERSION_NUMBER) && OPENSSL_VERSION_NUMBER < OPENSSL_VERSION_110 -static EVP_ENCODE_CTX *EVP_ENCODE_CTX_new(void) -{ - EVP_ENCODE_CTX *ctx = OPENSSL_malloc(sizeof(*ctx)); - - if (ctx != NULL) { - memset(ctx, 0, sizeof(*ctx)); - } - return ctx; -} -static void EVP_ENCODE_CTX_free(EVP_ENCODE_CTX *ctx) -{ - OPENSSL_free(ctx); - return; -} -#endif - #define CHALLENGE_LEN 256 #define CHALLENGE_LEN_BASE64 344 -inline static int base64_decode_helper(unsigned char *out, int *outl, const unsigned char *in, int in_len) -{ - unsigned char remaining_data[CHALLENGE_LEN]; - EVP_ENCODE_CTX *ctx = EVP_ENCODE_CTX_new(); - EVP_DecodeInit(ctx); - EVP_DecodeUpdate(ctx, out, outl, in, in_len); - int remainder = 0; - EVP_DecodeFinal(ctx, remaining_data, &remainder); - EVP_ENCODE_CTX_free(ctx); - if (remainder) { - netdata_log_error("Unexpected data at EVP_DecodeFinal"); - return 1; - } - return 0; -} #define OTP_URL_PREFIX "/api/v1/auth/node/" int aclk_get_otp_challenge(url_t *target, const char *agent_id, unsigned char **challenge, int *challenge_bytes, bool *fallback_ipv4) @@ -347,7 +315,7 @@ int aclk_get_otp_challenge(url_t *target, const char *agent_id, unsigned char ** goto cleanup_json; } const char *challenge_base64; - if (!(challenge_base64 = json_object_get_string(challenge_json))) { + if (!((challenge_base64 = json_object_get_string(challenge_json)))) { netdata_log_error("Failed to extract challenge from JSON object"); goto cleanup_json; } @@ -356,8 +324,9 @@ int aclk_get_otp_challenge(url_t *target, const char *agent_id, unsigned char ** goto cleanup_json; } - *challenge = mallocz((CHALLENGE_LEN_BASE64 / 4) * 3); - base64_decode_helper(*challenge, challenge_bytes, (const unsigned char*)challenge_base64, strlen(challenge_base64)); + *challenge = mallocz(CHALLENGE_LEN); + *challenge_bytes = netdata_base64_decode(*challenge, (const unsigned char *) challenge_base64, CHALLENGE_LEN_BASE64); + if (*challenge_bytes != CHALLENGE_LEN) { netdata_log_error("Unexpected challenge length of %d instead of %d", *challenge_bytes, CHALLENGE_LEN); freez(*challenge); @@ -375,7 +344,6 @@ int aclk_get_otp_challenge(url_t *target, const char *agent_id, unsigned char ** int aclk_send_otp_response(const char *agent_id, const unsigned char *response, int response_bytes, url_t *target, struct auth_data *mqtt_auth, bool *fallback_ipv4) { - int len; int rc = 1; https_req_t req = HTTPS_REQ_T_INITIALIZER; https_req_response_t resp = HTTPS_REQ_RESPONSE_T_INITIALIZER; @@ -387,7 +355,7 @@ int aclk_send_otp_response(const char *agent_id, const unsigned char *response, unsigned char base64[CHALLENGE_LEN_BASE64 + 1]; memset(base64, 0, CHALLENGE_LEN_BASE64 + 1); - base64_encode_helper(base64, &len, response, response_bytes); + (void) netdata_base64_encode(base64, response, response_bytes); BUFFER *url = buffer_create(strlen(OTP_URL_PREFIX) + UUID_STR_LEN + 20, &netdata_buffers_statistics.buffers_aclk); BUFFER *resp_json = buffer_create(strlen(OTP_URL_PREFIX) + UUID_STR_LEN + 20, &netdata_buffers_statistics.buffers_aclk); diff --git a/src/aclk/aclk_tx_msgs.c b/src/aclk/aclk_tx_msgs.c index 8319e93654433b..2d256279e16db8 100644 --- a/src/aclk/aclk_tx_msgs.c +++ b/src/aclk/aclk_tx_msgs.c @@ -77,7 +77,7 @@ static short aclk_send_message_with_bin_payload(mqtt_wss_client client, json_obj int rc = mqtt_wss_publish5(client, (char*)topic, NULL, full_msg, &freez_aclk_publish5b, full_msg_len, MQTT_WSS_PUB_QOS1, &packet_id); - if (rc == MQTT_WSS_ERR_TOO_BIG_FOR_SERVER) + if (rc == MQTT_WSS_ERR_MSG_TOO_BIG) return HTTP_RESP_CONTENT_TOO_LONG; return 0; diff --git a/src/aclk/aclk_util.c b/src/aclk/aclk_util.c index 215782e507817c..16f57fe9aea325 100644 --- a/src/aclk/aclk_util.c +++ b/src/aclk/aclk_util.c @@ -309,7 +309,7 @@ const char *aclk_get_topic(enum aclk_topics topic) * having to resort to callbacks. */ -const char *aclk_topic_cache_iterate(aclk_topic_cache_iter_t *iter) +const char *aclk_topic_cache_iterate(size_t *iter) { if (!aclk_topic_cache) { netdata_log_error("Topic cache not initialized when %s was called.", __FUNCTION__); @@ -434,44 +434,3 @@ void aclk_set_proxy(char **ohost, int *port, char **uname, char **pwd, enum mqtt freez(proxy); } - -#if defined(OPENSSL_VERSION_NUMBER) && OPENSSL_VERSION_NUMBER < OPENSSL_VERSION_110 -static EVP_ENCODE_CTX *EVP_ENCODE_CTX_new(void) -{ - EVP_ENCODE_CTX *ctx = OPENSSL_malloc(sizeof(*ctx)); - - if (ctx != NULL) { - memset(ctx, 0, sizeof(*ctx)); - } - return ctx; -} -static void EVP_ENCODE_CTX_free(EVP_ENCODE_CTX *ctx) -{ - OPENSSL_free(ctx); - return; -} -#endif - -int base64_encode_helper(unsigned char *out, int *outl, const unsigned char *in, int in_len) -{ - int len; - unsigned char *str = out; - EVP_ENCODE_CTX *ctx = EVP_ENCODE_CTX_new(); - EVP_EncodeInit(ctx); - EVP_EncodeUpdate(ctx, str, outl, in, in_len); - str += *outl; - EVP_EncodeFinal(ctx, str, &len); - *outl += len; - - str = out; - while(*str) { - if (*str != 0x0D && *str != 0x0A) - *out++ = *str++; - else - str++; - } - *out = 0; - - EVP_ENCODE_CTX_free(ctx); - return 0; -} diff --git a/src/aclk/aclk_util.h b/src/aclk/aclk_util.h index 3ab6f6f2edc380..24e17996431722 100644 --- a/src/aclk/aclk_util.h +++ b/src/aclk/aclk_util.h @@ -93,15 +93,10 @@ enum aclk_topics { ACLK_TOPICID_CTXS_UPDATED = 20 }; -typedef size_t aclk_topic_cache_iter_t; -#define ACLK_TOPIC_CACHE_ITER_T_INITIALIZER (0) - const char *aclk_get_topic(enum aclk_topics topic); -int aclk_generate_topic_cache(struct json_object *json); +int aclk_generate_topic_cache(json_object *json); void free_topic_cache(void); -const char *aclk_topic_cache_iterate(aclk_topic_cache_iter_t *iter); -// TODO -// aclk_topics_reload //when claim id changes +const char *aclk_topic_cache_iterate(size_t *iter); #ifdef ACLK_LOG_CONVERSATION_DIR extern volatile int aclk_conversation_log_counter; @@ -113,6 +108,4 @@ unsigned long int aclk_tbeb_delay(int reset, int base, unsigned long int min, un void aclk_set_proxy(char **ohost, int *port, char **uname, char **pwd, enum mqtt_wss_proxy_type *type); -int base64_encode_helper(unsigned char *out, int *outl, const unsigned char *in, int in_len); - #endif /* ACLK_UTIL_H */ diff --git a/src/aclk/https_client.c b/src/aclk/https_client.c index 5c99fad5724661..f144eaf15d5942 100644 --- a/src/aclk/https_client.c +++ b/src/aclk/https_client.c @@ -556,7 +556,7 @@ static int handle_http_request(https_req_ctx_t *ctx) { // we remove those but during encoding we need that space in the buffer creds_base64_len += (1+(creds_base64_len/64)) * strlen("\n"); char *creds_base64 = callocz(1, creds_base64_len + 1); - base64_encode_helper((unsigned char*)creds_base64, &creds_base64_len, (unsigned char*)creds_plain, creds_plain_len); + (void) netdata_base64_encode((unsigned char *)creds_base64, (unsigned char *)creds_plain, creds_plain_len); buffer_sprintf(hdr, "Proxy-Authorization: Basic %s\x0D\x0A", creds_base64); freez(creds_plain); } @@ -584,7 +584,6 @@ static int handle_http_request(https_req_ctx_t *ctx) { if (ctx->parse_ctx.chunked_response) freez(ctx->parse_ctx.chunked_response); rc = 4; - goto err_exit; } err_exit: diff --git a/src/aclk/mqtt_websockets/mqtt_ng.c b/src/aclk/mqtt_websockets/mqtt_ng.c index 96099aa687cd64..daf7931151bdec 100644 --- a/src/aclk/mqtt_websockets/mqtt_ng.c +++ b/src/aclk/mqtt_websockets/mqtt_ng.c @@ -8,16 +8,8 @@ #include "common_internal.h" #include "mqtt_constants.h" -#include "mqtt_wss_log.h" #include "mqtt_ng.h" -#define UNIT_LOG_PREFIX "mqtt_client: " -#define FATAL(fmt, ...) mws_fatal(client->log, UNIT_LOG_PREFIX fmt, ##__VA_ARGS__) -#define ERROR(fmt, ...) mws_error(client->log, UNIT_LOG_PREFIX fmt, ##__VA_ARGS__) -#define WARN(fmt, ...) mws_warn (client->log, UNIT_LOG_PREFIX fmt, ##__VA_ARGS__) -#define INFO(fmt, ...) mws_info (client->log, UNIT_LOG_PREFIX fmt, ##__VA_ARGS__) -#define DEBUG(fmt, ...) mws_debug(client->log, UNIT_LOG_PREFIX fmt, ##__VA_ARGS__) - #define SMALL_STRING_DONT_FRAGMENT_LIMIT 128 #define LOCK_HDR_BUFFER(buffer) spinlock_lock(&((buffer)->spinlock)) @@ -216,7 +208,7 @@ struct topic_aliases_data { c_rhash stoi_dict; uint32_t idx_max; uint32_t idx_assigned; - pthread_rwlock_t rwlock; + SPINLOCK spinlock; }; struct mqtt_ng_client { @@ -226,8 +218,6 @@ struct mqtt_ng_client { mqtt_msg_data connect_msg; - mqtt_wss_log_ctx_t log; - mqtt_ng_send_fnc_t send_fnc_ptr; void *user_ctx; @@ -245,7 +235,7 @@ struct mqtt_ng_client { unsigned int ping_pending:1; struct mqtt_ng_stats stats; - pthread_mutex_t stats_mutex; + SPINLOCK stats_spinlock; struct topic_aliases_data tx_topic_aliases; c_rhash rx_aliases; @@ -399,7 +389,7 @@ enum memory_mode { CALLER_RESPONSIBLE }; -static inline enum memory_mode ptr2memory_mode(void * ptr) { +static enum memory_mode ptr2memory_mode(void * ptr) { if (ptr == NULL) return MEMCPY; if (ptr == CALLER_RESPONSIBILITY) @@ -484,15 +474,8 @@ static void buffer_rebuild(struct header_buffer *buf) } while(frag); } -static void buffer_garbage_collect(struct header_buffer *buf, mqtt_wss_log_ctx_t log_ctx) +static void buffer_garbage_collect(struct header_buffer *buf) { -#if !defined(MQTT_DEBUG_VERBOSE) && !defined(ADDITIONAL_CHECKS) - (void) log_ctx; -#endif -#ifdef MQTT_DEBUG_VERBOSE - mws_debug(log_ctx, "Buffer Garbage Collection!"); -#endif - struct buffer_fragment *frag = BUFFER_FIRST_FRAG(buf); while (frag) { if (!frag_is_marked_for_gc(frag)) @@ -503,12 +486,8 @@ static void buffer_garbage_collect(struct header_buffer *buf, mqtt_wss_log_ctx_t frag = frag->next; } - if (frag == BUFFER_FIRST_FRAG(buf)) { -#ifdef MQTT_DEBUG_VERBOSE - mws_debug(log_ctx, "Buffer Garbage Collection! No Space Reclaimed!"); -#endif + if (frag == BUFFER_FIRST_FRAG(buf)) return; - } if (!frag) { buf->tail_frag = NULL; @@ -527,21 +506,17 @@ static void buffer_garbage_collect(struct header_buffer *buf, mqtt_wss_log_ctx_t buffer_rebuild(buf); } -static void transaction_buffer_garbage_collect(struct transaction_buffer *buf, mqtt_wss_log_ctx_t log_ctx) +static void transaction_buffer_garbage_collect(struct transaction_buffer *buf) { -#ifdef MQTT_DEBUG_VERBOSE - mws_debug(log_ctx, "Transaction Buffer Garbage Collection! %s", buf->sending_frag == NULL ? "NULL" : "in flight message"); -#endif - // Invalidate the cached sending fragment // as we will move data around if (buf->sending_frag != &ping_frag) buf->sending_frag = NULL; - buffer_garbage_collect(&buf->hdr_buffer, log_ctx); + buffer_garbage_collect(&buf->hdr_buffer); } -static int transaction_buffer_grow(struct transaction_buffer *buf, mqtt_wss_log_ctx_t log_ctx, float rate, size_t max) +static int transaction_buffer_grow(struct transaction_buffer *buf, float rate, size_t max) { if (buf->hdr_buffer.size >= max) return 0; @@ -557,29 +532,25 @@ static int transaction_buffer_grow(struct transaction_buffer *buf, mqtt_wss_log_ void *ret = reallocz(buf->hdr_buffer.data, buf->hdr_buffer.size); if (ret == NULL) { - mws_warn(log_ctx, "Buffer growth failed (realloc)"); + nd_log(NDLS_DAEMON, NDLP_WARNING, "Buffer growth failed (realloc)"); return 1; } - mws_debug(log_ctx, "Message metadata buffer was grown"); + nd_log(NDLS_DAEMON, NDLP_DEBUG, "Message metadata buffer was grown"); buf->hdr_buffer.data = ret; buffer_rebuild(&buf->hdr_buffer); return 0; } -inline static int transaction_buffer_init(struct transaction_buffer *to_init, size_t size) +inline static void transaction_buffer_init(struct transaction_buffer *to_init, size_t size) { spinlock_init(&to_init->spinlock); to_init->hdr_buffer.size = size; to_init->hdr_buffer.data = mallocz(size); - if (to_init->hdr_buffer.data == NULL) - return 1; - to_init->hdr_buffer.tail = to_init->hdr_buffer.data; to_init->hdr_buffer.tail_frag = NULL; - return 0; } static void transaction_buffer_destroy(struct transaction_buffer *to_init) @@ -620,54 +591,30 @@ void transaction_buffer_transaction_rollback(struct transaction_buffer *buf, str struct mqtt_ng_client *mqtt_ng_init(struct mqtt_ng_init *settings) { struct mqtt_ng_client *client = callocz(1, sizeof(struct mqtt_ng_client)); - if (client == NULL) - return NULL; - if (transaction_buffer_init(&client->main_buffer, HEADER_BUFFER_SIZE)) - goto err_free_client; + transaction_buffer_init(&client->main_buffer, HEADER_BUFFER_SIZE); client->rx_aliases = RX_ALIASES_INITIALIZE(); - if (client->rx_aliases == NULL) - goto err_free_trx_buf; - if (pthread_mutex_init(&client->stats_mutex, NULL)) - goto err_free_rx_alias; + spinlock_init(&client->stats_spinlock); + spinlock_init(&client->tx_topic_aliases.spinlock); client->tx_topic_aliases.stoi_dict = TX_ALIASES_INITIALIZE(); - if (client->tx_topic_aliases.stoi_dict == NULL) - goto err_free_stats_mutex; client->tx_topic_aliases.idx_max = UINT16_MAX; - if (pthread_rwlock_init(&client->tx_topic_aliases.rwlock, NULL)) - goto err_free_tx_alias; - // TODO just embed the struct into mqtt_ng_client client->parser.received_data = settings->data_in; client->send_fnc_ptr = settings->data_out_fnc; client->user_ctx = settings->user_ctx; - client->log = settings->log; - client->puback_callback = settings->puback_callback; client->connack_callback = settings->connack_callback; client->msg_callback = settings->msg_callback; return client; - -err_free_tx_alias: - c_rhash_destroy(client->tx_topic_aliases.stoi_dict); -err_free_stats_mutex: - pthread_mutex_destroy(&client->stats_mutex); -err_free_rx_alias: - c_rhash_destroy(client->rx_aliases); -err_free_trx_buf: - transaction_buffer_destroy(&client->main_buffer); -err_free_client: - freez(client); - return NULL; } -static inline uint8_t get_control_packet_type(uint8_t first_hdr_byte) +static uint8_t get_control_packet_type(uint8_t first_hdr_byte) { return first_hdr_byte >> 4; } @@ -699,33 +646,27 @@ static void mqtt_ng_destroy_tx_alias_hash(c_rhash hash) void mqtt_ng_destroy(struct mqtt_ng_client *client) { transaction_buffer_destroy(&client->main_buffer); - pthread_mutex_destroy(&client->stats_mutex); mqtt_ng_destroy_tx_alias_hash(client->tx_topic_aliases.stoi_dict); - pthread_rwlock_destroy(&client->tx_topic_aliases.rwlock); mqtt_ng_destroy_rx_alias_hash(client->rx_aliases); freez(client); } -int frag_set_external_data(mqtt_wss_log_ctx_t log, struct buffer_fragment *frag, void *data, size_t data_len, free_fnc_t data_free_fnc) +int frag_set_external_data(struct buffer_fragment *frag, void *data, size_t data_len, free_fnc_t data_free_fnc) { if (frag->len) { // TODO?: This could potentially be done in future if we set rule // external data always follows in buffer data // could help reduce fragmentation in some messages but // currently not worth it considering time is tight - mws_fatal(log, UNIT_LOG_PREFIX "INTERNAL ERROR: Cannot set external data to fragment already containing in buffer data!"); + nd_log(NDLS_DAEMON, NDLP_ERR, "INTERNAL ERROR: Cannot set external data to fragment already containing in buffer data!"); return 1; } switch (ptr2memory_mode(data_free_fnc)) { case MEMCPY: frag->data = mallocz(data_len); - if (frag->data == NULL) { - mws_error(log, UNIT_LOG_PREFIX "OOM while malloc @_optimized_add"); - return 1; - } memcpy(frag->data, data, data_len); break; case EXTERNAL_FREE_AFTER_USE: @@ -807,18 +748,18 @@ static size_t mqtt_ng_connect_size(struct mqtt_auth_properties *auth, #define PACK_2B_INT(buffer, integer, frag) { *(uint16_t *)WRITE_POS(frag) = htobe16((integer)); \ DATA_ADVANCE(buffer, sizeof(uint16_t), frag); } -static int _optimized_add(struct header_buffer *buf, mqtt_wss_log_ctx_t log_ctx, void *data, size_t data_len, free_fnc_t data_free_fnc, struct buffer_fragment **frag) +static int _optimized_add(struct header_buffer *buf, void *data, size_t data_len, free_fnc_t data_free_fnc, struct buffer_fragment **frag) { if (data_len > SMALL_STRING_DONT_FRAGMENT_LIMIT) { buffer_frag_flag_t flags = BUFFER_FRAG_DATA_EXTERNAL; if ((*frag)->flags & BUFFER_FRAG_GARBAGE_COLLECT_ON_SEND) flags |= BUFFER_FRAG_GARBAGE_COLLECT_ON_SEND; if( (*frag = buffer_new_frag(buf, flags)) == NULL ) { - mws_error(log_ctx, "Out of buffer space while generating the message"); + nd_log(NDLS_DAEMON, NDLP_ERR, "Out of buffer space while generating the message"); return 1; } - if (frag_set_external_data(log_ctx, *frag, data, data_len, data_free_fnc)) { - mws_error(log_ctx, "Error adding external data to newly created fragment"); + if (frag_set_external_data(*frag, data, data_len, data_free_fnc)) { + nd_log(NDLS_DAEMON, NDLP_ERR, "Error adding external data to newly created fragment"); return 1; } // we dont want to write to this fragment anymore @@ -833,31 +774,30 @@ static int _optimized_add(struct header_buffer *buf, mqtt_wss_log_ctx_t log_ctx, return 0; } -#define TRY_GENERATE_MESSAGE(generator_function, client, ...) \ - int rc = generator_function(&client->main_buffer, client->log, ##__VA_ARGS__); \ +#define TRY_GENERATE_MESSAGE(generator_function, ...) \ + int rc = generator_function(&client->main_buffer, ##__VA_ARGS__); \ if (rc == MQTT_NG_MSGGEN_BUFFER_OOM) { \ LOCK_HDR_BUFFER(&client->main_buffer); \ - transaction_buffer_garbage_collect((&client->main_buffer), client->log); \ + transaction_buffer_garbage_collect((&client->main_buffer)); \ UNLOCK_HDR_BUFFER(&client->main_buffer); \ - rc = generator_function(&client->main_buffer, client->log, ##__VA_ARGS__); \ + rc = generator_function(&client->main_buffer, ##__VA_ARGS__); \ if (rc == MQTT_NG_MSGGEN_BUFFER_OOM && client->max_mem_bytes) { \ LOCK_HDR_BUFFER(&client->main_buffer); \ - transaction_buffer_grow((&client->main_buffer), client->log, GROWTH_FACTOR, client->max_mem_bytes); \ + transaction_buffer_grow((&client->main_buffer),GROWTH_FACTOR, client->max_mem_bytes); \ UNLOCK_HDR_BUFFER(&client->main_buffer); \ - rc = generator_function(&client->main_buffer, client->log, ##__VA_ARGS__); \ + rc = generator_function(&client->main_buffer, ##__VA_ARGS__); \ } \ if (rc == MQTT_NG_MSGGEN_BUFFER_OOM) \ - mws_error(client->log, "%s failed to generate message due to insufficient buffer space (line %d)", __FUNCTION__, __LINE__); \ + nd_log(NDLS_DAEMON, NDLP_ERR, "%s failed to generate message due to insufficient buffer space (line %d)", __FUNCTION__, __LINE__); \ } \ if (rc == MQTT_NG_MSGGEN_OK) { \ - pthread_mutex_lock(&client->stats_mutex); \ + spinlock_lock(&client->stats_spinlock); \ client->stats.tx_messages_queued++; \ - pthread_mutex_unlock(&client->stats_mutex); \ + spinlock_unlock(&client->stats_spinlock); \ } \ return rc; mqtt_msg_data mqtt_ng_generate_connect(struct transaction_buffer *trx_buf, - mqtt_wss_log_ctx_t log_ctx, struct mqtt_auth_properties *auth, struct mqtt_lwt_properties *lwt, uint8_t clean_start, @@ -865,7 +805,7 @@ mqtt_msg_data mqtt_ng_generate_connect(struct transaction_buffer *trx_buf, { // Sanity Checks First (are given parameters correct and up to MQTT spec) if (!auth->client_id) { - mws_error(log_ctx, "ClientID must be set. [MQTT-3.1.3-3]"); + nd_log(NDLS_DAEMON, NDLP_ERR, "ClientID must be set. [MQTT-3.1.3-3]"); return NULL; } @@ -876,29 +816,29 @@ mqtt_msg_data mqtt_ng_generate_connect(struct transaction_buffer *trx_buf, // however server MUST allow ClientIDs between 1-23 bytes [MQTT-3.1.3-5] // so we will warn client server might not like this and he is using it // at his own risk! - mws_warn(log_ctx, "client_id provided is empty string. This might not be allowed by server [MQTT-3.1.3-6]"); + nd_log(NDLS_DAEMON, NDLP_WARNING, "client_id provided is empty string. This might not be allowed by server [MQTT-3.1.3-6]"); } if(len > MQTT_MAX_CLIENT_ID) { // [MQTT-3.1.3-5] server MUST allow client_id length 1-32 // server MAY allow longer client_id, if user provides longer client_id // warn them he is doing so at his own risk! - mws_warn(log_ctx, "client_id provided is longer than 23 bytes, server might not allow that [MQTT-3.1.3-5]"); + nd_log(NDLS_DAEMON, NDLP_WARNING, "client_id provided is longer than 23 bytes, server might not allow that [MQTT-3.1.3-5]"); } if (lwt) { if (lwt->will_message && lwt->will_message_size > 65535) { - mws_error(log_ctx, "Will message cannot be longer than 65535 bytes due to MQTT protocol limitations [MQTT-3.1.3-4] and [MQTT-1.5.6]"); + nd_log(NDLS_DAEMON, NDLP_ERR, "Will message cannot be longer than 65535 bytes due to MQTT protocol limitations [MQTT-3.1.3-4] and [MQTT-1.5.6]"); return NULL; } if (!lwt->will_topic) { //TODO topic given with strlen==0 ? check specs - mws_error(log_ctx, "If will message is given will topic must also be given [MQTT-3.1.3.3]"); + nd_log(NDLS_DAEMON, NDLP_ERR, "If will message is given will topic must also be given [MQTT-3.1.3.3]"); return NULL; } if (lwt->will_qos > MQTT_MAX_QOS) { // refer to [MQTT-3-1.2-12] - mws_error(log_ctx, "QOS for LWT message is bigger than max"); + nd_log(NDLS_DAEMON, NDLP_ERR, "QOS for LWT message is bigger than max"); return NULL; } } @@ -932,8 +872,10 @@ mqtt_msg_data mqtt_ng_generate_connect(struct transaction_buffer *trx_buf, *connect_flags = 0; if (auth->username) *connect_flags |= MQTT_CONNECT_FLAG_USERNAME; + if (auth->password) *connect_flags |= MQTT_CONNECT_FLAG_PASSWORD; + if (lwt) { *connect_flags |= MQTT_CONNECT_FLAG_LWT; *connect_flags |= lwt->will_qos << MQTT_CONNECT_FLAG_QOS_BITSHIFT; @@ -957,7 +899,7 @@ mqtt_msg_data mqtt_ng_generate_connect(struct transaction_buffer *trx_buf, // [MQTT-3.1.3.1] Client identifier CHECK_BYTES_AVAILABLE(&trx_buf->hdr_buffer, 2, goto fail_rollback); PACK_2B_INT(&trx_buf->hdr_buffer, strlen(auth->client_id), frag); - if (_optimized_add(&trx_buf->hdr_buffer, log_ctx, auth->client_id, strlen(auth->client_id), auth->client_id_free, &frag)) + if (_optimized_add(&trx_buf->hdr_buffer, auth->client_id, strlen(auth->client_id), auth->client_id_free, &frag)) goto fail_rollback; if (lwt != NULL) { @@ -971,7 +913,7 @@ mqtt_msg_data mqtt_ng_generate_connect(struct transaction_buffer *trx_buf, // Will Topic [MQTT-3.1.3.3] CHECK_BYTES_AVAILABLE(&trx_buf->hdr_buffer, 2, goto fail_rollback); PACK_2B_INT(&trx_buf->hdr_buffer, strlen(lwt->will_topic), frag); - if (_optimized_add(&trx_buf->hdr_buffer, log_ctx, lwt->will_topic, strlen(lwt->will_topic), lwt->will_topic_free, &frag)) + if (_optimized_add(&trx_buf->hdr_buffer, lwt->will_topic, strlen(lwt->will_topic), lwt->will_topic_free, &frag)) goto fail_rollback; // Will Payload [MQTT-3.1.3.4] @@ -979,7 +921,7 @@ mqtt_msg_data mqtt_ng_generate_connect(struct transaction_buffer *trx_buf, BUFFER_TRANSACTION_NEW_FRAG(&trx_buf->hdr_buffer, 0, frag, goto fail_rollback); CHECK_BYTES_AVAILABLE(&trx_buf->hdr_buffer, 2, goto fail_rollback); PACK_2B_INT(&trx_buf->hdr_buffer, lwt->will_message_size, frag); - if (_optimized_add(&trx_buf->hdr_buffer, log_ctx, lwt->will_message, lwt->will_message_size, lwt->will_topic_free, &frag)) + if (_optimized_add(&trx_buf->hdr_buffer, lwt->will_message, lwt->will_message_size, lwt->will_topic_free, &frag)) goto fail_rollback; } } @@ -989,7 +931,7 @@ mqtt_msg_data mqtt_ng_generate_connect(struct transaction_buffer *trx_buf, BUFFER_TRANSACTION_NEW_FRAG(&trx_buf->hdr_buffer, 0, frag, goto fail_rollback); CHECK_BYTES_AVAILABLE(&trx_buf->hdr_buffer, 2, goto fail_rollback); PACK_2B_INT(&trx_buf->hdr_buffer, strlen(auth->username), frag); - if (_optimized_add(&trx_buf->hdr_buffer, log_ctx, auth->username, strlen(auth->username), auth->username_free, &frag)) + if (_optimized_add(&trx_buf->hdr_buffer, auth->username, strlen(auth->username), auth->username_free, &frag)) goto fail_rollback; } @@ -998,7 +940,7 @@ mqtt_msg_data mqtt_ng_generate_connect(struct transaction_buffer *trx_buf, BUFFER_TRANSACTION_NEW_FRAG(&trx_buf->hdr_buffer, 0, frag, goto fail_rollback); CHECK_BYTES_AVAILABLE(&trx_buf->hdr_buffer, 2, goto fail_rollback); PACK_2B_INT(&trx_buf->hdr_buffer, strlen(auth->password), frag); - if (_optimized_add(&trx_buf->hdr_buffer, log_ctx, auth->password, strlen(auth->password), auth->password_free, &frag)) + if (_optimized_add(&trx_buf->hdr_buffer, auth->password, strlen(auth->password), auth->password_free, &frag)) goto fail_rollback; } trx_buf->hdr_buffer.tail_frag->flags |= BUFFER_FRAG_MQTT_PACKET_TAIL; @@ -1024,28 +966,23 @@ int mqtt_ng_connect(struct mqtt_ng_client *client, buffer_purge(&client->main_buffer.hdr_buffer); UNLOCK_HDR_BUFFER(&client->main_buffer); - pthread_rwlock_wrlock(&client->tx_topic_aliases.rwlock); + spinlock_lock(&client->tx_topic_aliases.spinlock); // according to MQTT spec topic aliases should not be persisted // even if clean session is true mqtt_ng_destroy_tx_alias_hash(client->tx_topic_aliases.stoi_dict); + client->tx_topic_aliases.stoi_dict = TX_ALIASES_INITIALIZE(); - if (client->tx_topic_aliases.stoi_dict == NULL) { - pthread_rwlock_unlock(&client->tx_topic_aliases.rwlock); - return 1; - } client->tx_topic_aliases.idx_assigned = 0; - pthread_rwlock_unlock(&client->tx_topic_aliases.rwlock); + spinlock_unlock(&client->tx_topic_aliases.spinlock); mqtt_ng_destroy_rx_alias_hash(client->rx_aliases); client->rx_aliases = RX_ALIASES_INITIALIZE(); - if (client->rx_aliases == NULL) - return 1; - client->connect_msg = mqtt_ng_generate_connect(&client->main_buffer, client->log, auth, lwt, clean_start, keep_alive); + client->connect_msg = mqtt_ng_generate_connect(&client->main_buffer, auth, lwt, clean_start, keep_alive); if (client->connect_msg == NULL) return 1; - pthread_mutex_lock(&client->stats_mutex); + spinlock_lock(&client->stats_spinlock); if (clean_start) client->stats.tx_messages_queued = 1; else @@ -1053,7 +990,7 @@ int mqtt_ng_connect(struct mqtt_ng_client *client, client->stats.tx_messages_sent = 0; client->stats.rx_messages_rcvd = 0; - pthread_mutex_unlock(&client->stats_mutex); + spinlock_unlock(&client->stats_spinlock); client->client_state = CONNECT_PENDING; return 0; @@ -1065,15 +1002,16 @@ uint16_t get_unused_packet_id() { return packet_id ? packet_id : ++packet_id; } -static inline size_t mqtt_ng_publish_size(const char *topic, - size_t msg_len, - uint16_t topic_id) +static size_t mqtt_ng_publish_size( + const char *topic, + size_t msg_len, + uint16_t topic_id) { - size_t retval = 2 /* Topic Name Length */ - + (topic == NULL ? 0 : strlen(topic)) - + 2 /* Packet identifier */ - + 1 /* Properties Length TODO for now fixed to 1 property */ - + msg_len; + size_t retval = 2 + + (topic == NULL ? 0 : strlen(topic)) /* Topic Name Length */ + + 2 /* Packet identifier */ + + 1 /* Properties Length for now fixed to 1 property */ + + msg_len; if (topic_id) retval += 3; @@ -1082,7 +1020,6 @@ static inline size_t mqtt_ng_publish_size(const char *topic, } int mqtt_ng_generate_publish(struct transaction_buffer *trx_buf, - mqtt_wss_log_ctx_t log_ctx, char *topic, free_fnc_t topic_free, void *msg, @@ -1121,7 +1058,7 @@ int mqtt_ng_generate_publish(struct transaction_buffer *trx_buf, // [MQTT-3.3.2.1] PACK_2B_INT(&trx_buf->hdr_buffer, topic == NULL ? 0 : strlen(topic), frag); if (topic != NULL) { - if (_optimized_add(&trx_buf->hdr_buffer, log_ctx, topic, strlen(topic), topic_free, &frag)) + if (_optimized_add(&trx_buf->hdr_buffer, topic, strlen(topic), topic_free, &frag)) goto fail_rollback; BUFFER_TRANSACTION_NEW_FRAG(&trx_buf->hdr_buffer, 0, frag, goto fail_rollback); } @@ -1145,7 +1082,7 @@ int mqtt_ng_generate_publish(struct transaction_buffer *trx_buf, if( (frag = buffer_new_frag(&trx_buf->hdr_buffer, BUFFER_FRAG_DATA_EXTERNAL)) == NULL ) goto fail_rollback; - if (frag_set_external_data(log_ctx, frag, msg, msg_len, msg_free)) + if (frag_set_external_data(frag, msg, msg_len, msg_free)) goto fail_rollback; trx_buf->hdr_buffer.tail_frag->flags |= BUFFER_FRAG_MQTT_PACKET_TAIL; @@ -1169,9 +1106,9 @@ int mqtt_ng_publish(struct mqtt_ng_client *client, uint16_t *packet_id) { struct topic_alias_data *alias = NULL; - pthread_rwlock_rdlock(&client->tx_topic_aliases.rwlock); + spinlock_lock(&client->tx_topic_aliases.spinlock); c_rhash_get_ptr_by_str(client->tx_topic_aliases.stoi_dict, topic, (void**)&alias); - pthread_rwlock_unlock(&client->tx_topic_aliases.rwlock); + spinlock_unlock(&client->tx_topic_aliases.spinlock); uint16_t topic_id = 0; @@ -1185,14 +1122,14 @@ int mqtt_ng_publish(struct mqtt_ng_client *client, } if (client->max_msg_size && PUBLISH_SP_SIZE + mqtt_ng_publish_size(topic, msg_len, topic_id) > client->max_msg_size) { - mws_error(client->log, "Message too big for server: %zu", msg_len); + nd_log(NDLS_DAEMON, NDLP_ERR, "Message too big for server: %zu", msg_len); return MQTT_NG_MSGGEN_MSG_TOO_BIG; } - TRY_GENERATE_MESSAGE(mqtt_ng_generate_publish, client, topic, topic_free, msg, msg_free, msg_len, publish_flags, packet_id, topic_id); + TRY_GENERATE_MESSAGE(mqtt_ng_generate_publish, topic, topic_free, msg, msg_free, msg_len, publish_flags, packet_id, topic_id); } -static inline size_t mqtt_ng_subscribe_size(struct mqtt_sub *subs, size_t sub_count) +static size_t mqtt_ng_subscribe_size(struct mqtt_sub *subs, size_t sub_count) { size_t len = 2 /* Packet Identifier */ + 1 /* Properties Length TODO for now fixed 0 */; len += sub_count * (2 /* topic filter string length */ + 1 /* [MQTT-3.8.3.1] Subscription Options Byte */); @@ -1203,7 +1140,7 @@ static inline size_t mqtt_ng_subscribe_size(struct mqtt_sub *subs, size_t sub_co return len; } -int mqtt_ng_generate_subscribe(struct transaction_buffer *trx_buf, mqtt_wss_log_ctx_t log_ctx, struct mqtt_sub *subs, size_t sub_count) +int mqtt_ng_generate_subscribe(struct transaction_buffer *trx_buf, struct mqtt_sub *subs, size_t sub_count) { // >> START THE RODEO << transaction_buffer_transaction_start(trx_buf); @@ -1238,7 +1175,7 @@ int mqtt_ng_generate_subscribe(struct transaction_buffer *trx_buf, mqtt_wss_log_ for (size_t i = 0; i < sub_count; i++) { BUFFER_TRANSACTION_NEW_FRAG(&trx_buf->hdr_buffer, 0, frag, goto fail_rollback); PACK_2B_INT(&trx_buf->hdr_buffer, strlen(subs[i].topic), frag); - if (_optimized_add(&trx_buf->hdr_buffer, log_ctx, subs[i].topic, strlen(subs[i].topic), subs[i].topic_free, &frag)) + if (_optimized_add(&trx_buf->hdr_buffer, subs[i].topic, strlen(subs[i].topic), subs[i].topic_free, &frag)) goto fail_rollback; BUFFER_TRANSACTION_NEW_FRAG(&trx_buf->hdr_buffer, 0, frag, goto fail_rollback); *WRITE_POS(frag) = subs[i].options; @@ -1255,12 +1192,11 @@ int mqtt_ng_generate_subscribe(struct transaction_buffer *trx_buf, mqtt_wss_log_ int mqtt_ng_subscribe(struct mqtt_ng_client *client, struct mqtt_sub *subs, size_t sub_count) { - TRY_GENERATE_MESSAGE(mqtt_ng_generate_subscribe, client, subs, sub_count); + TRY_GENERATE_MESSAGE(mqtt_ng_generate_subscribe, subs, sub_count); } -int mqtt_ng_generate_disconnect(struct transaction_buffer *trx_buf, mqtt_wss_log_ctx_t log_ctx, uint8_t reason_code) +int mqtt_ng_generate_disconnect(struct transaction_buffer *trx_buf, uint8_t reason_code) { - (void) log_ctx; // >> START THE RODEO << transaction_buffer_transaction_start(trx_buf); @@ -1299,12 +1235,11 @@ int mqtt_ng_generate_disconnect(struct transaction_buffer *trx_buf, mqtt_wss_log int mqtt_ng_disconnect(struct mqtt_ng_client *client, uint8_t reason_code) { - TRY_GENERATE_MESSAGE(mqtt_ng_generate_disconnect, client, reason_code); + TRY_GENERATE_MESSAGE(mqtt_ng_generate_disconnect, reason_code); } -static int mqtt_generate_puback(struct transaction_buffer *trx_buf, mqtt_wss_log_ctx_t log_ctx, uint16_t packet_id, uint8_t reason_code) +static int mqtt_generate_puback(struct transaction_buffer *trx_buf, uint16_t packet_id, uint8_t reason_code) { - (void) log_ctx; // >> START THE RODEO << transaction_buffer_transaction_start(trx_buf); @@ -1344,7 +1279,7 @@ static int mqtt_generate_puback(struct transaction_buffer *trx_buf, mqtt_wss_log static int mqtt_ng_puback(struct mqtt_ng_client *client, uint16_t packet_id, uint8_t reason_code) { - TRY_GENERATE_MESSAGE(mqtt_generate_puback, client, packet_id, reason_code); + TRY_GENERATE_MESSAGE(mqtt_generate_puback, packet_id, reason_code); } int mqtt_ng_ping(struct mqtt_ng_client *client) @@ -1361,7 +1296,6 @@ int mqtt_ng_ping(struct mqtt_ng_client *client) #define MQTT_NG_CLIENT_PROTOCOL_ERROR -1 #define MQTT_NG_CLIENT_SERVER_RETURNED_ERROR -2 #define MQTT_NG_CLIENT_NOT_IMPL_YET -3 -#define MQTT_NG_CLIENT_OOM -4 #define MQTT_NG_CLIENT_INTERNAL_ERROR -5 #define BUF_READ_CHECK_AT_LEAST(buf, x) \ @@ -1370,10 +1304,10 @@ int mqtt_ng_ping(struct mqtt_ng_client *client) #define vbi_parser_reset_ctx(ctx) memset(ctx, 0, sizeof(struct mqtt_vbi_parser_ctx)) -static int vbi_parser_parse(struct mqtt_vbi_parser_ctx *ctx, rbuf_t data, mqtt_wss_log_ctx_t log) +static int vbi_parser_parse(struct mqtt_vbi_parser_ctx *ctx, rbuf_t data) { if (ctx->bytes > MQTT_VBI_MAXBYTES - 1) { - mws_error(log, "MQTT Variable Byte Integer can't be longer than %d bytes", MQTT_VBI_MAXBYTES); + nd_log(NDLS_DAEMON, NDLP_ERR, "MQTT Variable Byte Integer can't be longer than %d bytes", MQTT_VBI_MAXBYTES); return MQTT_NG_CLIENT_PROTOCOL_ERROR; } if (!ctx->bytes || ctx->data[ctx->bytes-1] & MQTT_VBI_CONTINUATION_FLAG) { @@ -1385,7 +1319,7 @@ static int vbi_parser_parse(struct mqtt_vbi_parser_ctx *ctx, rbuf_t data, mqtt_w } if (mqtt_vbi_to_uint32(ctx->data, &ctx->result)) { - mws_error(log, "MQTT Variable Byte Integer failed to be parsed."); + nd_log(NDLS_DAEMON, NDLP_ERR, "MQTT Variable Byte Integer failed to be parsed."); return MQTT_NG_CLIENT_PROTOCOL_ERROR; } @@ -1471,12 +1405,12 @@ struct mqtt_property *get_property_by_id(struct mqtt_property *props, uint8_t pr } // Parses [MQTT-2.2.2] -static int parse_properties_array(struct mqtt_properties_parser_ctx *ctx, rbuf_t data, mqtt_wss_log_ctx_t log) +static int parse_properties_array(struct mqtt_properties_parser_ctx *ctx, rbuf_t data) { int rc; switch (ctx->state) { case PROPERTIES_LENGTH: - rc = vbi_parser_parse(&ctx->vbi_parser_ctx, data, log); + rc = vbi_parser_parse(&ctx->vbi_parser_ctx, data); if (rc == MQTT_NG_CLIENT_PARSE_DONE) { ctx->properties_length = ctx->vbi_parser_ctx.result; ctx->bytes_consumed += ctx->vbi_parser_ctx.bytes; @@ -1525,7 +1459,7 @@ static int parse_properties_array(struct mqtt_properties_parser_ctx *ctx, rbuf_t ctx->state = PROPERTY_TYPE_STR_BIN_LEN; break; default: - mws_error(log, "Unsupported property type %d for property id %d.", (int)ctx->tail->type, (int)ctx->tail->id); + nd_log(NDLS_DAEMON, NDLP_ERR, "Unsupported property type %d for property id %d.", (int)ctx->tail->type, (int)ctx->tail->id); return MQTT_NG_CLIENT_PROTOCOL_ERROR; } break; @@ -1543,7 +1477,7 @@ static int parse_properties_array(struct mqtt_properties_parser_ctx *ctx, rbuf_t ctx->state = PROPERTY_TYPE_STR; break; default: - mws_error(log, "Unexpected datatype in PROPERTY_TYPE_STR_BIN_LEN %d", (int)ctx->tail->type); + nd_log(NDLS_DAEMON, NDLP_ERR, "Unexpected datatype in PROPERTY_TYPE_STR_BIN_LEN %d", (int)ctx->tail->type); return MQTT_NG_CLIENT_INTERNAL_ERROR; } break; @@ -1568,7 +1502,7 @@ static int parse_properties_array(struct mqtt_properties_parser_ctx *ctx, rbuf_t ctx->state = PROPERTY_NEXT; break; case PROPERTY_TYPE_VBI: - rc = vbi_parser_parse(&ctx->vbi_parser_ctx, data, log); + rc = vbi_parser_parse(&ctx->vbi_parser_ctx, data); if (rc == MQTT_NG_CLIENT_PARSE_DONE) { ctx->tail->data.uint32 = ctx->vbi_parser_ctx.result; ctx->bytes_consumed += ctx->vbi_parser_ctx.bytes; @@ -1618,9 +1552,9 @@ static int parse_connack_varhdr(struct mqtt_ng_client *client) mqtt_properties_parser_ctx_reset(&parser->properties_parser); break; case MQTT_PARSE_VARHDR_PROPS: - return parse_properties_array(&parser->properties_parser, parser->received_data, client->log); + return parse_properties_array(&parser->properties_parser, parser->received_data); default: - ERROR("invalid state for connack varhdr parser"); + nd_log(NDLS_DAEMON, NDLP_ERR, "invalid state for connack varhdr parser"); return MQTT_NG_CLIENT_INTERNAL_ERROR; } return MQTT_NG_CLIENT_OK_CALL_AGAIN; @@ -1644,9 +1578,9 @@ static int parse_disconnect_varhdr(struct mqtt_ng_client *client) mqtt_properties_parser_ctx_reset(&parser->properties_parser); break; case MQTT_PARSE_VARHDR_PROPS: - return parse_properties_array(&parser->properties_parser, parser->received_data, client->log); + return parse_properties_array(&parser->properties_parser, parser->received_data); default: - ERROR("invalid state for connack varhdr parser"); + nd_log(NDLS_DAEMON, NDLP_ERR, "invalid state for connack varhdr parser"); return MQTT_NG_CLIENT_INTERNAL_ERROR; } return MQTT_NG_CLIENT_OK_CALL_AGAIN; @@ -1682,9 +1616,9 @@ static int parse_puback_varhdr(struct mqtt_ng_client *client) mqtt_properties_parser_ctx_reset(&parser->properties_parser); /* FALLTHROUGH */ case MQTT_PARSE_VARHDR_PROPS: - return parse_properties_array(&parser->properties_parser, parser->received_data, client->log); + return parse_properties_array(&parser->properties_parser, parser->received_data); default: - ERROR("invalid state for puback varhdr parser"); + nd_log(NDLS_DAEMON, NDLP_ERR, "invalid state for puback varhdr parser"); return MQTT_NG_CLIENT_INTERNAL_ERROR; } return MQTT_NG_CLIENT_OK_CALL_AGAIN; @@ -1707,7 +1641,7 @@ static int parse_suback_varhdr(struct mqtt_ng_client *client) mqtt_properties_parser_ctx_reset(&parser->properties_parser); /* FALLTHROUGH */ case MQTT_PARSE_VARHDR_PROPS: - rc = parse_properties_array(&parser->properties_parser, parser->received_data, client->log); + rc = parse_properties_array(&parser->properties_parser, parser->received_data); if (rc != MQTT_NG_CLIENT_PARSE_DONE) return rc; parser->mqtt_parsed_len += parser->properties_parser.bytes_consumed; @@ -1728,7 +1662,7 @@ static int parse_suback_varhdr(struct mqtt_ng_client *client) return MQTT_NG_CLIENT_NEED_MORE_BYTES; default: - ERROR("invalid state for suback varhdr parser"); + nd_log(NDLS_DAEMON, NDLP_ERR, "invalid state for suback varhdr parser"); return MQTT_NG_CLIENT_INTERNAL_ERROR; } return MQTT_NG_CLIENT_OK_CALL_AGAIN; @@ -1752,8 +1686,6 @@ static int parse_publish_varhdr(struct mqtt_ng_client *client) break; } publish->topic = callocz(1, publish->topic_len + 1 /* add 0x00 */); - if (publish->topic == NULL) - return MQTT_NG_CLIENT_OOM; parser->varhdr_state = MQTT_PARSE_VARHDR_TOPICNAME; /* FALLTHROUGH */ case MQTT_PARSE_VARHDR_TOPICNAME: @@ -1779,7 +1711,7 @@ static int parse_publish_varhdr(struct mqtt_ng_client *client) parser->mqtt_parsed_len += 2; /* FALLTHROUGH */ case MQTT_PARSE_VARHDR_PROPS: - rc = parse_properties_array(&parser->properties_parser, parser->received_data, client->log); + rc = parse_properties_array(&parser->properties_parser, parser->received_data); if (rc != MQTT_NG_CLIENT_PARSE_DONE) return rc; parser->mqtt_parsed_len += parser->properties_parser.bytes_consumed; @@ -1789,7 +1721,7 @@ static int parse_publish_varhdr(struct mqtt_ng_client *client) if (parser->mqtt_fixed_hdr_remaining_length < parser->mqtt_parsed_len) { freez(publish->topic); publish->topic = NULL; - ERROR("Error parsing PUBLISH message"); + nd_log(NDLS_DAEMON, NDLP_ERR, "Error parsing PUBLISH message"); return MQTT_NG_CLIENT_PROTOCOL_ERROR; } publish->data_len = parser->mqtt_fixed_hdr_remaining_length - parser->mqtt_parsed_len; @@ -1800,18 +1732,12 @@ static int parse_publish_varhdr(struct mqtt_ng_client *client) BUF_READ_CHECK_AT_LEAST(parser->received_data, publish->data_len); publish->data = mallocz(publish->data_len); - if (publish->data == NULL) { - freez(publish->topic); - publish->topic = NULL; - return MQTT_NG_CLIENT_OOM; - } - rbuf_pop(parser->received_data, publish->data, publish->data_len); parser->mqtt_parsed_len += publish->data_len; return MQTT_NG_CLIENT_PARSE_DONE; default: - ERROR("invalid state for publish varhdr parser"); + nd_log(NDLS_DAEMON, NDLP_ERR, "invalid state for publish varhdr parser"); return MQTT_NG_CLIENT_INTERNAL_ERROR; } return MQTT_NG_CLIENT_OK_CALL_AGAIN; @@ -1831,7 +1757,7 @@ static int parse_data(struct mqtt_ng_client *client) parser->state = MQTT_PARSE_FIXED_HEADER_LEN; break; case MQTT_PARSE_FIXED_HEADER_LEN: - rc = vbi_parser_parse(&parser->vbi_parser, parser->received_data, client->log); + rc = vbi_parser_parse(&parser->vbi_parser, parser->received_data); if (rc == MQTT_NG_CLIENT_PARSE_DONE) { parser->mqtt_fixed_hdr_remaining_length = parser->vbi_parser.result; parser->state = MQTT_PARSE_VARIABLE_HEADER; @@ -1874,7 +1800,7 @@ static int parse_data(struct mqtt_ng_client *client) return rc; case MQTT_CPT_PINGRESP: if (parser->mqtt_fixed_hdr_remaining_length) { - ERROR ("PINGRESP has to be 0 Remaining Length."); // [MQTT-3.13.1] + nd_log(NDLS_DAEMON, NDLP_ERR, "PINGRESP has to be 0 Remaining Length."); // [MQTT-3.13.1] return MQTT_NG_CLIENT_PROTOCOL_ERROR; } parser->state = MQTT_PARSE_MQTT_PACKET_DONE; @@ -1887,7 +1813,7 @@ static int parse_data(struct mqtt_ng_client *client) } return rc; default: - ERROR("Parsing Control Packet Type %" PRIu8 " not implemented yet.", get_control_packet_type(parser->mqtt_control_packet_type)); + nd_log(NDLS_DAEMON, NDLP_ERR, "Parsing Control Packet Type %" PRIu8 " not implemented yet.", get_control_packet_type(parser->mqtt_control_packet_type)); rbuf_bump_tail(parser->received_data, parser->mqtt_fixed_hdr_remaining_length); parser->state = MQTT_PARSE_MQTT_PACKET_DONE; return MQTT_NG_CLIENT_NOT_IMPL_YET; @@ -1950,7 +1876,7 @@ static int send_fragment(struct mqtt_ng_client *client) { if (bytes) processed = client->send_fnc_ptr(client->user_ctx, ptr, bytes); else - WARN("This fragment was fully sent already. This should not happen!"); + nd_log(NDLS_DAEMON, NDLP_WARNING, "This fragment was fully sent already. This should not happen!"); frag->sent += processed; if (frag->sent != frag->len) @@ -1958,11 +1884,11 @@ static int send_fragment(struct mqtt_ng_client *client) { if (frag->flags & BUFFER_FRAG_MQTT_PACKET_TAIL) { client->time_of_last_send = time(NULL); - pthread_mutex_lock(&client->stats_mutex); + spinlock_lock(&client->stats_spinlock); if (client->main_buffer.sending_frag != &ping_frag) client->stats.tx_messages_queued--; client->stats.tx_messages_sent++; - pthread_mutex_unlock(&client->stats_mutex); + spinlock_unlock(&client->stats_spinlock); client->main_buffer.sending_frag = NULL; return 1; } @@ -1986,7 +1912,7 @@ static void try_send_all(struct mqtt_ng_client *client) { } while(send_all_message_fragments(client) >= 0); } -static inline void mark_message_for_gc(struct buffer_fragment *frag) +static void mark_message_for_gc(struct buffer_fragment *frag) { while (frag) { frag->flags |= BUFFER_FRAG_GARBAGE_COLLECT; @@ -2004,7 +1930,7 @@ static int mark_packet_acked(struct mqtt_ng_client *client, uint16_t packet_id) while (frag) { if ( (frag->flags & BUFFER_FRAG_MQTT_PACKET_HEAD) && frag->packet_id == packet_id) { if (!frag->sent) { - ERROR("Received packet_id (%" PRIu16 ") belongs to MQTT packet which was not yet sent!", packet_id); + nd_log(NDLS_DAEMON, NDLP_ERR, "Received packet_id (%" PRIu16 ") belongs to MQTT packet which was not yet sent!", packet_id); UNLOCK_HDR_BUFFER(&client->main_buffer); return 1; } @@ -2014,7 +1940,7 @@ static int mark_packet_acked(struct mqtt_ng_client *client, uint16_t packet_id) } frag = frag->next; } - ERROR("Received packet_id (%" PRIu16 ") is unknown!", packet_id); + nd_log(NDLS_DAEMON, NDLP_ERR, "Received packet_id (%" PRIu16 ") is unknown!", packet_id); UNLOCK_HDR_BUFFER(&client->main_buffer); return 1; } @@ -2022,110 +1948,113 @@ static int mark_packet_acked(struct mqtt_ng_client *client, uint16_t packet_id) int handle_incoming_traffic(struct mqtt_ng_client *client) { int rc; + while ((rc = parse_data(client)) == MQTT_NG_CLIENT_OK_CALL_AGAIN) { + ; + } + if (rc != MQTT_NG_CLIENT_MQTT_PACKET_DONE) + return rc; + struct mqtt_publish *pub; - while( (rc = parse_data(client)) == MQTT_NG_CLIENT_OK_CALL_AGAIN ); - if ( rc == MQTT_NG_CLIENT_MQTT_PACKET_DONE ) { - struct mqtt_property *prop; -#ifdef MQTT_DEBUG_VERBOSE - DEBUG("MQTT Packet Parsed Successfully!"); -#endif - pthread_mutex_lock(&client->stats_mutex); - client->stats.rx_messages_rcvd++; - pthread_mutex_unlock(&client->stats_mutex); - - switch (get_control_packet_type(client->parser.mqtt_control_packet_type)) { - case MQTT_CPT_CONNACK: -#ifdef MQTT_DEBUG_VERBOSE - DEBUG("Received CONNACK"); -#endif - LOCK_HDR_BUFFER(&client->main_buffer); - mark_message_for_gc(client->connect_msg); - UNLOCK_HDR_BUFFER(&client->main_buffer); - client->connect_msg = NULL; - if (client->client_state != CONNECTING) { - ERROR("Received unexpected CONNACK"); - client->client_state = ERROR; - return MQTT_NG_CLIENT_PROTOCOL_ERROR; - } - if ((prop = get_property_by_id(client->parser.properties_parser.head, MQTT_PROP_MAX_PKT_SIZE)) != NULL) { - INFO("MQTT server limits message size to %" PRIu32, prop->data.uint32); - client->max_msg_size = prop->data.uint32; - } - if (client->connack_callback) - client->connack_callback(client->user_ctx, client->parser.mqtt_packet.connack.reason_code); - if (!client->parser.mqtt_packet.connack.reason_code) { - INFO("MQTT Connection Accepted By Server"); - client->client_state = CONNECTED; - break; - } + struct mqtt_property *prop; + spinlock_lock(&client->stats_spinlock); + client->stats.rx_messages_rcvd++; + spinlock_unlock(&client->stats_spinlock); + + uint8_t ctrl_packet_type = get_control_packet_type(client->parser.mqtt_control_packet_type); + switch (ctrl_packet_type) { + case MQTT_CPT_CONNACK: + LOCK_HDR_BUFFER(&client->main_buffer); + mark_message_for_gc(client->connect_msg); + UNLOCK_HDR_BUFFER(&client->main_buffer); + + client->connect_msg = NULL; + + if (client->client_state != CONNECTING) { + nd_log(NDLS_DAEMON, NDLP_ERR, "Received unexpected CONNACK"); client->client_state = ERROR; - return MQTT_NG_CLIENT_SERVER_RETURNED_ERROR; - case MQTT_CPT_PUBACK: -#ifdef MQTT_DEBUG_VERBOSE - DEBUG("Received PUBACK %" PRIu16, client->parser.mqtt_packet.puback.packet_id); -#endif - if (mark_packet_acked(client, client->parser.mqtt_packet.puback.packet_id)) - return MQTT_NG_CLIENT_PROTOCOL_ERROR; - if (client->puback_callback) - client->puback_callback(client->parser.mqtt_packet.puback.packet_id); - break; - case MQTT_CPT_PINGRESP: -#ifdef MQTT_DEBUG_VERBOSE - DEBUG("Received PINGRESP"); -#endif - break; - case MQTT_CPT_SUBACK: -#ifdef MQTT_DEBUG_VERBOSE - DEBUG("Received SUBACK %" PRIu16, client->parser.mqtt_packet.suback.packet_id); -#endif - if (mark_packet_acked(client, client->parser.mqtt_packet.suback.packet_id)) - return MQTT_NG_CLIENT_PROTOCOL_ERROR; + return MQTT_NG_CLIENT_PROTOCOL_ERROR; + } + + if ((prop = get_property_by_id(client->parser.properties_parser.head, MQTT_PROP_MAX_PKT_SIZE)) != NULL) { + nd_log(NDLS_DAEMON, NDLP_INFO, "MQTT server limits message size to %" PRIu32, prop->data.uint32); + client->max_msg_size = prop->data.uint32; + } + + if (client->connack_callback) + client->connack_callback(client->user_ctx, client->parser.mqtt_packet.connack.reason_code); + if (!client->parser.mqtt_packet.connack.reason_code) { + nd_log(NDLS_DAEMON, NDLP_INFO, "MQTT Connection Accepted By Server"); + client->client_state = CONNECTED; break; - case MQTT_CPT_PUBLISH: -#ifdef MQTT_DEBUG_VERBOSE - DEBUG("Recevied PUBLISH"); -#endif - pub = &client->parser.mqtt_packet.publish; - if (pub->qos > 1) { - freez(pub->topic); - freez(pub->data); - return MQTT_NG_CLIENT_NOT_IMPL_YET; - } - if ( pub->qos == 1 && (rc = mqtt_ng_puback(client, pub->packet_id, 0)) ) { - client->client_state = ERROR; - ERROR("Error generating PUBACK reply for PUBLISH"); - return rc; - } - if ( (prop = get_property_by_id(client->parser.properties_parser.head, MQTT_PROP_TOPIC_ALIAS)) != NULL ) { - // Topic Alias property was sent from server - void *topic_ptr; - if (!c_rhash_get_ptr_by_uint64(client->rx_aliases, prop->data.uint8, &topic_ptr)) { - if (pub->topic != NULL) { - ERROR("We do not yet support topic alias reassignment"); - return MQTT_NG_CLIENT_NOT_IMPL_YET; - } - pub->topic = topic_ptr; - } else { - if (pub->topic == NULL) { - ERROR("Topic alias with id %d unknown and topic not set by server!", prop->data.uint8); - return MQTT_NG_CLIENT_PROTOCOL_ERROR; - } - c_rhash_insert_uint64_ptr(client->rx_aliases, prop->data.uint8, pub->topic); + } + client->client_state = ERROR; + return MQTT_NG_CLIENT_SERVER_RETURNED_ERROR; + + case MQTT_CPT_PUBACK: + if (mark_packet_acked(client, client->parser.mqtt_packet.puback.packet_id)) + return MQTT_NG_CLIENT_PROTOCOL_ERROR; + if (client->puback_callback) + client->puback_callback(client->parser.mqtt_packet.puback.packet_id); + break; + + case MQTT_CPT_PINGRESP: + break; + + case MQTT_CPT_SUBACK: + if (mark_packet_acked(client, client->parser.mqtt_packet.suback.packet_id)) + return MQTT_NG_CLIENT_PROTOCOL_ERROR; + break; + + case MQTT_CPT_PUBLISH: + pub = &client->parser.mqtt_packet.publish; + + if (pub->qos > 1) { + freez(pub->topic); + freez(pub->data); + return MQTT_NG_CLIENT_NOT_IMPL_YET; + } + + if ( pub->qos == 1 && ((rc = mqtt_ng_puback(client, pub->packet_id, 0))) ) { + client->client_state = ERROR; + nd_log(NDLS_DAEMON, NDLP_ERR, "Error generating PUBACK reply for PUBLISH"); + return rc; + } + + if ( (prop = get_property_by_id(client->parser.properties_parser.head, MQTT_PROP_TOPIC_ALIAS)) != NULL ) { + // Topic Alias property was sent from server + void *topic_ptr; + if (!c_rhash_get_ptr_by_uint64(client->rx_aliases, prop->data.uint8, &topic_ptr)) { + if (pub->topic != NULL) { + nd_log(NDLS_DAEMON, NDLP_ERR, "We do not yet support topic alias reassignment"); + return MQTT_NG_CLIENT_NOT_IMPL_YET; } + pub->topic = topic_ptr; + } else { + if (pub->topic == NULL) { + nd_log(NDLS_DAEMON, NDLP_ERR, "Topic alias with id %d unknown and topic not set by server!", prop->data.uint8); + return MQTT_NG_CLIENT_PROTOCOL_ERROR; + } + c_rhash_insert_uint64_ptr(client->rx_aliases, prop->data.uint8, pub->topic); } - if (client->msg_callback) - client->msg_callback(pub->topic, pub->data, pub->data_len, pub->qos); - // in case we have property topic alias and we have topic we take over the string - // and add pointer to it into topic alias list - if (prop == NULL) - freez(pub->topic); - freez(pub->data); - return MQTT_NG_CLIENT_WANT_WRITE; - case MQTT_CPT_DISCONNECT: - INFO ("Got MQTT DISCONNECT control packet from server. Reason code: %d", (int)client->parser.mqtt_packet.disconnect.reason_code); - client->client_state = DISCONNECTED; - break; - } + } + + if (client->msg_callback) + client->msg_callback(pub->topic, pub->data, pub->data_len, pub->qos); + // in case we have property topic alias and we have topic we take over the string + // and add pointer to it into topic alias list + if (prop == NULL) + freez(pub->topic); + freez(pub->data); + return MQTT_NG_CLIENT_WANT_WRITE; + + case MQTT_CPT_DISCONNECT: + nd_log(NDLS_DAEMON, NDLP_INFO, "Got MQTT DISCONNECT control packet from server. Reason code: %d", (int)client->parser.mqtt_packet.disconnect.reason_code); + client->client_state = DISCONNECTED; + break; + + default: + nd_log(NDLS_DAEMON, NDLP_INFO, "Got unknown control packet %u from server", ctrl_packet_type); + break; } return rc; @@ -2173,9 +2102,9 @@ void mqtt_ng_set_max_mem(struct mqtt_ng_client *client, size_t bytes) void mqtt_ng_get_stats(struct mqtt_ng_client *client, struct mqtt_ng_stats *stats) { - pthread_mutex_lock(&client->stats_mutex); + spinlock_lock(&client->stats_spinlock); memcpy(stats, &client->stats, sizeof(struct mqtt_ng_stats)); - pthread_mutex_unlock(&client->stats_mutex); + spinlock_unlock(&client->stats_spinlock); stats->tx_bytes_queued = 0; stats->tx_buffer_reclaimable = 0; @@ -2198,11 +2127,11 @@ void mqtt_ng_get_stats(struct mqtt_ng_client *client, struct mqtt_ng_stats *stat int mqtt_ng_set_topic_alias(struct mqtt_ng_client *client, const char *topic) { uint16_t idx; - pthread_rwlock_wrlock(&client->tx_topic_aliases.rwlock); + spinlock_lock(&client->tx_topic_aliases.spinlock); if (client->tx_topic_aliases.idx_assigned >= client->tx_topic_aliases.idx_max) { - pthread_rwlock_unlock(&client->tx_topic_aliases.rwlock); - mws_error(client->log, "Tx topic alias indexes were exhausted (current version of the library doesn't support reassigning yet. Feel free to contribute."); + spinlock_unlock(&client->tx_topic_aliases.spinlock); + nd_log(NDLS_DAEMON, NDLP_ERR, "Tx topic alias indexes were exhausted (current version of the library doesn't support reassigning yet. Feel free to contribute."); return 0; //0 is not a valid topic alias } @@ -2211,8 +2140,8 @@ int mqtt_ng_set_topic_alias(struct mqtt_ng_client *client, const char *topic) // this is not a problem for library but might be helpful to warn user // as it might indicate bug in their program (but also might be expected) idx = alias->idx; - pthread_rwlock_unlock(&client->tx_topic_aliases.rwlock); - mws_debug(client->log, "%s topic \"%s\" already has alias set. Ignoring.", __FUNCTION__, topic); + spinlock_unlock(&client->tx_topic_aliases.spinlock); + nd_log(NDLS_DAEMON, NDLP_DEBUG, "%s topic \"%s\" already has alias set. Ignoring.", __FUNCTION__, topic); return idx; } @@ -2223,6 +2152,6 @@ int mqtt_ng_set_topic_alias(struct mqtt_ng_client *client, const char *topic) c_rhash_insert_str_ptr(client->tx_topic_aliases.stoi_dict, topic, (void*)alias); - pthread_rwlock_unlock(&client->tx_topic_aliases.rwlock); + spinlock_unlock(&client->tx_topic_aliases.spinlock); return idx; } diff --git a/src/aclk/mqtt_websockets/mqtt_ng.h b/src/aclk/mqtt_websockets/mqtt_ng.h index 8bd7434d58dd36..1661f540e2c795 100644 --- a/src/aclk/mqtt_websockets/mqtt_ng.h +++ b/src/aclk/mqtt_websockets/mqtt_ng.h @@ -67,7 +67,6 @@ int mqtt_ng_ping(struct mqtt_ng_client *client); typedef ssize_t (*mqtt_ng_send_fnc_t)(void *user_ctx, const void* buf, size_t len); struct mqtt_ng_init { - mqtt_wss_log_ctx_t log; rbuf_t data_in; mqtt_ng_send_fnc_t data_out_fnc; void *user_ctx; diff --git a/src/aclk/mqtt_websockets/mqtt_wss_client.c b/src/aclk/mqtt_websockets/mqtt_wss_client.c index 9b478b806a12b2..2b2c972bb7fe5d 100644 --- a/src/aclk/mqtt_websockets/mqtt_wss_client.c +++ b/src/aclk/mqtt_websockets/mqtt_wss_client.c @@ -57,6 +57,8 @@ char *util_openssl_ret_err(int err) return "SSL_ERROR_SYSCALL"; case SSL_ERROR_SSL: return "SSL_ERROR_SSL"; + default: + break; } return "UNKNOWN"; } @@ -64,8 +66,6 @@ char *util_openssl_ret_err(int err) struct mqtt_wss_client_struct { ws_client *ws_client; - mqtt_wss_log_ctx_t log; - // immediate connection (e.g. proxy server) char *host; int port; @@ -117,69 +117,49 @@ static void mws_connack_callback_ng(void *user_ctx, int code) switch(code) { case 0: client->mqtt_connected = 1; - return; + break; //TODO manual labor: all the CONNACK error codes with some nice error message default: - mws_error(client->log, "MQTT CONNACK returned error %d", code); - return; + nd_log(NDLS_DAEMON, NDLP_ERR, "MQTT CONNACK returned error %d", code); + break; } } static ssize_t mqtt_send_cb(void *user_ctx, const void* buf, size_t len) { mqtt_wss_client client = user_ctx; -#ifdef DEBUG_ULTRA_VERBOSE - mws_debug(client->log, "mqtt_pal_sendall(len=%d)", len); -#endif int ret = ws_client_send(client->ws_client, WS_OP_BINARY_FRAME, buf, len); - if (ret >= 0 && (size_t)ret != len) { -#ifdef DEBUG_ULTRA_VERBOSE - mws_debug(client->log, "Not complete message sent (Msg=%d,Sent=%d). Need to arm POLLOUT!", len, ret); -#endif + if (ret >= 0 && (size_t)ret != len) client->mqtt_didnt_finish_write = 1; - } return ret; } -mqtt_wss_client mqtt_wss_new(const char *log_prefix, - mqtt_wss_log_callback_t log_callback, - msg_callback_fnc_t msg_callback, - void (*puback_callback)(uint16_t packet_id)) +mqtt_wss_client mqtt_wss_new( + msg_callback_fnc_t msg_callback, + void (*puback_callback)(uint16_t packet_id)) { - mqtt_wss_log_ctx_t log; - - log = mqtt_wss_log_ctx_create(log_prefix, log_callback); - if(!log) - return NULL; - SSL_library_init(); SSL_load_error_strings(); mqtt_wss_client client = callocz(1, sizeof(struct mqtt_wss_client_struct)); - if (!client) { - mws_error(log, "OOM alocating mqtt_wss_client"); - goto fail; - } spinlock_init(&client->stat_lock); client->msg_callback = msg_callback; client->puback_callback = puback_callback; - client->ws_client = ws_client_new(0, &client->target_host, log); + client->ws_client = ws_client_new(0, &client->target_host); if (!client->ws_client) { - mws_error(log, "Error creating ws_client"); + nd_log(NDLS_DAEMON, NDLP_ERR, "Error creating ws_client"); goto fail_1; } - client->log = log; - #ifdef __APPLE__ if (pipe(client->write_notif_pipe)) { #else if (pipe2(client->write_notif_pipe, O_CLOEXEC /*| O_DIRECT*/)) { #endif - mws_error(log, "Couldn't create pipe"); + nd_log(NDLS_DAEMON, NDLP_ERR, "Couldn't create pipe"); goto fail_2; } @@ -189,7 +169,6 @@ mqtt_wss_client mqtt_wss_new(const char *log_prefix, client->poll_fds[POLLFD_SOCKET].events = POLLIN; struct mqtt_ng_init settings = { - .log = log, .data_in = client->ws_client->buf_to_mqtt, .data_out_fnc = &mqtt_send_cb, .user_ctx = client, @@ -197,22 +176,14 @@ mqtt_wss_client mqtt_wss_new(const char *log_prefix, .puback_callback = puback_callback, .msg_callback = msg_callback }; - if ( (client->mqtt = mqtt_ng_init(&settings)) == NULL ) { - mws_error(log, "Error initializing internal MQTT client"); - goto fail_3; - } + client->mqtt = mqtt_ng_init(&settings); return client; -fail_3: - close(client->write_notif_pipe[PIPE_WRITE_END]); - close(client->write_notif_pipe[PIPE_READ_END]); fail_2: ws_client_destroy(client->ws_client); fail_1: freez(client); -fail: - mqtt_wss_log_ctx_destroy(log); return NULL; } @@ -253,30 +224,25 @@ void mqtt_wss_destroy(mqtt_wss_client client) if (client->sockfd > 0) close(client->sockfd); - mqtt_wss_log_ctx_destroy(client->log); freez(client); } static int cert_verify_callback(int preverify_ok, X509_STORE_CTX *ctx) { - SSL *ssl; - X509 *err_cert; - mqtt_wss_client client; - int err = 0, depth; - char *err_str; + int err = 0; - ssl = X509_STORE_CTX_get_ex_data(ctx, SSL_get_ex_data_X509_STORE_CTX_idx()); - client = SSL_get_ex_data(ssl, 0); + SSL* ssl = X509_STORE_CTX_get_ex_data(ctx, SSL_get_ex_data_X509_STORE_CTX_idx()); + mqtt_wss_client client = SSL_get_ex_data(ssl, 0); // TODO handle depth as per https://www.openssl.org/docs/man1.0.2/man3/SSL_CTX_set_verify.html if (!preverify_ok) { err = X509_STORE_CTX_get_error(ctx); - depth = X509_STORE_CTX_get_error_depth(ctx); - err_cert = X509_STORE_CTX_get_current_cert(ctx); - err_str = X509_NAME_oneline(X509_get_subject_name(err_cert), NULL, 0); + int depth = X509_STORE_CTX_get_error_depth(ctx); + X509* err_cert = X509_STORE_CTX_get_current_cert(ctx); + char* err_str = X509_NAME_oneline(X509_get_subject_name(err_cert), NULL, 0); - mws_error(client->log, "verify error:num=%d:%s:depth=%d:%s", err, + nd_log(NDLS_DAEMON, NDLP_ERR, "verify error:num=%d:%s:depth=%d:%s", err, X509_verify_cert_error_string(err), depth, err_str); freez(err_str); @@ -286,7 +252,7 @@ static int cert_verify_callback(int preverify_ok, X509_STORE_CTX *ctx) client->ssl_flags & MQTT_WSS_SSL_ALLOW_SELF_SIGNED) { preverify_ok = 1; - mws_error(client->log, "Self Signed Certificate Accepted as the connection was " + nd_log(NDLS_DAEMON, NDLP_ERR, "Self Signed Certificate Accepted as the connection was " "requested with MQTT_WSS_SSL_ALLOW_SELF_SIGNED"); } @@ -300,16 +266,14 @@ static int cert_verify_callback(int preverify_ok, X509_STORE_CTX *ctx) #define HTTP_HDR_TERMINATOR "\x0D\x0A\x0D\x0A" #define HTTP_CODE_LEN 4 #define HTTP_REASON_MAX_LEN 512 -static int http_parse_reply(mqtt_wss_client client, rbuf_t buf) +static int http_parse_reply(rbuf_t buf) { - char *ptr; char http_code_s[4]; - int http_code; int idx; if (rbuf_memcmp_n(buf, PROXY_HTTP, strlen(PROXY_HTTP))) { if (rbuf_memcmp_n(buf, PROXY_HTTP10, strlen(PROXY_HTTP10))) { - mws_error(client->log, "http_proxy expected reply with \"" PROXY_HTTP "\" or \"" PROXY_HTTP10 "\""); + nd_log(NDLS_DAEMON, NDLP_ERR, "http_proxy expected reply with \"" PROXY_HTTP "\" or \"" PROXY_HTTP10 "\""); return 1; } } @@ -317,39 +281,37 @@ static int http_parse_reply(mqtt_wss_client client, rbuf_t buf) rbuf_bump_tail(buf, strlen(PROXY_HTTP)); if (!rbuf_pop(buf, http_code_s, 1) || http_code_s[0] != 0x20) { - mws_error(client->log, "http_proxy missing space after \"" PROXY_HTTP "\" or \"" PROXY_HTTP10 "\""); + nd_log(NDLS_DAEMON, NDLP_ERR, "http_proxy missing space after \"" PROXY_HTTP "\" or \"" PROXY_HTTP10 "\""); return 2; } if (!rbuf_pop(buf, http_code_s, HTTP_CODE_LEN)) { - mws_error(client->log, "http_proxy missing HTTP code"); + nd_log(NDLS_DAEMON, NDLP_ERR, "http_proxy missing HTTP code"); return 3; } for (int i = 0; i < HTTP_CODE_LEN - 1; i++) if (http_code_s[i] > 0x39 || http_code_s[i] < 0x30) { - mws_error(client->log, "http_proxy HTTP code non numeric"); + nd_log(NDLS_DAEMON, NDLP_ERR, "http_proxy HTTP code non numeric"); return 4; } http_code_s[HTTP_CODE_LEN - 1] = 0; - http_code = atoi(http_code_s); + int http_code = str2i(http_code_s); // TODO check if we ever have more headers here rbuf_find_bytes(buf, HTTP_ENDLINE, strlen(HTTP_ENDLINE), &idx); if (idx >= HTTP_REASON_MAX_LEN) { - mws_error(client->log, "http_proxy returned reason that is too long"); + nd_log(NDLS_DAEMON, NDLP_ERR, "http_proxy returned reason that is too long"); return 5; } if (http_code != 200) { - ptr = mallocz(idx + 1); - if (!ptr) - return 6; + char *ptr = mallocz(idx + 1); rbuf_pop(buf, ptr, idx); ptr[idx] = 0; - mws_error(client->log, "http_proxy returned error code %d \"%s\"", http_code, ptr); + nd_log(NDLS_DAEMON, NDLP_ERR, "http_proxy returned error code %d \"%s\"", http_code, ptr); freez(ptr); return 7; }/* else @@ -362,52 +324,11 @@ static int http_parse_reply(mqtt_wss_client client, rbuf_t buf) rbuf_bump_tail(buf, strlen(HTTP_HDR_TERMINATOR)); if (rbuf_bytes_available(buf)) { - mws_error(client->log, "http_proxy unexpected trailing bytes after end of HTTP hdr"); + nd_log(NDLS_DAEMON, NDLP_ERR, "http_proxy unexpected trailing bytes after end of HTTP hdr"); return 8; } - mws_debug(client->log, "http_proxy CONNECT succeeded"); - return 0; -} - -#if defined(OPENSSL_VERSION_NUMBER) && OPENSSL_VERSION_NUMBER < OPENSSL_VERSION_110 -static EVP_ENCODE_CTX *EVP_ENCODE_CTX_new(void) -{ - EVP_ENCODE_CTX *ctx = OPENSSL_malloc(sizeof(*ctx)); - - if (ctx != NULL) { - memset(ctx, 0, sizeof(*ctx)); - } - return ctx; -} -static void EVP_ENCODE_CTX_free(EVP_ENCODE_CTX *ctx) -{ - OPENSSL_free(ctx); - return; -} -#endif - -inline static int base64_encode_helper(unsigned char *out, int *outl, const unsigned char *in, int in_len) -{ - int len; - unsigned char *str = out; - EVP_ENCODE_CTX *ctx = EVP_ENCODE_CTX_new(); - EVP_EncodeInit(ctx); - EVP_EncodeUpdate(ctx, str, outl, in, in_len); - str += *outl; - EVP_EncodeFinal(ctx, str, &len); - *outl += len; - - str = out; - while(*str) { - if (*str != 0x0D && *str != 0x0A) - *out++ = *str++; - else - str++; - } - *out = 0; - - EVP_ENCODE_CTX_free(ctx); + nd_log(NDLS_DAEMON, NDLP_DEBUG, "http_proxy CONNECT succeeded"); return 0; } @@ -418,13 +339,12 @@ static int http_proxy_connect(mqtt_wss_client client) rbuf_t r_buf = rbuf_create(4096); if (!r_buf) return 1; - char *r_buf_ptr; size_t r_buf_linear_insert_capacity; poll_fd.fd = client->sockfd; poll_fd.events = POLLIN; - r_buf_ptr = rbuf_get_linear_insert_range(r_buf, &r_buf_linear_insert_capacity); + char *r_buf_ptr = rbuf_get_linear_insert_range(r_buf, &r_buf_linear_insert_capacity); snprintf(r_buf_ptr, r_buf_linear_insert_capacity,"%s %s:%d %s" HTTP_ENDLINE "Host: %s" HTTP_ENDLINE, PROXY_CONNECT, client->target_host, client->target_port, PROXY_HTTP, client->target_host); write(client->sockfd, r_buf_ptr, strlen(r_buf_ptr)); @@ -433,7 +353,7 @@ static int http_proxy_connect(mqtt_wss_client client) size_t creds_plain_len = strlen(client->proxy_uname) + strlen(client->proxy_passwd) + 2; char *creds_plain = mallocz(creds_plain_len); if (!creds_plain) { - mws_error(client->log, "OOM creds_plain"); + nd_log(NDLS_DAEMON, NDLP_ERR, "OOM creds_plain"); rc = 6; goto cleanup; } @@ -444,7 +364,7 @@ static int http_proxy_connect(mqtt_wss_client client) char *creds_base64 = mallocz(creds_base64_len + 1); if (!creds_base64) { freez(creds_plain); - mws_error(client->log, "OOM creds_base64"); + nd_log(NDLS_DAEMON, NDLP_ERR, "OOM creds_base64"); rc = 6; goto cleanup; } @@ -454,8 +374,7 @@ static int http_proxy_connect(mqtt_wss_client client) *ptr++ = ':'; strcpy(ptr, client->proxy_passwd); - int b64_len; - base64_encode_helper((unsigned char*)creds_base64, &b64_len, (unsigned char*)creds_plain, strlen(creds_plain)); + (void) netdata_base64_encode((unsigned char*)creds_base64, (unsigned char*)creds_plain, strlen(creds_plain)); freez(creds_plain); r_buf_ptr = rbuf_get_linear_insert_range(r_buf, &r_buf_linear_insert_capacity); @@ -470,13 +389,13 @@ static int http_proxy_connect(mqtt_wss_client client) // or timeout while ((rc = poll(&poll_fd, 1, 1000)) >= 0) { if (!rc) { - mws_error(client->log, "http_proxy timeout waiting reply from proxy server"); + nd_log(NDLS_DAEMON, NDLP_ERR, "http_proxy timeout waiting reply from proxy server"); rc = 2; goto cleanup; } r_buf_ptr = rbuf_get_linear_insert_range(r_buf, &r_buf_linear_insert_capacity); if (!r_buf_ptr) { - mws_error(client->log, "http_proxy read ring buffer full"); + nd_log(NDLS_DAEMON, NDLP_ERR, "http_proxy read ring buffer full"); rc = 3; goto cleanup; } @@ -484,20 +403,20 @@ static int http_proxy_connect(mqtt_wss_client client) if (errno == EWOULDBLOCK || errno == EAGAIN) { continue; } - mws_error(client->log, "http_proxy error reading from socket \"%s\"", strerror(errno)); + nd_log(NDLS_DAEMON, NDLP_ERR, "http_proxy error reading from socket \"%s\"", strerror(errno)); rc = 4; goto cleanup; } rbuf_bump_head(r_buf, rc); if (rbuf_find_bytes(r_buf, HTTP_HDR_TERMINATOR, strlen(HTTP_HDR_TERMINATOR), &rc)) { rc = 0; - if (http_parse_reply(client, r_buf)) + if (http_parse_reply(r_buf)) rc = 5; goto cleanup; } } - mws_error(client->log, "proxy negotiation poll error \"%s\"", strerror(errno)); + nd_log(NDLS_DAEMON, NDLP_ERR, "proxy negotiation poll error \"%s\"", strerror(errno)); rc = 5; cleanup: rbuf_free(r_buf); @@ -510,11 +429,11 @@ int mqtt_wss_connect( int port, struct mqtt_connect_params *mqtt_params, int ssl_flags, - struct mqtt_wss_proxy *proxy, + const struct mqtt_wss_proxy *proxy, bool *fallback_ipv4) { if (!mqtt_params) { - mws_error(client->log, "mqtt_params can't be null!"); + nd_log(NDLS_DAEMON, NDLP_ERR, "mqtt_params can't be null!"); return -1; } @@ -571,7 +490,7 @@ int mqtt_wss_connect( struct timeval timeout = { .tv_sec = 10, .tv_usec = 0 }; int fd = connect_to_this_ip46(IPPROTO_TCP, SOCK_STREAM, client->host, 0, port_str, &timeout, fallback_ipv4); if (fd < 0) { - mws_error(client->log, "Could not connect to remote endpoint \"%s\", port %d.\n", client->host, port); + nd_log(NDLS_DAEMON, NDLP_ERR, "Could not connect to remote endpoint \"%s\", port %d.\n", client->host, port); return -3; } @@ -586,12 +505,12 @@ int mqtt_wss_connect( int flag = 1; int result = setsockopt(client->sockfd, IPPROTO_TCP, TCP_NODELAY, &flag, sizeof(int)); if (result < 0) - mws_error(client->log, "Could not dissable NAGLE"); + nd_log(NDLS_DAEMON, NDLP_ERR, "Could not dissable NAGLE"); client->poll_fds[POLLFD_SOCKET].fd = client->sockfd; if (fcntl(client->sockfd, F_SETFL, fcntl(client->sockfd, F_GETFL, 0) | O_NONBLOCK) == -1) { - mws_error(client->log, "Error setting O_NONBLOCK to TCP socket. \"%s\"", strerror(errno)); + nd_log(NDLS_DAEMON, NDLP_ERR, "Error setting O_NONBLOCK to TCP socket. \"%s\"", strerror(errno)); return -8; } @@ -607,7 +526,7 @@ int mqtt_wss_connect( SSL_library_init(); #else if (OPENSSL_init_ssl(OPENSSL_INIT_LOAD_CONFIG, NULL) != 1) { - mws_error(client->log, "Failed to initialize SSL"); + nd_log(NDLS_DAEMON, NDLP_ERR, "Failed to initialize SSL"); return -1; }; #endif @@ -624,7 +543,7 @@ int mqtt_wss_connect( SSL_CTX_set_default_verify_paths(client->ssl_ctx); SSL_CTX_set_verify(client->ssl_ctx, SSL_VERIFY_PEER | SSL_VERIFY_CLIENT_ONCE, cert_verify_callback); } else - mws_error(client->log, "SSL Certificate checking completely disabled!!!"); + nd_log(NDLS_DAEMON, NDLP_ERR, "SSL Certificate checking completely disabled!!!"); #ifdef MQTT_WSS_DEBUG if(client->ssl_ctx_keylog_cb) @@ -634,7 +553,7 @@ int mqtt_wss_connect( client->ssl = SSL_new(client->ssl_ctx); if (!(client->ssl_flags & MQTT_WSS_SSL_DONT_CHECK_CERTS)) { if (!SSL_set_ex_data(client->ssl, 0, client)) { - mws_error(client->log, "Could not SSL_set_ex_data"); + nd_log(NDLS_DAEMON, NDLP_ERR, "Could not SSL_set_ex_data"); return -4; } } @@ -642,27 +561,27 @@ int mqtt_wss_connect( SSL_set_connect_state(client->ssl); if (!SSL_set_tlsext_host_name(client->ssl, client->target_host)) { - mws_error(client->log, "Error setting TLS SNI host"); + nd_log(NDLS_DAEMON, NDLP_ERR, "Error setting TLS SNI host"); return -7; } result = SSL_connect(client->ssl); if (result != -1 && result != 1) { - mws_error(client->log, "SSL could not connect"); + nd_log(NDLS_DAEMON, NDLP_ERR, "SSL could not connect"); return -5; } if (result == -1) { int ec = SSL_get_error(client->ssl, result); if (ec != SSL_ERROR_WANT_READ && ec != SSL_ERROR_WANT_WRITE) { - mws_error(client->log, "Failed to start SSL connection"); + nd_log(NDLS_DAEMON, NDLP_ERR, "Failed to start SSL connection"); return -6; } } client->mqtt_keepalive = (mqtt_params->keep_alive ? mqtt_params->keep_alive : 400); - mws_info(client->log, "Going to connect using internal MQTT 5 implementation"); + nd_log(NDLS_DAEMON, NDLP_INFO, "Going to connect using internal MQTT 5 implementation"); struct mqtt_auth_properties auth; auth.client_id = (char*)mqtt_params->clientid; auth.client_id_free = NULL; @@ -682,7 +601,7 @@ int mqtt_wss_connect( int ret = mqtt_ng_connect(client->mqtt, &auth, mqtt_params->will_msg ? &lwt : NULL, 1, client->mqtt_keepalive); if (ret) { - mws_error(client->log, "Error generating MQTT connect"); + nd_log(NDLS_DAEMON, NDLP_ERR, "Error generating MQTT connect"); return 1; } @@ -691,7 +610,7 @@ int mqtt_wss_connect( // wait till MQTT connection is established while (!client->mqtt_connected) { if(mqtt_wss_service(client, -1)) { - mws_error(client->log, "Error connecting to MQTT WSS server \"%s\", port %d.", host, port); + nd_log(NDLS_DAEMON, NDLP_ERR, "Error connecting to MQTT WSS server \"%s\", port %d.", host, port); return 2; } } @@ -704,14 +623,14 @@ int mqtt_wss_connect( #define NSEC_PER_MSEC 1000000ULL #define NSEC_PER_SEC 1000000000ULL -static inline uint64_t boottime_usec(mqtt_wss_client client) { +static uint64_t boottime_usec(void) { struct timespec ts; #if defined(__APPLE__) || defined(__FreeBSD__) if (clock_gettime(CLOCK_MONOTONIC, &ts) == -1) { #else if (clock_gettime(CLOCK_BOOTTIME, &ts) == -1) { #endif - mws_error(client->log, "clock_gettimte failed"); + nd_log(NDLS_DAEMON, NDLP_ERR, "clock_gettimte failed"); return 0; } return (uint64_t)ts.tv_sec * USEC_PER_SEC + (ts.tv_nsec % NSEC_PER_SEC) / NSEC_PER_USEC; @@ -720,7 +639,7 @@ static inline uint64_t boottime_usec(mqtt_wss_client client) { #define MWS_TIMED_OUT 1 #define MWS_ERROR 2 #define MWS_OK 0 -static inline const char *mqtt_wss_error_tos(int ec) +static const char *mqtt_wss_error_tos(int ec) { switch(ec) { case MWS_TIMED_OUT: @@ -733,13 +652,12 @@ static inline const char *mqtt_wss_error_tos(int ec) } -static inline int mqtt_wss_service_all(mqtt_wss_client client, int timeout_ms) +static int mqtt_wss_service_all(mqtt_wss_client client, int timeout_ms) { - uint64_t exit_by = boottime_usec(client) + (timeout_ms * NSEC_PER_MSEC); - uint64_t now; + uint64_t exit_by = boottime_usec() + (timeout_ms * NSEC_PER_MSEC); client->poll_fds[POLLFD_SOCKET].events |= POLLOUT; // TODO when entering mwtt_wss_service use out buffer size to arm POLLOUT while (rbuf_bytes_available(client->ws_client->buf_write)) { - now = boottime_usec(client); + const uint64_t now = boottime_usec(); if (now >= exit_by) return MWS_TIMED_OUT; if (mqtt_wss_service(client, exit_by - now)) @@ -750,15 +668,13 @@ static inline int mqtt_wss_service_all(mqtt_wss_client client, int timeout_ms) void mqtt_wss_disconnect(mqtt_wss_client client, int timeout_ms) { - int ret; - // block application from sending more MQTT messages client->mqtt_disconnecting = 1; // send whatever was left at the time of calling this function - ret = mqtt_wss_service_all(client, timeout_ms / 4); + int ret = mqtt_wss_service_all(client, timeout_ms / 4); if(ret) - mws_error(client->log, + nd_log(NDLS_DAEMON, NDLP_ERR, "Error while trying to send all remaining data in an attempt " "to gracefully disconnect! EC=%d Desc:\"%s\"", ret, @@ -770,7 +686,7 @@ void mqtt_wss_disconnect(mqtt_wss_client client, int timeout_ms) ret = mqtt_wss_service_all(client, timeout_ms / 4); if(ret) - mws_error(client->log, + nd_log(NDLS_DAEMON, NDLP_ERR, "Error while trying to send MQTT disconnect message in an attempt " "to gracefully disconnect! EC=%d Desc:\"%s\"", ret, @@ -783,7 +699,7 @@ void mqtt_wss_disconnect(mqtt_wss_client client, int timeout_ms) if(ret) { // Some MQTT/WSS servers will close socket on receipt of MQTT disconnect and // do not wait for WebSocket to be closed properly - mws_warn(client->log, + nd_log(NDLS_DAEMON, NDLP_WARNING, "Error while trying to send WebSocket disconnect message in an attempt " "to gracefully disconnect! EC=%d Desc:\"%s\".", ret, @@ -798,22 +714,19 @@ void mqtt_wss_disconnect(mqtt_wss_client client, int timeout_ms) client->sockfd = -1; } -static inline void mqtt_wss_wakeup(mqtt_wss_client client) +static void mqtt_wss_wakeup(mqtt_wss_client client) { -#ifdef DEBUG_ULTRA_VERBOSE - mws_debug(client->log, "mqtt_wss_wakup - forcing wake up of main loop"); -#endif write(client->write_notif_pipe[PIPE_WRITE_END], " ", 1); } #define THROWAWAY_BUF_SIZE 32 char throwaway[THROWAWAY_BUF_SIZE]; -static inline void util_clear_pipe(int fd) +static void util_clear_pipe(int fd) { (void)read(fd, throwaway, THROWAWAY_BUF_SIZE); } -static inline void set_socket_pollfds(mqtt_wss_client client, int ssl_ret) { +static void set_socket_pollfds(mqtt_wss_client client, int ssl_ret) { if (ssl_ret == SSL_ERROR_WANT_WRITE) client->poll_fds[POLLFD_SOCKET].events |= POLLOUT; if (ssl_ret == SSL_ERROR_WANT_READ) @@ -824,7 +737,7 @@ static int handle_mqtt_internal(mqtt_wss_client client) { int rc = mqtt_ng_sync(client->mqtt); if (rc) { - mws_error(client->log, "mqtt_ng_sync returned %d != 0", rc); + nd_log(NDLS_DAEMON, NDLP_ERR, "mqtt_ng_sync returned %d != 0", rc); client->mqtt_connected = 0; return 1; } @@ -832,7 +745,7 @@ static int handle_mqtt_internal(mqtt_wss_client client) } #define SEC_TO_MSEC 1000 -static inline long long int t_till_next_keepalive_ms(mqtt_wss_client client) +static long long int t_till_next_keepalive_ms(mqtt_wss_client client) { time_t last_send = mqtt_ng_last_send_time(client->mqtt); long long int next_mqtt_keep_alive = (last_send * SEC_TO_MSEC) @@ -841,10 +754,10 @@ static inline long long int t_till_next_keepalive_ms(mqtt_wss_client client) } #ifdef MQTT_WSS_CPUSTATS -static inline uint64_t mqtt_wss_now_usec(mqtt_wss_client client) { +static uint64_t mqtt_wss_now_usec(void) { struct timespec ts; if(clock_gettime(CLOCK_MONOTONIC, &ts) == -1) { - mws_error(client->log, "clock_gettime(CLOCK_MONOTONIC, ×pec) failed."); + nd_log(NDLS_DAEMON, NDLP_ERR, "clock_gettime(CLOCK_MONOTONIC, ×pec) failed."); return 0; } return (uint64_t)ts.tv_sec * USEC_PER_SEC + (ts.tv_nsec % NSEC_PER_SEC) / NSEC_PER_USEC; @@ -859,61 +772,39 @@ int mqtt_wss_service(mqtt_wss_client client, int timeout_ms) int send_keepalive = 0; #ifdef MQTT_WSS_CPUSTATS - uint64_t t1,t2; - t1 = mqtt_wss_now_usec(client); -#endif - -#ifdef DEBUG_ULTRA_VERBOSE - mws_debug(client->log, ">>>>> mqtt_wss_service <<<<<"); - mws_debug(client->log, "Waiting for events: %s%s%s", - (client->poll_fds[POLLFD_SOCKET].events & POLLIN) ? "SOCKET_POLLIN " : "", - (client->poll_fds[POLLFD_SOCKET].events & POLLOUT) ? "SOCKET_POLLOUT " : "", - (client->poll_fds[POLLFD_PIPE].events & POLLIN) ? "PIPE_POLLIN" : "" ); + uint64_t t2; + uint64_t t1 = mqtt_wss_now_usec(); #endif // Check user requested TO doesn't interfere with MQTT keep alives long long int till_next_keep_alive = t_till_next_keepalive_ms(client); if (client->mqtt_connected && (timeout_ms < 0 || timeout_ms >= till_next_keep_alive)) { - #ifdef DEBUG_ULTRA_VERBOSE - mws_debug(client->log, "Shortening Timeout requested %d to %lld to ensure keep-alive can be sent", timeout_ms, till_next_keep_alive); - #endif timeout_ms = till_next_keep_alive; send_keepalive = 1; } #ifdef MQTT_WSS_CPUSTATS - t2 = mqtt_wss_now_usec(client); + t2 = mqtt_wss_now_usec(); client->stats.time_keepalive += t2 - t1; #endif if ((ret = poll(client->poll_fds, 2, timeout_ms >= 0 ? timeout_ms : -1)) < 0) { if (errno == EINTR) { - mws_warn(client->log, "poll interrupted by EINTR"); + nd_log(NDLS_DAEMON, NDLP_WARNING, "poll interrupted by EINTR"); return 0; } - mws_error(client->log, "poll error \"%s\"", strerror(errno)); + nd_log(NDLS_DAEMON, NDLP_ERR, "poll error \"%s\"", strerror(errno)); return -2; } -#ifdef DEBUG_ULTRA_VERBOSE - mws_debug(client->log, "Poll events happened: %s%s%s%s", - (client->poll_fds[POLLFD_SOCKET].revents & POLLIN) ? "SOCKET_POLLIN " : "", - (client->poll_fds[POLLFD_SOCKET].revents & POLLOUT) ? "SOCKET_POLLOUT " : "", - (client->poll_fds[POLLFD_PIPE].revents & POLLIN) ? "PIPE_POLLIN " : "", - (!ret) ? "POLL_TIMEOUT" : ""); -#endif - #ifdef MQTT_WSS_CPUSTATS - t1 = mqtt_wss_now_usec(client); + t1 = mqtt_wss_now_usec(); #endif if (ret == 0) { if (send_keepalive) { // otherwise we shortened the timeout ourselves to take care of // MQTT keep alives -#ifdef DEBUG_ULTRA_VERBOSE - mws_debug(client->log, "Forcing MQTT Ping/keep-alive"); -#endif mqtt_ng_ping(client->mqtt); } else { // if poll timed out and user requested timeout was being used @@ -923,7 +814,7 @@ int mqtt_wss_service(mqtt_wss_client client, int timeout_ms) } #ifdef MQTT_WSS_CPUSTATS - t2 = mqtt_wss_now_usec(client); + t2 = mqtt_wss_now_usec(); client->stats.time_keepalive += t2 - t1; #endif @@ -931,9 +822,6 @@ int mqtt_wss_service(mqtt_wss_client client, int timeout_ms) if ((ptr = rbuf_get_linear_insert_range(client->ws_client->buf_read, &size))) { if((ret = SSL_read(client->ssl, ptr, size)) > 0) { -#ifdef DEBUG_ULTRA_VERBOSE - mws_debug(client->log, "SSL_Read: Read %d.", ret); -#endif spinlock_lock(&client->stat_lock); client->stats.bytes_rx += ret; spinlock_unlock(&client->stat_lock); @@ -941,22 +829,19 @@ int mqtt_wss_service(mqtt_wss_client client, int timeout_ms) } else { int errnobkp = errno; ret = SSL_get_error(client->ssl, ret); -#ifdef DEBUG_ULTRA_VERBOSE - mws_debug(client->log, "Read Err: %s", util_openssl_ret_err(ret)); -#endif set_socket_pollfds(client, ret); if (ret != SSL_ERROR_WANT_READ && ret != SSL_ERROR_WANT_WRITE) { - mws_error(client->log, "SSL_read error: %d %s", ret, util_openssl_ret_err(ret)); + nd_log(NDLS_DAEMON, NDLP_ERR, "SSL_read error: %d %s", ret, util_openssl_ret_err(ret)); if (ret == SSL_ERROR_SYSCALL) - mws_error(client->log, "SSL_read SYSCALL errno: %d %s", errnobkp, strerror(errnobkp)); + nd_log(NDLS_DAEMON, NDLP_ERR, "SSL_read SYSCALL errno: %d %s", errnobkp, strerror(errnobkp)); return MQTT_WSS_ERR_CONN_DROP; } } } #ifdef MQTT_WSS_CPUSTATS - t1 = mqtt_wss_now_usec(client); + t1 = mqtt_wss_now_usec(); client->stats.time_read_socket += t1 - t2; #endif @@ -964,18 +849,20 @@ int mqtt_wss_service(mqtt_wss_client client, int timeout_ms) switch(ret) { case WS_CLIENT_PROTOCOL_ERROR: return MQTT_WSS_ERR_PROTO_WS; + case WS_CLIENT_NEED_MORE_BYTES: -#ifdef DEBUG_ULTRA_VERBOSE - mws_debug(client->log, "WSCLIENT WANT READ"); -#endif client->poll_fds[POLLFD_SOCKET].events |= POLLIN; break; + case WS_CLIENT_CONNECTION_CLOSED: return MQTT_WSS_ERR_CONN_DROP; + + default: + return MQTT_WSS_ERR_PROTO_WS; } #ifdef MQTT_WSS_CPUSTATS - t2 = mqtt_wss_now_usec(client); + t2 = mqtt_wss_now_usec(); client->stats.time_process_websocket += t2 - t1; #endif @@ -990,18 +877,12 @@ int mqtt_wss_service(mqtt_wss_client client, int timeout_ms) } #ifdef MQTT_WSS_CPUSTATS - t1 = mqtt_wss_now_usec(client); + t1 = mqtt_wss_now_usec(); client->stats.time_process_mqtt += t1 - t2; #endif if ((ptr = rbuf_get_linear_read_range(client->ws_client->buf_write, &size))) { -#ifdef DEBUG_ULTRA_VERBOSE - mws_debug(client->log, "Have data to write to SSL"); -#endif if ((ret = SSL_write(client->ssl, ptr, size)) > 0) { -#ifdef DEBUG_ULTRA_VERBOSE - mws_debug(client->log, "SSL_Write: Written %d of avail %d.", ret, size); -#endif spinlock_lock(&client->stat_lock); client->stats.bytes_tx += ret; spinlock_unlock(&client->stat_lock); @@ -1009,15 +890,12 @@ int mqtt_wss_service(mqtt_wss_client client, int timeout_ms) } else { int errnobkp = errno; ret = SSL_get_error(client->ssl, ret); -#ifdef DEBUG_ULTRA_VERBOSE - mws_debug(client->log, "Write Err: %s", util_openssl_ret_err(ret)); -#endif set_socket_pollfds(client, ret); if (ret != SSL_ERROR_WANT_READ && ret != SSL_ERROR_WANT_WRITE) { - mws_error(client->log, "SSL_write error: %d %s", ret, util_openssl_ret_err(ret)); + nd_log(NDLS_DAEMON, NDLP_ERR, "SSL_write error: %d %s", ret, util_openssl_ret_err(ret)); if (ret == SSL_ERROR_SYSCALL) - mws_error(client->log, "SSL_write SYSCALL errno: %d %s", errnobkp, strerror(errnobkp)); + nd_log(NDLS_DAEMON, NDLP_ERR, "SSL_write SYSCALL errno: %d %s", errnobkp, strerror(errnobkp)); return MQTT_WSS_ERR_CONN_DROP; } } @@ -1027,7 +905,7 @@ int mqtt_wss_service(mqtt_wss_client client, int timeout_ms) util_clear_pipe(client->write_notif_pipe[PIPE_READ_END]); #ifdef MQTT_WSS_CPUSTATS - t2 = mqtt_wss_now_usec(client); + t2 = mqtt_wss_now_usec(); client->stats.time_write_socket += t2 - t1; #endif @@ -1044,12 +922,12 @@ int mqtt_wss_publish5(mqtt_wss_client client, uint16_t *packet_id) { if (client->mqtt_disconnecting) { - mws_error(client->log, "mqtt_wss is disconnecting can't publish"); + nd_log(NDLS_DAEMON, NDLP_ERR, "mqtt_wss is disconnecting can't publish"); return 1; } if (!client->mqtt_connected) { - mws_error(client->log, "MQTT is offline. Can't send message."); + nd_log(NDLS_DAEMON, NDLP_ERR, "MQTT is offline. Can't send message."); return 1; } uint8_t mqtt_flags = 0; @@ -1060,7 +938,7 @@ int mqtt_wss_publish5(mqtt_wss_client client, int rc = mqtt_ng_publish(client->mqtt, topic, topic_free, msg, msg_free, msg_len, mqtt_flags, packet_id); if (rc == MQTT_NG_MSGGEN_MSG_TOO_BIG) - return MQTT_WSS_ERR_TOO_BIG_FOR_SERVER; + return MQTT_WSS_ERR_MSG_TOO_BIG; mqtt_wss_wakeup(client); @@ -1071,12 +949,12 @@ int mqtt_wss_subscribe(mqtt_wss_client client, char *topic, int max_qos_level) { (void)max_qos_level; //TODO now hardcoded if (!client->mqtt_connected) { - mws_error(client->log, "MQTT is offline. Can't subscribe."); + nd_log(NDLS_DAEMON, NDLP_ERR, "MQTT is offline. Can't subscribe."); return 1; } if (client->mqtt_disconnecting) { - mws_error(client->log, "mqtt_wss is disconnecting can't subscribe"); + nd_log(NDLS_DAEMON, NDLP_ERR, "mqtt_wss is disconnecting can't subscribe"); return 1; } diff --git a/src/aclk/mqtt_websockets/mqtt_wss_client.h b/src/aclk/mqtt_websockets/mqtt_wss_client.h index 2f1c15954d86cc..2fd94075d6bbe5 100644 --- a/src/aclk/mqtt_websockets/mqtt_wss_client.h +++ b/src/aclk/mqtt_websockets/mqtt_wss_client.h @@ -3,49 +3,34 @@ #ifndef MQTT_WSS_CLIENT_H #define MQTT_WSS_CLIENT_H -#include "mqtt_wss_log.h" #include "common_public.h" -// All OK call me at your earliest convinience -#define MQTT_WSS_OK 0 -/* All OK, poll timeout you requested when calling mqtt_wss_service expired - you might want to know if timeout - * happened or we got some data or handle same as MQTT_WSS_OK - */ -#define MQTT_WSS_OK_TO 1 -// Connection was closed by remote -#define MQTT_WSS_ERR_CONN_DROP -1 -// Error in MQTT protocol (e.g. malformed packet) -#define MQTT_WSS_ERR_PROTO_MQTT -2 -// Error in WebSocket protocol (e.g. malformed packet) -#define MQTT_WSS_ERR_PROTO_WS -3 - -#define MQTT_WSS_ERR_TX_BUF_TOO_SMALL -4 -#define MQTT_WSS_ERR_RX_BUF_TOO_SMALL -5 - -#define MQTT_WSS_ERR_TOO_BIG_FOR_SERVER -6 -// if client was initialized with MQTT 3 but MQTT 5 feature -// was requested by user of library -#define MQTT_WSS_ERR_CANT_DO -8 + +#define MQTT_WSS_OK 0 // All OK call me at your earliest convinience +#define MQTT_WSS_OK_TO 1 // All OK, poll timeout you requested when calling mqtt_wss_service expired + //you might want to know if timeout + //happened or we got some data or handle same as MQTT_WSS_OK +#define MQTT_WSS_ERR_CONN_DROP -1 // Connection was closed by remote +#define MQTT_WSS_ERR_PROTO_MQTT -2 // Error in MQTT protocol (e.g. malformed packet) +#define MQTT_WSS_ERR_PROTO_WS -3 // Error in WebSocket protocol (e.g. malformed packet) +#define MQTT_WSS_ERR_MSG_TOO_BIG -6 // Message size too big for server +#define MQTT_WSS_ERR_CANT_DO -8 // if client was initialized with MQTT 3 but MQTT 5 feature + // was requested by user of library typedef struct mqtt_wss_client_struct *mqtt_wss_client; typedef void (*msg_callback_fnc_t)(const char *topic, const void *msg, size_t msglen, int qos); + /* Creates new instance of MQTT over WSS. Doesn't start connection. - * @param log_prefix this is prefix to be used when logging to discern between multiple - * mqtt_wss instances. Can be NULL. - * @param log_callback is function pointer to fnc to be called when mqtt_wss wants - * to log. This allows plugging this library into your own logging system/solution. - * If NULL STDOUT/STDERR will be used. * @param msg_callback is function pointer to function which will be called * when application level message arrives from broker (for subscribed topics). * Can be NULL if you are not interested about incoming messages. * @param puback_callback is function pointer to function to be called when QOS1 Publish * is acknowledged by server */ -mqtt_wss_client mqtt_wss_new(const char *log_prefix, - mqtt_wss_log_callback_t log_callback, - msg_callback_fnc_t msg_callback, - void (*puback_callback)(uint16_t packet_id)); +mqtt_wss_client mqtt_wss_new( + msg_callback_fnc_t msg_callback, + void (*puback_callback)(uint16_t packet_id)); void mqtt_wss_set_max_buf_size(mqtt_wss_client client, size_t size); @@ -71,7 +56,7 @@ int mqtt_wss_connect( int port, struct mqtt_connect_params *mqtt_params, int ssl_flags, - struct mqtt_wss_proxy *proxy, + const struct mqtt_wss_proxy *proxy, bool *fallback_ipv4); int mqtt_wss_service(mqtt_wss_client client, int timeout_ms); void mqtt_wss_disconnect(mqtt_wss_client client, int timeout_ms); diff --git a/src/aclk/mqtt_websockets/mqtt_wss_log.c b/src/aclk/mqtt_websockets/mqtt_wss_log.c deleted file mode 100644 index def2236af63937..00000000000000 --- a/src/aclk/mqtt_websockets/mqtt_wss_log.c +++ /dev/null @@ -1,126 +0,0 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -#include "libnetdata/libnetdata.h" - -#include "mqtt_wss_log.h" - -struct mqtt_wss_log_ctx { - mqtt_wss_log_callback_t extern_log_fnc; - char *ctx_prefix; - char *buffer; - char *buffer_w_ptr; - size_t buffer_bytes_avail; -}; - -#define LOG_BUFFER_SIZE 1024 * 4 -#define LOG_CTX_PREFIX_SEV_STR " : " -#define LOG_CTX_PREFIX_LIMIT 15 -#define LOG_CTX_PREFIX_LIMIT_STR (LOG_CTX_PREFIX_LIMIT - (2 + strlen(LOG_CTX_PREFIX_SEV_STR))) // with [] characters and affixed ' ' it is total 15 chars -#if (LOG_CTX_PREFIX_LIMIT * 10) > LOG_BUFFER_SIZE -#error "LOG_BUFFER_SIZE too small" -#endif -mqtt_wss_log_ctx_t mqtt_wss_log_ctx_create(const char *ctx_prefix, mqtt_wss_log_callback_t log_callback) -{ - mqtt_wss_log_ctx_t ctx = callocz(1, sizeof(struct mqtt_wss_log_ctx)); - if(!ctx) - return NULL; - - if(log_callback) { - ctx->extern_log_fnc = log_callback; - ctx->buffer = callocz(1, LOG_BUFFER_SIZE); - if(!ctx->buffer) - goto cleanup; - - ctx->buffer_w_ptr = ctx->buffer; - if(ctx_prefix) { - *(ctx->buffer_w_ptr++) = '['; - strncpy(ctx->buffer_w_ptr, ctx_prefix, LOG_CTX_PREFIX_LIMIT_STR); - ctx->buffer_w_ptr += strnlen(ctx_prefix, LOG_CTX_PREFIX_LIMIT_STR); - *(ctx->buffer_w_ptr++) = ']'; - } - strcpy(ctx->buffer_w_ptr, LOG_CTX_PREFIX_SEV_STR); - ctx->buffer_w_ptr += strlen(LOG_CTX_PREFIX_SEV_STR); - // no term '\0' -> calloc is used - - ctx->buffer_bytes_avail = LOG_BUFFER_SIZE - strlen(ctx->buffer); - - return ctx; - } - - if(ctx_prefix) { - ctx->ctx_prefix = strndup(ctx_prefix, LOG_CTX_PREFIX_LIMIT_STR); - if(!ctx->ctx_prefix) - goto cleanup; - } - - return ctx; - -cleanup: - freez(ctx); - return NULL; -} - -void mqtt_wss_log_ctx_destroy(mqtt_wss_log_ctx_t ctx) -{ - freez(ctx->ctx_prefix); - freez(ctx->buffer); - freez(ctx); -} - -static inline char severity_to_c(int severity) -{ - switch (severity) { - case MQTT_WSS_LOG_FATAL: - return 'F'; - case MQTT_WSS_LOG_ERROR: - return 'E'; - case MQTT_WSS_LOG_WARN: - return 'W'; - case MQTT_WSS_LOG_INFO: - return 'I'; - case MQTT_WSS_LOG_DEBUG: - return 'D'; - default: - return '?'; - } -} - -void mws_log(int severity, mqtt_wss_log_ctx_t ctx, const char *fmt, va_list args) -{ - size_t size; - - if(ctx->extern_log_fnc) { - size = vsnprintf(ctx->buffer_w_ptr, ctx->buffer_bytes_avail, fmt, args); - *(ctx->buffer_w_ptr - 3) = severity_to_c(severity); - - ctx->extern_log_fnc(severity, ctx->buffer); - - if(size >= ctx->buffer_bytes_avail) - mws_error(ctx, "Last message of this type was truncated! Consider what you log or increase LOG_BUFFER_SIZE if really needed."); - - return; - } - - if(ctx->ctx_prefix) - printf("[%s] ", ctx->ctx_prefix); - - printf("%c: ", severity_to_c(severity)); - - vprintf(fmt, args); - putchar('\n'); -} - -#define DEFINE_MWS_SEV_FNC(severity_fncname, severity) \ -void mws_ ## severity_fncname(mqtt_wss_log_ctx_t ctx, const char *fmt, ...) \ -{ \ - va_list args; \ - va_start(args, fmt); \ - mws_log(severity, ctx, fmt, args); \ - va_end(args); \ -} - -DEFINE_MWS_SEV_FNC(fatal, MQTT_WSS_LOG_FATAL) -DEFINE_MWS_SEV_FNC(error, MQTT_WSS_LOG_ERROR) -DEFINE_MWS_SEV_FNC(warn, MQTT_WSS_LOG_WARN ) -DEFINE_MWS_SEV_FNC(info, MQTT_WSS_LOG_INFO ) -DEFINE_MWS_SEV_FNC(debug, MQTT_WSS_LOG_DEBUG) diff --git a/src/aclk/mqtt_websockets/mqtt_wss_log.h b/src/aclk/mqtt_websockets/mqtt_wss_log.h deleted file mode 100644 index 6ae60d870b4b83..00000000000000 --- a/src/aclk/mqtt_websockets/mqtt_wss_log.h +++ /dev/null @@ -1,39 +0,0 @@ -// Copyright: SPDX-License-Identifier: GPL-3.0-only - -#ifndef MQTT_WSS_LOG_H -#define MQTT_WSS_LOG_H - -typedef enum mqtt_wss_log_type { - MQTT_WSS_LOG_DEBUG = 0x01, - MQTT_WSS_LOG_INFO = 0x02, - MQTT_WSS_LOG_WARN = 0x03, - MQTT_WSS_LOG_ERROR = 0x81, - MQTT_WSS_LOG_FATAL = 0x88 -} mqtt_wss_log_type_t; - -typedef void (*mqtt_wss_log_callback_t)(mqtt_wss_log_type_t, const char*); - -typedef struct mqtt_wss_log_ctx *mqtt_wss_log_ctx_t; - -/** Creates logging context with optional prefix and optional callback - * @param ctx_prefix String to be prefixed to every log message. - * This is useful if multiple clients are instantiated to be able to - * know which one this message belongs to. Can be `NULL` for no prefix. - * @param log_callback Callback to be called instead of logging to - * `STDOUT` or `STDERR` (if debug enabled otherwise silent). Callback has to be - * pointer to function of `void function(mqtt_wss_log_type_t, const char*)` type. - * If `NULL` default will be used (silent or STDERR/STDOUT). - * @return mqtt_wss_log_ctx_t or `NULL` on error */ -mqtt_wss_log_ctx_t mqtt_wss_log_ctx_create(const char *ctx_prefix, mqtt_wss_log_callback_t log_callback); - -/** Destroys logging context and cleans up the memory - * @param ctx Context to destroy */ -void mqtt_wss_log_ctx_destroy(mqtt_wss_log_ctx_t ctx); - -void mws_fatal(mqtt_wss_log_ctx_t ctx, const char *fmt, ...); -void mws_error(mqtt_wss_log_ctx_t ctx, const char *fmt, ...); -void mws_warn (mqtt_wss_log_ctx_t ctx, const char *fmt, ...); -void mws_info (mqtt_wss_log_ctx_t ctx, const char *fmt, ...); -void mws_debug(mqtt_wss_log_ctx_t ctx, const char *fmt, ...); - -#endif /* MQTT_WSS_LOG_H */ diff --git a/src/aclk/mqtt_websockets/ws_client.c b/src/aclk/mqtt_websockets/ws_client.c index ed39967ce88a61..14c3f19b2fe05d 100644 --- a/src/aclk/mqtt_websockets/ws_client.c +++ b/src/aclk/mqtt_websockets/ws_client.c @@ -5,78 +5,54 @@ #include "ws_client.h" #include "common_internal.h" -#define UNIT_LOG_PREFIX "ws_client: " -#define FATAL(fmt, ...) mws_fatal(client->log, UNIT_LOG_PREFIX fmt, ##__VA_ARGS__) -#define ERROR(fmt, ...) mws_error(client->log, UNIT_LOG_PREFIX fmt, ##__VA_ARGS__) -#define WARN(fmt, ...) mws_warn (client->log, UNIT_LOG_PREFIX fmt, ##__VA_ARGS__) -#define INFO(fmt, ...) mws_info (client->log, UNIT_LOG_PREFIX fmt, ##__VA_ARGS__) -#define DEBUG(fmt, ...) mws_debug(client->log, UNIT_LOG_PREFIX fmt, ##__VA_ARGS__) +#ifdef OS_WINDOWS +#include +#include // For BCryptGenRandom +#endif + +static uint32_t generate_random_32bit(void) { + uint32_t random_number = 0; + + if (RAND_bytes((unsigned char *)&random_number, sizeof(random_number)) != 1) { + nd_log(NDLS_DAEMON, NDLP_ERR, "Failed to generate a random uint32 mask"); + } + + return random_number; +} const char *websocket_upgrage_hdr = "GET /mqtt HTTP/1.1\x0D\x0A" "Host: %s\x0D\x0A" "Upgrade: websocket\x0D\x0A" "Connection: Upgrade\x0D\x0A" "Sec-WebSocket-Key: %s\x0D\x0A" - "Origin: http://example.com\x0D\x0A" + "Origin: \x0D\x0A" "Sec-WebSocket-Protocol: mqtt\x0D\x0A" "Sec-WebSocket-Version: 13\x0D\x0A\x0D\x0A"; const char *mqtt_protoid = "258EAFA5-E914-47DA-95CA-C5AB0DC85B11"; #define DEFAULT_RINGBUFFER_SIZE (1024*128) -#define ENTROPY_SOURCE "/dev/urandom" -ws_client *ws_client_new(size_t buf_size, char **host, mqtt_wss_log_ctx_t log) -{ - ws_client *client; +ws_client *ws_client_new(size_t buf_size, char **host) +{ if(!host) return NULL; - client = callocz(1, sizeof(ws_client)); - if (!client) - return NULL; - + ws_client *client = callocz(1, sizeof(ws_client)); client->host = host; - client->log = log; - client->buf_read = rbuf_create(buf_size ? buf_size : DEFAULT_RINGBUFFER_SIZE); - if (!client->buf_read) - goto cleanup; - client->buf_write = rbuf_create(buf_size ? buf_size : DEFAULT_RINGBUFFER_SIZE); - if (!client->buf_write) - goto cleanup_1; - client->buf_to_mqtt = rbuf_create(buf_size ? buf_size : DEFAULT_RINGBUFFER_SIZE); - if (!client->buf_to_mqtt) - goto cleanup_2; - - client->entropy_fd = open(ENTROPY_SOURCE, O_RDONLY | O_CLOEXEC); - if (client->entropy_fd < 1) { - ERROR("Error opening entropy source \"" ENTROPY_SOURCE "\". Reason: \"%s\"", strerror(errno)); - goto cleanup_3; - } return client; - -cleanup_3: - rbuf_free(client->buf_to_mqtt); -cleanup_2: - rbuf_free(client->buf_write); -cleanup_1: - rbuf_free(client->buf_read); -cleanup: - freez(client); - return NULL; } void ws_client_free_headers(ws_client *client) { struct http_header *ptr = client->hs.headers; - struct http_header *tmp; while (ptr) { - tmp = ptr; + struct http_header *tmp = ptr; ptr = ptr->next; freez(tmp); } @@ -91,7 +67,6 @@ void ws_client_destroy(ws_client *client) ws_client_free_headers(client); freez(client->hs.nonce_reply); freez(client->hs.http_reply_msg); - close(client->entropy_fd); rbuf_free(client->buf_read); rbuf_free(client->buf_write); rbuf_free(client->buf_to_mqtt); @@ -120,7 +95,7 @@ void ws_client_reset(ws_client *client) int ws_client_add_http_header(ws_client *client, struct http_header *hdr) { if (client->hs.hdr_count > MAX_HTTP_HDR_COUNT) { - ERROR("Too many HTTP response header fields"); + nd_log(NDLS_DAEMON, NDLP_ERR, "Too many HTTP response header fields"); return -1; } @@ -135,7 +110,7 @@ int ws_client_add_http_header(ws_client *client, struct http_header *hdr) return 0; } -int ws_client_want_write(ws_client *client) +int ws_client_want_write(const ws_client *client) { return rbuf_bytes_available(client->buf_write); } @@ -144,78 +119,92 @@ int ws_client_want_write(ws_client *client) #define TEMP_BUF_SIZE 4096 int ws_client_start_handshake(ws_client *client) { - nd_uuid_t nonce; + unsigned char nonce[WEBSOCKET_NONCE_SIZE]; char nonce_b64[256]; char second[TEMP_BUF_SIZE]; unsigned int md_len; - unsigned char *digest; + unsigned char digest[EVP_MAX_MD_SIZE]; // EVP_MAX_MD_SIZE ensures enough space EVP_MD_CTX *md_ctx; const EVP_MD *md; + int rc = 1; if(!client->host || !*client->host) { - ERROR("Hostname has not been set. We should not be able to come here!"); + nd_log(NDLS_DAEMON, NDLP_ERR, "Hostname has not been set. We should not be able to come here!"); return 1; } - uuid_generate_random(nonce); - EVP_EncodeBlock((unsigned char *)nonce_b64, (const unsigned char *)nonce, WEBSOCKET_NONCE_SIZE); - snprintf(second, TEMP_BUF_SIZE, websocket_upgrage_hdr, *client->host, nonce_b64); - - if(rbuf_bytes_free(client->buf_write) < strlen(second)) { - ERROR("Write buffer capacity too low."); + // Generate a random 16-byte nonce + if (!RAND_bytes(nonce, WEBSOCKET_NONCE_SIZE)) { + nd_log(NDLS_DAEMON, NDLP_ERR, "Failed to generate nonce"); return 1; } - rbuf_push(client->buf_write, second, strlen(second)); - client->state = WS_HANDSHAKE; - - //Calculating expected Sec-WebSocket-Accept reply - snprintf(second, TEMP_BUF_SIZE, "%s%s", nonce_b64, mqtt_protoid); - + // Initialize the digest context #if (OPENSSL_VERSION_NUMBER < OPENSSL_VERSION_110) md_ctx = EVP_MD_CTX_create(); #else md_ctx = EVP_MD_CTX_new(); #endif if (md_ctx == NULL) { - ERROR("Cant create EVP_MD Context"); + nd_log(NDLS_DAEMON, NDLP_ERR, "Can't create EVP_MD context"); return 1; } - md = EVP_get_digestbyname("sha1"); + md = EVP_sha1(); // Use SHA-1 for WebSocket handshake if (!md) { - ERROR("Unknown message digest"); - return 1; + nd_log(NDLS_DAEMON, NDLP_ERR, "Unknown message digest SHA-1"); + goto exit_with_error; } - if ((digest = (unsigned char *)OPENSSL_malloc(EVP_MD_size(EVP_sha256()))) == NULL) { - ERROR("Cant alloc digest"); - return 1; + (void) netdata_base64_encode((unsigned char *) nonce_b64, nonce, WEBSOCKET_NONCE_SIZE); + + // Format and push the upgrade header to the write buffer + size_t bytes = snprintf(second, TEMP_BUF_SIZE, websocket_upgrage_hdr, *client->host, nonce_b64); + if(rbuf_bytes_free(client->buf_write) < bytes) { + nd_log(NDLS_DAEMON, NDLP_ERR, "Write buffer capacity too low."); + goto exit_with_error; + } + rbuf_push(client->buf_write, second, bytes); + + client->state = WS_HANDSHAKE; + + // Create the expected Sec-WebSocket-Accept value + bytes = snprintf(second, TEMP_BUF_SIZE, "%s%s", nonce_b64, mqtt_protoid); + + if (!EVP_DigestInit_ex(md_ctx, md, NULL)) { + nd_log(NDLS_DAEMON, NDLP_ERR, "Failed to initialize digest context"); + goto exit_with_error; + } + + if (!EVP_DigestUpdate(md_ctx, second, bytes)) { + nd_log(NDLS_DAEMON, NDLP_ERR, "Failed to update digest"); + goto exit_with_error; } - EVP_DigestInit_ex(md_ctx, md, NULL); - EVP_DigestUpdate(md_ctx, second, strlen(second)); - EVP_DigestFinal_ex(md_ctx, digest, &md_len); + if (!EVP_DigestFinal_ex(md_ctx, digest, &md_len)) { + nd_log(NDLS_DAEMON, NDLP_ERR, "Failed to finalize digest"); + goto exit_with_error; + } - EVP_EncodeBlock((unsigned char *)nonce_b64, digest, (int) md_len); + (void) netdata_base64_encode((unsigned char *) nonce_b64, digest, md_len); freez(client->hs.nonce_reply); client->hs.nonce_reply = strdupz(nonce_b64); + rc = 0; - OPENSSL_free(digest); - +exit_with_error: #if (OPENSSL_VERSION_NUMBER < OPENSSL_VERSION_110) EVP_MD_CTX_destroy(md_ctx); #else EVP_MD_CTX_free(md_ctx); #endif - return 0; + return rc; } #define BUF_READ_MEMCMP_CONST(const, err) \ if (rbuf_memcmp_n(client->buf_read, const, strlen(const))) { \ - ERROR(err); \ + nd_log(NDLS_DAEMON, NDLP_ERR, err); \ rbuf_flush(client->buf_read); \ return WS_CLIENT_PROTOCOL_ERROR; \ } @@ -241,7 +230,7 @@ int ws_client_start_handshake(ws_client *client) #define HTTP_HDR_LINE_CHECK_LIMIT(x) \ if ((x) >= MAX_HTTP_LINE_LENGTH) { \ - ERROR("HTTP line received is too long. Maximum is %d", MAX_HTTP_LINE_LENGTH); \ + nd_log(NDLS_DAEMON, NDLP_ERR, "HTTP line received is too long. Maximum is %d", MAX_HTTP_LINE_LENGTH); \ return WS_CLIENT_PROTOCOL_ERROR; \ } @@ -264,13 +253,13 @@ int ws_client_parse_handshake_resp(ws_client *client) BUF_READ_CHECK_AT_LEAST(HTTP_SC_LENGTH); // "XXX " http return code rbuf_pop(client->buf_read, buf, HTTP_SC_LENGTH); if (buf[HTTP_SC_LENGTH - 1] != 0x20) { - ERROR("HTTP status code received is not terminated by space (0x20)"); + nd_log(NDLS_DAEMON, NDLP_ERR, "HTTP status code received is not terminated by space (0x20)"); return WS_CLIENT_PROTOCOL_ERROR; } buf[HTTP_SC_LENGTH - 1] = 0; client->hs.http_code = atoi(buf); if (client->hs.http_code < 100 || client->hs.http_code >= 600) { - ERROR("HTTP status code received not in valid range 100-600"); + nd_log(NDLS_DAEMON, NDLP_ERR, "HTTP status code received not in valid range 100-600"); return WS_CLIENT_PROTOCOL_ERROR; } client->hs.hdr_state = WS_HDR_ENDLINE; @@ -309,16 +298,16 @@ int ws_client_parse_handshake_resp(ws_client *client) ptr = rbuf_find_bytes(client->buf_read, HTTP_HDR_SEPARATOR, strlen(HTTP_HDR_SEPARATOR), &idx_sep); if (!ptr || idx_sep > idx_crlf) { - ERROR("Expected HTTP hdr field key/value separator \": \" before endline in non empty HTTP header line"); + nd_log(NDLS_DAEMON, NDLP_ERR, "Expected HTTP hdr field key/value separator \": \" before endline in non empty HTTP header line"); return WS_CLIENT_PROTOCOL_ERROR; } if (idx_crlf == idx_sep + (int)strlen(HTTP_HDR_SEPARATOR)) { - ERROR("HTTP Header value cannot be empty"); + nd_log(NDLS_DAEMON, NDLP_ERR, "HTTP Header value cannot be empty"); return WS_CLIENT_PROTOCOL_ERROR; } if (idx_sep > HTTP_HEADER_NAME_MAX_LEN) { - ERROR("HTTP header too long (%d)", idx_sep); + nd_log(NDLS_DAEMON, NDLP_ERR, "HTTP header too long (%d)", idx_sep); return WS_CLIENT_PROTOCOL_ERROR; } @@ -326,23 +315,21 @@ int ws_client_parse_handshake_resp(ws_client *client) hdr->key = ((char*)hdr) + sizeof(struct http_header); hdr->value = hdr->key + idx_sep + 1; - bytes = rbuf_pop(client->buf_read, hdr->key, idx_sep); + rbuf_pop(client->buf_read, hdr->key, idx_sep); rbuf_bump_tail(client->buf_read, strlen(HTTP_HDR_SEPARATOR)); - bytes = rbuf_pop(client->buf_read, hdr->value, idx_crlf - idx_sep - strlen(HTTP_HDR_SEPARATOR)); + rbuf_pop(client->buf_read, hdr->value, idx_crlf - idx_sep - strlen(HTTP_HDR_SEPARATOR)); rbuf_bump_tail(client->buf_read, strlen(WS_HTTP_NEWLINE)); for (int i = 0; hdr->key[i]; i++) hdr->key[i] = tolower(hdr->key[i]); -// DEBUG("HTTP header \"%s\" received. Value \"%s\"", hdr->key, hdr->value); - if (ws_client_add_http_header(client, hdr)) return WS_CLIENT_PROTOCOL_ERROR; if (!strcmp(hdr->key, WS_CONN_ACCEPT)) { if (strcmp(client->hs.nonce_reply, hdr->value)) { - ERROR("Received NONCE \"%s\" does not match expected nonce of \"%s\"", hdr->value, client->hs.nonce_reply); + nd_log(NDLS_DAEMON, NDLP_ERR, "Received NONCE \"%s\" does not match expected nonce of \"%s\"", hdr->value, client->hs.nonce_reply); return WS_CLIENT_PROTOCOL_ERROR; } client->hs.nonce_matched = 1; @@ -352,21 +339,21 @@ int ws_client_parse_handshake_resp(ws_client *client) case WS_HDR_PARSE_DONE: if (!client->hs.nonce_matched) { - ERROR("Missing " WS_CONN_ACCEPT " header"); + nd_log(NDLS_DAEMON, NDLP_ERR, "Missing " WS_CONN_ACCEPT " header"); return WS_CLIENT_PROTOCOL_ERROR; } if (client->hs.http_code != 101) { - ERROR("HTTP return code not 101. Received %d with msg \"%s\".", client->hs.http_code, client->hs.http_reply_msg); + nd_log(NDLS_DAEMON, NDLP_ERR, "HTTP return code not 101. Received %d with msg \"%s\".", client->hs.http_code, client->hs.http_reply_msg); return WS_CLIENT_PROTOCOL_ERROR; } client->state = WS_ESTABLISHED; client->hs.hdr_state = WS_HDR_ALL_DONE; - INFO("Websocket Connection Accepted By Server"); + nd_log(NDLS_DAEMON, NDLP_INFO, "Websocket Connection Accepted By Server"); return WS_CLIENT_PARSING_DONE; case WS_HDR_ALL_DONE: - FATAL("This is error we should never come here!"); + nd_log(NDLS_DAEMON, NDLP_CRIT, "This is error we should never come here!"); return WS_CLIENT_PROTOCOL_ERROR; } return 0; @@ -376,7 +363,7 @@ int ws_client_parse_handshake_resp(ws_client *client) #define WS_FINAL_FRAG BYTE_MSB #define WS_PAYLOAD_MASKED BYTE_MSB -static inline size_t get_ws_hdr_size(size_t payload_size) +static size_t get_ws_hdr_size(size_t payload_size) { size_t hdr_len = 2 + 4 /*mask*/; if(payload_size > 125) @@ -387,7 +374,7 @@ static inline size_t get_ws_hdr_size(size_t payload_size) } #define MAX_POSSIBLE_HDR_LEN 14 -int ws_client_send(ws_client *client, enum websocket_opcode frame_type, const char *data, size_t size) +int ws_client_send(const ws_client *client, enum websocket_opcode frame_type, const char *data, size_t size) { // TODO maybe? implement fragmenting, it is not necessary though // as both tested MQTT brokers have no reuirement of one MQTT envelope @@ -395,24 +382,16 @@ int ws_client_send(ws_client *client, enum websocket_opcode frame_type, const ch // one big MQTT message as single fragmented WebSocket envelope char hdr[MAX_POSSIBLE_HDR_LEN]; char *ptr = hdr; - char *mask; int size_written = 0; size_t j = 0; size_t w_buff_free = rbuf_bytes_free(client->buf_write); size_t hdr_len = get_ws_hdr_size(size); - if (w_buff_free < hdr_len * 2) { -#ifdef DEBUG_ULTRA_VERBOSE - DEBUG("Write buffer full. Can't write requested %d size.", size); -#endif + if (w_buff_free < hdr_len * 2) return 0; - } if (w_buff_free < (hdr_len + size)) { -#ifdef DEBUG_ULTRA_VERBOSE - DEBUG("Can't write whole MQTT packet of %d bytes into the buffer. Will do partial send of %d.", size, w_buff_free - hdr_len); -#endif size = w_buff_free - hdr_len; hdr_len = get_ws_hdr_size(size); // the actual needed header size might decrease if we cut number of bytes @@ -438,12 +417,14 @@ int ws_client_send(ws_client *client, enum websocket_opcode frame_type, const ch ptr += sizeof(be); } else *ptr++ |= size; - - mask = ptr; - if (read(client->entropy_fd, mask, sizeof(uint32_t)) < (ssize_t)sizeof(uint32_t)) { - ERROR("Unable to get mask from \"" ENTROPY_SOURCE "\""); + + char *mask = ptr; + uint32_t mask32 = generate_random_32bit(); + if (!mask32) { + nd_log(NDLS_DAEMON, NDLP_ERR, "Unable to get mask to XOR websocket payload"); return -2; } + memcpy(mask, &mask32, sizeof(mask32)); rbuf_push(client->buf_write, hdr, hdr_len); @@ -469,7 +450,7 @@ int ws_client_send(ws_client *client, enum websocket_opcode frame_type, const ch return size_written; } -static int check_opcode(ws_client *client,enum websocket_opcode oc) +static int check_opcode(enum websocket_opcode oc) { switch(oc) { case WS_OP_BINARY_FRAME: @@ -477,34 +458,34 @@ static int check_opcode(ws_client *client,enum websocket_opcode oc) case WS_OP_PING: return 0; case WS_OP_CONTINUATION_FRAME: - FATAL("WS_OP_CONTINUATION_FRAME NOT IMPLEMENTED YET!!!!"); + nd_log(NDLS_DAEMON, NDLP_ERR, "WS_OP_CONTINUATION_FRAME NOT IMPLEMENTED YET!!!!"); return 0; case WS_OP_TEXT_FRAME: - FATAL("WS_OP_TEXT_FRAME NOT IMPLEMENTED YET!!!!"); + nd_log(NDLS_DAEMON, NDLP_ERR, "WS_OP_TEXT_FRAME NOT IMPLEMENTED YET!!!!"); return 0; case WS_OP_PONG: - FATAL("WS_OP_PONG NOT IMPLEMENTED YET!!!!"); + nd_log(NDLS_DAEMON, NDLP_ERR, "WS_OP_PONG NOT IMPLEMENTED YET!!!!"); return 0; default: return WS_CLIENT_PROTOCOL_ERROR; } } -static inline void ws_client_rx_post_hdr_state(ws_client *client) +static void ws_client_rx_post_hdr_state(ws_client *client) { switch(client->rx.opcode) { case WS_OP_BINARY_FRAME: client->rx.parse_state = WS_PAYLOAD_DATA; - return; + break; case WS_OP_CONNECTION_CLOSE: client->rx.parse_state = WS_PAYLOAD_CONNECTION_CLOSE; - return; + break; case WS_OP_PING: client->rx.parse_state = WS_PAYLOAD_PING_REQ_PAYLOAD; - return; + break; default: client->rx.parse_state = WS_PAYLOAD_SKIP_UNKNOWN_PAYLOAD; - return; + break; } } @@ -520,15 +501,15 @@ int ws_client_process_rx_ws(ws_client *client) client->rx.opcode = buf[0] & (char)~BYTE_MSB; if (!(buf[0] & (char)~WS_FINAL_FRAG)) { - ERROR("Not supporting fragmented messages yet!"); + nd_log(NDLS_DAEMON, NDLP_ERR, "Not supporting fragmented messages yet!"); return WS_CLIENT_PROTOCOL_ERROR; } - if (check_opcode(client, client->rx.opcode) == WS_CLIENT_PROTOCOL_ERROR) + if (check_opcode(client->rx.opcode) == WS_CLIENT_PROTOCOL_ERROR) return WS_CLIENT_PROTOCOL_ERROR; if (buf[1] & (char)WS_PAYLOAD_MASKED) { - ERROR("Mask is not allowed in Server->Client Websocket direction."); + nd_log(NDLS_DAEMON, NDLP_ERR, "Mask is not allowed in Server->Client Websocket direction."); return WS_CLIENT_PROTOCOL_ERROR; } @@ -563,12 +544,8 @@ int ws_client_process_rx_ws(ws_client *client) if (!rbuf_bytes_available(client->buf_read)) return WS_CLIENT_NEED_MORE_BYTES; char *insert = rbuf_get_linear_insert_range(client->buf_to_mqtt, &size); - if (!insert) { -#ifdef DEBUG_ULTRA_VERBOSE - DEBUG("BUFFER TOO FULL. Avail %d req %d", (int)size, (int)remaining); -#endif + if (!insert) return WS_CLIENT_BUFFER_FULL; - } size = (size > remaining) ? remaining : size; size = rbuf_pop(client->buf_read, insert, size); rbuf_bump_head(client->buf_to_mqtt, size); @@ -582,11 +559,11 @@ int ws_client_process_rx_ws(ws_client *client) // b) 2byte reason code // c) 2byte reason code followed by message if (client->rx.payload_length == 1) { - ERROR("WebScoket CONNECTION_CLOSE can't have payload of size 1"); + nd_log(NDLS_DAEMON, NDLP_ERR, "WebScoket CONNECTION_CLOSE can't have payload of size 1"); return WS_CLIENT_PROTOCOL_ERROR; } if (!client->rx.payload_length) { - INFO("WebSocket server closed the connection without giving reason."); + nd_log(NDLS_DAEMON, NDLP_INFO, "WebSocket server closed the connection without giving reason."); client->rx.parse_state = WS_PACKET_DONE; break; } @@ -600,7 +577,7 @@ int ws_client_process_rx_ws(ws_client *client) client->rx.payload_processed += sizeof(uint16_t); if(client->rx.payload_processed == client->rx.payload_length) { - INFO("WebSocket server closed the connection with EC=%d. Without message.", + nd_log(NDLS_DAEMON, NDLP_INFO, "WebSocket server closed the connection with EC=%d. Without message.", client->rx.specific_data.op_close.ec); client->rx.parse_state = WS_PACKET_DONE; break; @@ -619,7 +596,7 @@ int ws_client_process_rx_ws(ws_client *client) client->rx.payload_length - client->rx.payload_processed); } client->rx.specific_data.op_close.reason[client->rx.payload_length] = 0; - INFO("WebSocket server closed the connection with EC=%d and reason \"%s\"", + nd_log(NDLS_DAEMON, NDLP_INFO, "WebSocket server closed the connection with EC=%d and reason \"%s\"", client->rx.specific_data.op_close.ec, client->rx.specific_data.op_close.reason); freez(client->rx.specific_data.op_close.reason); @@ -628,14 +605,14 @@ int ws_client_process_rx_ws(ws_client *client) break; case WS_PAYLOAD_SKIP_UNKNOWN_PAYLOAD: BUF_READ_CHECK_AT_LEAST(client->rx.payload_length); - WARN("Skipping Websocket Packet of unsupported/unknown type"); + nd_log(NDLS_DAEMON, NDLP_WARNING, "Skipping Websocket Packet of unsupported/unknown type"); if (client->rx.payload_length) rbuf_bump_tail(client->buf_read, client->rx.payload_length); client->rx.parse_state = WS_PACKET_DONE; return WS_CLIENT_PARSING_DONE; case WS_PAYLOAD_PING_REQ_PAYLOAD: if (client->rx.payload_length > rbuf_get_capacity(client->buf_read) / 2) { - ERROR("Ping arrived with payload which is too big!"); + nd_log(NDLS_DAEMON, NDLP_ERR, "Ping arrived with payload which is too big!"); return WS_CLIENT_INTERNAL_ERROR; } BUF_READ_CHECK_AT_LEAST(client->rx.payload_length); @@ -645,7 +622,7 @@ int ws_client_process_rx_ws(ws_client *client) // then attempt to send as soon as buffer space clears up size = ws_client_send(client, WS_OP_PONG, client->rx.specific_data.ping_msg, client->rx.payload_length); if (size != client->rx.payload_length) { - ERROR("Unable to send the PONG as one packet back. Closing connection."); + nd_log(NDLS_DAEMON, NDLP_ERR, "Unable to send the PONG as one packet back. Closing connection."); return WS_CLIENT_PROTOCOL_ERROR; } client->rx.parse_state = WS_PACKET_DONE; @@ -657,7 +634,7 @@ int ws_client_process_rx_ws(ws_client *client) return WS_CLIENT_CONNECTION_CLOSED; return WS_CLIENT_PARSING_DONE; default: - FATAL("Unknown parse state"); + nd_log(NDLS_DAEMON, NDLP_ERR, "Unknown parse state"); return WS_CLIENT_INTERNAL_ERROR; } return 0; @@ -690,6 +667,8 @@ int ws_client_process(ws_client *client) case WS_CLIENT_CONNECTION_CLOSED: client->state = WS_CONN_CLOSED_GRACEFUL; break; + default: + break; } // if ret == 0 we can continue parsing // if ret == WS_CLIENT_PARSING_DONE we processed @@ -698,13 +677,13 @@ int ws_client_process(ws_client *client) } while (!ret || ret == WS_CLIENT_PARSING_DONE); break; case WS_ERROR: - ERROR("ws_client is in error state. Restart the connection!"); + nd_log(NDLS_DAEMON, NDLP_ERR, "ws_client is in error state. Restart the connection!"); return WS_CLIENT_PROTOCOL_ERROR; case WS_CONN_CLOSED_GRACEFUL: - ERROR("Connection has been gracefully closed. Calling this is useless (and probably bug) until you reconnect again."); + nd_log(NDLS_DAEMON, NDLP_ERR, "Connection has been gracefully closed. Calling this is useless (and probably bug) until you reconnect again."); return WS_CLIENT_CONNECTION_CLOSED; default: - FATAL("Unknown connection state! Probably memory corruption."); + nd_log(NDLS_DAEMON, NDLP_CRIT, "Unknown connection state! Probably memory corruption."); return WS_CLIENT_INTERNAL_ERROR; } return ret; diff --git a/src/aclk/mqtt_websockets/ws_client.h b/src/aclk/mqtt_websockets/ws_client.h index 75e780697b802e..67e5835a257025 100644 --- a/src/aclk/mqtt_websockets/ws_client.h +++ b/src/aclk/mqtt_websockets/ws_client.h @@ -3,8 +3,6 @@ #ifndef WS_CLIENT_H #define WS_CLIENT_H -#include "mqtt_wss_log.h" - #define WS_CLIENT_NEED_MORE_BYTES 0x10 #define WS_CLIENT_PARSING_DONE 0x11 #define WS_CLIENT_CONNECTION_CLOSED 0x12 @@ -94,23 +92,20 @@ typedef struct websocket_client { // memory usage and remove one more memcpy buf_read->buf_to_mqtt rbuf_t buf_to_mqtt; // RAW data for MQTT lib - int entropy_fd; - // careful host is borrowed, don't free char **host; - mqtt_wss_log_ctx_t log; } ws_client; -ws_client *ws_client_new(size_t buf_size, char **host, mqtt_wss_log_ctx_t log); +ws_client *ws_client_new(size_t buf_size, char **host); void ws_client_destroy(ws_client *client); void ws_client_reset(ws_client *client); int ws_client_start_handshake(ws_client *client); -int ws_client_want_write(ws_client *client); +int ws_client_want_write(const ws_client *client); int ws_client_process(ws_client *client); -int ws_client_send(ws_client *client, enum websocket_opcode frame_type, const char *data, size_t size); +int ws_client_send(const ws_client *client, enum websocket_opcode frame_type, const char *data, size_t size); #endif /* WS_CLIENT_H */ diff --git a/src/libnetdata/c_rhash/c_rhash.c b/src/libnetdata/c_rhash/c_rhash.c index 0ab25d5d427321..ec2c061a28240e 100644 --- a/src/libnetdata/c_rhash/c_rhash.c +++ b/src/libnetdata/c_rhash/c_rhash.c @@ -8,9 +8,6 @@ c_rhash c_rhash_new(size_t bin_count) { bin_count = 1000; c_rhash hash = callocz(1, sizeof(struct c_rhash_s) + (bin_count * sizeof(struct bin_ll*)) ); - if (hash == NULL) - return NULL; - hash->bin_count = bin_count; hash->bins = (c_rhash_bin *)((char*)hash + sizeof(struct c_rhash_s)); diff --git a/src/libnetdata/libnetdata.c b/src/libnetdata/libnetdata.c index 17dbd85428e4f5..84aa6e339f4f67 100644 --- a/src/libnetdata/libnetdata.c +++ b/src/libnetdata/libnetdata.c @@ -1570,52 +1570,93 @@ bool rrdr_relative_window_to_absolute_query(time_t *after, time_t *before, time_ return (absolute_period_requested != 1); } -int netdata_base64_decode(const char *encoded, char *decoded, size_t decoded_size) { - static const unsigned char base64_table[256] = { - ['A'] = 0, ['B'] = 1, ['C'] = 2, ['D'] = 3, ['E'] = 4, ['F'] = 5, ['G'] = 6, ['H'] = 7, - ['I'] = 8, ['J'] = 9, ['K'] = 10, ['L'] = 11, ['M'] = 12, ['N'] = 13, ['O'] = 14, ['P'] = 15, - ['Q'] = 16, ['R'] = 17, ['S'] = 18, ['T'] = 19, ['U'] = 20, ['V'] = 21, ['W'] = 22, ['X'] = 23, - ['Y'] = 24, ['Z'] = 25, ['a'] = 26, ['b'] = 27, ['c'] = 28, ['d'] = 29, ['e'] = 30, ['f'] = 31, - ['g'] = 32, ['h'] = 33, ['i'] = 34, ['j'] = 35, ['k'] = 36, ['l'] = 37, ['m'] = 38, ['n'] = 39, - ['o'] = 40, ['p'] = 41, ['q'] = 42, ['r'] = 43, ['s'] = 44, ['t'] = 45, ['u'] = 46, ['v'] = 47, - ['w'] = 48, ['x'] = 49, ['y'] = 50, ['z'] = 51, ['0'] = 52, ['1'] = 53, ['2'] = 54, ['3'] = 55, - ['4'] = 56, ['5'] = 57, ['6'] = 58, ['7'] = 59, ['8'] = 60, ['9'] = 61, ['+'] = 62, ['/'] = 63, - [0 ... '+' - 1] = 255, - ['+' + 1 ... '/' - 1] = 255, - ['9' + 1 ... 'A' - 1] = 255, - ['Z' + 1 ... 'a' - 1] = 255, - ['z' + 1 ... 255] = 255 - }; - size_t count = 0; - unsigned int tmp = 0; - int i, bit; - - if (decoded_size < 1) - return 0; // Buffer size must be at least 1 for null termination - - for (i = 0, bit = 0; encoded[i]; i++) { - unsigned char value = base64_table[(unsigned char)encoded[i]]; - if (value > 63) - return -1; // Invalid character in input - - tmp = tmp << 6 | value; - if (++bit == 4) { - if (count + 3 >= decoded_size) break; // Stop decoding if buffer is full - decoded[count++] = (tmp >> 16) & 0xFF; - decoded[count++] = (tmp >> 8) & 0xFF; - decoded[count++] = tmp & 0xFF; - tmp = 0; - bit = 0; - } - } +#if defined(OPENSSL_VERSION_NUMBER) && OPENSSL_VERSION_NUMBER < OPENSSL_VERSION_110 +static inline EVP_ENCODE_CTX *EVP_ENCODE_CTX_new(void) +{ + EVP_ENCODE_CTX *ctx = OPENSSL_malloc(sizeof(*ctx)); - if (bit > 0 && count + 1 < decoded_size) { - tmp <<= 6 * (4 - bit); - if (bit > 2 && count + 1 < decoded_size) decoded[count++] = (tmp >> 16) & 0xFF; - if (bit > 3 && count + 1 < decoded_size) decoded[count++] = (tmp >> 8) & 0xFF; + if (ctx != NULL) { + memset(ctx, 0, sizeof(*ctx)); } + return ctx; +} + +static void EVP_ENCODE_CTX_free(EVP_ENCODE_CTX *ctx) +{ + OPENSSL_free(ctx); +} +#endif - decoded[count] = '\0'; // Null terminate the output string - return count; +int netdata_base64_decode(unsigned char *out, const unsigned char *in, const int in_len) +{ + int outl; + unsigned char remaining_data[256]; + + EVP_ENCODE_CTX *ctx = EVP_ENCODE_CTX_new(); + EVP_DecodeInit(ctx); + EVP_DecodeUpdate(ctx, out, &outl, in, in_len); + int remainder = 0; + EVP_DecodeFinal(ctx, remaining_data, &remainder); + EVP_ENCODE_CTX_free(ctx); + if (remainder) + return -1; + + return outl; } + +int netdata_base64_encode(unsigned char *encoded, const unsigned char *input, size_t input_size) +{ + return EVP_EncodeBlock(encoded, input, input_size); +} + +// Keep internal implementation +// int netdata_base64_decode_internal(const char *encoded, char *decoded, size_t decoded_size) { +// static const unsigned char base64_table[256] = { +// ['A'] = 0, ['B'] = 1, ['C'] = 2, ['D'] = 3, ['E'] = 4, ['F'] = 5, ['G'] = 6, ['H'] = 7, +// ['I'] = 8, ['J'] = 9, ['K'] = 10, ['L'] = 11, ['M'] = 12, ['N'] = 13, ['O'] = 14, ['P'] = 15, +// ['Q'] = 16, ['R'] = 17, ['S'] = 18, ['T'] = 19, ['U'] = 20, ['V'] = 21, ['W'] = 22, ['X'] = 23, +// ['Y'] = 24, ['Z'] = 25, ['a'] = 26, ['b'] = 27, ['c'] = 28, ['d'] = 29, ['e'] = 30, ['f'] = 31, +// ['g'] = 32, ['h'] = 33, ['i'] = 34, ['j'] = 35, ['k'] = 36, ['l'] = 37, ['m'] = 38, ['n'] = 39, +// ['o'] = 40, ['p'] = 41, ['q'] = 42, ['r'] = 43, ['s'] = 44, ['t'] = 45, ['u'] = 46, ['v'] = 47, +// ['w'] = 48, ['x'] = 49, ['y'] = 50, ['z'] = 51, ['0'] = 52, ['1'] = 53, ['2'] = 54, ['3'] = 55, +// ['4'] = 56, ['5'] = 57, ['6'] = 58, ['7'] = 59, ['8'] = 60, ['9'] = 61, ['+'] = 62, ['/'] = 63, +// [0 ... '+' - 1] = 255, +// ['+' + 1 ... '/' - 1] = 255, +// ['9' + 1 ... 'A' - 1] = 255, +// ['Z' + 1 ... 'a' - 1] = 255, +// ['z' + 1 ... 255] = 255 +// }; +// +// size_t count = 0; +// unsigned int tmp = 0; +// int i, bit; +// +// if (decoded_size < 1) +// return 0; // Buffer size must be at least 1 for null termination +// +// for (i = 0, bit = 0; encoded[i]; i++) { +// unsigned char value = base64_table[(unsigned char)encoded[i]]; +// if (value > 63) +// return -1; // Invalid character in input +// +// tmp = tmp << 6 | value; +// if (++bit == 4) { +// if (count + 3 >= decoded_size) break; // Stop decoding if buffer is full +// decoded[count++] = (tmp >> 16) & 0xFF; +// decoded[count++] = (tmp >> 8) & 0xFF; +// decoded[count++] = tmp & 0xFF; +// tmp = 0; +// bit = 0; +// } +// } +// +// if (bit > 0 && count + 1 < decoded_size) { +// tmp <<= 6 * (4 - bit); +// if (bit > 2 && count + 1 < decoded_size) decoded[count++] = (tmp >> 16) & 0xFF; +// if (bit > 3 && count + 1 < decoded_size) decoded[count++] = (tmp >> 8) & 0xFF; +// } +// +// decoded[count] = '\0'; // Null terminate the output string +// return count; +// } diff --git a/src/libnetdata/libnetdata.h b/src/libnetdata/libnetdata.h index 0963d63df68dee..d27d33e5a88959 100644 --- a/src/libnetdata/libnetdata.h +++ b/src/libnetdata/libnetdata.h @@ -642,7 +642,8 @@ extern bool unittest_running; bool rrdr_relative_window_to_absolute(time_t *after, time_t *before, time_t now); bool rrdr_relative_window_to_absolute_query(time_t *after, time_t *before, time_t *now_ptr, bool unittest); -int netdata_base64_decode(const char *encoded, char *decoded, size_t decoded_size); +int netdata_base64_decode(unsigned char *out, const unsigned char *in, int in_len); +int netdata_base64_encode(unsigned char *encoded, const unsigned char *input, size_t input_size); static inline void freez_charp(char **p) { freez(*p); diff --git a/src/libnetdata/ringbuffer/ringbuffer.c b/src/libnetdata/ringbuffer/ringbuffer.c index 5a3523962e1d8e..b30b3c39a2a349 100644 --- a/src/libnetdata/ringbuffer/ringbuffer.c +++ b/src/libnetdata/ringbuffer/ringbuffer.c @@ -6,9 +6,6 @@ rbuf_t rbuf_create(size_t size) { rbuf_t buffer = mallocz(sizeof(struct rbuf) + size); - if (!buffer) - return NULL; - memset(buffer, 0, sizeof(struct rbuf)); buffer->data = ((char*)buffer) + sizeof(struct rbuf); diff --git a/src/libnetdata/socket/security.h b/src/libnetdata/socket/security.h index c5c4d79c56beef..4e0b113cf58b07 100644 --- a/src/libnetdata/socket/security.h +++ b/src/libnetdata/socket/security.h @@ -19,6 +19,7 @@ typedef enum __attribute__((packed)) { #define OPENSSL_VERSION_300 0x30000000L # include +# include # include # include # include From 3c0ff6203e035d27c187fb315f8e5c514b37a325 Mon Sep 17 00:00:00 2001 From: netdatabot Date: Mon, 30 Sep 2024 00:20:14 +0000 Subject: [PATCH 03/23] [ci skip] Update changelog and version for nightly build: v1.99.0-210-nightly. --- CHANGELOG.md | 3 ++- packaging/version | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5432282b85a22b..b887dcafbf026f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,8 @@ - go.d/postgres: fix checkpoints query for postgres 17 [\#18629](https://github.com/netdata/netdata/pull/18629) ([ilyam8](https://github.com/ilyam8)) - go.d/ceph: fix leftovers after \#18582 [\#18628](https://github.com/netdata/netdata/pull/18628) ([ilyam8](https://github.com/ilyam8)) - Regenerate integrations.js [\#18627](https://github.com/netdata/netdata/pull/18627) ([netdatabot](https://github.com/netdatabot)) +- Change default pages per extent [\#18623](https://github.com/netdata/netdata/pull/18623) ([stelfrag](https://github.com/stelfrag)) +- Misc mqtt related code cleanup [\#18622](https://github.com/netdata/netdata/pull/18622) ([stelfrag](https://github.com/stelfrag)) - Revert "Add ceph commands to ndsudo" [\#18620](https://github.com/netdata/netdata/pull/18620) ([ilyam8](https://github.com/ilyam8)) - go.d/hddtemp: connect and read [\#18619](https://github.com/netdata/netdata/pull/18619) ([ilyam8](https://github.com/ilyam8)) - go.d/uwsgi: don't write just connect and read [\#18618](https://github.com/netdata/netdata/pull/18618) ([ilyam8](https://github.com/ilyam8)) @@ -419,7 +421,6 @@ - go.d postgres github.com/jackc/pgx/v5 [\#18062](https://github.com/netdata/netdata/pull/18062) ([ilyam8](https://github.com/ilyam8)) - fix prometeus export: missing comma before "instance" label [\#18061](https://github.com/netdata/netdata/pull/18061) ([ilyam8](https://github.com/ilyam8)) - go.d vsphere add update\_every ui:help [\#18060](https://github.com/netdata/netdata/pull/18060) ([ilyam8](https://github.com/ilyam8)) -- restructure go.d [\#18058](https://github.com/netdata/netdata/pull/18058) ([ilyam8](https://github.com/ilyam8)) ## [v1.46.3](https://github.com/netdata/netdata/tree/v1.46.3) (2024-07-23) diff --git a/packaging/version b/packaging/version index 2e41f95fa2f6bd..3da8b6ab413d46 100644 --- a/packaging/version +++ b/packaging/version @@ -1 +1 @@ -v1.99.0-207-nightly +v1.99.0-210-nightly From ee2e1e0a88839c7535518561a97259446940de17 Mon Sep 17 00:00:00 2001 From: Fotis Voutsas Date: Mon, 30 Sep 2024 14:41:19 +0300 Subject: [PATCH 04/23] Remove Python OpenLDAP implementation (#18626) --- CMakeLists.txt | 2 - .../python.d.plugin/openldap/README.md | 1 - .../openldap/integrations/openldap.md | 249 ------------------ .../python.d.plugin/openldap/metadata.yaml | 225 ---------------- .../openldap/openldap.chart.py | 216 --------------- .../python.d.plugin/openldap/openldap.conf | 75 ------ src/collectors/python.d.plugin/python.d.conf | 2 +- 7 files changed, 1 insertion(+), 769 deletions(-) delete mode 120000 src/collectors/python.d.plugin/openldap/README.md delete mode 100644 src/collectors/python.d.plugin/openldap/integrations/openldap.md delete mode 100644 src/collectors/python.d.plugin/openldap/metadata.yaml delete mode 100644 src/collectors/python.d.plugin/openldap/openldap.chart.py delete mode 100644 src/collectors/python.d.plugin/openldap/openldap.conf diff --git a/CMakeLists.txt b/CMakeLists.txt index da13eb46d809c3..4f37add255e990 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2937,7 +2937,6 @@ if(ENABLE_PLUGIN_PYTHON) src/collectors/python.d.plugin/am2320/am2320.conf src/collectors/python.d.plugin/go_expvar/go_expvar.conf src/collectors/python.d.plugin/haproxy/haproxy.conf - src/collectors/python.d.plugin/openldap/openldap.conf src/collectors/python.d.plugin/oracledb/oracledb.conf src/collectors/python.d.plugin/pandas/pandas.conf src/collectors/python.d.plugin/spigotmc/spigotmc.conf @@ -2950,7 +2949,6 @@ if(ENABLE_PLUGIN_PYTHON) src/collectors/python.d.plugin/am2320/am2320.chart.py src/collectors/python.d.plugin/go_expvar/go_expvar.chart.py src/collectors/python.d.plugin/haproxy/haproxy.chart.py - src/collectors/python.d.plugin/openldap/openldap.chart.py src/collectors/python.d.plugin/oracledb/oracledb.chart.py src/collectors/python.d.plugin/pandas/pandas.chart.py src/collectors/python.d.plugin/spigotmc/spigotmc.chart.py diff --git a/src/collectors/python.d.plugin/openldap/README.md b/src/collectors/python.d.plugin/openldap/README.md deleted file mode 120000 index 45f36b9b92ea6b..00000000000000 --- a/src/collectors/python.d.plugin/openldap/README.md +++ /dev/null @@ -1 +0,0 @@ -integrations/openldap.md \ No newline at end of file diff --git a/src/collectors/python.d.plugin/openldap/integrations/openldap.md b/src/collectors/python.d.plugin/openldap/integrations/openldap.md deleted file mode 100644 index 3f363343a747fc..00000000000000 --- a/src/collectors/python.d.plugin/openldap/integrations/openldap.md +++ /dev/null @@ -1,249 +0,0 @@ - - -# OpenLDAP - - - - - -Plugin: python.d.plugin -Module: openldap - - - -## Overview - -This collector monitors OpenLDAP metrics about connections, operations, referrals and more. - -Statistics are taken from the monitoring interface of a openLDAP (slapd) server - - -This collector is supported on all platforms. - -This collector only supports collecting metrics from a single instance of this integration. - - -### Default Behavior - -#### Auto-Detection - -This collector doesn't work until all the prerequisites are checked. - - -#### Limits - -The default configuration for this integration does not impose any limits on data collection. - -#### Performance Impact - -The default configuration for this integration is not expected to impose a significant performance impact on the system. - - -## Metrics - -Metrics grouped by *scope*. - -The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels. - - - -### Per OpenLDAP instance - -These metrics refer to the entire monitored application. - -This scope has no labels. - -Metrics: - -| Metric | Dimensions | Unit | -|:------|:----------|:----| -| openldap.total_connections | connections | connections/s | -| openldap.traffic_stats | sent | KiB/s | -| openldap.operations_status | completed, initiated | ops/s | -| openldap.referrals | sent | referrals/s | -| openldap.entries | sent | entries/s | -| openldap.ldap_operations | bind, search, unbind, add, delete, modify, compare | ops/s | -| openldap.waiters | write, read | waiters/s | - - - -## Alerts - -There are no alerts configured by default for this integration. - - -## Setup - -### Prerequisites - -#### Configure the openLDAP server to expose metrics to monitor it. - -Follow instructions from https://www.openldap.org/doc/admin24/monitoringslapd.html to activate monitoring interface. - - -#### Install python-ldap module - -Install python ldap module - -1. From pip package manager - -```bash -pip install ldap -``` - -2. With apt package manager (in most deb based distros) - - -```bash -apt-get install python-ldap -``` - - -3. With yum package manager (in most rpm based distros) - - -```bash -yum install python-ldap -``` - - -#### Insert credentials for Netdata to access openLDAP server - -Use the `ldappasswd` utility to set a password for the username you will use. - - - -### Configuration - -#### File - -The configuration file name for this integration is `python.d/openldap.conf`. - - -You can edit the configuration file using the `edit-config` script from the -Netdata [config directory](/docs/netdata-agent/configuration/README.md#the-netdata-config-directory). - -```bash -cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata -sudo ./edit-config python.d/openldap.conf -``` -#### Options - -There are 2 sections: - -* Global variables -* One or more JOBS that can define multiple different instances to monitor. - -The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values. - -Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. - -Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. - - -
Config options - -| Name | Description | Default | Required | -|:----|:-----------|:-------|:--------:| -| update_every | Sets the default data collection frequency. | 5 | no | -| priority | Controls the order of charts at the netdata dashboard. | 60000 | no | -| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no | -| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no | -| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | no | -| username | The bind user with right to access monitor statistics | | yes | -| password | The password for the binded user | | yes | -| server | The listening address of the LDAP server. In case of TLS, use the hostname which the certificate is published for. | | yes | -| port | The listening port of the LDAP server. Change to 636 port in case of TLS connection. | 389 | yes | -| use_tls | Make True if a TLS connection is used over ldaps:// | no | no | -| use_start_tls | Make True if a TLS connection is used over ldap:// | no | no | -| cert_check | False if you want to ignore certificate check | True | yes | -| timeout | Seconds to timeout if no connection exist | | yes | - -
- -#### Examples - -##### Basic - -A basic example configuration. - -```yaml -username: "cn=admin" -password: "pass" -server: "localhost" -port: "389" -check_cert: True -timeout: 1 - -``` - - -## Troubleshooting - -### Debug Mode - - -To troubleshoot issues with the `openldap` collector, run the `python.d.plugin` with the debug option enabled. The output -should give you clues as to why the collector isn't working. - -- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on - your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`. - - ```bash - cd /usr/libexec/netdata/plugins.d/ - ``` - -- Switch to the `netdata` user. - - ```bash - sudo -u netdata -s - ``` - -- Run the `python.d.plugin` to debug the collector: - - ```bash - ./python.d.plugin openldap debug trace - ``` - -### Getting Logs - -If you're encountering problems with the `openldap` collector, follow these steps to retrieve logs and identify potential issues: - -- **Run the command** specific to your system (systemd, non-systemd, or Docker container). -- **Examine the output** for any warnings or error messages that might indicate issues. These messages should provide clues about the root cause of the problem. - -#### System with systemd - -Use the following command to view logs generated since the last Netdata service restart: - -```bash -journalctl _SYSTEMD_INVOCATION_ID="$(systemctl show --value --property=InvocationID netdata)" --namespace=netdata --grep openldap -``` - -#### System without systemd - -Locate the collector log file, typically at `/var/log/netdata/collector.log`, and use `grep` to filter for collector's name: - -```bash -grep openldap /var/log/netdata/collector.log -``` - -**Note**: This method shows logs from all restarts. Focus on the **latest entries** for troubleshooting current issues. - -#### Docker Container - -If your Netdata runs in a Docker container named "netdata" (replace if different), use this command: - -```bash -docker logs netdata 2>&1 | grep openldap -``` - - diff --git a/src/collectors/python.d.plugin/openldap/metadata.yaml b/src/collectors/python.d.plugin/openldap/metadata.yaml deleted file mode 100644 index 3826b22c72c7cb..00000000000000 --- a/src/collectors/python.d.plugin/openldap/metadata.yaml +++ /dev/null @@ -1,225 +0,0 @@ -plugin_name: python.d.plugin -modules: - - meta: - plugin_name: python.d.plugin - module_name: openldap - monitored_instance: - name: OpenLDAP - link: "https://www.openldap.org/" - categories: - - data-collection.authentication-and-authorization - icon_filename: "statsd.png" - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: "" - keywords: - - openldap - - RBAC - - Directory access - most_popular: false - overview: - data_collection: - metrics_description: "This collector monitors OpenLDAP metrics about connections, operations, referrals and more." - method_description: | - Statistics are taken from the monitoring interface of a openLDAP (slapd) server - supported_platforms: - include: [] - exclude: [] - multi_instance: false - additional_permissions: - description: "" - default_behavior: - auto_detection: - description: | - This collector doesn't work until all the prerequisites are checked. - limits: - description: "" - performance_impact: - description: "" - setup: - prerequisites: - list: - - title: Configure the openLDAP server to expose metrics to monitor it. - description: | - Follow instructions from https://www.openldap.org/doc/admin24/monitoringslapd.html to activate monitoring interface. - - title: Install python-ldap module - description: | - Install python ldap module - - 1. From pip package manager - - ```bash - pip install ldap - ``` - - 2. With apt package manager (in most deb based distros) - - - ```bash - apt-get install python-ldap - ``` - - - 3. With yum package manager (in most rpm based distros) - - - ```bash - yum install python-ldap - ``` - - title: Insert credentials for Netdata to access openLDAP server - description: | - Use the `ldappasswd` utility to set a password for the username you will use. - configuration: - file: - name: "python.d/openldap.conf" - options: - description: | - There are 2 sections: - - * Global variables - * One or more JOBS that can define multiple different instances to monitor. - - The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values. - - Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. - - Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. - folding: - title: "Config options" - enabled: true - list: - - name: update_every - description: Sets the default data collection frequency. - default_value: 5 - required: false - - name: priority - description: Controls the order of charts at the netdata dashboard. - default_value: 60000 - required: false - - name: autodetection_retry - description: Sets the job re-check interval in seconds. - default_value: 0 - required: false - - name: penalty - description: Indicates whether to apply penalty to update_every in case of failures. - default_value: yes - required: false - - name: name - description: Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. - default_value: "" - required: false - - name: username - description: The bind user with right to access monitor statistics - default_value: "" - required: true - - name: password - description: The password for the binded user - default_value: "" - required: true - - name: server - description: The listening address of the LDAP server. In case of TLS, use the hostname which the certificate is published for. - default_value: "" - required: true - - name: port - description: The listening port of the LDAP server. Change to 636 port in case of TLS connection. - default_value: "389" - required: true - - name: use_tls - description: Make True if a TLS connection is used over ldaps:// - default_value: False - required: false - - name: use_start_tls - description: Make True if a TLS connection is used over ldap:// - default_value: False - required: false - - name: cert_check - description: False if you want to ignore certificate check - default_value: "True" - required: true - - name: timeout - description: Seconds to timeout if no connection exist - default_value: "" - required: true - examples: - folding: - enabled: true - title: "Config" - list: - - name: Basic - description: A basic example configuration. - folding: - enabled: false - config: | - username: "cn=admin" - password: "pass" - server: "localhost" - port: "389" - check_cert: True - timeout: 1 - troubleshooting: - problems: - list: [] - alerts: [] - metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: global - description: "These metrics refer to the entire monitored application." - labels: [] - metrics: - - name: openldap.total_connections - description: Total Connections - unit: "connections/s" - chart_type: line - dimensions: - - name: connections - - name: openldap.traffic_stats - description: Traffic - unit: "KiB/s" - chart_type: line - dimensions: - - name: sent - - name: openldap.operations_status - description: Operations Status - unit: "ops/s" - chart_type: line - dimensions: - - name: completed - - name: initiated - - name: openldap.referrals - description: Referrals - unit: "referrals/s" - chart_type: line - dimensions: - - name: sent - - name: openldap.entries - description: Entries - unit: "entries/s" - chart_type: line - dimensions: - - name: sent - - name: openldap.ldap_operations - description: Operations - unit: "ops/s" - chart_type: line - dimensions: - - name: bind - - name: search - - name: unbind - - name: add - - name: delete - - name: modify - - name: compare - - name: openldap.waiters - description: Waiters - unit: "waiters/s" - chart_type: line - dimensions: - - name: write - - name: read diff --git a/src/collectors/python.d.plugin/openldap/openldap.chart.py b/src/collectors/python.d.plugin/openldap/openldap.chart.py deleted file mode 100644 index aba143954bca3d..00000000000000 --- a/src/collectors/python.d.plugin/openldap/openldap.chart.py +++ /dev/null @@ -1,216 +0,0 @@ -# -*- coding: utf-8 -*- -# Description: openldap netdata python.d module -# Author: Manolis Kartsonakis (ekartsonakis) -# SPDX-License-Identifier: GPL-3.0+ - -try: - import ldap - - HAS_LDAP = True -except ImportError: - HAS_LDAP = False - -from bases.FrameworkServices.SimpleService import SimpleService - -DEFAULT_SERVER = 'localhost' -DEFAULT_PORT = '389' -DEFAULT_TLS = False -DEFAULT_CERT_CHECK = True -DEFAULT_TIMEOUT = 1 -DEFAULT_START_TLS = False - -ORDER = [ - 'total_connections', - 'bytes_sent', - 'operations', - 'referrals_sent', - 'entries_sent', - 'ldap_operations', - 'waiters' -] - -CHARTS = { - 'total_connections': { - 'options': [None, 'Total Connections', 'connections/s', 'ldap', 'openldap.total_connections', 'line'], - 'lines': [ - ['total_connections', 'connections', 'incremental'] - ] - }, - 'bytes_sent': { - 'options': [None, 'Traffic', 'KiB/s', 'ldap', 'openldap.traffic_stats', 'line'], - 'lines': [ - ['bytes_sent', 'sent', 'incremental', 1, 1024] - ] - }, - 'operations': { - 'options': [None, 'Operations Status', 'ops/s', 'ldap', 'openldap.operations_status', 'line'], - 'lines': [ - ['completed_operations', 'completed', 'incremental'], - ['initiated_operations', 'initiated', 'incremental'] - ] - }, - 'referrals_sent': { - 'options': [None, 'Referrals', 'referrals/s', 'ldap', 'openldap.referrals', 'line'], - 'lines': [ - ['referrals_sent', 'sent', 'incremental'] - ] - }, - 'entries_sent': { - 'options': [None, 'Entries', 'entries/s', 'ldap', 'openldap.entries', 'line'], - 'lines': [ - ['entries_sent', 'sent', 'incremental'] - ] - }, - 'ldap_operations': { - 'options': [None, 'Operations', 'ops/s', 'ldap', 'openldap.ldap_operations', 'line'], - 'lines': [ - ['bind_operations', 'bind', 'incremental'], - ['search_operations', 'search', 'incremental'], - ['unbind_operations', 'unbind', 'incremental'], - ['add_operations', 'add', 'incremental'], - ['delete_operations', 'delete', 'incremental'], - ['modify_operations', 'modify', 'incremental'], - ['compare_operations', 'compare', 'incremental'] - ] - }, - 'waiters': { - 'options': [None, 'Waiters', 'waiters/s', 'ldap', 'openldap.waiters', 'line'], - 'lines': [ - ['write_waiters', 'write', 'incremental'], - ['read_waiters', 'read', 'incremental'] - ] - }, -} - -# Stuff to gather - make tuples of DN dn and attrib to get -SEARCH_LIST = { - 'total_connections': ( - 'cn=Total,cn=Connections,cn=Monitor', 'monitorCounter', - ), - 'bytes_sent': ( - 'cn=Bytes,cn=Statistics,cn=Monitor', 'monitorCounter', - ), - 'completed_operations': ( - 'cn=Operations,cn=Monitor', 'monitorOpCompleted', - ), - 'initiated_operations': ( - 'cn=Operations,cn=Monitor', 'monitorOpInitiated', - ), - 'referrals_sent': ( - 'cn=Referrals,cn=Statistics,cn=Monitor', 'monitorCounter', - ), - 'entries_sent': ( - 'cn=Entries,cn=Statistics,cn=Monitor', 'monitorCounter', - ), - 'bind_operations': ( - 'cn=Bind,cn=Operations,cn=Monitor', 'monitorOpCompleted', - ), - 'unbind_operations': ( - 'cn=Unbind,cn=Operations,cn=Monitor', 'monitorOpCompleted', - ), - 'add_operations': ( - 'cn=Add,cn=Operations,cn=Monitor', 'monitorOpInitiated', - ), - 'delete_operations': ( - 'cn=Delete,cn=Operations,cn=Monitor', 'monitorOpCompleted', - ), - 'modify_operations': ( - 'cn=Modify,cn=Operations,cn=Monitor', 'monitorOpCompleted', - ), - 'compare_operations': ( - 'cn=Compare,cn=Operations,cn=Monitor', 'monitorOpCompleted', - ), - 'search_operations': ( - 'cn=Search,cn=Operations,cn=Monitor', 'monitorOpCompleted', - ), - 'write_waiters': ( - 'cn=Write,cn=Waiters,cn=Monitor', 'monitorCounter', - ), - 'read_waiters': ( - 'cn=Read,cn=Waiters,cn=Monitor', 'monitorCounter', - ), -} - - -class Service(SimpleService): - def __init__(self, configuration=None, name=None): - SimpleService.__init__(self, configuration=configuration, name=name) - self.order = ORDER - self.definitions = CHARTS - self.server = configuration.get('server', DEFAULT_SERVER) - self.port = configuration.get('port', DEFAULT_PORT) - self.username = configuration.get('username') - self.password = configuration.get('password') - self.timeout = configuration.get('timeout', DEFAULT_TIMEOUT) - self.use_tls = configuration.get('use_tls', DEFAULT_TLS) - self.cert_check = configuration.get('cert_check', DEFAULT_CERT_CHECK) - self.use_start_tls = configuration.get('use_start_tls', DEFAULT_START_TLS) - self.alive = False - self.conn = None - - def disconnect(self): - if self.conn: - self.conn.unbind() - self.conn = None - self.alive = False - - def connect(self): - try: - if self.use_tls: - self.conn = ldap.initialize('ldaps://%s:%s' % (self.server, self.port)) - else: - self.conn = ldap.initialize('ldap://%s:%s' % (self.server, self.port)) - self.conn.set_option(ldap.OPT_NETWORK_TIMEOUT, self.timeout) - if (self.use_tls or self.use_start_tls) and not self.cert_check: - self.conn.set_option(ldap.OPT_X_TLS_REQUIRE_CERT, ldap.OPT_X_TLS_NEVER) - if self.use_start_tls or self.use_tls: - self.conn.set_option(ldap.OPT_X_TLS_NEWCTX, 0) - if self.use_start_tls: - self.conn.protocol_version = ldap.VERSION3 - self.conn.start_tls_s() - if self.username and self.password: - self.conn.simple_bind(self.username, self.password) - except ldap.LDAPError as error: - self.error(error) - return False - - self.alive = True - return True - - def reconnect(self): - self.disconnect() - return self.connect() - - def check(self): - if not HAS_LDAP: - self.error("'python-ldap' package is needed") - return None - - return self.connect() and self.get_data() - - def get_data(self): - if not self.alive and not self.reconnect(): - return None - - data = dict() - for key in SEARCH_LIST: - dn = SEARCH_LIST[key][0] - attr = SEARCH_LIST[key][1] - try: - num = self.conn.search(dn, ldap.SCOPE_BASE, 'objectClass=*', [attr, ]) - result_type, result_data = self.conn.result(num, 1) - except ldap.LDAPError as error: - self.error("Empty result. Check bind username/password. Message: ", error) - self.alive = False - return None - - if result_type != 101: - continue - - try: - data[key] = int(list(result_data[0][1].values())[0][0]) - except (ValueError, IndexError) as error: - self.debug(error) - continue - - return data diff --git a/src/collectors/python.d.plugin/openldap/openldap.conf b/src/collectors/python.d.plugin/openldap/openldap.conf deleted file mode 100644 index 5fd99a5257fc78..00000000000000 --- a/src/collectors/python.d.plugin/openldap/openldap.conf +++ /dev/null @@ -1,75 +0,0 @@ -# netdata python.d.plugin configuration for openldap -# -# This file is in YaML format. Generally the format is: -# -# name: value -# -# There are 2 sections: -# - global variables -# - one or more JOBS -# -# JOBS allow you to collect values from multiple sources. -# Each source will have its own set of charts. -# -# JOB parameters have to be indented (using spaces only, example below). - -# ---------------------------------------------------------------------- -# Global Variables -# These variables set the defaults for all JOBs, however each JOB -# may define its own, overriding the defaults. - -# update_every sets the default data collection frequency. -# If unset, the python.d.plugin default is used. -# postfix is slow, so once every 10 seconds -update_every: 10 - -# priority controls the order of charts at the netdata dashboard. -# Lower numbers move the charts towards the top of the page. -# If unset, the default for python.d.plugin is used. -# priority: 60000 - -# penalty indicates whether to apply penalty to update_every in case of failures. -# Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes. -# penalty: yes - -# autodetection_retry sets the job re-check interval in seconds. -# The job is not deleted if check fails. -# Attempts to start the job are made once every autodetection_retry. -# This feature is disabled by default. -# autodetection_retry: 0 - -# ---------------------------------------------------------------------- -# JOBS (data collection sources) -# -# The default JOBS share the same *name*. JOBS with the same name -# are mutually exclusive. Only one of them will be allowed running at -# any time. This allows autodetection to try several alternatives and -# pick the one that works. -# -# Any number of jobs is supported. -# -# All python.d.plugin JOBS (for all its modules) support a set of -# predefined parameters. These are: -# -# job_name: -# name: myname # the JOB's name as it will appear at the -# # dashboard (by default is the job_name) -# # JOBs sharing a name are mutually exclusive -# update_every: 1 # the JOB's data collection frequency -# priority: 60000 # the JOB's order on the dashboard -# penalty: yes # the JOB's penalty -# autodetection_retry: 0 # the JOB's re-check interval in seconds -# -# ---------------------------------------------------------------------- -# OPENLDAP EXTRA PARAMETERS - -# Set here your LDAP connection settings - -#username : "cn=admin,dc=example,dc=com" # The bind user with right to access monitor statistics -#password : "yourpass" # The password for the binded user -#server : 'localhost' # The listening address of the LDAP server. In case of TLS, use the hostname which the certificate is published for. -#port : 389 # The listening port of the LDAP server. Change to 636 port in case of TLS connection -#use_tls : False # Make True if a TLS connection is used over ldaps:// -#use_start_tls: False # Make True if a TLS connection is used over ldap:// -#cert_check : True # False if you want to ignore certificate check -#timeout : 1 # Seconds to timeout if no connection exi diff --git a/src/collectors/python.d.plugin/python.d.conf b/src/collectors/python.d.plugin/python.d.conf index d575d3d8bdccb2..26a941817e2ca7 100644 --- a/src/collectors/python.d.plugin/python.d.conf +++ b/src/collectors/python.d.plugin/python.d.conf @@ -29,7 +29,6 @@ gc_interval: 300 # this is just an example go_expvar: no # haproxy: yes -# openldap: yes # oracledb: yes # pandas: yes # retroshare: yes @@ -65,6 +64,7 @@ mysql: no # Removed (replaced with go.d/mysql). nginx: no # Removed (replaced with go.d/nginx). nsd: no # Removed (replaced with go.d/nsd). nvidia_smi: no # Removed (replaced with go.d/nvidia_smi). +openldap: no # Removed (replaced with go.d/openldap). postfix: no # Removed (replaced with go.d/postfix). postgres: no # Removed (replaced with go.d/postgres). proxysql: no # Removed (replaced with go.d/proxysql). From 2f12797f93bed79b446c944ce82e2309076935a1 Mon Sep 17 00:00:00 2001 From: vkalintiris Date: Mon, 30 Sep 2024 15:48:09 +0300 Subject: [PATCH 05/23] Update file names. (#18638) --- CMakeLists.txt | 16 ++++++++-------- .../common-contexts/common-contexts.h | 18 +++++++++--------- .../common-contexts/{disk.io.h => disk-io.h} | 0 .../{mem.available.h => mem-available.h} | 0 .../{mem.pgfaults.h => mem-pgfaults.h} | 0 .../common-contexts/{mem.swap.h => mem-swap.h} | 0 ...system.interrupts.h => system-interrupts.h} | 0 .../{system.io.h => system-io.h} | 0 .../{system.ipc.h => system-ipc.h} | 0 .../{system.processes.h => system-processes.h} | 0 .../{system.ram.h => system-ram.h} | 0 11 files changed, 17 insertions(+), 17 deletions(-) rename src/collectors/common-contexts/{disk.io.h => disk-io.h} (100%) rename src/collectors/common-contexts/{mem.available.h => mem-available.h} (100%) rename src/collectors/common-contexts/{mem.pgfaults.h => mem-pgfaults.h} (100%) rename src/collectors/common-contexts/{mem.swap.h => mem-swap.h} (100%) rename src/collectors/common-contexts/{system.interrupts.h => system-interrupts.h} (100%) rename src/collectors/common-contexts/{system.io.h => system-io.h} (100%) rename src/collectors/common-contexts/{system.ipc.h => system-ipc.h} (100%) rename src/collectors/common-contexts/{system.processes.h => system-processes.h} (100%) rename src/collectors/common-contexts/{system.ram.h => system-ram.h} (100%) diff --git a/CMakeLists.txt b/CMakeLists.txt index 4f37add255e990..e4bb053c8760a1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1125,14 +1125,14 @@ endif() set(INTERNAL_COLLECTORS_FILES src/collectors/common-contexts/common-contexts.h - src/collectors/common-contexts/disk.io.h - src/collectors/common-contexts/system.io.h - src/collectors/common-contexts/system.interrupts.h - src/collectors/common-contexts/system.processes.h - src/collectors/common-contexts/system.ram.h - src/collectors/common-contexts/mem.swap.h - src/collectors/common-contexts/mem.pgfaults.h - src/collectors/common-contexts/mem.available.h + src/collectors/common-contexts/disk-io.h + src/collectors/common-contexts/system-io.h + src/collectors/common-contexts/system-interrupts.h + src/collectors/common-contexts/system-processes.h + src/collectors/common-contexts/system-ram.h + src/collectors/common-contexts/mem-swap.h + src/collectors/common-contexts/mem-pgfaults.h + src/collectors/common-contexts/mem-available.h ) set(PLUGINSD_PLUGIN_FILES diff --git a/src/collectors/common-contexts/common-contexts.h b/src/collectors/common-contexts/common-contexts.h index 1938230dc0b03b..ba30c2b329c96e 100644 --- a/src/collectors/common-contexts/common-contexts.h +++ b/src/collectors/common-contexts/common-contexts.h @@ -18,14 +18,14 @@ typedef void (*instance_labels_cb_t)(RRDSET *st, void *data); -#include "system.io.h" -#include "system.ram.h" -#include "system.interrupts.h" -#include "system.processes.h" -#include "system.ipc.h" -#include "mem.swap.h" -#include "mem.pgfaults.h" -#include "mem.available.h" -#include "disk.io.h" +#include "system-io.h" +#include "system-ram.h" +#include "system-interrupts.h" +#include "system-processes.h" +#include "system-ipc.h" +#include "mem-swap.h" +#include "mem-pgfaults.h" +#include "mem-available.h" +#include "disk-io.h" #endif //NETDATA_COMMON_CONTEXTS_H diff --git a/src/collectors/common-contexts/disk.io.h b/src/collectors/common-contexts/disk-io.h similarity index 100% rename from src/collectors/common-contexts/disk.io.h rename to src/collectors/common-contexts/disk-io.h diff --git a/src/collectors/common-contexts/mem.available.h b/src/collectors/common-contexts/mem-available.h similarity index 100% rename from src/collectors/common-contexts/mem.available.h rename to src/collectors/common-contexts/mem-available.h diff --git a/src/collectors/common-contexts/mem.pgfaults.h b/src/collectors/common-contexts/mem-pgfaults.h similarity index 100% rename from src/collectors/common-contexts/mem.pgfaults.h rename to src/collectors/common-contexts/mem-pgfaults.h diff --git a/src/collectors/common-contexts/mem.swap.h b/src/collectors/common-contexts/mem-swap.h similarity index 100% rename from src/collectors/common-contexts/mem.swap.h rename to src/collectors/common-contexts/mem-swap.h diff --git a/src/collectors/common-contexts/system.interrupts.h b/src/collectors/common-contexts/system-interrupts.h similarity index 100% rename from src/collectors/common-contexts/system.interrupts.h rename to src/collectors/common-contexts/system-interrupts.h diff --git a/src/collectors/common-contexts/system.io.h b/src/collectors/common-contexts/system-io.h similarity index 100% rename from src/collectors/common-contexts/system.io.h rename to src/collectors/common-contexts/system-io.h diff --git a/src/collectors/common-contexts/system.ipc.h b/src/collectors/common-contexts/system-ipc.h similarity index 100% rename from src/collectors/common-contexts/system.ipc.h rename to src/collectors/common-contexts/system-ipc.h diff --git a/src/collectors/common-contexts/system.processes.h b/src/collectors/common-contexts/system-processes.h similarity index 100% rename from src/collectors/common-contexts/system.processes.h rename to src/collectors/common-contexts/system-processes.h diff --git a/src/collectors/common-contexts/system.ram.h b/src/collectors/common-contexts/system-ram.h similarity index 100% rename from src/collectors/common-contexts/system.ram.h rename to src/collectors/common-contexts/system-ram.h From 7d4f9c58d5d82f5aad9fad12265b2dad6be5875c Mon Sep 17 00:00:00 2001 From: vkalintiris Date: Mon, 30 Sep 2024 16:14:26 +0300 Subject: [PATCH 06/23] Move plugins.d directory outside of collectors (#18637) * Move plugins.d out of collectors It's being used by streaming as well. * Move ndsudo and local_listeners back to collectors. --- .github/labeler.yml | 12 ++++---- CMakeLists.txt | 28 +++++++++---------- .../python-collector.md | 2 +- docs/diagrams/netdata-overview.xml | 2 +- docs/glossary.md | 2 +- docs/top-monitoring-netdata-functions.md | 2 +- packaging/installer/methods/macos.md | 2 +- src/collectors/README.md | 2 +- src/collectors/charts.d.plugin/README.md | 2 +- .../integrations/go_applications_expvar.md | 4 +-- .../python.d.plugin/go_expvar/metadata.yaml | 4 +-- .../{plugins.d => utils}/local_listeners.c | 0 src/collectors/{plugins.d => utils}/ndsudo.c | 0 src/go/pkg/netdataapi/api.go | 2 +- .../discoverer/netlisteners/netlisteners.go | 2 +- src/go/plugin/go.d/agent/module/charts.go | 6 ++-- .../plugin/go.d/docs/how-to-write-a-module.md | 6 ++-- src/{collectors => }/plugins.d/README.md | 2 +- .../plugins.d/functions-table.md | 0 .../plugins.d/gperf-config.txt | 0 .../plugins.d/gperf-hashtable.h | 0 src/{collectors => }/plugins.d/plugins_d.c | 0 src/{collectors => }/plugins.d/plugins_d.h | 0 .../plugins.d/pluginsd_dyncfg.c | 0 .../plugins.d/pluginsd_dyncfg.h | 0 .../plugins.d/pluginsd_functions.c | 0 .../plugins.d/pluginsd_functions.h | 0 .../plugins.d/pluginsd_internals.c | 0 .../plugins.d/pluginsd_internals.h | 0 .../plugins.d/pluginsd_parser.c | 0 .../plugins.d/pluginsd_parser.h | 0 .../plugins.d/pluginsd_replication.c | 0 .../plugins.d/pluginsd_replication.h | 0 src/streaming/protocol/command-claimed_id.c | 2 +- src/streaming/protocol/command-nodeid.c | 2 +- src/streaming/receiver.c | 2 +- src/streaming/stream_path.c | 2 +- 37 files changed, 44 insertions(+), 44 deletions(-) rename src/collectors/{plugins.d => utils}/local_listeners.c (100%) rename src/collectors/{plugins.d => utils}/ndsudo.c (100%) rename src/{collectors => }/plugins.d/README.md (99%) rename src/{collectors => }/plugins.d/functions-table.md (100%) rename src/{collectors => }/plugins.d/gperf-config.txt (100%) rename src/{collectors => }/plugins.d/gperf-hashtable.h (100%) rename src/{collectors => }/plugins.d/plugins_d.c (100%) rename src/{collectors => }/plugins.d/plugins_d.h (100%) rename src/{collectors => }/plugins.d/pluginsd_dyncfg.c (100%) rename src/{collectors => }/plugins.d/pluginsd_dyncfg.h (100%) rename src/{collectors => }/plugins.d/pluginsd_functions.c (100%) rename src/{collectors => }/plugins.d/pluginsd_functions.h (100%) rename src/{collectors => }/plugins.d/pluginsd_internals.c (100%) rename src/{collectors => }/plugins.d/pluginsd_internals.h (100%) rename src/{collectors => }/plugins.d/pluginsd_parser.c (100%) rename src/{collectors => }/plugins.d/pluginsd_parser.h (100%) rename src/{collectors => }/plugins.d/pluginsd_replication.c (100%) rename src/{collectors => }/plugins.d/pluginsd_replication.h (100%) diff --git a/.github/labeler.yml b/.github/labeler.yml index 36d18e74e8355c..fe3e3b39fcc769 100644 --- a/.github/labeler.yml +++ b/.github/labeler.yml @@ -77,12 +77,6 @@ area/collectors: - src/collectors/** - src/go/** -collectors/plugins.d: - - any: - - changed-files: - - any-glob-to-any-file: - - src/collectors/plugins.d/** - collectors/apps: - any: - changed-files: @@ -289,3 +283,9 @@ area/web: - changed-files: - any-glob-to-any-file: - src/web/** + +area/plugins.d: + - any: + - changed-files: + - any-glob-to-any-file: + - src/plugins.d/** diff --git a/CMakeLists.txt b/CMakeLists.txt index e4bb053c8760a1..e6a11fcfa62075 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1136,18 +1136,18 @@ set(INTERNAL_COLLECTORS_FILES ) set(PLUGINSD_PLUGIN_FILES - src/collectors/plugins.d/plugins_d.c - src/collectors/plugins.d/plugins_d.h - src/collectors/plugins.d/pluginsd_dyncfg.c - src/collectors/plugins.d/pluginsd_dyncfg.h - src/collectors/plugins.d/pluginsd_functions.c - src/collectors/plugins.d/pluginsd_functions.h - src/collectors/plugins.d/pluginsd_internals.c - src/collectors/plugins.d/pluginsd_internals.h - src/collectors/plugins.d/pluginsd_parser.c - src/collectors/plugins.d/pluginsd_parser.h - src/collectors/plugins.d/pluginsd_replication.c - src/collectors/plugins.d/pluginsd_replication.h + src/plugins.d/plugins_d.c + src/plugins.d/plugins_d.h + src/plugins.d/pluginsd_dyncfg.c + src/plugins.d/pluginsd_dyncfg.h + src/plugins.d/pluginsd_functions.c + src/plugins.d/pluginsd_functions.h + src/plugins.d/pluginsd_internals.c + src/plugins.d/pluginsd_internals.h + src/plugins.d/pluginsd_parser.c + src/plugins.d/pluginsd_parser.h + src/plugins.d/pluginsd_replication.c + src/plugins.d/pluginsd_replication.h ) set(RRD_PLUGIN_FILES @@ -2068,7 +2068,7 @@ if(ENABLE_PLUGIN_CUPS) endif() if(NEED_NDSUDO) - set(NDSUDO_FILES src/collectors/plugins.d/ndsudo.c) + set(NDSUDO_FILES src/collectors/utils/ndsudo.c) add_executable(ndsudo ${NDSUDO_FILES}) @@ -2212,7 +2212,7 @@ endif() if(ENABLE_PLUGIN_LOCAL_LISTENERS) set(LOCAL_LISTENERS_FILES - src/collectors/plugins.d/local_listeners.c + src/collectors/utils/local_listeners.c src/libnetdata/maps/local-sockets.h ) diff --git a/docs/developer-and-contributor-corner/python-collector.md b/docs/developer-and-contributor-corner/python-collector.md index 0b7aa96a65f7eb..f53ed3f5259476 100644 --- a/docs/developer-and-contributor-corner/python-collector.md +++ b/docs/developer-and-contributor-corner/python-collector.md @@ -73,7 +73,7 @@ The basic elements of a Netdata collector are: - `get_data()`: The basic function of the plugin which will return to Netdata the correct values. **Note**: All names are better explained in the -[External Plugins Documentation](/src/collectors/plugins.d/README.md). +[External Plugins Documentation](/src/plugins.d/README.md). Parameters like `priority` and `update_every` mentioned in that documentation are handled by the `python.d.plugin`, not by each collection module. diff --git a/docs/diagrams/netdata-overview.xml b/docs/diagrams/netdata-overview.xml index 16c967e6ed3408..2967f915c29189 100644 --- a/docs/diagrams/netdata-overview.xml +++ b/docs/diagrams/netdata-overview.xml @@ -78,7 +78,7 @@ - + diff --git a/docs/glossary.md b/docs/glossary.md index bcada6030fd173..87cf01f3bb2203 100644 --- a/docs/glossary.md +++ b/docs/glossary.md @@ -53,7 +53,7 @@ Use the alphabatized list below to find the answer to your single-term questions ## E -- [**External Plugins**](/src/collectors/plugins.d/README.md): These gather metrics from external processes, such as a webserver or database, and run as independent processes that communicate with the Netdata daemon via pipes. +- [**External Plugins**](/src/plugins.d/README.md): These gather metrics from external processes, such as a webserver or database, and run as independent processes that communicate with the Netdata daemon via pipes. ## F diff --git a/docs/top-monitoring-netdata-functions.md b/docs/top-monitoring-netdata-functions.md index ee76d40ff7bc89..a9caea781337e0 100644 --- a/docs/top-monitoring-netdata-functions.md +++ b/docs/top-monitoring-netdata-functions.md @@ -7,7 +7,7 @@ executed on the node/host where the function is made available. Collectors besides the metric collection, storing, and/or streaming work are capable of executing specific routines on request. These routines will bring additional information to help you troubleshoot or even trigger some action to happen on the node itself. -For more details please check out documentation on how we use our internal collector to get this from the first collector that exposes functions - [plugins.d](/src/collectors/plugins.d/README.md#function). +For more details please check out documentation on how we use our internal collector to get this from the first collector that exposes functions - [plugins.d](/src/plugins.d/README.md#function). ## Prerequisites diff --git a/packaging/installer/methods/macos.md b/packaging/installer/methods/macos.md index 31aaebf980f774..dc736693662118 100644 --- a/packaging/installer/methods/macos.md +++ b/packaging/installer/methods/macos.md @@ -9,7 +9,7 @@ learn_rel_path: "Installation/Install on specific environments" # Install Netdata on macOS Netdata works on macOS, albeit with some limitations. -The number of charts displaying system metrics is limited, but you can use any of Netdata's [external plugins](/src/collectors/plugins.d/README.md) to monitor any services you might have installed on your macOS system. +The number of charts displaying system metrics is limited, but you can use any of Netdata's [external plugins](/src/plugins.d/README.md) to monitor any services you might have installed on your macOS system. You could also use a macOS system as the parent node in a [streaming configuration](/src/streaming/README.md). You can install Netdata in one of the three following ways: diff --git a/src/collectors/README.md b/src/collectors/README.md index 0fd5983b7a0a07..2404b1bb1e6f58 100644 --- a/src/collectors/README.md +++ b/src/collectors/README.md @@ -45,7 +45,7 @@ specifics of what a given collector does. - **External** plugins organize collectors that gather metrics from external processes, such as a MySQL database or Nginx web server. They can be written in any language, and the `netdata` daemon spawns them as long-running independent processes. They communicate with the daemon via pipes. All external plugins are managed by - [plugins.d](/src/collectors/plugins.d/README.md), which provides additional management options. + [plugins.d](/src/plugins.d/README.md), which provides additional management options. - **Orchestrators** are external plugins that run and manage one or more modules. They run as independent processes. The Go orchestrator is in active development. diff --git a/src/collectors/charts.d.plugin/README.md b/src/collectors/charts.d.plugin/README.md index 3558985db55552..d6cd07bcbb0c2d 100644 --- a/src/collectors/charts.d.plugin/README.md +++ b/src/collectors/charts.d.plugin/README.md @@ -7,7 +7,7 @@ 3. It communicates with Netdata via a unidirectional pipe (sending data to the `netdata` daemon) 4. Supports any number of data collection **modules** -To better understand the guidelines and the API behind our External plugins, please have a look at the [Introduction to External plugins](/src/collectors/plugins.d/README.md) prior to reading this page. +To better understand the guidelines and the API behind our External plugins, please have a look at the [Introduction to External plugins](/src/plugins.d/README.md) prior to reading this page. `charts.d.plugin` has been designed so that the actual script that will do data collection will be permanently in diff --git a/src/collectors/python.d.plugin/go_expvar/integrations/go_applications_expvar.md b/src/collectors/python.d.plugin/go_expvar/integrations/go_applications_expvar.md index 8f086765eefb2c..9f0ff24920640f 100644 --- a/src/collectors/python.d.plugin/go_expvar/integrations/go_applications_expvar.md +++ b/src/collectors/python.d.plugin/go_expvar/integrations/go_applications_expvar.md @@ -231,7 +231,7 @@ See [this issue](https://github.com/netdata/netdata/pull/1902#issuecomment-28449 Please see these two links to the official Netdata documentation for more information about the values: -- [External plugins - charts](/src/collectors/plugins.d/README.md#chart) +- [External plugins - charts](/src/plugins.d/README.md#chart) - [Chart variables](/src/collectors/python.d.plugin/README.md#global-variables-order-and-chart) **Line definitions** @@ -255,7 +255,7 @@ hidden: False ``` Please see the following link for more information about the options and their default values: -[External plugins - dimensions](/src/collectors/plugins.d/README.md#dimension) +[External plugins - dimensions](/src/plugins.d/README.md#dimension) Apart from top-level expvars, this plugin can also parse expvars stored in a multi-level map; All dicts in the resulting JSON document are then flattened to one level. diff --git a/src/collectors/python.d.plugin/go_expvar/metadata.yaml b/src/collectors/python.d.plugin/go_expvar/metadata.yaml index aa45968ff6840d..9fd47f8283d6fd 100644 --- a/src/collectors/python.d.plugin/go_expvar/metadata.yaml +++ b/src/collectors/python.d.plugin/go_expvar/metadata.yaml @@ -200,7 +200,7 @@ modules: Please see these two links to the official Netdata documentation for more information about the values: - - [External plugins - charts](/src/collectors/plugins.d/README.md#chart) + - [External plugins - charts](/src/plugins.d/README.md#chart) - [Chart variables](/src/collectors/python.d.plugin/README.md#global-variables-order-and-chart) **Line definitions** @@ -224,7 +224,7 @@ modules: ``` Please see the following link for more information about the options and their default values: - [External plugins - dimensions](/src/collectors/plugins.d/README.md#dimension) + [External plugins - dimensions](/src/plugins.d/README.md#dimension) Apart from top-level expvars, this plugin can also parse expvars stored in a multi-level map; All dicts in the resulting JSON document are then flattened to one level. diff --git a/src/collectors/plugins.d/local_listeners.c b/src/collectors/utils/local_listeners.c similarity index 100% rename from src/collectors/plugins.d/local_listeners.c rename to src/collectors/utils/local_listeners.c diff --git a/src/collectors/plugins.d/ndsudo.c b/src/collectors/utils/ndsudo.c similarity index 100% rename from src/collectors/plugins.d/ndsudo.c rename to src/collectors/utils/ndsudo.c diff --git a/src/go/pkg/netdataapi/api.go b/src/go/pkg/netdataapi/api.go index 4f2b7a9b580647..4f3faefc846975 100644 --- a/src/go/pkg/netdataapi/api.go +++ b/src/go/pkg/netdataapi/api.go @@ -11,7 +11,7 @@ import ( type ( // API implements Netdata external plugins API. - // https://learn.netdata.cloud/docs/agent/collectors/plugins.d#the-output-of-the-plugin + // https://learn.netdata.cloud/docs/agent/plugins.d#the-output-of-the-plugin API struct { io.Writer } diff --git a/src/go/plugin/go.d/agent/discovery/sd/discoverer/netlisteners/netlisteners.go b/src/go/plugin/go.d/agent/discovery/sd/discoverer/netlisteners/netlisteners.go index 6f536c49e16b7b..dee6c40c01313a 100644 --- a/src/go/plugin/go.d/agent/discovery/sd/discoverer/netlisteners/netlisteners.go +++ b/src/go/plugin/go.d/agent/discovery/sd/discoverer/netlisteners/netlisteners.go @@ -294,7 +294,7 @@ func (e *localListenersExec) discover(ctx context.Context) ([]byte, error) { defer cancel() // TCPv4/6 and UPDv4 sockets in LISTEN state - // https://github.com/netdata/netdata/blob/master/src/collectors/plugins.d/local_listeners.c + // https://github.com/netdata/netdata/blob/master/src/collectors/utils/local_listeners.c args := []string{ "no-udp6", "no-local", diff --git a/src/go/plugin/go.d/agent/module/charts.go b/src/go/plugin/go.d/agent/module/charts.go index 3f5f97563b325a..1b266e0b6454ad 100644 --- a/src/go/plugin/go.d/agent/module/charts.go +++ b/src/go/plugin/go.d/agent/module/charts.go @@ -78,7 +78,7 @@ type ( } // Chart represents a chart. - // For the full description please visit https://docs.netdata.cloud/collectors/plugins.d/#chart + // For the full description please visit https://docs.netdata.cloud/plugins.d/#chart Chart struct { // typeID is the unique identification of the chart, if not specified, // the orchestrator will use job full name + chart ID as typeID (default behaviour). @@ -128,7 +128,7 @@ type ( } // Dim represents a chart dimension. - // For detailed description please visit https://docs.netdata.cloud/collectors/plugins.d/#dimension. + // For detailed description please visit https://docs.netdata.cloud/plugins.d/#dimension. Dim struct { ID string Name string @@ -141,7 +141,7 @@ type ( } // Var represents a chart variable. - // For detailed description please visit https://docs.netdata.cloud/collectors/plugins.d/#variable + // For detailed description please visit https://docs.netdata.cloud/plugins.d/#variable Var struct { ID string Name string diff --git a/src/go/plugin/go.d/docs/how-to-write-a-module.md b/src/go/plugin/go.d/docs/how-to-write-a-module.md index bf7d3bc6d54baf..b314175fb5a0ed 100644 --- a/src/go/plugin/go.d/docs/how-to-write-a-module.md +++ b/src/go/plugin/go.d/docs/how-to-write-a-module.md @@ -48,7 +48,7 @@ The steps are: developed collector. It will be placed into the `bin` directory (e.g `go.d.plugin/bin`) - Run it in the debug mode `bin/godplugin -d -m `. This will output the `STDOUT` of the collector, the same output that is sent to the Netdata Agent and is transformed into charts. You can read more about this collector API in - our [documentation](/src/collectors/plugins.d/README.md#external-plugins-api). + our [documentation](/src/plugins.d/README.md#external-plugins-api). - If you want to test the collector with the actual Netdata Agent, you need to replace the `go.d.plugin` binary that exists in the Netdata Agent installation directory with the one you just compiled. Once you restart the Netdata Agent, it will detect and run it, creating all the charts. It is advised not to remove the default `go.d.plugin` binary, but simply rename it to `go.d.plugin.old` so that the Agent doesn't run it, but you can easily rename it back once you are done. @@ -119,7 +119,7 @@ func (e *Example) Check() bool { ### Charts method :exclamation: Netdata module -produces [`charts`](/src/collectors/plugins.d/README.md#chart), not +produces [`charts`](/src/plugins.d/README.md#chart), not raw metrics. Use [`agent/module`](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/agent/module/charts.go) @@ -127,7 +127,7 @@ package to create them, it contains charts and dimensions structs. - `Charts` returns - the [charts](/src/collectors/plugins.d/README.md#chart) (`*module.Charts`). + the [charts](/src/plugins.d/README.md#chart) (`*module.Charts`). - Called after `Check` and only if `Check` returned `true`. - If it returns `nil`, the job will be disabled - :warning: Make sure not to share returned value between module instances (jobs). diff --git a/src/collectors/plugins.d/README.md b/src/plugins.d/README.md similarity index 99% rename from src/collectors/plugins.d/README.md rename to src/plugins.d/README.md index 6b53dbed65219f..8ef0caeff5afa7 100644 --- a/src/collectors/plugins.d/README.md +++ b/src/plugins.d/README.md @@ -1,6 +1,6 @@ -# Application monitoring (apps.plugin) +# Applications monitoring (apps.plugin) -`apps.plugin` breaks down system resource usage to **processes**, **users** and **user groups**. -It is enabled by default on every Netdata installation. +`apps.plugin` monitors the resources utilization of all processes running. -To achieve this task, it iterates through the whole process tree, collecting resource usage information -for every process found running. +## Process Aggregation and Grouping -Since Netdata needs to present this information in charts and track them through time, -instead of presenting a `top` like list, `apps.plugin` uses a pre-defined list of **process groups** -to which it assigns all running processes. This list is customizable via `apps_groups.conf`, and Netdata -ships with a good default for most cases (to edit it on your system run `/etc/netdata/edit-config apps_groups.conf`). +`apps.plugin` aggregates processes in three distinct ways to provide a more insightful +breakdown of resource utilization: -So, `apps.plugin` builds a process tree (much like `ps fax` does in Linux), and groups + - **Tree** or **Category**: Grouped by their position in the process tree. + This is customizable and allows aggregation by process managers and individual + processes of interest. Allows also renaming the processes for presentation purposes. + + - **User**: Grouped by the effective user (UID) under which the processes run. + + - **Group**: Grouped by the effective group (GID) under which the processes run. + + ## Short-Lived Process Handling + +`apps.plugin` accounts for resource utilization of both running and exited processes, +capturing the impact of processes that spawn short-lived subprocesses, such as shell +scripts that fork hundreds or thousands of times per second. So, although processes +may spawn short lived sub-processes, `apps.plugin` will aggregate their resources +utilization providing a holistic view of how resources are shared among the processes. + +## Charts sections + +To provide more valuable insights, apps.plugin aggregates individual processes in several ways. +Each type of aggregation is presented as a different section on the dashboard. + +### Custom Process Groups (Apps) + +In this section, apps.plugin summarizes the resources consumed by all processes, grouped based +on the groups provided in `/etc/netdata/apps_groups.conf`. You can edit this file using our [`edit-config`](docs/netdata-agent/configuration/README.md) script. + +For this section, `apps.plugin` builds a process tree (much like `ps fax` does in Linux), and groups processes together (evaluating both child and parent processes) so that the result is always a list with a predefined set of members (of course, only process groups found running are reported). > If you find that `apps.plugin` categorizes standard applications as `other`, we would be > glad to accept pull requests improving the defaults shipped with Netdata in `apps_groups.conf`. -Unlike traditional process monitoring tools (like `top`), `apps.plugin` is able to account the resource -utilization of exit processes. Their utilization is accounted at their currently running parents. -So, `apps.plugin` is perfectly able to measure the resources used by shell scripts and other processes -that fork/spawn other short-lived processes hundreds of times per second. +### By User (Users) + +In this section, apps.plugin summarizes the resources consumed by all processes, grouped by the +effective user under which each process runs. + +### By User Group (Groups) + +In this section, apps.plugin summarizes the resources consumed by all processes, grouped by the +effective user group under which each process runs. ## Charts @@ -82,7 +109,7 @@ The above are reported: `apps.plugin` is a complex piece of software and has a lot of work to do We are proud that `apps.plugin` is a lot faster compared to any other similar tool, while collecting a lot more information for the processes, however the fact is that -this plugin requires more CPU resources than the `netdata` daemon itself. +this plugin may require more CPU resources than the `netdata` daemon itself. Under Linux, for each process running, `apps.plugin` reads several `/proc` files per process. Doing this work per-second, especially on hosts with several thousands @@ -103,7 +130,7 @@ its CPU resources will be cut in half, and data collection will be once every 2 ## Configuration -The configuration file is `/etc/netdata/apps_groups.conf`. To edit it on your system, run `/etc/netdata/edit-config apps_groups.conf`. +The configuration file is `/etc/netdata/apps_groups.conf`. You can edit this file using our [`edit-config`](docs/netdata-agent/configuration/README.md) script. The configuration file works accepts multiple lines, each having this format: @@ -381,14 +408,14 @@ the process tree of `sshd`, **including the exited children**. > `apps.plugin` does not use these mechanisms. The process grouping made by `apps.plugin` works > on any Linux, `systemd` based or not. -#### a more technical description of how Netdata works +#### a more technical description of how apps.plugin works -Netdata reads `/proc//stat` for all processes, once per second and extracts `utime` and +Apps.plugin reads `/proc//stat` for all processes, once per second and extracts `utime` and `stime` (user and system cpu utilization), much like all the console tools do. -But it also extracts `cutime` and `cstime` that account the user and system time of the exit children of each process. -By keeping a map in memory of the whole process tree, it is capable of assigning the right time to every process, taking -into account all its exited children. +But it also extracts `cutime` and `cstime` that account the user and system time of the exit +children of each process. By keeping a map in memory of the whole process tree, it is capable of +assigning the right time to every process, taking into account all its exited children. It is tricky, since a process may be running for 1 hour and once it exits, its parent should not receive the whole 1 hour of cpu time in just 1 second - you have to subtract the cpu time that has @@ -397,6 +424,4 @@ been reported for it prior to this iteration. It is even trickier, because walking through the entire process tree takes some time itself. So, if you sum the CPU utilization of all processes, you might have more CPU time than the reported total cpu time of the system. Netdata solves this, by adapting the per process cpu utilization to -the total of the system. [Netdata adds charts that document this normalization](https://london.my-netdata.io/default.html#menu_netdata_submenu_apps_plugin). - - +the total of the system. [Apps.plugin adds charts that document this normalization](https://london.my-netdata.io/default.html#menu_netdata_submenu_apps_plugin). diff --git a/src/collectors/apps.plugin/apps_aggregations.c b/src/collectors/apps.plugin/apps_aggregations.c new file mode 100644 index 00000000000000..8c7ebb68997ad3 --- /dev/null +++ b/src/collectors/apps.plugin/apps_aggregations.c @@ -0,0 +1,237 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "apps_plugin.h" + +// ---------------------------------------------------------------------------- +// update statistics on the targets + +static size_t zero_all_targets(struct target *root) { + struct target *w; + size_t count = 0; + + for (w = root; w ; w = w->next) { + count++; + + for(size_t f = 0; f < PDF_MAX ;f++) + w->values[f] = 0; + + w->uptime_min = 0; + w->uptime_max = 0; + +#if (PROCESSES_HAVE_FDS == 1) + // zero file counters + if(w->target_fds) { + memset(w->target_fds, 0, sizeof(int) * w->target_fds_size); + w->openfds.files = 0; + w->openfds.pipes = 0; + w->openfds.sockets = 0; + w->openfds.inotifies = 0; + w->openfds.eventfds = 0; + w->openfds.timerfds = 0; + w->openfds.signalfds = 0; + w->openfds.eventpolls = 0; + w->openfds.other = 0; + + w->max_open_files_percent = 0.0; + } +#endif + + if(unlikely(w->root_pid)) { + struct pid_on_target *pid_on_target = w->root_pid; + + while(pid_on_target) { + struct pid_on_target *pid_on_target_to_free = pid_on_target; + pid_on_target = pid_on_target->next; + freez(pid_on_target_to_free); + } + + w->root_pid = NULL; + } + } + + return count; +} + +static inline void aggregate_pid_on_target(struct target *w, struct pid_stat *p, struct target *o __maybe_unused) { + if(unlikely(!p->updated)) { + // the process is not running + return; + } + + if(unlikely(!w)) { + netdata_log_error("pid %d %s was left without a target!", p->pid, pid_stat_comm(p)); + return; + } + +#if (PROCESSES_HAVE_FDS == 1) && (PROCESSES_HAVE_PID_LIMITS == 1) + if(p->openfds_limits_percent > w->max_open_files_percent) + w->max_open_files_percent = p->openfds_limits_percent; +#endif + + for(size_t f = 0; f < PDF_MAX ;f++) + w->values[f] += p->values[f]; + + if(!w->uptime_min || p->values[PDF_UPTIME] < w->uptime_min) w->uptime_min = p->values[PDF_UPTIME]; + if(!w->uptime_max || w->uptime_max < p->values[PDF_UPTIME]) w->uptime_max = p->values[PDF_UPTIME]; + + if(unlikely(debug_enabled || w->debug_enabled)) { + struct pid_on_target *pid_on_target = mallocz(sizeof(struct pid_on_target)); + pid_on_target->pid = p->pid; + pid_on_target->next = w->root_pid; + w->root_pid = pid_on_target; + } +} + +static inline void cleanup_exited_pids(void) { + struct pid_stat *p = NULL; + + for(p = root_of_pids(); p ;) { + if(!p->updated && (!p->keep || p->keeploops > 0)) { + if(unlikely(debug_enabled && (p->keep || p->keeploops))) + debug_log(" > CLEANUP cannot keep exited process %d (%s) anymore - removing it.", p->pid, pid_stat_comm(p)); + +#if (PROCESSES_HAVE_FDS == 1) + for(size_t c = 0; c < p->fds_size; c++) + if(p->fds[c].fd > 0) { + file_descriptor_not_used(p->fds[c].fd); + clear_pid_fd(&p->fds[c]); + } +#endif + + const pid_t r = p->pid; + p = p->next; + del_pid_entry(r); + } + else { + if(unlikely(p->keep)) p->keeploops++; + p->keep = false; + p = p->next; + } + } +} + +static struct target *get_app_group_target_for_pid(struct pid_stat *p) { + targets_assignment_counter++; + + for(struct target *w = apps_groups_root_target; w ; w = w->next) { + if(w->type != TARGET_TYPE_APP_GROUP) continue; + + // find it - 4 cases: + // 1. the target is not a pattern + // 2. the target has the prefix + // 3. the target has the suffix + // 4. the target is something inside cmdline + + if(unlikely(( (!w->starts_with && !w->ends_with && w->compare == p->comm) + || (w->starts_with && !w->ends_with && string_starts_with_string(p->comm, w->compare)) + || (!w->starts_with && w->ends_with && string_ends_with_string(p->comm, w->compare)) + || (proc_pid_cmdline_is_needed && w->starts_with && w->ends_with && strstr(pid_stat_cmdline(p), string2str(w->compare))) + ))) { + + p->matched_by_config = true; + if(w->target) return w->target; + else return w; + } + } + + return NULL; +} + +static void assign_a_target_to_all_processes(void) { + // assign targets from app_groups.conf + for(struct pid_stat *p = root_of_pids(); p ; p = p->next) { + if(!p->target) + p->target = get_app_group_target_for_pid(p); + } + + // assign targets from their parents, if they have + for(struct pid_stat *p = root_of_pids(); p ; p = p->next) { + if(!p->target) { + for(struct pid_stat *pp = p->parent ; pp ; pp = pp->parent) { + if(pp->target) { + if(pp->matched_by_config) { + // we are only interested about app_groups.conf matches + p->target = pp->target; + } + break; + } + } + + if(!p->target) { + // there is no target, get it from the tree + p->target = get_tree_target(p); + } + } + + fatal_assert(p->target != NULL); + } +} + +void aggregate_processes_to_targets(void) { + assign_a_target_to_all_processes(); + apps_groups_targets_count = zero_all_targets(apps_groups_root_target); + +#if (PROCESSES_HAVE_UID == 1) + zero_all_targets(users_root_target); +#endif +#if (PROCESSES_HAVE_GID == 1) + zero_all_targets(groups_root_target); +#endif + + // this has to be done, before the cleanup + struct target *w = NULL, *o = NULL; + + // concentrate everything on the targets + for(struct pid_stat *p = root_of_pids(); p ; p = p->next) { + + // -------------------------------------------------------------------- + // apps_groups and tree target + + aggregate_pid_on_target(p->target, p, NULL); + + + // -------------------------------------------------------------------- + // user target + +#if (PROCESSES_HAVE_UID == 1) + o = p->uid_target; + if(likely(p->uid_target && p->uid_target->uid == p->uid)) + w = p->uid_target; + else { + if(unlikely(debug_enabled && p->uid_target)) + debug_log("pid %d (%s) switched user from %u (%s) to %u.", p->pid, pid_stat_comm(p), p->uid_target->uid, p->uid_target->name, p->uid); + + w = p->uid_target = get_uid_target(p->uid); + } + + aggregate_pid_on_target(w, p, o); +#endif + + // -------------------------------------------------------------------- + // user group target + +#if (PROCESSES_HAVE_GID == 1) + o = p->gid_target; + if(likely(p->gid_target && p->gid_target->gid == p->gid)) + w = p->gid_target; + else { + if(unlikely(debug_enabled && p->gid_target)) + debug_log("pid %d (%s) switched group from %u (%s) to %u.", p->pid, pid_stat_comm(p), p->gid_target->gid, p->gid_target->name, p->gid); + + w = p->gid_target = get_gid_target(p->gid); + } + + aggregate_pid_on_target(w, p, o); +#endif + + // -------------------------------------------------------------------- + // aggregate all file descriptors + +#if (PROCESSES_HAVE_FDS == 1) + if(enable_file_charts) + aggregate_pid_fds_on_targets(p); +#endif + } + + cleanup_exited_pids(); +} diff --git a/src/collectors/apps.plugin/apps_functions.c b/src/collectors/apps.plugin/apps_functions.c index cacc0469d20c77..58173f4b95014e 100644 --- a/src/collectors/apps.plugin/apps_functions.c +++ b/src/collectors/apps.plugin/apps_functions.c @@ -24,24 +24,35 @@ static void apps_plugin_function_processes_help(const char *transaction) { " category:NAME\n" " Shows only processes that are assigned the category `NAME` in apps_groups.conf\n" "\n" + " parent:NAME\n" + " Shows only processes that are aggregated under parent `NAME`\n" + "\n" +#if (PROCESSES_HAVE_UID == 1) " user:NAME\n" " Shows only processes that are running as user name `NAME`.\n" "\n" +#endif +#if (PROCESSES_HAVE_GID == 1) " group:NAME\n" " Shows only processes that are running as group name `NAME`.\n" "\n" +#endif " process:NAME\n" " Shows only processes that their Command is `NAME` or their parent's Command is `NAME`.\n" "\n" " pid:NUMBER\n" " Shows only processes that their PID is `NUMBER` or their parent's PID is `NUMBER`\n" "\n" +#if (PROCESSES_HAVE_UID == 1) " uid:NUMBER\n" " Shows only processes that their UID is `NUMBER`\n" "\n" +#endif +#if (PROCESSES_HAVE_GID == 1) " gid:NUMBER\n" " Shows only processes that their GID is `NUMBER`\n" "\n" +#endif "Filters can be combined. Each filter can be given only one time.\n" ); @@ -72,21 +83,20 @@ void function_processes(const char *transaction, char *function, struct pid_stat *p; bool show_cmdline = http_access_user_has_enough_access_level_for_endpoint( - access, - HTTP_ACCESS_SIGNED_ID | HTTP_ACCESS_SAME_SPACE | HTTP_ACCESS_SENSITIVE_DATA | - HTTP_ACCESS_VIEW_AGENT_CONFIG) || enable_function_cmdline; + access, HTTP_ACCESS_SIGNED_ID | HTTP_ACCESS_SAME_SPACE | HTTP_ACCESS_SENSITIVE_DATA | HTTP_ACCESS_VIEW_AGENT_CONFIG) || enable_function_cmdline; char *words[PLUGINSD_MAX_WORDS] = { NULL }; size_t num_words = quoted_strings_splitter_pluginsd(function, words, PLUGINSD_MAX_WORDS); - struct target *category = NULL, *user = NULL, *group = NULL; + struct target *category = NULL, *user = NULL, *group = NULL; (void)category; (void)user; (void)group; const char *process_name = NULL; pid_t pid = 0; - uid_t uid = 0; - gid_t gid = 0; + uid_t uid = 0; (void)uid; + gid_t gid = 0; (void)gid; bool info = false; bool filter_pid = false, filter_uid = false, filter_gid = false; + (void)filter_uid; (void)filter_gid; for(int i = 1; i < PLUGINSD_MAX_WORDS ;i++) { const char *keyword = get_word(words, num_words, i); @@ -100,6 +110,7 @@ void function_processes(const char *transaction, char *function, return; } } +#if (PROCESSES_HAVE_UID == 1) else if(!user && strncmp(keyword, PROCESS_FILTER_USER, strlen(PROCESS_FILTER_USER)) == 0) { user = find_target_by_name(users_root_target, &keyword[strlen(PROCESS_FILTER_USER)]); if(!user) { @@ -108,6 +119,8 @@ void function_processes(const char *transaction, char *function, return; } } +#endif +#if (PROCESSES_HAVE_GID == 1) else if(strncmp(keyword, PROCESS_FILTER_GROUP, strlen(PROCESS_FILTER_GROUP)) == 0) { group = find_target_by_name(groups_root_target, &keyword[strlen(PROCESS_FILTER_GROUP)]); if(!group) { @@ -116,6 +129,7 @@ void function_processes(const char *transaction, char *function, return; } } +#endif else if(!process_name && strncmp(keyword, PROCESS_FILTER_PROCESS, strlen(PROCESS_FILTER_PROCESS)) == 0) { process_name = &keyword[strlen(PROCESS_FILTER_PROCESS)]; } @@ -123,14 +137,18 @@ void function_processes(const char *transaction, char *function, pid = str2i(&keyword[strlen(PROCESS_FILTER_PID)]); filter_pid = true; } +#if (PROCESSES_HAVE_UID == 1) else if(!uid && strncmp(keyword, PROCESS_FILTER_UID, strlen(PROCESS_FILTER_UID)) == 0) { uid = str2i(&keyword[strlen(PROCESS_FILTER_UID)]); filter_uid = true; } +#endif +#if (PROCESSES_HAVE_GID == 1) else if(!gid && strncmp(keyword, PROCESS_FILTER_GID, strlen(PROCESS_FILTER_GID)) == 0) { gid = str2i(&keyword[strlen(PROCESS_FILTER_GID)]); filter_gid = true; } +#endif else if(strcmp(keyword, "help") == 0) { apps_plugin_function_processes_help(transaction); return; @@ -140,10 +158,6 @@ void function_processes(const char *transaction, char *function, } } - unsigned int cpu_divisor = time_factor * RATES_DETAIL / 100; - unsigned int memory_divisor = 1024; - unsigned int io_divisor = 1024 * RATES_DETAIL; - BUFFER *wb = buffer_create(4096, NULL); buffer_json_initialize(wb, "\"", "\"", 0, true, BUFFER_JSON_OPTIONS_MINIFY); buffer_json_member_add_uint64(wb, "status", HTTP_RESP_OK); @@ -156,38 +170,71 @@ void function_processes(const char *transaction, char *function, if(info) goto close_and_send; + uint64_t cpu_divisor = NSEC_PER_SEC / 100; + unsigned int memory_divisor = 1024 * 1024; + unsigned int io_divisor = 1024 * RATES_DETAIL; + + uint64_t total_memory_bytes = OS_FUNCTION(apps_os_get_total_memory)(); + NETDATA_DOUBLE - UserCPU_max = 0.0 + UserCPU_max = 0.0 , SysCPU_max = 0.0 +#if (PROCESSES_HAVE_CPU_GUEST_TIME == 1) , GuestCPU_max = 0.0 +#endif +#if (PROCESSES_HAVE_CPU_CHILDREN_TIME == 1) , CUserCPU_max = 0.0 , CSysCPU_max = 0.0 +#if (PROCESSES_HAVE_CPU_GUEST_TIME == 1) , CGuestCPU_max = 0.0 +#endif +#endif , CPU_max = 0.0 , VMSize_max = 0.0 , RSS_max = 0.0 +#if (PROCESSES_HAVE_VMSHARED == 1) , Shared_max = 0.0 +#endif , Swap_max = 0.0 , Memory_max = 0.0 +#if (PROCESSES_HAVE_FDS == 1) && (PROCESSES_HAVE_PID_LIMITS == 1) , FDsLimitPercent_max = 0.0 +#endif ; unsigned long long Processes_max = 0 , Threads_max = 0 +#if (PROCESSES_HAVE_VOLCTX == 1) , VoluntaryCtxtSwitches_max = 0 +#endif +#if (PROCESSES_HAVE_NVOLCTX == 1) , NonVoluntaryCtxtSwitches_max = 0 +#endif , Uptime_max = 0 , MinFlt_max = 0 - , CMinFlt_max = 0 - , TMinFlt_max = 0 +#if (PROCESSES_HAVE_MAJFLT == 1) , MajFlt_max = 0 +#endif +#if (PROCESSES_HAVE_CHILDREN_FLTS == 1) + , CMinFlt_max = 0 , CMajFlt_max = 0 + , TMinFlt_max = 0 , TMajFlt_max = 0 +#endif +#if (PROCESSES_HAVE_LOGICAL_IO == 1) + , LReads_max = 0 + , LWrites_max = 0 +#endif +#if (PROCESSES_HAVE_PHYSICAL_IO == 1) , PReads_max = 0 , PWrites_max = 0 - , RCalls_max = 0 - , WCalls_max = 0 +#endif +#if (PROCESSES_HAVE_IO_CALLS == 1) + , ROps_max = 0 + , WOps_max = 0 +#endif +#if (PROCESSES_HAVE_FDS == 1) , Files_max = 0 , Pipes_max = 0 , Sockets_max = 0 @@ -198,40 +245,47 @@ void function_processes(const char *transaction, char *function, , EvPollFDs_max = 0 , OtherFDs_max = 0 , FDs_max = 0 +#endif +#if (PROCESSES_HAVE_HANDLES == 1) + , Handles_max = 0 +#endif ; -#if !defined(__FreeBSD__) && !defined(__APPLE__) - unsigned long long - LReads_max = 0 - , LWrites_max = 0 - ; -#endif // !__FreeBSD__ !__APPLE_ + netdata_mutex_lock(&apps_and_stdout_mutex); int rows= 0; - for(p = root_of_pids; p ; p = p->next) { + for(p = root_of_pids(); p ; p = p->next) { if(!p->updated) continue; if(category && p->target != category) continue; - if(user && p->user_target != user) +#if (PROCESSES_HAVE_UID == 1) + if(user && p->uid_target != user) continue; +#endif - if(group && p->group_target != group) +#if (PROCESSES_HAVE_GID == 1) + if(group && p->gid_target != group) continue; +#endif - if(process_name && ((strcmp(p->comm, process_name) != 0 && !p->parent) || (p->parent && strcmp(p->comm, process_name) != 0 && strcmp(p->parent->comm, process_name) != 0))) + if(process_name && ((strcmp(pid_stat_comm(p), process_name) != 0 && !p->parent) || (p->parent && strcmp(pid_stat_comm(p), process_name) != 0 && strcmp(pid_stat_comm(p->parent), process_name) != 0))) continue; if(filter_pid && p->pid != pid && p->ppid != pid) continue; +#if (PROCESSES_HAVE_UID == 1) if(filter_uid && p->uid != uid) continue; +#endif +#if (PROCESSES_HAVE_GID == 1) if(filter_gid && p->gid != gid) continue; +#endif rows++; @@ -244,80 +298,126 @@ void function_processes(const char *transaction, char *function, buffer_json_add_array_item_uint64(wb, p->pid); // cmd - buffer_json_add_array_item_string(wb, p->comm); + buffer_json_add_array_item_string(wb, string2str(p->comm)); + +#if (PROCESSES_HAVE_COMM_AND_NAME == 1) + // name + buffer_json_add_array_item_string(wb, string2str(p->name ? p->name : p->comm)); +#endif // cmdline if (show_cmdline) { - buffer_json_add_array_item_string(wb, (p->cmdline && *p->cmdline) ? p->cmdline : p->comm); + buffer_json_add_array_item_string(wb, (string_strlen(p->cmdline)) ? pid_stat_cmdline(p) : pid_stat_comm(p)); } // ppid buffer_json_add_array_item_uint64(wb, p->ppid); // category - buffer_json_add_array_item_string(wb, p->target ? p->target->name : "-"); + buffer_json_add_array_item_string(wb, p->target ? string2str(p->target->name) : "-"); +#if (PROCESSES_HAVE_UID == 1) // user - buffer_json_add_array_item_string(wb, p->user_target ? p->user_target->name : "-"); + buffer_json_add_array_item_string(wb, p->uid_target ? string2str(p->uid_target->name) : "-"); // uid buffer_json_add_array_item_uint64(wb, p->uid); +#endif +#if (PROCESSES_HAVE_GID == 1) // group - buffer_json_add_array_item_string(wb, p->group_target ? p->group_target->name : "-"); + buffer_json_add_array_item_string(wb, p->gid_target ? string2str(p->gid_target->name) : "-"); // gid buffer_json_add_array_item_uint64(wb, p->gid); +#endif // CPU utilization % - add_value_field_ndd_with_max(wb, CPU, (NETDATA_DOUBLE)(p->utime + p->stime + p->gtime + p->cutime + p->cstime + p->cgtime) / cpu_divisor); - add_value_field_ndd_with_max(wb, UserCPU, (NETDATA_DOUBLE)(p->utime) / cpu_divisor); - add_value_field_ndd_with_max(wb, SysCPU, (NETDATA_DOUBLE)(p->stime) / cpu_divisor); - add_value_field_ndd_with_max(wb, GuestCPU, (NETDATA_DOUBLE)(p->gtime) / cpu_divisor); - add_value_field_ndd_with_max(wb, CUserCPU, (NETDATA_DOUBLE)(p->cutime) / cpu_divisor); - add_value_field_ndd_with_max(wb, CSysCPU, (NETDATA_DOUBLE)(p->cstime) / cpu_divisor); - add_value_field_ndd_with_max(wb, CGuestCPU, (NETDATA_DOUBLE)(p->cgtime) / cpu_divisor); + kernel_uint_t total_cpu = p->values[PDF_UTIME] + p->values[PDF_STIME]; - add_value_field_llu_with_max(wb, VoluntaryCtxtSwitches, p->status_voluntary_ctxt_switches / RATES_DETAIL); - add_value_field_llu_with_max(wb, NonVoluntaryCtxtSwitches, p->status_nonvoluntary_ctxt_switches / RATES_DETAIL); +#if (PROCESSES_HAVE_CPU_GUEST_TIME) + total_cpu += p->values[PDF_GTIME]; +#endif +#if (PROCESSES_HAVE_CPU_CHILDREN_TIME) + total_cpu += p->values[PDF_CUTIME] + p->values[PDF_CSTIME]; +#if (PROCESSES_HAVE_CPU_GUEST_TIME) + total_cpu += p->values[PDF_CGTIME]; +#endif +#endif + add_value_field_ndd_with_max(wb, CPU, (NETDATA_DOUBLE)(total_cpu) / cpu_divisor); + add_value_field_ndd_with_max(wb, UserCPU, (NETDATA_DOUBLE)(p->values[PDF_UTIME]) / cpu_divisor); + add_value_field_ndd_with_max(wb, SysCPU, (NETDATA_DOUBLE)(p->values[PDF_STIME]) / cpu_divisor); +#if (PROCESSES_HAVE_CPU_GUEST_TIME) + add_value_field_ndd_with_max(wb, GuestCPU, (NETDATA_DOUBLE)(p->values[PDF_GTIME]) / cpu_divisor); +#endif +#if (PROCESSES_HAVE_CPU_CHILDREN_TIME) + add_value_field_ndd_with_max(wb, CUserCPU, (NETDATA_DOUBLE)(p->values[PDF_CUTIME]) / cpu_divisor); + add_value_field_ndd_with_max(wb, CSysCPU, (NETDATA_DOUBLE)(p->values[PDF_CSTIME]) / cpu_divisor); +#if (PROCESSES_HAVE_CPU_GUEST_TIME) + add_value_field_ndd_with_max(wb, CGuestCPU, (NETDATA_DOUBLE)(p->values[PDF_CGTIME]) / cpu_divisor); +#endif +#endif + +#if (PROCESSES_HAVE_VOLCTX == 1) + add_value_field_llu_with_max(wb, VoluntaryCtxtSwitches, p->values[PDF_VOLCTX] / RATES_DETAIL); +#endif +#if (PROCESSES_HAVE_NVOLCTX == 1) + add_value_field_llu_with_max(wb, NonVoluntaryCtxtSwitches, p->values[PDF_NVOLCTX] / RATES_DETAIL); +#endif // memory MiB - if(MemTotal) - add_value_field_ndd_with_max(wb, Memory, (NETDATA_DOUBLE)p->status_vmrss * 100.0 / (NETDATA_DOUBLE)MemTotal); + if(total_memory_bytes) + add_value_field_ndd_with_max(wb, Memory, (NETDATA_DOUBLE)p->values[PDF_VMRSS] * 100.0 / (NETDATA_DOUBLE)total_memory_bytes); + + add_value_field_ndd_with_max(wb, RSS, (NETDATA_DOUBLE)p->values[PDF_VMRSS] / memory_divisor); - add_value_field_ndd_with_max(wb, RSS, (NETDATA_DOUBLE)p->status_vmrss / memory_divisor); - add_value_field_ndd_with_max(wb, Shared, (NETDATA_DOUBLE)p->status_vmshared / memory_divisor); -#if !defined(__APPLE__) - add_value_field_ndd_with_max(wb, VMSize, (NETDATA_DOUBLE)p->status_vmsize / memory_divisor); +#if (PROCESSES_HAVE_VMSHARED == 1) + add_value_field_ndd_with_max(wb, Shared, (NETDATA_DOUBLE)p->values[PDF_VMSHARED] / memory_divisor); #endif - add_value_field_ndd_with_max(wb, Swap, (NETDATA_DOUBLE)p->status_vmswap / memory_divisor); + add_value_field_ndd_with_max(wb, VMSize, (NETDATA_DOUBLE)p->values[PDF_VMSIZE] / memory_divisor); +#if (PROCESSES_HAVE_VMSWAP == 1) + add_value_field_ndd_with_max(wb, Swap, (NETDATA_DOUBLE)p->values[PDF_VMSWAP] / memory_divisor); +#endif + +#if (PROCESSES_HAVE_PHYSICAL_IO == 1) // Physical I/O - add_value_field_llu_with_max(wb, PReads, p->io_storage_bytes_read / io_divisor); - add_value_field_llu_with_max(wb, PWrites, p->io_storage_bytes_written / io_divisor); + add_value_field_llu_with_max(wb, PReads, p->values[PDF_PREAD] / io_divisor); + add_value_field_llu_with_max(wb, PWrites, p->values[PDF_PWRITE] / io_divisor); +#endif +#if (PROCESSES_HAVE_LOGICAL_IO == 1) // Logical I/O -#if !defined(__FreeBSD__) && !defined(__APPLE__) - add_value_field_llu_with_max(wb, LReads, p->io_logical_bytes_read / io_divisor); - add_value_field_llu_with_max(wb, LWrites, p->io_logical_bytes_written / io_divisor); + add_value_field_llu_with_max(wb, LReads, p->values[PDF_LREAD] / io_divisor); + add_value_field_llu_with_max(wb, LWrites, p->values[PDF_LWRITE] / io_divisor); #endif +#if (PROCESSES_HAVE_IO_CALLS == 1) // I/O calls - add_value_field_llu_with_max(wb, RCalls, p->io_read_calls / RATES_DETAIL); - add_value_field_llu_with_max(wb, WCalls, p->io_write_calls / RATES_DETAIL); + add_value_field_llu_with_max(wb, ROps, p->values[PDF_OREAD] / RATES_DETAIL); + add_value_field_llu_with_max(wb, WOps, p->values[PDF_OWRITE] / RATES_DETAIL); +#endif // minor page faults - add_value_field_llu_with_max(wb, MinFlt, p->minflt / RATES_DETAIL); - add_value_field_llu_with_max(wb, CMinFlt, p->cminflt / RATES_DETAIL); - add_value_field_llu_with_max(wb, TMinFlt, (p->minflt + p->cminflt) / RATES_DETAIL); + add_value_field_llu_with_max(wb, MinFlt, p->values[PDF_MINFLT] / RATES_DETAIL); +#if (PROCESSES_HAVE_MAJFLT == 1) // major page faults - add_value_field_llu_with_max(wb, MajFlt, p->majflt / RATES_DETAIL); - add_value_field_llu_with_max(wb, CMajFlt, p->cmajflt / RATES_DETAIL); - add_value_field_llu_with_max(wb, TMajFlt, (p->majflt + p->cmajflt) / RATES_DETAIL); + add_value_field_llu_with_max(wb, MajFlt, p->values[PDF_MAJFLT] / RATES_DETAIL); +#endif + +#if (PROCESSES_HAVE_CHILDREN_FLTS == 1) + add_value_field_llu_with_max(wb, CMinFlt, p->values[PDF_CMINFLT] / RATES_DETAIL); + add_value_field_llu_with_max(wb, CMajFlt, p->values[PDF_CMAJFLT] / RATES_DETAIL); + add_value_field_llu_with_max(wb, TMinFlt, (p->values[PDF_MINFLT] + p->values[PDF_CMINFLT]) / RATES_DETAIL); + add_value_field_llu_with_max(wb, TMajFlt, (p->values[PDF_MAJFLT] + p->values[PDF_CMAJFLT]) / RATES_DETAIL); +#endif +#if (PROCESSES_HAVE_FDS == 1) // open file descriptors +#if (PROCESSES_HAVE_PID_LIMITS == 1) add_value_field_ndd_with_max(wb, FDsLimitPercent, p->openfds_limits_percent); +#endif add_value_field_llu_with_max(wb, FDs, pid_openfds_sum(p)); add_value_field_llu_with_max(wb, Files, p->openfds.files); add_value_field_llu_with_max(wb, Pipes, p->openfds.pipes); @@ -328,12 +428,16 @@ void function_processes(const char *transaction, char *function, add_value_field_llu_with_max(wb, SigFDs, p->openfds.signalfds); add_value_field_llu_with_max(wb, EvPollFDs, p->openfds.eventpolls); add_value_field_llu_with_max(wb, OtherFDs, p->openfds.other); +#endif +#if (PROCESSES_HAVE_HANDLES == 1) + add_value_field_llu_with_max(wb, Handles, p->values[PDF_HANDLES]); +#endif // processes, threads, uptime - add_value_field_llu_with_max(wb, Processes, p->children_count); - add_value_field_llu_with_max(wb, Threads, p->num_threads); - add_value_field_llu_with_max(wb, Uptime, p->uptime); + add_value_field_llu_with_max(wb, Processes, p->values[PDF_PROCESSES]); + add_value_field_llu_with_max(wb, Threads, p->values[PDF_THREADS]); + add_value_field_llu_with_max(wb, Uptime, p->values[PDF_UPTIME]); buffer_json_array_close(wb); // for each pid } @@ -360,6 +464,14 @@ void function_processes(const char *transaction, char *function, RRDF_FIELD_FILTER_MULTISELECT, RRDF_FIELD_OPTS_VISIBLE | RRDF_FIELD_OPTS_STICKY, NULL); +#if (PROCESSES_HAVE_COMM_AND_NAME == 1) + buffer_rrdf_table_add_field(wb, field_id++, "Name", "Process Friendly Name", RRDF_FIELD_TYPE_STRING, + RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, 0, NULL, NAN, + RRDF_FIELD_SORT_ASCENDING, NULL, RRDF_FIELD_SUMMARY_COUNT, + RRDF_FIELD_FILTER_MULTISELECT, + RRDF_FIELD_OPTS_VISIBLE | RRDF_FIELD_OPTS_STICKY, NULL); +#endif + if (show_cmdline) { buffer_rrdf_table_add_field(wb, field_id++, "CmdLine", "Command Line", RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, 0, @@ -373,12 +485,15 @@ void function_processes(const char *transaction, char *function, NAN, RRDF_FIELD_SORT_ASCENDING, "PID", RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT, RRDF_FIELD_OPTS_NONE, NULL); + buffer_rrdf_table_add_field(wb, field_id++, "Category", "Category (apps_groups.conf)", RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL, RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT, RRDF_FIELD_OPTS_VISIBLE | RRDF_FIELD_OPTS_STICKY, NULL); + +#if (PROCESSES_HAVE_UID == 1) buffer_rrdf_table_add_field(wb, field_id++, "User", "User Owner", RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL, RRDF_FIELD_SUMMARY_COUNT, @@ -389,6 +504,9 @@ void function_processes(const char *transaction, char *function, RRDF_FIELD_SORT_ASCENDING, NULL, RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT, RRDF_FIELD_OPTS_NONE, NULL); +#endif + +#if (PROCESSES_HAVE_GID == 1) buffer_rrdf_table_add_field(wb, field_id++, "Group", "Group Owner", RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL, RRDF_FIELD_SUMMARY_COUNT, @@ -399,6 +517,7 @@ void function_processes(const char *transaction, char *function, RRDF_FIELD_SORT_ASCENDING, NULL, RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT, RRDF_FIELD_OPTS_NONE, NULL); +#endif // CPU utilization buffer_rrdf_table_add_field(wb, field_id++, "CPU", "Total CPU Time (100% = 1 core)", @@ -416,11 +535,14 @@ void function_processes(const char *transaction, char *function, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, 2, "%", SysCPU_max, RRDF_FIELD_SORT_DESCENDING, NULL, RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_RANGE, RRDF_FIELD_OPTS_NONE, NULL); +#if (PROCESSES_HAVE_CPU_GUEST_TIME == 1) buffer_rrdf_table_add_field(wb, field_id++, "GuestCPU", "Guest CPU Time (100% = 1 core)", RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, 2, "%", GuestCPU_max, RRDF_FIELD_SORT_DESCENDING, NULL, RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_RANGE, RRDF_FIELD_OPTS_NONE, NULL); +#endif +#if (PROCESSES_HAVE_CPU_CHILDREN_TIME == 1) buffer_rrdf_table_add_field(wb, field_id++, "CUserCPU", "Children User CPU Time (100% = 1 core)", RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, 2, "%", CUserCPU_max, RRDF_FIELD_SORT_DESCENDING, NULL, @@ -431,26 +553,33 @@ void function_processes(const char *transaction, char *function, RRDF_FIELD_TRANSFORM_NUMBER, 2, "%", CSysCPU_max, RRDF_FIELD_SORT_DESCENDING, NULL, RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_RANGE, RRDF_FIELD_OPTS_NONE, NULL); +#if (PROCESSES_HAVE_CPU_GUEST_TIME == 1) buffer_rrdf_table_add_field(wb, field_id++, "CGuestCPU", "Children Guest CPU Time (100% = 1 core)", RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, 2, "%", CGuestCPU_max, RRDF_FIELD_SORT_DESCENDING, NULL, RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_RANGE, RRDF_FIELD_OPTS_NONE, NULL); +#endif +#endif +#if (PROCESSES_HAVE_VOLCTX == 1) // CPU context switches buffer_rrdf_table_add_field(wb, field_id++, "vCtxSwitch", "Voluntary Context Switches", RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, 2, "switches/s", VoluntaryCtxtSwitches_max, RRDF_FIELD_SORT_DESCENDING, NULL, RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_RANGE, RRDF_FIELD_OPTS_NONE, NULL); +#endif +#if (PROCESSES_HAVE_NVOLCTX == 1) buffer_rrdf_table_add_field(wb, field_id++, "iCtxSwitch", "Involuntary Context Switches", RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, 2, "switches/s", NonVoluntaryCtxtSwitches_max, RRDF_FIELD_SORT_DESCENDING, NULL, RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_RANGE, RRDF_FIELD_OPTS_NONE, NULL); +#endif // memory - if (MemTotal) + if (total_memory_bytes) buffer_rrdf_table_add_field(wb, field_id++, "Memory", "Memory Percentage", RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, 2, "%", 100.0, RRDF_FIELD_SORT_DESCENDING, NULL, @@ -463,25 +592,30 @@ void function_processes(const char *transaction, char *function, 2, "MiB", RSS_max, RRDF_FIELD_SORT_DESCENDING, NULL, RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_RANGE, RRDF_FIELD_OPTS_VISIBLE, NULL); +#if (PROCESSES_HAVE_VMSHARED == 1) buffer_rrdf_table_add_field(wb, field_id++, "Shared", "Shared Pages", RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, 2, "MiB", Shared_max, RRDF_FIELD_SORT_DESCENDING, NULL, RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_RANGE, RRDF_FIELD_OPTS_VISIBLE, NULL); -#if !defined(__APPLE__) +#endif + buffer_rrdf_table_add_field(wb, field_id++, "Virtual", "Virtual Memory Size", RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, 2, "MiB", VMSize_max, RRDF_FIELD_SORT_DESCENDING, NULL, RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_RANGE, RRDF_FIELD_OPTS_VISIBLE, NULL); -#endif + +#if (PROCESSES_HAVE_VMSWAP == 1) buffer_rrdf_table_add_field(wb, field_id++, "Swap", "Swap Memory", RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, 2, "MiB", Swap_max, RRDF_FIELD_SORT_DESCENDING, NULL, RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_RANGE, RRDF_FIELD_OPTS_NONE, NULL); +#endif +#if (PROCESSES_HAVE_PHYSICAL_IO == 1) // Physical I/O buffer_rrdf_table_add_field(wb, field_id++, "PReads", "Physical I/O Reads", RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, @@ -493,33 +627,41 @@ void function_processes(const char *transaction, char *function, RRDF_FIELD_TRANSFORM_NUMBER, 2, "KiB/s", PWrites_max, RRDF_FIELD_SORT_DESCENDING, NULL, RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_RANGE, RRDF_FIELD_OPTS_VISIBLE, NULL); +#endif +#if (PROCESSES_HAVE_LOGICAL_IO == 1) +#if (PROCESSES_HAVE_PHYSICAL_IO == 1) + RRDF_FIELD_OPTIONS logical_io_options = RRDF_FIELD_OPTS_NONE; +#else + RRDF_FIELD_OPTIONS logical_io_options = RRDF_FIELD_OPTS_VISIBLE; +#endif // Logical I/O -#if !defined(__FreeBSD__) && !defined(__APPLE__) buffer_rrdf_table_add_field(wb, field_id++, "LReads", "Logical I/O Reads", RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, 2, "KiB/s", LReads_max, RRDF_FIELD_SORT_DESCENDING, NULL, RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_RANGE, - RRDF_FIELD_OPTS_NONE, NULL); + logical_io_options, NULL); buffer_rrdf_table_add_field(wb, field_id++, "LWrites", "Logical I/O Writes", RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, 2, "KiB/s", LWrites_max, RRDF_FIELD_SORT_DESCENDING, NULL, RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_RANGE, - RRDF_FIELD_OPTS_NONE, NULL); + logical_io_options, NULL); #endif +#if (PROCESSES_HAVE_IO_CALLS == 1) // I/O calls - buffer_rrdf_table_add_field(wb, field_id++, "RCalls", "I/O Read Calls", RRDF_FIELD_TYPE_BAR_WITH_INTEGER, + buffer_rrdf_table_add_field(wb, field_id++, "ROps", "I/O Read Operations", RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, 2, - "calls/s", RCalls_max, RRDF_FIELD_SORT_DESCENDING, NULL, RRDF_FIELD_SUMMARY_SUM, + "ops/s", ROps_max, RRDF_FIELD_SORT_DESCENDING, NULL, RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_RANGE, RRDF_FIELD_OPTS_NONE, NULL); - buffer_rrdf_table_add_field(wb, field_id++, "WCalls", "I/O Write Calls", RRDF_FIELD_TYPE_BAR_WITH_INTEGER, + buffer_rrdf_table_add_field(wb, field_id++, "WOps", "I/O Write Operations", RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, 2, - "calls/s", WCalls_max, RRDF_FIELD_SORT_DESCENDING, NULL, RRDF_FIELD_SUMMARY_SUM, + "ops/s", WOps_max, RRDF_FIELD_SORT_DESCENDING, NULL, RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_RANGE, RRDF_FIELD_OPTS_NONE, NULL); +#endif // minor page faults buffer_rrdf_table_add_field(wb, field_id++, "MinFlt", "Minor Page Faults/s", RRDF_FIELD_TYPE_BAR_WITH_INTEGER, @@ -528,18 +670,8 @@ void function_processes(const char *transaction, char *function, 2, "pgflts/s", MinFlt_max, RRDF_FIELD_SORT_DESCENDING, NULL, RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_RANGE, RRDF_FIELD_OPTS_NONE, NULL); - buffer_rrdf_table_add_field(wb, field_id++, "CMinFlt", "Children Minor Page Faults/s", - RRDF_FIELD_TYPE_BAR_WITH_INTEGER, - RRDF_FIELD_VISUAL_BAR, - RRDF_FIELD_TRANSFORM_NUMBER, 2, "pgflts/s", CMinFlt_max, RRDF_FIELD_SORT_DESCENDING, - NULL, RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_RANGE, - RRDF_FIELD_OPTS_NONE, NULL); - buffer_rrdf_table_add_field(wb, field_id++, "TMinFlt", "Total Minor Page Faults/s", - RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, - RRDF_FIELD_TRANSFORM_NUMBER, 2, "pgflts/s", TMinFlt_max, RRDF_FIELD_SORT_DESCENDING, - NULL, RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_RANGE, - RRDF_FIELD_OPTS_NONE, NULL); +#if (PROCESSES_HAVE_MAJFLT == 1) // major page faults buffer_rrdf_table_add_field(wb, field_id++, "MajFlt", "Major Page Faults/s", RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, @@ -547,24 +679,42 @@ void function_processes(const char *transaction, char *function, 2, "pgflts/s", MajFlt_max, RRDF_FIELD_SORT_DESCENDING, NULL, RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_RANGE, RRDF_FIELD_OPTS_NONE, NULL); +#endif + +#if (PROCESSES_HAVE_CHILDREN_FLTS == 1) + buffer_rrdf_table_add_field(wb, field_id++, "CMinFlt", "Children Minor Page Faults/s", + RRDF_FIELD_TYPE_BAR_WITH_INTEGER, + RRDF_FIELD_VISUAL_BAR, + RRDF_FIELD_TRANSFORM_NUMBER, 2, "pgflts/s", CMinFlt_max, RRDF_FIELD_SORT_DESCENDING, + NULL, RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_RANGE, + RRDF_FIELD_OPTS_NONE, NULL); buffer_rrdf_table_add_field(wb, field_id++, "CMajFlt", "Children Major Page Faults/s", RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, 2, "pgflts/s", CMajFlt_max, RRDF_FIELD_SORT_DESCENDING, NULL, RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_RANGE, RRDF_FIELD_OPTS_NONE, NULL); + buffer_rrdf_table_add_field(wb, field_id++, "TMinFlt", "Total Minor Page Faults/s", + RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, + RRDF_FIELD_TRANSFORM_NUMBER, 2, "pgflts/s", TMinFlt_max, RRDF_FIELD_SORT_DESCENDING, + NULL, RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_RANGE, + RRDF_FIELD_OPTS_NONE, NULL); buffer_rrdf_table_add_field(wb, field_id++, "TMajFlt", "Total Major Page Faults/s", RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, 2, "pgflts/s", TMajFlt_max, RRDF_FIELD_SORT_DESCENDING, NULL, RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_RANGE, RRDF_FIELD_OPTS_NONE, NULL); +#endif +#if (PROCESSES_HAVE_FDS == 1) // open file descriptors +#if (PROCESSES_HAVE_PID_LIMITS == 1) buffer_rrdf_table_add_field(wb, field_id++, "FDsLimitPercent", "Percentage of Open Descriptors vs Limits", RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, 2, "%", FDsLimitPercent_max, RRDF_FIELD_SORT_DESCENDING, NULL, RRDF_FIELD_SUMMARY_MAX, RRDF_FIELD_FILTER_RANGE, RRDF_FIELD_OPTS_NONE, NULL); +#endif buffer_rrdf_table_add_field(wb, field_id++, "FDs", "All Open File Descriptors", RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, 0, "fds", FDs_max, RRDF_FIELD_SORT_DESCENDING, NULL, @@ -617,6 +767,16 @@ void function_processes(const char *transaction, char *function, RRDF_FIELD_TRANSFORM_NUMBER, 0, "fds", OtherFDs_max, RRDF_FIELD_SORT_DESCENDING, NULL, RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_RANGE, RRDF_FIELD_OPTS_NONE, NULL); +#endif + +#if (PROCESSES_HAVE_HANDLES == 1) + buffer_rrdf_table_add_field(wb, field_id++, "Handles", "Open Handles", RRDF_FIELD_TYPE_BAR_WITH_INTEGER, + RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, 0, + "handles", + Handles_max, RRDF_FIELD_SORT_DESCENDING, NULL, RRDF_FIELD_SUMMARY_SUM, + RRDF_FIELD_FILTER_RANGE, + RRDF_FIELD_OPTS_VISIBLE, NULL); +#endif // processes, threads, uptime buffer_rrdf_table_add_field(wb, field_id++, "Processes", "Processes", RRDF_FIELD_TYPE_BAR_WITH_INTEGER, @@ -650,27 +810,39 @@ void function_processes(const char *transaction, char *function, { buffer_json_add_array_item_string(wb, "UserCPU"); buffer_json_add_array_item_string(wb, "SysCPU"); +#if (PROCESSES_HAVE_CPU_GUEST_TIME == 1) buffer_json_add_array_item_string(wb, "GuestCPU"); +#endif +#if (PROCESSES_HAVE_CPU_CHILDREN_TIME == 1) buffer_json_add_array_item_string(wb, "CUserCPU"); buffer_json_add_array_item_string(wb, "CSysCPU"); +#if (PROCESSES_HAVE_CPU_GUEST_TIME == 1) buffer_json_add_array_item_string(wb, "CGuestCPU"); +#endif +#endif } buffer_json_array_close(wb); } buffer_json_object_close(wb); +#if (PROCESSES_HAVE_VOLCTX == 1) || (PROCESSES_HAVE_NVOLCTX == 1) buffer_json_member_add_object(wb, "CPUCtxSwitches"); { buffer_json_member_add_string(wb, "name", "CPU Context Switches"); buffer_json_member_add_string(wb, "type", "stacked-bar"); buffer_json_member_add_array(wb, "columns"); { +#if (PROCESSES_HAVE_VOLCTX == 1) buffer_json_add_array_item_string(wb, "vCtxSwitch"); +#endif +#if (PROCESSES_HAVE_NVOLCTX == 1) buffer_json_add_array_item_string(wb, "iCtxSwitch"); +#endif } buffer_json_array_close(wb); } buffer_json_object_close(wb); +#endif // Memory chart buffer_json_member_add_object(wb, "Memory"); @@ -688,7 +860,7 @@ void function_processes(const char *transaction, char *function, } buffer_json_object_close(wb); - if(MemTotal) { + if(total_memory_bytes) { // Memory chart buffer_json_member_add_object(wb, "MemoryPercent"); { @@ -703,7 +875,7 @@ void function_processes(const char *transaction, char *function, buffer_json_object_close(wb); } -#if !defined(__FreeBSD__) && !defined(__APPLE__) +#if (PROCESSES_HAVE_LOGICAL_IO == 1) || (PROCESSES_HAVE_PHYSICAL_IO == 1) // I/O Reads chart buffer_json_member_add_object(wb, "Reads"); { @@ -711,8 +883,12 @@ void function_processes(const char *transaction, char *function, buffer_json_member_add_string(wb, "type", "stacked-bar"); buffer_json_member_add_array(wb, "columns"); { +#if (PROCESSES_HAVE_LOGICAL_IO == 1) buffer_json_add_array_item_string(wb, "LReads"); +#endif +#if (PROCESSES_HAVE_PHYSICAL_IO == 1) buffer_json_add_array_item_string(wb, "PReads"); +#endif } buffer_json_array_close(wb); } @@ -725,13 +901,19 @@ void function_processes(const char *transaction, char *function, buffer_json_member_add_string(wb, "type", "stacked-bar"); buffer_json_member_add_array(wb, "columns"); { +#if (PROCESSES_HAVE_LOGICAL_IO == 1) buffer_json_add_array_item_string(wb, "LWrites"); +#endif +#if (PROCESSES_HAVE_PHYSICAL_IO == 1) buffer_json_add_array_item_string(wb, "PWrites"); +#endif } buffer_json_array_close(wb); } buffer_json_object_close(wb); +#endif +#if (PROCESSES_HAVE_LOGICAL_IO == 1) // Logical I/O chart buffer_json_member_add_object(wb, "LogicalIO"); { @@ -747,6 +929,7 @@ void function_processes(const char *transaction, char *function, buffer_json_object_close(wb); #endif +#if (PROCESSES_HAVE_PHYSICAL_IO == 1) // Physical I/O chart buffer_json_member_add_object(wb, "PhysicalIO"); { @@ -760,7 +943,9 @@ void function_processes(const char *transaction, char *function, buffer_json_array_close(wb); } buffer_json_object_close(wb); +#endif +#if (PROCESSES_HAVE_IO_CALLS == 1) // I/O Calls chart buffer_json_member_add_object(wb, "IOCalls"); { @@ -768,12 +953,13 @@ void function_processes(const char *transaction, char *function, buffer_json_member_add_string(wb, "type", "stacked-bar"); buffer_json_member_add_array(wb, "columns"); { - buffer_json_add_array_item_string(wb, "RCalls"); + buffer_json_add_array_item_string(wb, "ROps"); buffer_json_add_array_item_string(wb, "WCalls"); } buffer_json_array_close(wb); } buffer_json_object_close(wb); +#endif // Minor Page Faults chart buffer_json_member_add_object(wb, "MinFlt"); @@ -893,6 +1079,7 @@ void function_processes(const char *transaction, char *function, } buffer_json_object_close(wb); +#if (PROCESSES_HAVE_UID == 1) // group by User buffer_json_member_add_object(wb, "User"); { @@ -905,7 +1092,9 @@ void function_processes(const char *transaction, char *function, buffer_json_array_close(wb); } buffer_json_object_close(wb); +#endif +#if (PROCESSES_HAVE_GID == 1) // group by Group buffer_json_member_add_object(wb, "Group"); { @@ -918,9 +1107,12 @@ void function_processes(const char *transaction, char *function, buffer_json_array_close(wb); } buffer_json_object_close(wb); +#endif } buffer_json_object_close(wb); // group_by + netdata_mutex_unlock(&apps_and_stdout_mutex); + close_and_send: buffer_json_member_add_time_t(wb, "expires", now_s + update_every); buffer_json_finalize(wb); diff --git a/src/collectors/apps.plugin/apps_groups.conf b/src/collectors/apps.plugin/apps_groups.conf index d5bb9448422eda..df01c4b4589e25 100644 --- a/src/collectors/apps.plugin/apps_groups.conf +++ b/src/collectors/apps.plugin/apps_groups.conf @@ -1,90 +1,41 @@ -# -# apps.plugin process grouping -# -# The apps.plugin displays charts with information about the processes running. -# This config allows grouping processes together, so that several processes -# will be reported as one. -# -# Only groups in this file are reported. All other processes will be reported -# as 'other'. -# -# For each process given, its whole process tree will be grouped, not just -# the process matched. The plugin will include both parents and childs. -# -# The format is: -# -# group: process1 process2 process3 ... -# -# Each group can be given multiple times, to add more processes to it. -# -# The process names are the ones returned by: -# -# - ps -e or /proc/PID/stat -# - in case of substring mode (see below): /proc/PID/cmdline -# -# To add process names with spaces, enclose them in quotes (single or double) -# example: 'Plex Media Serv' "my other process". -# -# Note that spaces are not supported for process groups. Use a dash "-" instead. -# example-process-group: process1 process2 -# -# Wildcard support: -# You can add an asterisk (*) at the beginning and/or the end of a process: -# -# *name suffix mode: will search for processes ending with 'name' -# (/proc/PID/stat) -# -# name* prefix mode: will search for processes beginning with 'name' -# (/proc/PID/stat) -# -# *name* substring mode: will search for 'name' in the whole command line -# (/proc/PID/cmdline) -# -# If you enter even just one *name* (substring), apps.plugin will process -# /proc/PID/cmdline for all processes, just once (when they are first seen). -# -# To add processes with single quotes, enclose them in double quotes -# example: "process with this ' single quote" -# -# To add processes with double quotes, enclose them in single quotes: -# example: 'process with this " double quote' -# -# If a group or process name starts with a -, the dimension will be hidden -# (cpu chart only). -# -# If a process starts with a +, debugging will be enabled for it -# (debugging produces a lot of output - do not enable it in production systems) -# -# You can add any number of groups you like. Only the ones found running will -# affect the charts generated. However, producing charts with hundreds of -# dimensions may slow down your web browser. -# -# The order of the entries in this list is important: the first that matches -# a process is used, so put important ones at the top. Processes not matched -# by any row, will inherit it from their parents or children. -# -# The order also controls the order of the dimensions on the generated charts -# (although applications started after apps.plugin is started, will be appended -# to the existing list of dimensions the netdata daemon maintains). +## +## apps.plugin process grouping +## +## Documentation at: +## https://github.com/netdata/netdata/blob/master/src/collectors/apps.plugin/README.md +## +## The list of process managers can be configured here (uncomment and edit): -# ----------------------------------------------------------------------------- -# NETDATA processes accounting +## Linux +#managers: init systemd containerd-shim dumb-init gnome-shell docker-init -# netdata main process -netdata: netdata +## FreeBSD +#managers: init + +## MacOS +#managers: launchd + +## Windows +#managers: System services wininit + +## ----------------------------------------------------------------------------- +## Processes of interest -# netdata known plugins -# plugins not defined here will be accumulated in netdata, above -apps.plugin: apps.plugin -freeipmi.plugin: freeipmi.plugin -nfacct.plugin: nfacct.plugin -cups.plugin: cups.plugin -xenstat.plugin: xenstat.plugin -perf.plugin: perf.plugin +## NETDATA processes accounting +netdata: netdata +## netdata known plugins +## plugins not defined here will be accumulated into netdata, above +apps.plugin: *apps.plugin* +freeipmi.plugin: *freeipmi.plugin* +nfacct.plugin: *nfacct.plugin* +cups.plugin: *cups.plugin* +xenstat.plugin: *xenstat.plugin* +perf.plugin: *perf.plugin* charts.d.plugin: *charts.d.plugin* python.d.plugin: *python.d.plugin* systemd-journal.plugin: *systemd-journal.plugin* network-viewer.plugin: *network-viewer.plugin* +windows-events.plugin: *windows-events.plugin* tc-qos-helper: *tc-qos-helper.sh* fping: fping ioping: ioping @@ -93,262 +44,79 @@ slabinfo.plugin: *slabinfo.plugin* ebpf.plugin: *ebpf.plugin* debugfs.plugin: *debugfs.plugin* -# agent-service-discovery +## agent-service-discovery agent_sd: agent_sd -# ----------------------------------------------------------------------------- -# authentication/authorization related servers - -auth: radius* openldap* ldap* slapd authelia sssd saslauthd polkitd gssproxy -fail2ban: fail2ban* - -# ----------------------------------------------------------------------------- -# web/ftp servers +## ----------------------------------------------------------------------------- -httpd: apache* httpd nginx* lighttpd hiawatha caddy h2o -proxy: squid* c-icap squidGuard varnish* -php: php* lsphp* -ftpd: proftpd in.tftpd vsftpd -uwsgi: uwsgi unicorn: *unicorn* puma: *puma* - -# ----------------------------------------------------------------------------- -# database servers - -sql: mysqld* mariad* postgres* postmaster* oracle_* ora_* sqlservr -nosql: mongod redis* valkey* memcached *couchdb* -timedb: prometheus *carbon-cache.py* *carbon-aggregator.py* *graphite/manage.py* *net.opentsdb.tools.TSDMain* influxd* - -clickhouse: clickhouse-serv* clickhouse-cli* clckhouse-watch - -# ----------------------------------------------------------------------------- -# email servers - -mta: amavis* zmstat-* zmdiaglog zmmailboxdmgr opendkim postfwd2 smtp* lmtp* sendmail postfix master pickup qmgr showq tlsmgr postscreen oqmgr msmtp* nullmailer* -mda: dovecot *imapd *pop3d *popd - -# ----------------------------------------------------------------------------- -# network, routing, VPN - -ppp: ppp* -vpn: openvpn pptp* cjdroute gvpe tincd wireguard tailscaled -wifi: hostapd wpa_supplicant -routing: ospfd* ospf6d* bgpd bfdd fabricd isisd eigrpd sharpd staticd ripd ripngd pimd pbrd nhrpd ldpd zebra vrrpd vtysh bird* -modem: ModemManager -netmanager: NetworkManager nm* systemd-networkd networkctl netplan connmand wicked* avahi-autoipd networkd-dispatcher -firewall: firewalld ufw nft -tor: tor -bluetooth: bluetooth bluetoothd bluez bluedevil obexd - -# ----------------------------------------------------------------------------- -# high availability and balancers - +couchdb: *couchdb* +graphite: *carbon-cache.py* *carbon-aggregator.py* *graphite/manage.py* +opentsdb: *net.opentsdb.tools.TSDMain* +imapd: *imapd +pop3d: *pop3d +popd: *popd camo: *camo* -balancer: ipvs_* haproxy -ha: corosync hs_logd ha_logd stonithd pacemakerd lrmd crmd keepalived ucarp* - -# ----------------------------------------------------------------------------- -# telephony - -pbx: asterisk safe_asterisk *vicidial* -sip: opensips* stund - -# ----------------------------------------------------------------------------- -# chat - -chat: irssi *vines* *prosody* murmurd - -# ----------------------------------------------------------------------------- -# monitoring - -logs: ulogd* syslog* rsyslog* logrotate *systemd-journal* rotatelogs sysklogd metalog -nms: snmpd vnstatd smokeping zabbix* munin* mon openhpid tailon nrpe -monit: monit -splunk: splunkd +vicidial: *vicidial* +vines: *vines* +prosody: *prosody* azure: mdsd *waagent* *omiserver* *omiagent* hv_kvp_daemon hv_vss_daemon *auoms* *omsagent* datadog: *datadog* -edgedelta: edgedelta newrelic: newrelic* google-agent: *google_guest_agent* *google_osconfig_agent* -nvidia-smi: nvidia-smi -intel_gpu_top: intel_gpu_top -htop: htop -watchdog: watchdog -telegraf: telegraf -grafana: grafana* - -# ----------------------------------------------------------------------------- -# storage, file systems and file servers - ceph: ceph-* ceph_* radosgw* rbd-* cephfs-* osdmaptool crushtool samba: smbd nmbd winbindd ctdbd ctdb-* ctdb_* nfs: rpcbind rpc.* nfs* zfs: spl_* z_* txg_* zil_* arc_* l2arc* -btrfs: btrfs* iscsi: iscsid iscsi_eh afp: netatalk afpd cnid_dbd cnid_metad -ntfs-3g: ntfs-3g - -# ----------------------------------------------------------------------------- -# kubernetes - -kubelet: kubelet -kube-dns: kube-dns -kube-proxy: kube-proxy -metrics-server: metrics-server -heapster: heapster - -# ----------------------------------------------------------------------------- -# AWS - aws-s3: '*aws s3*' s3cmd s5cmd -aws: aws - -# ----------------------------------------------------------------------------- -# virtualization platform - proxmox-ve: pve* spiceproxy - -# ----------------------------------------------------------------------------- -# containers & virtual machines - -containers: lxc* docker* balena* containerd -VMs: vbox* VBox* qemu* kvm* libvirt: virtlogd virtqemud virtstoraged virtnetworkd virtlockd virtinterfaced libvirt: virtnodedevd virtproxyd virtsecretd libvirtd guest-agent: qemu-ga spice-vdagent cloud-init* - -# ----------------------------------------------------------------------------- -# ssh servers and clients - -ssh: ssh* scp sftp* dropbear - -# ----------------------------------------------------------------------------- -# print servers and clients - -print: cups* lpd lpq - -# ----------------------------------------------------------------------------- -# time servers and clients - -time: ntp* systemd-timesyn* chronyd ptp* - -# ----------------------------------------------------------------------------- -# dhcp servers and clients - dhcp: *dhcp* dhclient -# ----------------------------------------------------------------------------- -# name servers and clients - -dns: named unbound nsd pdns_server knotd gdnsd yadifad dnsmasq *systemd-resolve* pihole* avahi-daemon avahi-dnsconfd -dnsdist: dnsdist - -# ----------------------------------------------------------------------------- -# installation / compilation / debugging - build: cc1 cc1plus as gcc* cppcheck ld make cmake automake autoconf autoreconf build: cargo rustc bazel buck git gdb valgrind* rpmbuild dpkg-buildpackage - -# ----------------------------------------------------------------------------- -# package management - packagemanager: apt* dpkg* dselect dnf yum rpm zypp* yast* pacman xbps* swupd* emerge* packagemanager: packagekitd pkgin pkg apk snapd slackpkg slapt-get - -# ----------------------------------------------------------------------------- -# antivirus - -antivirus: clam* *clam imunify360* - -# ----------------------------------------------------------------------------- -# torrent clients - -torrents: *deluge* transmission* *SickBeard* *CouchPotato* *rtorrent* - -# ----------------------------------------------------------------------------- -# backup servers and clients - +clam: clam* *clam backup: rsync lsyncd bacula* borg rclone - -# ----------------------------------------------------------------------------- -# cron - cron: cron* atd anacron *systemd-cron* incrond - -# ----------------------------------------------------------------------------- -# UPS - ups: upsmon upsd */nut/* apcupsd - -# ----------------------------------------------------------------------------- -# media players, servers, clients - -media: mplayer vlc xine mediatomb omxplayer* kodi* xbmc* mediacenter eventlircd -media: mpd minidlnad mt-daapd Plex* jellyfin squeeze* jackett Ombi -media: strawberry* clementine* - audio: pulse* pipewire wireplumber jack* -# ----------------------------------------------------------------------------- -# java applications +rabbitmq: *rabbitmq* +sidekiq: *sidekiq* +erlang: beam.smp + +## ----------------------------------------------------------------------------- +## java applications hdfsdatanode: *org.apache.hadoop.hdfs.server.datanode.DataNode* hdfsnamenode: *org.apache.hadoop.hdfs.server.namenode.NameNode* hdfsjournalnode: *org.apache.hadoop.hdfs.qjournal.server.JournalNode* hdfszkfc: *org.apache.hadoop.hdfs.tools.DFSZKFailoverController* - yarnnode: *org.apache.hadoop.yarn.server.nodemanager.NodeManager* yarnmgr: *org.apache.hadoop.yarn.server.resourcemanager.ResourceManager* yarnproxy: *org.apache.hadoop.yarn.server.webproxy.WebAppProxyServer* - sparkworker: *org.apache.spark.deploy.worker.Worker* sparkmaster: *org.apache.spark.deploy.master.Master* - hbaseregion: *org.apache.hadoop.hbase.regionserver.HRegionServer* hbaserest: *org.apache.hadoop.hbase.rest.RESTServer* hbasethrift: *org.apache.hadoop.hbase.thrift.ThriftServer* hbasemaster: *org.apache.hadoop.hbase.master.HMaster* - zookeeper: *org.apache.zookeeper.server.quorum.QuorumPeerMain* - hive2: *org.apache.hive.service.server.HiveServer2* hivemetastore: *org.apache.hadoop.hive.metastore.HiveMetaStore* - solr: *solr.install.dir* - airflow: *airflow* +kafka: *kafka.Kafka* -# ----------------------------------------------------------------------------- -# GUI - -X: X Xorg xinit xdm Xwayland xsettingsd touchegg -wayland: swaylock swayidle waypipe wayvnc -kde: *kdeinit* kdm sddm plasmashell startplasma-* kwin* kwallet* krunner kactivitymanager* -gnome: gnome-* gdm gconf* mutter -mate: mate-* msd-* marco* -cinnamon: cinnamon* muffin -xfce: xfwm4 xfdesktop xfce* Thunar xfsettingsd xfconf* -lxde: lxde* startlxde lxdm lxappearance* lxlauncher* lxpanel* lxsession* lxsettings* -lxqt: lxqt* startlxqt -enlightenment: entrance enlightenment* -i3: i3* -awesome: awesome awesome-client -dwm: dwm.* -sway: sway -weston: weston -cage: cage -wayfire: wayfire -gui: lightdm colord seatd greetd gkrellm slim qingy dconf* *gvfs gvfs* -gui: '*systemd --user*' xdg-* at-spi-* - -webbrowser: *chrome-sandbox* *google-chrome* *chromium* *firefox* vivaldi* opera* epiphany chrome* -webbrowser: lynx elinks w3m w3mmee links -mua: evolution-* thunderbird* mutt neomutt pine mailx alpine - -# ----------------------------------------------------------------------------- -# Kernel / System +## ----------------------------------------------------------------------------- +## Kernel / System ksmd: ksmd khugepaged: khugepaged @@ -356,87 +124,5 @@ kdamond: kdamond kswapd: kswapd zswap: zswap kcompactd: kcompactd - -system: systemd* udisks* udevd* *udevd ipv6_addrconf dbus-* rtkit* -system: mdadm acpid uuidd upowerd elogind* eudev mdev lvmpolld dmeventd -system: accounts-daemon rngd haveged rasdaemon irqbalance start-stop-daemon -system: supervise-daemon openrc* init runit runsvdir runsv auditd lsmd -system: abrt* nscd rtkit-daemon gpg-agent usbguard* boltd geoclue - -kernel: kworker kthreadd kauditd lockd khelper kdevtmpfs khungtaskd rpciod -kernel: fsnotify_mark kthrotld deferwq scsi_* kdmflush oom_reaper kdevtempfs -kernel: ksoftirqd - -# ----------------------------------------------------------------------------- -# inetd - -inetd: inetd xinetd - -# ----------------------------------------------------------------------------- -# other application servers - -nginxunit: unitd - -typesense: typesense-serve - -i2pd: i2pd - -rethinkdb: rethinkdb - -beanstalkd: beanstalkd - -rspamd: rspamd - -consul: consul - -kafka: *kafka.Kafka* - -rabbitmq: *rabbitmq* - -sidekiq: *sidekiq* -java: java -ipfs: ipfs -erlang: beam.smp - -node: node -factorio: factorio - -p4: p4* - -git-services: gitea gitlab-runner - -freeswitch: freeswitch* - -# -------- web3 / blockchains ---------- - -go-ethereum: geth* -nethermind-ethereum: nethermind* -besu-ethereum: besu* -openEthereum: openethereum* -urbit: urbit* -bitcoin-node: *bitcoind* lnd* -filecoin: lotus* lotus-miner* lotus-worker* -solana: solana* -web3: *hardhat* *ganache* *truffle* *brownie* *waffle* -terra: terra* mantle* - -# ----------------------------------------------------------------------------- -# chaos engineering tools - -stress: stress stress-ng* -gremlin: gremlin* - -# ----------------------------------------------------------------------------- -# load testing tools - -locust: locust - -# ----------------------------------------------------------------------------- -# data science and machine learning tools - -jupyter: jupyter* - -# ----------------------------------------------------------------------------- -# File synchronization tools - -filesync: dropbox syncthing +ipvs: ipvs_* +btrfs: btrfs* diff --git a/src/collectors/apps.plugin/apps_incremental_collection.c b/src/collectors/apps.plugin/apps_incremental_collection.c new file mode 100644 index 00000000000000..18aa90f30cacde --- /dev/null +++ b/src/collectors/apps.plugin/apps_incremental_collection.c @@ -0,0 +1,188 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "apps_plugin.h" + +#if (INCREMENTAL_DATA_COLLECTION == 1) +bool managed_log(struct pid_stat *p, PID_LOG log, bool status) { + if(unlikely(!status)) { + // netdata_log_error("command failed log %u, errno %d", log, errno); + + if(unlikely(debug_enabled || errno != ENOENT)) { + if(unlikely(debug_enabled || !(p->log_thrown & log))) { + p->log_thrown |= log; + switch(log) { + case PID_LOG_IO: +#if !defined(OS_LINUX) + netdata_log_error("Cannot fetch process %d I/O info (command '%s')", p->pid, pid_stat_comm(p)); +#else + netdata_log_error("Cannot process %s/proc/%d/io (command '%s')", netdata_configured_host_prefix, p->pid, pid_stat_comm(p)); +#endif + break; + + case PID_LOG_STATUS: +#if !defined(OS_LINUX) + netdata_log_error("Cannot fetch process %d status info (command '%s')", p->pid, pid_stat_comm(p)); +#else + netdata_log_error("Cannot process %s/proc/%d/status (command '%s')", netdata_configured_host_prefix, p->pid, pid_stat_comm(p)); +#endif + break; + + case PID_LOG_CMDLINE: +#if !defined(OS_LINUX) + netdata_log_error("Cannot fetch process %d command line (command '%s')", p->pid, pid_stat_comm(p)); +#else + netdata_log_error("Cannot process %s/proc/%d/cmdline (command '%s')", netdata_configured_host_prefix, p->pid, pid_stat_comm(p)); +#endif + break; + + case PID_LOG_FDS: +#if !defined(OS_LINUX) + netdata_log_error("Cannot fetch process %d files (command '%s')", p->pid, pid_stat_comm(p)); +#else + netdata_log_error("Cannot process entries in %s/proc/%d/fd (command '%s')", netdata_configured_host_prefix, p->pid, pid_stat_comm(p)); +#endif + break; + + case PID_LOG_LIMITS: +#if !defined(OS_LINUX) + ; +#else + netdata_log_error("Cannot process %s/proc/%d/limits (command '%s')", netdata_configured_host_prefix, p->pid, pid_stat_comm(p)); +#endif + + case PID_LOG_STAT: + break; + + default: + netdata_log_error("unhandled error for pid %d, command '%s'", p->pid, pid_stat_comm(p)); + break; + } + } + } + errno_clear(); + } + else if(unlikely(p->log_thrown & log)) { + // netdata_log_error("unsetting log %u on pid %d", log, p->pid); + p->log_thrown &= ~log; + } + + return status; +} + +static inline bool incrementally_read_pid_stat(struct pid_stat *p, void *ptr) { + p->last_stat_collected_usec = p->stat_collected_usec; + p->stat_collected_usec = now_monotonic_usec(); + calls_counter++; + + if(!OS_FUNCTION(apps_os_read_pid_stat)(p, ptr)) + return 0; + + return 1; +} + +static inline int incrementally_read_pid_io(struct pid_stat *p, void *ptr) { + p->last_io_collected_usec = p->io_collected_usec; + p->io_collected_usec = now_monotonic_usec(); + calls_counter++; + + bool ret = OS_FUNCTION(apps_os_read_pid_io)(p, ptr); + + return ret ? 1 : 0; +} + +// -------------------------------------------------------------------------------------------------------------------- + +int incrementally_collect_data_for_pid_stat(struct pid_stat *p, void *ptr) { + if(unlikely(p->read)) return 0; + + pid_collection_started(p); + + // -------------------------------------------------------------------- + // /proc//stat + + if(unlikely(!managed_log(p, PID_LOG_STAT, incrementally_read_pid_stat(p, ptr)))) { + // there is no reason to proceed if we cannot get its status + pid_collection_failed(p); + return 0; + } + + // check its parent pid + if(unlikely(p->ppid < INIT_PID)) + p->ppid = 0; + + // -------------------------------------------------------------------- + // /proc//io + + managed_log(p, PID_LOG_IO, incrementally_read_pid_io(p, ptr)); + + // -------------------------------------------------------------------- + // /proc//status + + if(unlikely(!managed_log(p, PID_LOG_STATUS, OS_FUNCTION(apps_os_read_pid_status)(p, ptr)))) { + // there is no reason to proceed if we cannot get its status + pid_collection_failed(p); + return 0; + } + + // -------------------------------------------------------------------- + // /proc//fd + +#if (PROCESSES_HAVE_FDS == 1) + if(enable_file_charts) { + managed_log(p, PID_LOG_FDS, read_pid_file_descriptors(p, ptr)); +#if (PROCESSES_HAVE_PID_LIMITS == 1) + managed_log(p, PID_LOG_LIMITS, OS_FUNCTION(apps_os_read_pid_limits)(p, ptr)); +#endif + } +#endif + + // -------------------------------------------------------------------- + // done! + +#if defined(NETDATA_INTERNAL_CHECKS) && (ALL_PIDS_ARE_READ_INSTANTLY == 0) + struct pid_stat *pp = p->parent; + if(unlikely(include_exited_childs && pp && !pp->read)) + nd_log(NDLS_COLLECTORS, NDLP_WARNING, + "Read process %d (%s) sortlisted %"PRIu32", but its parent %d (%s) sortlisted %"PRIu32", is not read", + p->pid, pid_stat_comm(p), p->sortlist, pp->pid, pid_stat_comm(pp), pp->sortlist); +#endif + + pid_collection_completed(p); + + return 1; +} + +int incrementally_collect_data_for_pid(pid_t pid, void *ptr) { + if(unlikely(pid < INIT_PID)) { + netdata_log_error("Invalid pid %d read (expected >= %d). Ignoring process.", pid, INIT_PID); + return 0; + } + + struct pid_stat *p = get_or_allocate_pid_entry(pid); + if(unlikely(!p)) return 0; + + return incrementally_collect_data_for_pid_stat(p, ptr); +} +#endif + +// -------------------------------------------------------------------------------------------------------------------- + +#if (PROCESSES_HAVE_CMDLINE == 1) +int read_proc_pid_cmdline(struct pid_stat *p) { + static char cmdline[MAX_CMDLINE]; + + if(unlikely(!OS_FUNCTION(apps_os_get_pid_cmdline)(p, cmdline, sizeof(cmdline)))) + goto cleanup; + + string_freez(p->cmdline); + p->cmdline = string_strdupz(cmdline); + + return 1; + +cleanup: + // copy the command to the command line + string_freez(p->cmdline); + p->cmdline = string_dup(p->comm); + return 0; +} +#endif diff --git a/src/collectors/apps.plugin/apps_os_freebsd.c b/src/collectors/apps.plugin/apps_os_freebsd.c new file mode 100644 index 00000000000000..bec6d0c4d9e2ce --- /dev/null +++ b/src/collectors/apps.plugin/apps_os_freebsd.c @@ -0,0 +1,365 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "apps_plugin.h" + +#if defined(OS_FREEBSD) + +usec_t system_current_time_ut; +long global_block_size = 512; + +static long get_fs_block_size(void) { + struct statvfs vfs; + static long block_size = 0; + + if (block_size == 0) { + if (statvfs("/", &vfs) == 0) { + block_size = vfs.f_frsize ? vfs.f_frsize : vfs.f_bsize; + } else { + // If statvfs fails, fall back to the typical block size + block_size = 512; + } + } + + return block_size; +} + +void apps_os_init_freebsd(void) { + global_block_size = get_fs_block_size(); +} + +static inline void get_current_time(void) { + struct timeval current_time; + gettimeofday(¤t_time, NULL); + system_current_time_ut = timeval_usec(¤t_time); +} + +uint64_t apps_os_get_total_memory_freebsd(void) { + uint64_t ret = 0; + + int mib[2] = {CTL_HW, HW_PHYSMEM}; + size_t size = sizeof(ret); + if (sysctl(mib, 2, &ret, &size, NULL, 0) == -1) { + netdata_log_error("Failed to get total memory using sysctl"); + return 0; + } + + return ret; +} + +bool apps_os_read_pid_fds_freebsd(struct pid_stat *p, void *ptr) { + int mib[4]; + size_t size; + struct kinfo_file *fds; + static char *fdsbuf; + char *bfdsbuf, *efdsbuf; + char fdsname[FILENAME_MAX + 1]; +#define SHM_FORMAT_LEN 31 // format: 21 + size: 10 + char shm_name[FILENAME_MAX - SHM_FORMAT_LEN + 1]; + + // we make all pid fds negative, so that + // we can detect unused file descriptors + // at the end, to free them + make_all_pid_fds_negative(p); + + mib[0] = CTL_KERN; + mib[1] = KERN_PROC; + mib[2] = KERN_PROC_FILEDESC; + mib[3] = p->pid; + + if (unlikely(sysctl(mib, 4, NULL, &size, NULL, 0))) { + netdata_log_error("sysctl error: Can't get file descriptors data size for pid %d", p->pid); + return false; + } + if (likely(size > 0)) + fdsbuf = reallocz(fdsbuf, size); + if (unlikely(sysctl(mib, 4, fdsbuf, &size, NULL, 0))) { + netdata_log_error("sysctl error: Can't get file descriptors data for pid %d", p->pid); + return false; + } + + bfdsbuf = fdsbuf; + efdsbuf = fdsbuf + size; + while (bfdsbuf < efdsbuf) { + fds = (struct kinfo_file *)(uintptr_t)bfdsbuf; + if (unlikely(fds->kf_structsize == 0)) + break; + + // do not process file descriptors for current working directory, root directory, + // jail directory, ktrace vnode, text vnode and controlling terminal + if (unlikely(fds->kf_fd < 0)) { + bfdsbuf += fds->kf_structsize; + continue; + } + + // get file descriptors array index + size_t fdid = fds->kf_fd; + + // check if the fds array is small + if (unlikely(fdid >= p->fds_size)) { + // it is small, extend it + + debug_log("extending fd memory slots for %s from %d to %d", pid_stat_comm(p), p->fds_size, fdid + MAX_SPARE_FDS); + + p->fds = reallocz(p->fds, (fdid + MAX_SPARE_FDS) * sizeof(struct pid_fd)); + + // and initialize it + init_pid_fds(p, p->fds_size, (fdid + MAX_SPARE_FDS) - p->fds_size); + p->fds_size = fdid + MAX_SPARE_FDS; + } + + if (unlikely(p->fds[fdid].fd == 0)) { + // we don't know this fd, get it + + switch (fds->kf_type) { + case KF_TYPE_FIFO: + case KF_TYPE_VNODE: + if (unlikely(!fds->kf_path[0])) { + sprintf(fdsname, "other: inode: %lu", fds->kf_un.kf_file.kf_file_fileid); + break; + } + sprintf(fdsname, "%s", fds->kf_path); + break; + case KF_TYPE_SOCKET: + switch (fds->kf_sock_domain) { + case AF_INET: + case AF_INET6: +#if __FreeBSD_version < 1400074 + if (fds->kf_sock_protocol == IPPROTO_TCP) + sprintf(fdsname, "socket: %d %lx", fds->kf_sock_protocol, fds->kf_un.kf_sock.kf_sock_inpcb); + else +#endif + sprintf(fdsname, "socket: %d %lx", fds->kf_sock_protocol, fds->kf_un.kf_sock.kf_sock_pcb); + break; + case AF_UNIX: + /* print address of pcb and connected pcb */ + sprintf(fdsname, "socket: %lx %lx", fds->kf_un.kf_sock.kf_sock_pcb, fds->kf_un.kf_sock.kf_sock_unpconn); + break; + default: + /* print protocol number and socket address */ +#if __FreeBSD_version < 1200031 + sprintf(fdsname, "socket: other: %d %s %s", fds->kf_sock_protocol, fds->kf_sa_local.__ss_pad1, fds->kf_sa_local.__ss_pad2); +#else + sprintf(fdsname, "socket: other: %d %s %s", fds->kf_sock_protocol, fds->kf_un.kf_sock.kf_sa_local.__ss_pad1, fds->kf_un.kf_sock.kf_sa_local.__ss_pad2); +#endif + } + break; + case KF_TYPE_PIPE: + sprintf(fdsname, "pipe: %lu %lu", fds->kf_un.kf_pipe.kf_pipe_addr, fds->kf_un.kf_pipe.kf_pipe_peer); + break; + case KF_TYPE_PTS: +#if __FreeBSD_version < 1200031 + sprintf(fdsname, "other: pts: %u", fds->kf_un.kf_pts.kf_pts_dev); +#else + sprintf(fdsname, "other: pts: %lu", fds->kf_un.kf_pts.kf_pts_dev); +#endif + break; + case KF_TYPE_SHM: + strncpyz(shm_name, fds->kf_path, FILENAME_MAX - SHM_FORMAT_LEN); + sprintf(fdsname, "other: shm: %s size: %lu", shm_name, fds->kf_un.kf_file.kf_file_size); + break; + case KF_TYPE_SEM: + sprintf(fdsname, "other: sem: %u", fds->kf_un.kf_sem.kf_sem_value); + break; + default: + sprintf(fdsname, "other: pid: %d fd: %d", fds->kf_un.kf_proc.kf_pid, fds->kf_fd); + } + + // if another process already has this, we will get + // the same id + p->fds[fdid].fd = file_descriptor_find_or_add(fdsname, 0); + } + + // else make it positive again, we need it + // of course, the actual file may have changed + + else + p->fds[fdid].fd = -p->fds[fdid].fd; + + bfdsbuf += fds->kf_structsize; + } + + return true; +} + +bool apps_os_get_pid_cmdline_freebsd(struct pid_stat *p, char *cmdline, size_t bytes) { + size_t i, b = bytes - 1; + int mib[4]; + + mib[0] = CTL_KERN; + mib[1] = KERN_PROC; + mib[2] = KERN_PROC_ARGS; + mib[3] = p->pid; + if (unlikely(sysctl(mib, 4, cmdline, &b, NULL, 0))) + return false; + + cmdline[b] = '\0'; + for(i = 0; i < b ; i++) + if(unlikely(!cmdline[i])) cmdline[i] = ' '; + + return true; +} + +bool apps_os_read_pid_io_freebsd(struct pid_stat *p, void *ptr) { + struct kinfo_proc *proc_info = (struct kinfo_proc *)ptr; + + pid_incremental_rate(io, PDF_LREAD, proc_info->ki_rusage.ru_inblock * global_block_size); + pid_incremental_rate(io, PDF_LWRITE, proc_info->ki_rusage.ru_oublock * global_block_size); + + return true; +} + +bool apps_os_read_pid_limits_freebsd(struct pid_stat *p __maybe_unused, void *ptr __maybe_unused) { + return false; +} + +bool apps_os_read_pid_status_freebsd(struct pid_stat *p, void *ptr) { + struct kinfo_proc *proc_info = (struct kinfo_proc *)ptr; + + p->uid = proc_info->ki_uid; + p->gid = proc_info->ki_groups[0]; + p->values[PDF_VMSIZE] = proc_info->ki_size; + p->values[PDF_VMRSS] = proc_info->ki_rssize * pagesize; + // TODO: what about shared and swap memory on FreeBSD? + return true; +} + +//bool apps_os_read_global_cpu_utilization_freebsd(void) { +// static kernel_uint_t utime_raw = 0, stime_raw = 0, ntime_raw = 0; +// static usec_t collected_usec = 0, last_collected_usec = 0; +// long cp_time[CPUSTATES]; +// +// if (unlikely(CPUSTATES != 5)) { +// goto cleanup; +// } else { +// static int mib[2] = {0, 0}; +// +// if (unlikely(GETSYSCTL_SIMPLE("kern.cp_time", mib, cp_time))) { +// goto cleanup; +// } +// } +// +// last_collected_usec = collected_usec; +// collected_usec = now_monotonic_usec(); +// +// calls_counter++; +// +// // temporary - it is added global_ntime; +// kernel_uint_t global_ntime = 0; +// +// incremental_rate(global_utime, utime_raw, cp_time[0], collected_usec, last_collected_usec, (NSEC_PER_SEC / system_hz)); +// incremental_rate(global_ntime, ntime_raw, cp_time[1], collected_usec, last_collected_usec, (NSEC_PER_SEC / system_hz)); +// incremental_rate(global_stime, stime_raw, cp_time[2], collected_usec, last_collected_usec, (NSEC_PER_SEC / system_hz)); +// +// global_utime += global_ntime; +// +// if(unlikely(global_iterations_counter == 1)) { +// global_utime = 0; +// global_stime = 0; +// global_gtime = 0; +// } +// +// return 1; +// +//cleanup: +// global_utime = 0; +// global_stime = 0; +// global_gtime = 0; +// return 0; +//} + +bool apps_os_read_pid_stat_freebsd(struct pid_stat *p, void *ptr) { + struct kinfo_proc *proc_info = (struct kinfo_proc *)ptr; + if (unlikely(proc_info->ki_tdflags & TDF_IDLETD)) + goto cleanup; + + char *comm = proc_info->ki_comm; + p->ppid = proc_info->ki_ppid; + + update_pid_comm(p, comm); + + pid_incremental_rate(stat, PDF_MINFLT, (kernel_uint_t)proc_info->ki_rusage.ru_minflt); + pid_incremental_rate(stat, PDF_CMINFLT, (kernel_uint_t)proc_info->ki_rusage_ch.ru_minflt); + pid_incremental_rate(stat, PDF_MAJFLT, (kernel_uint_t)proc_info->ki_rusage.ru_majflt); + pid_incremental_rate(stat, PDF_CMAJFLT, (kernel_uint_t)proc_info->ki_rusage_ch.ru_majflt); + pid_incremental_rate(stat, PDF_UTIME, (kernel_uint_t)proc_info->ki_rusage.ru_utime.tv_sec * NSEC_PER_SEC + proc_info->ki_rusage.ru_utime.tv_usec * NSEC_PER_USEC); + pid_incremental_rate(stat, PDF_STIME, (kernel_uint_t)proc_info->ki_rusage.ru_stime.tv_sec * NSEC_PER_SEC + proc_info->ki_rusage.ru_stime.tv_usec * NSEC_PER_USEC); + pid_incremental_rate(stat, PDF_CUTIME, (kernel_uint_t)proc_info->ki_rusage_ch.ru_utime.tv_sec * NSEC_PER_SEC + proc_info->ki_rusage_ch.ru_utime.tv_usec * NSEC_PER_USEC); + pid_incremental_rate(stat, PDF_CSTIME, (kernel_uint_t)proc_info->ki_rusage_ch.ru_stime.tv_sec * NSEC_PER_SEC + proc_info->ki_rusage_ch.ru_stime.tv_usec * NSEC_PER_USEC); + + p->values[PDF_THREADS] = proc_info->ki_numthreads; + + usec_t started_ut = timeval_usec(&proc_info->ki_start); + p->values[PDF_UPTIME] = (system_current_time_ut > started_ut) ? (system_current_time_ut - started_ut) / USEC_PER_SEC : 0; + + if(unlikely(debug_enabled || (p->target && p->target->debug_enabled))) + debug_log_int("READ PROC/PID/STAT: %s/proc/%d/stat, process: '%s' on target '%s' (dt=%llu) VALUES: utime=" KERNEL_UINT_FORMAT ", stime=" KERNEL_UINT_FORMAT ", cutime=" KERNEL_UINT_FORMAT ", cstime=" KERNEL_UINT_FORMAT ", minflt=" KERNEL_UINT_FORMAT ", majflt=" KERNEL_UINT_FORMAT ", cminflt=" KERNEL_UINT_FORMAT ", cmajflt=" KERNEL_UINT_FORMAT ", threads=%d", + netdata_configured_host_prefix, p->pid, pid_stat_comm(p), (p->target)?string2str(p->target->name):"UNSET", + p->stat_collected_usec - p->last_stat_collected_usec, + p->values[PDF_UTIME], + p->values[PDF_STIME], + p->values[PDF_CUTIME], + p->values[PDF_CSTIME], + p->values[PDF_MINFLT], + p->values[PDF_MAJFLT], + p->values[PDF_CMINFLT], + p->values[PDF_CMAJFLT], + p->values[PDF_THREADS]); + + return true; + +cleanup: + return false; +} + +bool apps_os_collect_all_pids_freebsd(void) { + // Mark all processes as unread before collecting new data + struct pid_stat *p = NULL; + int i, procnum; + + static size_t procbase_size = 0; + static struct kinfo_proc *procbase = NULL; + + size_t new_procbase_size; + + int mib[3] = { CTL_KERN, KERN_PROC, KERN_PROC_PROC }; + if (unlikely(sysctl(mib, 3, NULL, &new_procbase_size, NULL, 0))) { + netdata_log_error("sysctl error: Can't get processes data size"); + return false; + } + + // give it some air for processes that may be started + // during this little time. + new_procbase_size += 100 * sizeof(struct kinfo_proc); + + // increase the buffer if needed + if(new_procbase_size > procbase_size) { + procbase_size = new_procbase_size; + procbase = reallocz(procbase, procbase_size); + } + + // sysctl() gets from new_procbase_size the buffer size + // and also returns to it the amount of data filled in + new_procbase_size = procbase_size; + + // get the processes from the system + if (unlikely(sysctl(mib, 3, procbase, &new_procbase_size, NULL, 0))) { + netdata_log_error("sysctl error: Can't get processes data"); + return false; + } + + // based on the amount of data filled in + // calculate the number of processes we got + procnum = new_procbase_size / sizeof(struct kinfo_proc); + + get_current_time(); + + for (i = 0 ; i < procnum ; ++i) { + pid_t pid = procbase[i].ki_pid; + if (pid <= 0) continue; + incrementally_collect_data_for_pid(pid, &procbase[i]); + } + + return true; +} + +#endif diff --git a/src/collectors/apps.plugin/apps_os_linux.c b/src/collectors/apps.plugin/apps_os_linux.c new file mode 100644 index 00000000000000..bda64c1a6c52da --- /dev/null +++ b/src/collectors/apps.plugin/apps_os_linux.c @@ -0,0 +1,771 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "apps_plugin.h" + +#if defined(OS_LINUX) + +#define MAX_PROC_PID_LIMITS 8192 +#define PROC_PID_LIMITS_MAX_OPEN_FILES_KEY "\nMax open files " + +int max_fds_cache_seconds = 60; +kernel_uint_t system_uptime_secs; + +void apps_os_init_linux(void) { + ; +} + +// -------------------------------------------------------------------------------------------------------------------- +// /proc/pid/fd + +struct arl_callback_ptr { + struct pid_stat *p; + procfile *ff; + size_t line; +}; + +bool apps_os_read_pid_fds_linux(struct pid_stat *p, void *ptr __maybe_unused) { + if(unlikely(!p->fds_dirname)) { + char dirname[FILENAME_MAX+1]; + snprintfz(dirname, FILENAME_MAX, "%s/proc/%d/fd", netdata_configured_host_prefix, p->pid); + p->fds_dirname = strdupz(dirname); + } + + DIR *fds = opendir(p->fds_dirname); + if(unlikely(!fds)) return false; + + struct dirent *de; + char linkname[FILENAME_MAX + 1]; + + // we make all pid fds negative, so that + // we can detect unused file descriptors + // at the end, to free them + make_all_pid_fds_negative(p); + + while((de = readdir(fds))) { + // we need only files with numeric names + + if(unlikely(de->d_name[0] < '0' || de->d_name[0] > '9')) + continue; + + // get its number + int fdid = (int) str2l(de->d_name); + if(unlikely(fdid < 0)) continue; + + // check if the fds array is small + if(unlikely((size_t)fdid >= p->fds_size)) { + // it is small, extend it + + debug_log("extending fd memory slots for %s from %d to %d" + , pid_stat_comm(p) + , p->fds_size + , fdid + MAX_SPARE_FDS + ); + + p->fds = reallocz(p->fds, (fdid + MAX_SPARE_FDS) * sizeof(struct pid_fd)); + + // and initialize it + init_pid_fds(p, p->fds_size, (fdid + MAX_SPARE_FDS) - p->fds_size); + p->fds_size = (size_t)fdid + MAX_SPARE_FDS; + } + + if(unlikely(p->fds[fdid].fd < 0 && de->d_ino != p->fds[fdid].inode)) { + // inodes do not match, clear the previous entry + inodes_changed_counter++; + file_descriptor_not_used(-p->fds[fdid].fd); + clear_pid_fd(&p->fds[fdid]); + } + + if(p->fds[fdid].fd < 0 && p->fds[fdid].cache_iterations_counter > 0) { + p->fds[fdid].fd = -p->fds[fdid].fd; + p->fds[fdid].cache_iterations_counter--; + continue; + } + + if(unlikely(!p->fds[fdid].filename)) { + filenames_allocated_counter++; + char fdname[FILENAME_MAX + 1]; + snprintfz(fdname, FILENAME_MAX, "%s/proc/%d/fd/%s", netdata_configured_host_prefix, p->pid, de->d_name); + p->fds[fdid].filename = strdupz(fdname); + } + + file_counter++; + ssize_t l = readlink(p->fds[fdid].filename, linkname, FILENAME_MAX); + if(unlikely(l == -1)) { + // cannot read the link + + if(debug_enabled || (p->target && p->target->debug_enabled)) + netdata_log_error("Cannot read link %s", p->fds[fdid].filename); + + if(unlikely(p->fds[fdid].fd < 0)) { + file_descriptor_not_used(-p->fds[fdid].fd); + clear_pid_fd(&p->fds[fdid]); + } + + continue; + } + else + linkname[l] = '\0'; + + uint32_t link_hash = simple_hash(linkname); + + if(unlikely(p->fds[fdid].fd < 0 && p->fds[fdid].link_hash != link_hash)) { + // the link changed + links_changed_counter++; + file_descriptor_not_used(-p->fds[fdid].fd); + clear_pid_fd(&p->fds[fdid]); + } + + if(unlikely(p->fds[fdid].fd == 0)) { + // we don't know this fd, get it + + // if another process already has this, we will get + // the same id + p->fds[fdid].fd = (int)file_descriptor_find_or_add(linkname, link_hash); + p->fds[fdid].inode = de->d_ino; + p->fds[fdid].link_hash = link_hash; + } + else { + // else make it positive again, we need it + p->fds[fdid].fd = -p->fds[fdid].fd; + } + + // caching control + // without this we read all the files on every iteration + if(max_fds_cache_seconds > 0) { + size_t spread = ((size_t)max_fds_cache_seconds > 10) ? 10 : (size_t)max_fds_cache_seconds; + + // cache it for a few iterations + size_t max = ((size_t) max_fds_cache_seconds + (fdid % spread)) / (size_t) update_every; + p->fds[fdid].cache_iterations_reset++; + + if(unlikely(p->fds[fdid].cache_iterations_reset % spread == (size_t) fdid % spread)) + p->fds[fdid].cache_iterations_reset++; + + if(unlikely((fdid <= 2 && p->fds[fdid].cache_iterations_reset > 5) || + p->fds[fdid].cache_iterations_reset > max)) { + // for stdin, stdout, stderr (fdid <= 2) we have checked a few times, or if it goes above the max, goto max + p->fds[fdid].cache_iterations_reset = max; + } + + p->fds[fdid].cache_iterations_counter = p->fds[fdid].cache_iterations_reset; + } + } + + closedir(fds); + + return true; +} + +// -------------------------------------------------------------------------------------------------------------------- +// /proc/meminfo + +uint64_t apps_os_get_total_memory_linux(void) { + uint64_t ret = 0; + + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s/proc/meminfo", netdata_configured_host_prefix); + + procfile *ff = procfile_open(filename, ": \t", PROCFILE_FLAG_DEFAULT); + if(!ff) + return ret; + + ff = procfile_readall(ff); + if(!ff) + return ret; + + size_t line, lines = procfile_lines(ff); + + for(line = 0; line < lines ;line++) { + size_t words = procfile_linewords(ff, line); + if(words == 3 && strcmp(procfile_lineword(ff, line, 0), "MemTotal") == 0 && strcmp(procfile_lineword(ff, line, 2), "kB") == 0) { + ret = str2ull(procfile_lineword(ff, line, 1), NULL) * 1024; + break; + } + } + + procfile_close(ff); + + return ret; +} + +// -------------------------------------------------------------------------------------------------------------------- +// /proc/pid/cmdline + +bool apps_os_get_pid_cmdline_linux(struct pid_stat *p, char *cmdline, size_t bytes) { + if(unlikely(!p->cmdline_filename)) { + char filename[FILENAME_MAX]; + snprintfz(filename, FILENAME_MAX, "%s/proc/%d/cmdline", netdata_configured_host_prefix, p->pid); + p->cmdline_filename = strdupz(filename); + } + + int fd = open(p->cmdline_filename, procfile_open_flags, 0666); + if(unlikely(fd == -1)) + return false; + + ssize_t i, b = read(fd, cmdline, bytes - 1); + close(fd); + + if(unlikely(b < 0)) + return false; + + cmdline[b] = '\0'; + for(i = 0; i < b ; i++) + if(unlikely(!cmdline[i])) cmdline[i] = ' '; + + // remove trailing spaces + while(b > 0 && cmdline[b - 1] == ' ') + cmdline[--b] = '\0'; + + return true; +} + +// -------------------------------------------------------------------------------------------------------------------- +// /proc/pid/io + +bool apps_os_read_pid_io_linux(struct pid_stat *p, void *ptr __maybe_unused) { + static procfile *ff = NULL; + + if(unlikely(!p->io_filename)) { + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s/proc/%d/io", netdata_configured_host_prefix, p->pid); + p->io_filename = strdupz(filename); + } + + // open the file + ff = procfile_reopen(ff, p->io_filename, NULL, PROCFILE_FLAG_NO_ERROR_ON_FILE_IO); + if(unlikely(!ff)) goto cleanup; + + ff = procfile_readall(ff); + if(unlikely(!ff)) goto cleanup; + + pid_incremental_rate(io, PDF_LREAD, str2kernel_uint_t(procfile_lineword(ff, 0, 1))); + pid_incremental_rate(io, PDF_LWRITE, str2kernel_uint_t(procfile_lineword(ff, 1, 1))); + pid_incremental_rate(io, PDF_OREAD, str2kernel_uint_t(procfile_lineword(ff, 2, 1))); + pid_incremental_rate(io, PDF_OWRITE, str2kernel_uint_t(procfile_lineword(ff, 3, 1))); + pid_incremental_rate(io, PDF_PREAD, str2kernel_uint_t(procfile_lineword(ff, 4, 1))); + pid_incremental_rate(io, PDF_PWRITE, str2kernel_uint_t(procfile_lineword(ff, 5, 1))); + + return true; + +cleanup: + return false; +} + +// -------------------------------------------------------------------------------------------------------------------- +// /proc/pid/limits + +static inline kernel_uint_t get_proc_pid_limits_limit(char *buf, const char *key, size_t key_len, kernel_uint_t def) { + char *line = strstr(buf, key); + if(!line) + return def; + + char *v = &line[key_len]; + while(isspace((uint8_t)*v)) v++; + + if(strcmp(v, "unlimited") == 0) + return 0; + + return str2ull(v, NULL); +} + +bool apps_os_read_pid_limits_linux(struct pid_stat *p, void *ptr __maybe_unused) { + static char proc_pid_limits_buffer[MAX_PROC_PID_LIMITS + 1]; + bool ret = false; + bool read_limits = false; + + errno_clear(); + proc_pid_limits_buffer[0] = '\0'; + + kernel_uint_t all_fds = pid_openfds_sum(p); + if(all_fds < p->limits.max_open_files / 2 && p->io_collected_usec > p->last_limits_collected_usec && p->io_collected_usec - p->last_limits_collected_usec <= 60 * USEC_PER_SEC) { + // too frequent, we want to collect limits once per minute + ret = true; + goto cleanup; + } + + if(unlikely(!p->limits_filename)) { + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s/proc/%d/limits", netdata_configured_host_prefix, p->pid); + p->limits_filename = strdupz(filename); + } + + int fd = open(p->limits_filename, procfile_open_flags, 0666); + if(unlikely(fd == -1)) goto cleanup; + + ssize_t bytes = read(fd, proc_pid_limits_buffer, MAX_PROC_PID_LIMITS); + close(fd); + + if(bytes <= 0) + goto cleanup; + + // make it '\0' terminated + if(bytes < MAX_PROC_PID_LIMITS) + proc_pid_limits_buffer[bytes] = '\0'; + else + proc_pid_limits_buffer[MAX_PROC_PID_LIMITS - 1] = '\0'; + + p->limits.max_open_files = get_proc_pid_limits_limit(proc_pid_limits_buffer, PROC_PID_LIMITS_MAX_OPEN_FILES_KEY, sizeof(PROC_PID_LIMITS_MAX_OPEN_FILES_KEY) - 1, 0); + if(p->limits.max_open_files == 1) { + // it seems a bug in the kernel or something similar + // it sets max open files to 1 but the number of files + // the process has open are more than 1... + // https://github.com/netdata/netdata/issues/15443 + p->limits.max_open_files = 0; + ret = true; + goto cleanup; + } + + p->last_limits_collected_usec = p->io_collected_usec; + read_limits = true; + + ret = true; + +cleanup: + if(p->limits.max_open_files) + p->openfds_limits_percent = (NETDATA_DOUBLE)all_fds * 100.0 / (NETDATA_DOUBLE)p->limits.max_open_files; + else + p->openfds_limits_percent = 0.0; + + if(p->openfds_limits_percent > 100.0) { + if(!(p->log_thrown & PID_LOG_LIMITS_DETAIL)) { + char *line; + + if(!read_limits) { + proc_pid_limits_buffer[0] = '\0'; + line = "NOT READ"; + } + else { + line = strstr(proc_pid_limits_buffer, PROC_PID_LIMITS_MAX_OPEN_FILES_KEY); + if (line) { + line++; // skip the initial newline + + char *end = strchr(line, '\n'); + if (end) + *end = '\0'; + } + } + + netdata_log_info( + "FDS_LIMITS: PID %d (%s) is using " + "%0.2f %% of its fds limits, " + "open fds = %"PRIu64 "(" + "files = %"PRIu64 ", " + "pipes = %"PRIu64 ", " + "sockets = %"PRIu64", " + "inotifies = %"PRIu64", " + "eventfds = %"PRIu64", " + "timerfds = %"PRIu64", " + "signalfds = %"PRIu64", " + "eventpolls = %"PRIu64" " + "other = %"PRIu64" " + "), open fds limit = %"PRIu64", " + "%s, " + "original line [%s]", + p->pid, pid_stat_comm(p), p->openfds_limits_percent, all_fds, + p->openfds.files, + p->openfds.pipes, + p->openfds.sockets, + p->openfds.inotifies, + p->openfds.eventfds, + p->openfds.timerfds, + p->openfds.signalfds, + p->openfds.eventpolls, + p->openfds.other, + p->limits.max_open_files, + read_limits ? "and we have read the limits AFTER counting the fds" + : "but we have read the limits BEFORE counting the fds", + line); + + p->log_thrown |= PID_LOG_LIMITS_DETAIL; + } + } + else + p->log_thrown &= ~PID_LOG_LIMITS_DETAIL; + + return ret; +} + +// -------------------------------------------------------------------------------------------------------------------- +// /proc/pid/status + +void arl_callback_status_uid(const char *name, uint32_t hash, const char *value, void *dst) { + (void)name; (void)hash; (void)value; + struct arl_callback_ptr *aptr = (struct arl_callback_ptr *)dst; + if(unlikely(procfile_linewords(aptr->ff, aptr->line) < 5)) return; + + //const char *real_uid = procfile_lineword(aptr->ff, aptr->line, 1); + const char *effective_uid = procfile_lineword(aptr->ff, aptr->line, 2); + //const char *saved_uid = procfile_lineword(aptr->ff, aptr->line, 3); + //const char *filesystem_uid = procfile_lineword(aptr->ff, aptr->line, 4); + + if(likely(effective_uid && *effective_uid)) + aptr->p->uid = (uid_t)str2l(effective_uid); +} + +void arl_callback_status_gid(const char *name, uint32_t hash, const char *value, void *dst) { + (void)name; (void)hash; (void)value; + struct arl_callback_ptr *aptr = (struct arl_callback_ptr *)dst; + if(unlikely(procfile_linewords(aptr->ff, aptr->line) < 5)) return; + + //const char *real_gid = procfile_lineword(aptr->ff, aptr->line, 1); + const char *effective_gid = procfile_lineword(aptr->ff, aptr->line, 2); + //const char *saved_gid = procfile_lineword(aptr->ff, aptr->line, 3); + //const char *filesystem_gid = procfile_lineword(aptr->ff, aptr->line, 4); + + if(likely(effective_gid && *effective_gid)) + aptr->p->gid = (uid_t)str2l(effective_gid); +} + +void arl_callback_status_vmsize(const char *name, uint32_t hash, const char *value, void *dst) { + (void)name; (void)hash; (void)value; + struct arl_callback_ptr *aptr = (struct arl_callback_ptr *)dst; + if(unlikely(procfile_linewords(aptr->ff, aptr->line) < 3)) return; + + aptr->p->values[PDF_VMSIZE] = str2kernel_uint_t(procfile_lineword(aptr->ff, aptr->line, 1)) * 1024; +} + +void arl_callback_status_vmswap(const char *name, uint32_t hash, const char *value, void *dst) { + (void)name; (void)hash; (void)value; + struct arl_callback_ptr *aptr = (struct arl_callback_ptr *)dst; + if(unlikely(procfile_linewords(aptr->ff, aptr->line) < 3)) return; + + aptr->p->values[PDF_VMSWAP] = str2kernel_uint_t(procfile_lineword(aptr->ff, aptr->line, 1)) * 1024; +} + +void arl_callback_status_vmrss(const char *name, uint32_t hash, const char *value, void *dst) { + (void)name; (void)hash; (void)value; + struct arl_callback_ptr *aptr = (struct arl_callback_ptr *)dst; + if(unlikely(procfile_linewords(aptr->ff, aptr->line) < 3)) return; + + aptr->p->values[PDF_VMRSS] = str2kernel_uint_t(procfile_lineword(aptr->ff, aptr->line, 1)) * 1024; +} + +void arl_callback_status_rssfile(const char *name, uint32_t hash, const char *value, void *dst) { + (void)name; (void)hash; (void)value; + struct arl_callback_ptr *aptr = (struct arl_callback_ptr *)dst; + if(unlikely(procfile_linewords(aptr->ff, aptr->line) < 3)) return; + + aptr->p->values[PDF_RSSFILE] = str2kernel_uint_t(procfile_lineword(aptr->ff, aptr->line, 1)) * 1024; +} + +void arl_callback_status_rssshmem(const char *name, uint32_t hash, const char *value, void *dst) { + (void)name; (void)hash; (void)value; + struct arl_callback_ptr *aptr = (struct arl_callback_ptr *)dst; + if(unlikely(procfile_linewords(aptr->ff, aptr->line) < 3)) return; + + aptr->p->values[PDF_RSSSHMEM] = str2kernel_uint_t(procfile_lineword(aptr->ff, aptr->line, 1)) * 1024; +} + +void arl_callback_status_voluntary_ctxt_switches(const char *name, uint32_t hash, const char *value, void *dst) { + (void)name; (void)hash; (void)value; + struct arl_callback_ptr *aptr = (struct arl_callback_ptr *)dst; + if(unlikely(procfile_linewords(aptr->ff, aptr->line) < 2)) return; + + struct pid_stat *p = aptr->p; + pid_incremental_rate(stat, PDF_VOLCTX, str2kernel_uint_t(procfile_lineword(aptr->ff, aptr->line, 1))); +} + +void arl_callback_status_nonvoluntary_ctxt_switches(const char *name, uint32_t hash, const char *value, void *dst) { + (void)name; (void)hash; (void)value; + struct arl_callback_ptr *aptr = (struct arl_callback_ptr *)dst; + if(unlikely(procfile_linewords(aptr->ff, aptr->line) < 2)) return; + + struct pid_stat *p = aptr->p; + pid_incremental_rate(stat, PDF_NVOLCTX, str2kernel_uint_t(procfile_lineword(aptr->ff, aptr->line, 1))); +} + +bool apps_os_read_pid_status_linux(struct pid_stat *p, void *ptr __maybe_unused) { + static struct arl_callback_ptr arl_ptr; + static procfile *ff = NULL; + + if(unlikely(!p->status_arl)) { + p->status_arl = arl_create("/proc/pid/status", NULL, 60); + arl_expect_custom(p->status_arl, "Uid", arl_callback_status_uid, &arl_ptr); + arl_expect_custom(p->status_arl, "Gid", arl_callback_status_gid, &arl_ptr); + arl_expect_custom(p->status_arl, "VmSize", arl_callback_status_vmsize, &arl_ptr); + arl_expect_custom(p->status_arl, "VmRSS", arl_callback_status_vmrss, &arl_ptr); + arl_expect_custom(p->status_arl, "RssFile", arl_callback_status_rssfile, &arl_ptr); + arl_expect_custom(p->status_arl, "RssShmem", arl_callback_status_rssshmem, &arl_ptr); + arl_expect_custom(p->status_arl, "VmSwap", arl_callback_status_vmswap, &arl_ptr); + arl_expect_custom(p->status_arl, "voluntary_ctxt_switches", arl_callback_status_voluntary_ctxt_switches, &arl_ptr); + arl_expect_custom(p->status_arl, "nonvoluntary_ctxt_switches", arl_callback_status_nonvoluntary_ctxt_switches, &arl_ptr); + } + + if(unlikely(!p->status_filename)) { + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s/proc/%d/status", netdata_configured_host_prefix, p->pid); + p->status_filename = strdupz(filename); + } + + ff = procfile_reopen(ff, p->status_filename, (!ff)?" \t:,-()/":NULL, PROCFILE_FLAG_NO_ERROR_ON_FILE_IO); + if(unlikely(!ff)) return false; + + ff = procfile_readall(ff); + if(unlikely(!ff)) return false; + + calls_counter++; + + // let ARL use this pid + arl_ptr.p = p; + arl_ptr.ff = ff; + + size_t lines = procfile_lines(ff), l; + arl_begin(p->status_arl); + + for(l = 0; l < lines ;l++) { + // debug_log("CHECK: line %zu of %zu, key '%s' = '%s'", l, lines, procfile_lineword(ff, l, 0), procfile_lineword(ff, l, 1)); + arl_ptr.line = l; + if(unlikely(arl_check(p->status_arl, + procfile_lineword(ff, l, 0), + procfile_lineword(ff, l, 1)))) break; + } + + p->values[PDF_VMSHARED] = p->values[PDF_RSSFILE] + p->values[PDF_RSSSHMEM]; + return true; +} + +// -------------------------------------------------------------------------------------------------------------------- +// global CPU utilization + +bool apps_os_read_global_cpu_utilization_linux(void) { + static char filename[FILENAME_MAX + 1] = ""; + static procfile *ff = NULL; + static kernel_uint_t utime_raw = 0, stime_raw = 0, gtime_raw = 0, gntime_raw = 0, ntime_raw = 0; + static usec_t collected_usec = 0, last_collected_usec = 0; + + if(unlikely(!ff)) { + snprintfz(filename, FILENAME_MAX, "%s/proc/stat", netdata_configured_host_prefix); + ff = procfile_open(filename, " \t:", PROCFILE_FLAG_DEFAULT); + if(unlikely(!ff)) goto cleanup; + } + + ff = procfile_readall(ff); + if(unlikely(!ff)) goto cleanup; + + last_collected_usec = collected_usec; + collected_usec = now_monotonic_usec(); + + calls_counter++; + + // temporary - it is added global_ntime; + kernel_uint_t global_ntime = 0; + + incremental_rate(global_utime, utime_raw, str2kernel_uint_t(procfile_lineword(ff, 0, 1)), collected_usec, last_collected_usec, CPU_TO_NANOSECONDCORES); + incremental_rate(global_ntime, ntime_raw, str2kernel_uint_t(procfile_lineword(ff, 0, 2)), collected_usec, last_collected_usec, CPU_TO_NANOSECONDCORES); + incremental_rate(global_stime, stime_raw, str2kernel_uint_t(procfile_lineword(ff, 0, 3)), collected_usec, last_collected_usec, CPU_TO_NANOSECONDCORES); + incremental_rate(global_gtime, gtime_raw, str2kernel_uint_t(procfile_lineword(ff, 0, 10)), collected_usec, last_collected_usec, CPU_TO_NANOSECONDCORES); + + global_utime += global_ntime; + + if(enable_guest_charts) { + // temporary - it is added global_ntime; + kernel_uint_t global_gntime = 0; + + // guest nice time, on guest time + incremental_rate(global_gntime, gntime_raw, str2kernel_uint_t(procfile_lineword(ff, 0, 11)), collected_usec, last_collected_usec, 1); + + global_gtime += global_gntime; + + // remove guest time from user time + global_utime -= (global_utime > global_gtime) ? global_gtime : global_utime; + } + + if(unlikely(global_iterations_counter == 1)) { + global_utime = 0; + global_stime = 0; + global_gtime = 0; + } + + return true; + +cleanup: + global_utime = 0; + global_stime = 0; + global_gtime = 0; + return false; +} + +// -------------------------------------------------------------------------------------------------------------------- +// /proc/pid/stat + +static inline void update_proc_state_count(char proc_stt) { + switch (proc_stt) { + case 'S': + proc_state_count[PROC_STATUS_SLEEPING] += 1; + break; + case 'R': + proc_state_count[PROC_STATUS_RUNNING] += 1; + break; + case 'D': + proc_state_count[PROC_STATUS_SLEEPING_D] += 1; + break; + case 'Z': + proc_state_count[PROC_STATUS_ZOMBIE] += 1; + break; + case 'T': + proc_state_count[PROC_STATUS_STOPPED] += 1; + break; + default: + break; + } +} + +bool apps_os_read_pid_stat_linux(struct pid_stat *p, void *ptr __maybe_unused) { + static procfile *ff = NULL; + + if(unlikely(!p->stat_filename)) { + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s/proc/%d/stat", netdata_configured_host_prefix, p->pid); + p->stat_filename = strdupz(filename); + } + + bool set_quotes = (!ff) ? true : false; + + ff = procfile_reopen(ff, p->stat_filename, NULL, PROCFILE_FLAG_NO_ERROR_ON_FILE_IO); + if(unlikely(!ff)) goto cleanup; + + // if(set_quotes) procfile_set_quotes(ff, "()"); + if(unlikely(set_quotes)) + procfile_set_open_close(ff, "(", ")"); + + ff = procfile_readall(ff); + if(unlikely(!ff)) goto cleanup; + + // p->pid = str2pid_t(procfile_lineword(ff, 0, 0)); + char *comm = procfile_lineword(ff, 0, 1); + p->state = *(procfile_lineword(ff, 0, 2)); + p->ppid = (int32_t)str2pid_t(procfile_lineword(ff, 0, 3)); + // p->pgrp = (int32_t)str2pid_t(procfile_lineword(ff, 0, 4)); + // p->session = (int32_t)str2pid_t(procfile_lineword(ff, 0, 5)); + // p->tty_nr = (int32_t)str2pid_t(procfile_lineword(ff, 0, 6)); + // p->tpgid = (int32_t)str2pid_t(procfile_lineword(ff, 0, 7)); + // p->flags = str2uint64_t(procfile_lineword(ff, 0, 8)); + + update_pid_comm(p, comm); + + pid_incremental_rate(stat, PDF_MINFLT, str2kernel_uint_t(procfile_lineword(ff, 0, 9))); + pid_incremental_rate(stat, PDF_CMINFLT, str2kernel_uint_t(procfile_lineword(ff, 0, 10))); + pid_incremental_rate(stat, PDF_MAJFLT, str2kernel_uint_t(procfile_lineword(ff, 0, 11))); + pid_incremental_rate(stat, PDF_CMAJFLT, str2kernel_uint_t(procfile_lineword(ff, 0, 12))); + pid_incremental_cpu(stat, PDF_UTIME, str2kernel_uint_t(procfile_lineword(ff, 0, 13))); + pid_incremental_cpu(stat, PDF_STIME, str2kernel_uint_t(procfile_lineword(ff, 0, 14))); + pid_incremental_cpu(stat, PDF_CUTIME, str2kernel_uint_t(procfile_lineword(ff, 0, 15))); + pid_incremental_cpu(stat, PDF_CSTIME, str2kernel_uint_t(procfile_lineword(ff, 0, 16))); + // p->priority = str2kernel_uint_t(procfile_lineword(ff, 0, 17)); + // p->nice = str2kernel_uint_t(procfile_lineword(ff, 0, 18)); + p->values[PDF_THREADS] = (int32_t) str2uint32_t(procfile_lineword(ff, 0, 19), NULL); + // p->itrealvalue = str2kernel_uint_t(procfile_lineword(ff, 0, 20)); + kernel_uint_t collected_starttime = str2kernel_uint_t(procfile_lineword(ff, 0, 21)) / system_hz; + p->values[PDF_UPTIME] = (system_uptime_secs > collected_starttime)?(system_uptime_secs - collected_starttime):0; + // p->vsize = str2kernel_uint_t(procfile_lineword(ff, 0, 22)); + // p->rss = str2kernel_uint_t(procfile_lineword(ff, 0, 23)); + // p->rsslim = str2kernel_uint_t(procfile_lineword(ff, 0, 24)); + // p->starcode = str2kernel_uint_t(procfile_lineword(ff, 0, 25)); + // p->endcode = str2kernel_uint_t(procfile_lineword(ff, 0, 26)); + // p->startstack = str2kernel_uint_t(procfile_lineword(ff, 0, 27)); + // p->kstkesp = str2kernel_uint_t(procfile_lineword(ff, 0, 28)); + // p->kstkeip = str2kernel_uint_t(procfile_lineword(ff, 0, 29)); + // p->signal = str2kernel_uint_t(procfile_lineword(ff, 0, 30)); + // p->blocked = str2kernel_uint_t(procfile_lineword(ff, 0, 31)); + // p->sigignore = str2kernel_uint_t(procfile_lineword(ff, 0, 32)); + // p->sigcatch = str2kernel_uint_t(procfile_lineword(ff, 0, 33)); + // p->wchan = str2kernel_uint_t(procfile_lineword(ff, 0, 34)); + // p->nswap = str2kernel_uint_t(procfile_lineword(ff, 0, 35)); + // p->cnswap = str2kernel_uint_t(procfile_lineword(ff, 0, 36)); + // p->exit_signal = str2kernel_uint_t(procfile_lineword(ff, 0, 37)); + // p->processor = str2kernel_uint_t(procfile_lineword(ff, 0, 38)); + // p->rt_priority = str2kernel_uint_t(procfile_lineword(ff, 0, 39)); + // p->policy = str2kernel_uint_t(procfile_lineword(ff, 0, 40)); + // p->delayacct_blkio_ticks = str2kernel_uint_t(procfile_lineword(ff, 0, 41)); + + if(enable_guest_charts) { + pid_incremental_cpu(stat, PDF_GTIME, str2kernel_uint_t(procfile_lineword(ff, 0, 42))); + pid_incremental_cpu(stat, PDF_CGTIME, str2kernel_uint_t(procfile_lineword(ff, 0, 43))); + + if (show_guest_time || p->values[PDF_GTIME] || p->values[PDF_CGTIME]) { + p->values[PDF_UTIME] -= (p->values[PDF_UTIME] >= p->values[PDF_GTIME]) ? p->values[PDF_GTIME] : p->values[PDF_UTIME]; + p->values[PDF_CUTIME] -= (p->values[PDF_CUTIME] >= p->values[PDF_CGTIME]) ? p->values[PDF_CGTIME] : p->values[PDF_CUTIME]; + show_guest_time = true; + } + } + + if(unlikely(debug_enabled || (p->target && p->target->debug_enabled))) + debug_log_int("READ PROC/PID/STAT: %s/proc/%d/stat, process: '%s' on target '%s' (dt=%llu) VALUES: utime=" KERNEL_UINT_FORMAT ", stime=" KERNEL_UINT_FORMAT ", cutime=" KERNEL_UINT_FORMAT ", cstime=" KERNEL_UINT_FORMAT ", minflt=" KERNEL_UINT_FORMAT ", majflt=" KERNEL_UINT_FORMAT ", cminflt=" KERNEL_UINT_FORMAT ", cmajflt=" KERNEL_UINT_FORMAT ", threads=" KERNEL_UINT_FORMAT, + netdata_configured_host_prefix, p->pid, pid_stat_comm(p), (p->target)?string2str(p->target->name):"UNSET", p->stat_collected_usec - p->last_stat_collected_usec, + p->values[PDF_UTIME], + p->values[PDF_STIME], + p->values[PDF_CUTIME], + p->values[PDF_CSTIME], + p->values[PDF_MINFLT], + p->values[PDF_MAJFLT], + p->values[PDF_CMINFLT], + p->values[PDF_CMAJFLT], + p->values[PDF_THREADS]); + + update_proc_state_count(p->state); + return true; + +cleanup: + return false; +} + +// ---------------------------------------------------------------------------- + +// 1. read all files in /proc +// 2. for each numeric directory: +// i. read /proc/pid/stat +// ii. read /proc/pid/status +// iii. read /proc/pid/io (requires root access) +// iii. read the entries in directory /proc/pid/fd (requires root access) +// for each entry: +// a. find or create a struct file_descriptor +// b. cleanup any old/unused file_descriptors + +// after all these, some pids may be linked to targets, while others may not + +// in case of errors, only 1 every 1000 errors is printed +// to avoid filling up all disk space +// if debug is enabled, all errors are printed + +bool apps_os_collect_all_pids_linux(void) { +#if (PROCESSES_HAVE_STATE == 1) + // clear process state counter + memset(proc_state_count, 0, sizeof proc_state_count); +#endif + + // preload the parents and then their children + collect_parents_before_children(); + + static char uptime_filename[FILENAME_MAX + 1] = ""; + if(*uptime_filename == '\0') + snprintfz(uptime_filename, FILENAME_MAX, "%s/proc/uptime", netdata_configured_host_prefix); + + system_uptime_secs = (kernel_uint_t)(uptime_msec(uptime_filename) / MSEC_PER_SEC); + + char dirname[FILENAME_MAX + 1]; + + snprintfz(dirname, FILENAME_MAX, "%s/proc", netdata_configured_host_prefix); + DIR *dir = opendir(dirname); + if(!dir) return false; + + struct dirent *de = NULL; + + while((de = readdir(dir))) { + char *endptr = de->d_name; + + if(unlikely(de->d_type != DT_DIR || de->d_name[0] < '0' || de->d_name[0] > '9')) + continue; + + pid_t pid = (pid_t) strtoul(de->d_name, &endptr, 10); + + // make sure we read a valid number + if(unlikely(endptr == de->d_name || *endptr != '\0')) + continue; + + incrementally_collect_data_for_pid(pid, NULL); + } + closedir(dir); + + return true; +} +#endif diff --git a/src/collectors/apps.plugin/apps_os_macos.c b/src/collectors/apps.plugin/apps_os_macos.c new file mode 100644 index 00000000000000..746153ba5a6b4d --- /dev/null +++ b/src/collectors/apps.plugin/apps_os_macos.c @@ -0,0 +1,334 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "apps_plugin.h" + +#if defined(OS_MACOS) + +usec_t system_current_time_ut; +mach_timebase_info_data_t mach_info; + +void apps_os_init_macos(void) { + mach_timebase_info(&mach_info); +} + +uint64_t apps_os_get_total_memory_macos(void) { + uint64_t ret = 0; + int mib[2] = {CTL_HW, HW_MEMSIZE}; + size_t size = sizeof(ret); + if (sysctl(mib, 2, &ret, &size, NULL, 0) == -1) { + netdata_log_error("Failed to get total memory using sysctl"); + return 0; + } + + return ret; +} + +bool apps_os_read_pid_fds_macos(struct pid_stat *p, void *ptr __maybe_unused) { + static struct proc_fdinfo *fds = NULL; + static int fdsCapacity = 0; + + int bufferSize = proc_pidinfo(p->pid, PROC_PIDLISTFDS, 0, NULL, 0); + if (bufferSize <= 0) { + netdata_log_error("Failed to get the size of file descriptors for PID %d", p->pid); + return false; + } + + // Resize buffer if necessary + if (bufferSize > fdsCapacity) { + if(fds) + freez(fds); + + fds = mallocz(bufferSize); + fdsCapacity = bufferSize; + } + + int num_fds = proc_pidinfo(p->pid, PROC_PIDLISTFDS, 0, fds, bufferSize) / PROC_PIDLISTFD_SIZE; + if (num_fds <= 0) { + netdata_log_error("Failed to get the file descriptors for PID %d", p->pid); + return false; + } + + for (int i = 0; i < num_fds; i++) { + switch (fds[i].proc_fdtype) { + case PROX_FDTYPE_VNODE: { + struct vnode_fdinfowithpath vi; + if (proc_pidfdinfo(p->pid, fds[i].proc_fd, PROC_PIDFDVNODEPATHINFO, &vi, sizeof(vi)) > 0) + p->openfds.files++; + else + p->openfds.other++; + + break; + } + case PROX_FDTYPE_SOCKET: { + p->openfds.sockets++; + break; + } + case PROX_FDTYPE_PIPE: { + p->openfds.pipes++; + break; + } + + default: + p->openfds.other++; + break; + } + } + + return true; +} + +bool apps_os_get_pid_cmdline_macos(struct pid_stat *p, char *cmdline, size_t maxBytes) { + int mib[3] = {CTL_KERN, KERN_PROCARGS2, p->pid}; + static char *args = NULL; + static size_t size = 0; + + size_t new_size; + if (sysctl(mib, 3, NULL, &new_size, NULL, 0) == -1) { + return false; + } + + if (new_size > size) { + if (args) + freez(args); + + args = (char *)mallocz(new_size); + size = new_size; + } + + memset(cmdline, 0, new_size < maxBytes ? new_size : maxBytes); + + size_t used_size = size; + if (sysctl(mib, 3, args, &used_size, NULL, 0) == -1) + return false; + + int argc; + memcpy(&argc, args, sizeof(argc)); + char *ptr = args + sizeof(argc); + used_size -= sizeof(argc); + + // Skip the executable path + while (*ptr && used_size > 0) { + ptr++; + used_size--; + } + + // Copy only the arguments to the cmdline buffer, skipping the environment variables + size_t i = 0, copied_args = 0; + bool inArg = false; + for (; used_size > 0 && i < maxBytes - 1 && copied_args < argc; --used_size, ++ptr) { + if (*ptr == '\0') { + if (inArg) { + cmdline[i++] = ' '; // Replace nulls between arguments with spaces + inArg = false; + copied_args++; + } + } else { + cmdline[i++] = *ptr; + inArg = true; + } + } + + if (i > 0 && cmdline[i - 1] == ' ') + i--; // Remove the trailing space if present + + cmdline[i] = '\0'; // Null-terminate the string + + return true; +} + +bool apps_os_read_pid_io_macos(struct pid_stat *p, void *ptr) { + struct pid_info *pi = ptr; + + // On MacOS, the proc_pid_rusage provides disk_io_statistics which includes io bytes read and written + // but does not provide the same level of detail as Linux, like separating logical and physical I/O bytes. + pid_incremental_rate(io, PDF_LREAD, pi->rusageinfo.ri_diskio_bytesread); + pid_incremental_rate(io, PDF_LWRITE, pi->rusageinfo.ri_diskio_byteswritten); + + return true; +} + +bool apps_os_read_pid_limits_macos(struct pid_stat *p __maybe_unused, void *ptr __maybe_unused) { + return false; +} + +bool apps_os_read_pid_status_macos(struct pid_stat *p, void *ptr) { + struct pid_info *pi = ptr; + + p->uid = pi->bsdinfo.pbi_uid; + p->gid = pi->bsdinfo.pbi_gid; + p->values[PDF_VMSIZE] = pi->taskinfo.pti_virtual_size; + p->values[PDF_VMRSS] = pi->taskinfo.pti_resident_size; + // p->values[PDF_VMSWAP] = rusageinfo.ri_swapins + rusageinfo.ri_swapouts; // This is not directly available, consider an alternative representation + p->values[PDF_VOLCTX] = pi->taskinfo.pti_csw; + // p->values[PDF_NVOLCTX] = taskinfo.pti_nivcsw; + + return true; +} + +static inline void get_current_time(void) { + struct timeval current_time; + gettimeofday(¤t_time, NULL); + system_current_time_ut = timeval_usec(¤t_time); +} + +// bool apps_os_read_global_cpu_utilization_macos(void) { +// static kernel_uint_t utime_raw = 0, stime_raw = 0, ntime_raw = 0; +// static usec_t collected_usec = 0, last_collected_usec = 0; +// +// host_cpu_load_info_data_t cpuinfo; +// mach_msg_type_number_t count = HOST_CPU_LOAD_INFO_COUNT; +// +// if (host_statistics(mach_host_self(), HOST_CPU_LOAD_INFO, (host_info_t)&cpuinfo, &count) != KERN_SUCCESS) { +// // Handle error +// goto cleanup; +// } +// +// last_collected_usec = collected_usec; +// collected_usec = now_monotonic_usec(); +// +// calls_counter++; +// +// // Convert ticks to time +// // Note: MacOS does not separate nice time from user time in the CPU stats, so you might need to adjust this logic +// kernel_uint_t global_ntime = 0; // Assuming you want to keep track of nice time separately +// +// incremental_rate(global_utime, utime_raw, cpuinfo.cpu_ticks[CPU_STATE_USER] + cpuinfo.cpu_ticks[CPU_STATE_NICE], collected_usec, last_collected_usec, CPU_TO_NANOSECONDCORES); +// incremental_rate(global_ntime, ntime_raw, cpuinfo.cpu_ticks[CPU_STATE_NICE], collected_usec, last_collected_usec, CPU_TO_NANOSECONDCORES); +// incremental_rate(global_stime, stime_raw, cpuinfo.cpu_ticks[CPU_STATE_SYSTEM], collected_usec, last_collected_usec, CPU_TO_NANOSECONDCORES); +// +// global_utime += global_ntime; +// +// if(unlikely(global_iterations_counter == 1)) { +// global_utime = 0; +// global_stime = 0; +// global_gtime = 0; +// } +// +// return 1; +// +// cleanup: +// global_utime = 0; +// global_stime = 0; +// global_gtime = 0; +// return 0; +// } + +bool apps_os_read_pid_stat_macos(struct pid_stat *p, void *ptr) { + struct pid_info *pi = ptr; + + p->ppid = pi->proc.kp_eproc.e_ppid; + + // Update command name and target if changed + char comm[PROC_PIDPATHINFO_MAXSIZE]; + int ret = proc_name(p->pid, comm, sizeof(comm)); + if (ret <= 0) + strncpyz(comm, "unknown", sizeof(comm) - 1); + + update_pid_comm(p, comm); + + kernel_uint_t userCPU = (pi->taskinfo.pti_total_user * mach_info.numer) / mach_info.denom; + kernel_uint_t systemCPU = (pi->taskinfo.pti_total_system * mach_info.numer) / mach_info.denom; + + // Map the values from taskinfo to the pid_stat structure + pid_incremental_rate(stat, PDF_MINFLT, pi->taskinfo.pti_faults); + pid_incremental_rate(stat, PDF_MAJFLT, pi->taskinfo.pti_pageins); + pid_incremental_cpu(stat, PDF_UTIME, userCPU); + pid_incremental_cpu(stat, PDF_STIME, systemCPU); + p->values[PDF_THREADS] = pi->taskinfo.pti_threadnum; + + usec_t started_ut = timeval_usec(&pi->proc.kp_proc.p_starttime); + p->values[PDF_UPTIME] = (system_current_time_ut > started_ut) ? (system_current_time_ut - started_ut) / USEC_PER_SEC : 0; + + // Note: Some values such as guest time, cutime, cstime, etc., are not directly available in MacOS. + // You might need to approximate or leave them unset depending on your needs. + + if(unlikely(debug_enabled || (p->target && p->target->debug_enabled))) { + debug_log_int("READ PROC/PID/STAT for MacOS: process: '%s' on target '%s' VALUES: utime=" KERNEL_UINT_FORMAT ", stime=" KERNEL_UINT_FORMAT ", minflt=" KERNEL_UINT_FORMAT ", majflt=" KERNEL_UINT_FORMAT ", threads=%d", + pid_stat_comm(p), (p->target) ? string2str(p->target->name) : "UNSET", + p->values[PDF_UTIME], + p->values[PDF_STIME], + p->values[PDF_MINFLT], + p->values[PDF_MAJFLT], + p->values[PDF_THREADS]); + } + + // MacOS doesn't have a direct concept of process state like Linux, + // so updating process state count might need a different approach. + + return true; +} + +bool apps_os_collect_all_pids_macos(void) { + // Mark all processes as unread before collecting new data + struct pid_stat *p; + static pid_t *pids = NULL; + static int allocatedProcessCount = 0; + + // Get the number of processes + int numberOfProcesses = proc_listpids(PROC_ALL_PIDS, 0, NULL, 0); + if (numberOfProcesses <= 0) { + netdata_log_error("Failed to retrieve the process count"); + return false; + } + + // Allocate or reallocate space to hold all the process IDs if necessary + if (numberOfProcesses > allocatedProcessCount) { + // Allocate additional space to avoid frequent reallocations + allocatedProcessCount = numberOfProcesses + 100; + pids = reallocz(pids, allocatedProcessCount * sizeof(pid_t)); + } + + // this is required, otherwise the PIDs become totally random + memset(pids, 0, allocatedProcessCount * sizeof(pid_t)); + + // get the list of PIDs + numberOfProcesses = proc_listpids(PROC_ALL_PIDS, 0, pids, allocatedProcessCount * sizeof(pid_t)); + if (numberOfProcesses <= 0) { + netdata_log_error("Failed to retrieve the process IDs"); + return false; + } + + get_current_time(); + + // Collect data for each process + for (int i = 0; i < numberOfProcesses; ++i) { + pid_t pid = pids[i]; + if (pid <= 0) continue; + + struct pid_info pi = { 0 }; + + int mib[4] = {CTL_KERN, KERN_PROC, KERN_PROC_PID, pid}; + + size_t procSize = sizeof(pi.proc); + if(sysctl(mib, 4, &pi.proc, &procSize, NULL, 0) == -1) { + netdata_log_error("Failed to get proc for PID %d", pid); + continue; + } + if(procSize == 0) // no such process + continue; + + int st = proc_pidinfo(pid, PROC_PIDTASKINFO, 0, &pi.taskinfo, sizeof(pi.taskinfo)); + if (st <= 0) { + netdata_log_error("Failed to get task info for PID %d", pid); + continue; + } + + st = proc_pidinfo(pid, PROC_PIDTBSDINFO, 0, &pi.bsdinfo, sizeof(pi.bsdinfo)); + if (st <= 0) { + netdata_log_error("Failed to get BSD info for PID %d", pid); + continue; + } + + st = proc_pid_rusage(pid, RUSAGE_INFO_V4, (rusage_info_t *)&pi.rusageinfo); + if (st < 0) { + netdata_log_error("Failed to get resource usage info for PID %d", pid); + continue; + } + + incrementally_collect_data_for_pid(pid, &pi); + } + + return true; +} + +#endif diff --git a/src/collectors/apps.plugin/apps_os_windows.c b/src/collectors/apps.plugin/apps_os_windows.c new file mode 100644 index 00000000000000..65784682ee358e --- /dev/null +++ b/src/collectors/apps.plugin/apps_os_windows.c @@ -0,0 +1,831 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "apps_plugin.h" +/* +{ + "SystemName": "WIN11", + "NumObjectTypes": 1, + "LittleEndian": 1, + "Version": 1, + "Revision": 1, + "DefaultObject": 238, + "PerfFreq": 10000000, + "PerfTime": 9242655165203, + "PerfTime100nSec": 133716612800215149, + "SystemTime": { + "Year": 2024, + "Month": 9, + "DayOfWeek": 2, + "Day": 24, + "Hour": 14, + "Minute": 21, + "Second": 20, + "Milliseconds": 21 + }, + "Objects": [ + { + "NameId": 230, + "Name": "Process", + "HelpId": 231, + "Help": "The Process performance object consists of counters that monitor running application program and system processes. All the threads in a process share the same address space and have access to the same data.", + "NumInstances": 274, + "NumCounters": 28, + "PerfTime": 133716612800215149, + "PerfFreq": 10000000, + "CodePage": 0, + "DefaultCounter": 0, + "DetailLevel": "Novice (100)", + "Instances": [ + { + "Instance": "Idle", + "UniqueID": -1, + "Labels": [ + { + "key": "Process", + "value": "Idle" + } + ], + "Counters": [ + { + "Counter": "% Processor Time", + "Value": { + "data": 106881107812500, + "time": 133716612800215149, + "type": 542180608, + "multi": 0, + "frequency": 0 + }, + "Help": "% Processor Time is the percentage of elapsed time that all of process threads used the processor to execution instructions. An instruction is the basic unit of execution in a computer, a thread is the object that executes instructions, and a process is the object created when a program is run. Code executed to handle some hardware interrupts and trap conditions are included in this count.", + "Type": "PERF_100NSEC_TIMER", + "Algorithm": "100 * (data1 - data0) / (time1 - time0)", + "Description": "64-bit Timer in 100 nsec units. Display delta divided by delta time. Display suffix: \"%\"" + }, + { + "Counter": "% User Time", + "Value": { + "data": 0, + "time": 133716612800215149, + "type": 542180608, + "multi": 0, + "frequency": 0 + }, + "Help": "% User Time is the percentage of elapsed time that the process threads spent executing code in user mode. Applications, environment subsystems, and integral subsystems execute in user mode. Code executing in user mode cannot damage the integrity of the Windows executive, kernel, and device drivers. Unlike some early operating systems, Windows uses process boundaries for subsystem protection in addition to the traditional protection of user and privileged modes. Some work done by Windows on behalf of the application might appear in other subsystem processes in addition to the privileged time in the process.", + "Type": "PERF_100NSEC_TIMER", + "Algorithm": "100 * (data1 - data0) / (time1 - time0)", + "Description": "64-bit Timer in 100 nsec units. Display delta divided by delta time. Display suffix: \"%\"" + }, + { + "Counter": "% Privileged Time", + "Value": { + "data": 106881107812500, + "time": 133716612800215149, + "type": 542180608, + "multi": 0, + "frequency": 0 + }, + "Help": "% Privileged Time is the percentage of elapsed time that the process threads spent executing code in privileged mode. When a Windows system service is called, the service will often run in privileged mode to gain access to system-private data. Such data is protected from access by threads executing in user mode. Calls to the system can be explicit or implicit, such as page faults or interrupts. Unlike some early operating systems, Windows uses process boundaries for subsystem protection in addition to the traditional protection of user and privileged modes. Some work done by Windows on behalf of the application might appear in other subsystem processes in addition to the privileged time in the process.", + "Type": "PERF_100NSEC_TIMER", + "Algorithm": "100 * (data1 - data0) / (time1 - time0)", + "Description": "64-bit Timer in 100 nsec units. Display delta divided by delta time. Display suffix: \"%\"" + }, + { + "Counter": "Virtual Bytes Peak", + "Value": { + "data": 8192, + "time": 0, + "type": 65792, + "multi": 0, + "frequency": 0 + }, + "Help": "Virtual Bytes Peak is the maximum size, in bytes, of virtual address space the process has used at any one time. Use of virtual address space does not necessarily imply corresponding use of either disk or main memory pages. However, virtual space is finite, and the process might limit its ability to load libraries.", + "Type": "PERF_COUNTER_LARGE_RAWCOUNT", + "Algorithm": "data0", + "Description": "A counter which should not be time averaged on display (such as an error counter on a serial line). Display as is. No Display Suffix." + }, + { + "Counter": "Virtual Bytes", + "Value": { + "data": 8192, + "time": 0, + "type": 65792, + "multi": 0, + "frequency": 0 + }, + "Help": "Virtual Bytes is the current size, in bytes, of the virtual address space the process is using. Use of virtual address space does not necessarily imply corresponding use of either disk or main memory pages. Virtual space is finite, and the process can limit its ability to load libraries.", + "Type": "PERF_COUNTER_LARGE_RAWCOUNT", + "Algorithm": "data0", + "Description": "A counter which should not be time averaged on display (such as an error counter on a serial line). Display as is. No Display Suffix." + }, + { + "Counter": "Page Faults/sec", + "Value": { + "data": 9, + "time": 9242655165203, + "type": 272696320, + "multi": 0, + "frequency": 10000000 + }, + "Help": "Page Faults/sec is the rate at which page faults by the threads executing in this process are occurring. A page fault occurs when a thread refers to a virtual memory page that is not in its working set in main memory. This may not cause the page to be fetched from disk if it is on the standby list and hence already in main memory, or if it is in use by another process with whom the page is shared.", + "Type": "PERF_COUNTER_COUNTER", + "Algorithm": "(data1 - data0) / ((time1 - time0) / frequency)", + "Description": "32-bit Counter. Divide delta by delta time. Display suffix: \"/sec\"" + }, + { + "Counter": "Working Set Peak", + "Value": { + "data": 8192, + "time": 0, + "type": 65792, + "multi": 0, + "frequency": 0 + }, + "Help": "Working Set Peak is the maximum size, in bytes, of the Working Set of this process at any point in time. The Working Set is the set of memory pages touched recently by the threads in the process. If free memory in the computer is above a threshold, pages are left in the Working Set of a process even if they are not in use. When free memory falls below a threshold, pages are trimmed from Working Sets. If they are needed they will then be soft-faulted back into the Working Set before they leave main memory.", + "Type": "PERF_COUNTER_LARGE_RAWCOUNT", + "Algorithm": "data0", + "Description": "A counter which should not be time averaged on display (such as an error counter on a serial line). Display as is. No Display Suffix." + }, + { + "Counter": "Working Set", + "Value": { + "data": 8192, + "time": 0, + "type": 65792, + "multi": 0, + "frequency": 0 + }, + "Help": "Working Set is the current size, in bytes, of the Working Set of this process. The Working Set is the set of memory pages touched recently by the threads in the process. If free memory in the computer is above a threshold, pages are left in the Working Set of a process even if they are not in use. When free memory falls below a threshold, pages are trimmed from Working Sets. If they are needed they will then be soft-faulted back into the Working Set before leaving main memory.", + "Type": "PERF_COUNTER_LARGE_RAWCOUNT", + "Algorithm": "data0", + "Description": "A counter which should not be time averaged on display (such as an error counter on a serial line). Display as is. No Display Suffix." + }, + { + "Counter": "Page File Bytes Peak", + "Value": { + "data": 61440, + "time": 0, + "type": 65792, + "multi": 0, + "frequency": 0 + }, + "Help": "Page File Bytes Peak is the maximum amount of virtual memory, in bytes, that this process has reserved for use in the paging file(s). Paging files are used to store pages of memory used by the process that are not contained in other files. Paging files are shared by all processes, and the lack of space in paging files can prevent other processes from allocating memory. If there is no paging file, this counter reflects the maximum amount of virtual memory that the process has reserved for use in physical memory.", + "Type": "PERF_COUNTER_LARGE_RAWCOUNT", + "Algorithm": "data0", + "Description": "A counter which should not be time averaged on display (such as an error counter on a serial line). Display as is. No Display Suffix." + }, + { + "Counter": "Page File Bytes", + "Value": { + "data": 61440, + "time": 0, + "type": 65792, + "multi": 0, + "frequency": 0 + }, + "Help": "Page File Bytes is the current amount of virtual memory, in bytes, that this process has reserved for use in the paging file(s). Paging files are used to store pages of memory used by the process that are not contained in other files. Paging files are shared by all processes, and the lack of space in paging files can prevent other processes from allocating memory. If there is no paging file, this counter reflects the current amount of virtual memory that the process has reserved for use in physical memory.", + "Type": "PERF_COUNTER_LARGE_RAWCOUNT", + "Algorithm": "data0", + "Description": "A counter which should not be time averaged on display (such as an error counter on a serial line). Display as is. No Display Suffix." + }, + { + "Counter": "Private Bytes", + "Value": { + "data": 61440, + "time": 0, + "type": 65792, + "multi": 0, + "frequency": 0 + }, + "Help": "Private Bytes is the current size, in bytes, of memory that this process has allocated that cannot be shared with other processes.", + "Type": "PERF_COUNTER_LARGE_RAWCOUNT", + "Algorithm": "data0", + "Description": "A counter which should not be time averaged on display (such as an error counter on a serial line). Display as is. No Display Suffix." + }, + { + "Counter": "Thread Count", + "Value": { + "data": 24, + "time": 0, + "type": 65536, + "multi": 0, + "frequency": 0 + }, + "Help": "The number of threads currently active in this process. An instruction is the basic unit of execution in a processor, and a thread is the object that executes instructions. Every running process has at least one thread.", + "Type": "PERF_COUNTER_RAWCOUNT", + "Algorithm": "data0", + "Description": "A counter which should not be time averaged on display (such as an error counter on a serial line). Display as is. No Display Suffix." + }, + { + "Counter": "Priority Base", + "Value": { + "data": 0, + "time": 0, + "type": 65536, + "multi": 0, + "frequency": 0 + }, + "Help": "The current base priority of this process. Threads within a process can raise and lower their own base priority relative to the process' base priority.", + "Type": "PERF_COUNTER_RAWCOUNT", + "Algorithm": "data0", + "Description": "A counter which should not be time averaged on display (such as an error counter on a serial line). Display as is. No Display Suffix." + }, + { + "Counter": "Elapsed Time", + "Value": { + "data": 133707369666486855, + "time": 133716612800215149, + "type": 807666944, + "multi": 0, + "frequency": 10000000 + }, + "Help": "The total elapsed time, in seconds, that this process has been running.", + "Type": "PERF_ELAPSED_TIME", + "Algorithm": "(time0 - data0) / frequency0", + "Description": "The data collected in this counter is actually the start time of the item being measured. For display, this data is subtracted from the sample time to yield the elapsed time as the difference between the two. In the definition below, the PerfTime field of the Object contains the sample time as indicated by the PERF_OBJECT_TIMER bit and the difference is scaled by the PerfFreq of the Object to convert the time units into seconds." + }, + { + "Counter": "ID Process", + "Value": { + "data": 0, + "time": 0, + "type": 65536, + "multi": 0, + "frequency": 0 + }, + "Help": "ID Process is the unique identifier of this process. ID Process numbers are reused, so they only identify a process for the lifetime of that process.", + "Type": "PERF_COUNTER_RAWCOUNT", + "Algorithm": "data0", + "Description": "A counter which should not be time averaged on display (such as an error counter on a serial line). Display as is. No Display Suffix." + }, + { + "Counter": "Creating Process ID", + "Value": { + "data": 0, + "time": 0, + "type": 65536, + "multi": 0, + "frequency": 0 + }, + "Help": "The Creating Process ID value is the Process ID of the process that created the process. The creating process may have terminated, so this value may no longer identify a running process.", + "Type": "PERF_COUNTER_RAWCOUNT", + "Algorithm": "data0", + "Description": "A counter which should not be time averaged on display (such as an error counter on a serial line). Display as is. No Display Suffix." + }, + { + "Counter": "Pool Paged Bytes", + "Value": { + "data": 0, + "time": 0, + "type": 65536, + "multi": 0, + "frequency": 0 + }, + "Help": "Pool Paged Bytes is the size, in bytes, of the paged pool, an area of the system virtual memory that is used for objects that can be written to disk when they are not being used. Memory\\\\Pool Paged Bytes is calculated differently than Process\\\\Pool Paged Bytes, so it might not equal Process(_Total)\\\\Pool Paged Bytes. This counter displays the last observed value only; it is not an average.", + "Type": "PERF_COUNTER_RAWCOUNT", + "Algorithm": "data0", + "Description": "A counter which should not be time averaged on display (such as an error counter on a serial line). Display as is. No Display Suffix." + }, + { + "Counter": "Pool Nonpaged Bytes", + "Value": { + "data": 272, + "time": 0, + "type": 65536, + "multi": 0, + "frequency": 0 + }, + "Help": "Pool Nonpaged Bytes is the size, in bytes, of the nonpaged pool, an area of the system virtual memory that is used for objects that cannot be written to disk, but must remain in physical memory as long as they are allocated. Memory\\\\Pool Nonpaged Bytes is calculated differently than Process\\\\Pool Nonpaged Bytes, so it might not equal Process(_Total)\\\\Pool Nonpaged Bytes. This counter displays the last observed value only; it is not an average.", + "Type": "PERF_COUNTER_RAWCOUNT", + "Algorithm": "data0", + "Description": "A counter which should not be time averaged on display (such as an error counter on a serial line). Display as is. No Display Suffix." + }, + { + "Counter": "Handle Count", + "Value": { + "data": 0, + "time": 0, + "type": 65536, + "multi": 0, + "frequency": 0 + }, + "Help": "The total number of handles currently open by this process. This number is equal to the sum of the handles currently open by each thread in this process.", + "Type": "PERF_COUNTER_RAWCOUNT", + "Algorithm": "data0", + "Description": "A counter which should not be time averaged on display (such as an error counter on a serial line). Display as is. No Display Suffix." + }, + { + "Counter": "IO Read Operations/sec", + "Value": { + "data": 0, + "time": 9242655165203, + "type": 272696576, + "multi": 0, + "frequency": 10000000 + }, + "Help": "The rate at which the process is issuing read I/O operations. This counter counts all I/O activity generated by the process to include file, network and device I/Os.", + "Type": "PERF_COUNTER_BULK_COUNT", + "Algorithm": "(data1 - data0) / ((time1 - time0) / frequency)", + "Description": "64-bit Counter. Divide delta by delta time. Display Suffix: \"/sec\"" + }, + { + "Counter": "IO Write Operations/sec", + "Value": { + "data": 0, + "time": 9242655165203, + "type": 272696576, + "multi": 0, + "frequency": 10000000 + }, + "Help": "The rate at which the process is issuing write I/O operations. This counter counts all I/O activity generated by the process to include file, network and device I/Os.", + "Type": "PERF_COUNTER_BULK_COUNT", + "Algorithm": "(data1 - data0) / ((time1 - time0) / frequency)", + "Description": "64-bit Counter. Divide delta by delta time. Display Suffix: \"/sec\"" + }, + { + "Counter": "IO Data Operations/sec", + "Value": { + "data": 0, + "time": 9242655165203, + "type": 272696576, + "multi": 0, + "frequency": 10000000 + }, + "Help": "The rate at which the process is issuing read and write I/O operations. This counter counts all I/O activity generated by the process to include file, network and device I/Os.", + "Type": "PERF_COUNTER_BULK_COUNT", + "Algorithm": "(data1 - data0) / ((time1 - time0) / frequency)", + "Description": "64-bit Counter. Divide delta by delta time. Display Suffix: \"/sec\"" + }, + { + "Counter": "IO Other Operations/sec", + "Value": { + "data": 0, + "time": 9242655165203, + "type": 272696576, + "multi": 0, + "frequency": 10000000 + }, + "Help": "The rate at which the process is issuing I/O operations that are neither read nor write operations (for example, a control function). This counter counts all I/O activity generated by the process to include file, network and device I/Os.", + "Type": "PERF_COUNTER_BULK_COUNT", + "Algorithm": "(data1 - data0) / ((time1 - time0) / frequency)", + "Description": "64-bit Counter. Divide delta by delta time. Display Suffix: \"/sec\"" + }, + { + "Counter": "IO Read Bytes/sec", + "Value": { + "data": 0, + "time": 9242655165203, + "type": 272696576, + "multi": 0, + "frequency": 10000000 + }, + "Help": "The rate at which the process is reading bytes from I/O operations. This counter counts all I/O activity generated by the process to include file, network and device I/Os.", + "Type": "PERF_COUNTER_BULK_COUNT", + "Algorithm": "(data1 - data0) / ((time1 - time0) / frequency)", + "Description": "64-bit Counter. Divide delta by delta time. Display Suffix: \"/sec\"" + }, + { + "Counter": "IO Write Bytes/sec", + "Value": { + "data": 0, + "time": 9242655165203, + "type": 272696576, + "multi": 0, + "frequency": 10000000 + }, + "Help": "The rate at which the process is writing bytes to I/O operations. This counter counts all I/O activity generated by the process to include file, network and device I/Os.", + "Type": "PERF_COUNTER_BULK_COUNT", + "Algorithm": "(data1 - data0) / ((time1 - time0) / frequency)", + "Description": "64-bit Counter. Divide delta by delta time. Display Suffix: \"/sec\"" + }, + { + "Counter": "IO Data Bytes/sec", + "Value": { + "data": 0, + "time": 9242655165203, + "type": 272696576, + "multi": 0, + "frequency": 10000000 + }, + "Help": "The rate at which the process is reading and writing bytes in I/O operations. This counter counts all I/O activity generated by the process to include file, network and device I/Os.", + "Type": "PERF_COUNTER_BULK_COUNT", + "Algorithm": "(data1 - data0) / ((time1 - time0) / frequency)", + "Description": "64-bit Counter. Divide delta by delta time. Display Suffix: \"/sec\"" + }, + { + "Counter": "IO Other Bytes/sec", + "Value": { + "data": 0, + "time": 9242655165203, + "type": 272696576, + "multi": 0, + "frequency": 10000000 + }, + "Help": "The rate at which the process is issuing bytes to I/O operations that do not involve data such as control operations. This counter counts all I/O activity generated by the process to include file, network and device I/Os.", + "Type": "PERF_COUNTER_BULK_COUNT", + "Algorithm": "(data1 - data0) / ((time1 - time0) / frequency)", + "Description": "64-bit Counter. Divide delta by delta time. Display Suffix: \"/sec\"" + }, + { + "Counter": "Working Set - Private", + "Value": { + "data": 8192, + "time": 0, + "type": 65792, + "multi": 0, + "frequency": 0 + }, + "Help": "Working Set - Private displays the size of the working set, in bytes, that is use for this process only and not shared nor sharable by other processes.", + "Type": "PERF_COUNTER_LARGE_RAWCOUNT", + "Algorithm": "data0", + "Description": "A counter which should not be time averaged on display (such as an error counter on a serial line). Display as is. No Display Suffix." + } + ] + }, + */ + + +#if defined(OS_WINDOWS) + +#include +#include +#include +#include +#include + +struct perflib_data { + PERF_DATA_BLOCK *pDataBlock; + PERF_OBJECT_TYPE *pObjectType; + PERF_INSTANCE_DEFINITION *pi; + DWORD pid; +}; + +BOOL EnableDebugPrivilege() { + HANDLE hToken; + LUID luid; + TOKEN_PRIVILEGES tkp; + + if (!OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &hToken)) + return FALSE; + + if (!LookupPrivilegeValue(NULL, SE_DEBUG_NAME, &luid)) + return FALSE; + + tkp.PrivilegeCount = 1; + tkp.Privileges[0].Luid = luid; + tkp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED; + + if (!AdjustTokenPrivileges(hToken, FALSE, &tkp, sizeof(tkp), NULL, NULL)) + return FALSE; + + CloseHandle(hToken); + + return TRUE; +} + +void apps_os_init_windows(void) { + PerflibNamesRegistryInitialize(); + + if(!EnableDebugPrivilege()) + nd_log(NDLS_COLLECTORS, NDLP_WARNING, "Failed to enable debug privilege"); +} + +uint64_t apps_os_get_total_memory_windows(void) { + MEMORYSTATUSEX memStat = { 0 }; + memStat.dwLength = sizeof(memStat); + + if (!GlobalMemoryStatusEx(&memStat)) { + netdata_log_error("GlobalMemoryStatusEx() failed."); + return 0; + } + + return memStat.ullTotalPhys; +} + +static __thread wchar_t unicode[PATH_MAX]; + +// Convert wide string to UTF-8 +static STRING *wchar_to_string(WCHAR *s) { + static __thread char utf8[PATH_MAX]; + static __thread int utf8_size = sizeof(utf8); + + int len = WideCharToMultiByte(CP_UTF8, 0, s, -1, NULL, 0, NULL, NULL); + if (len <= 0 || len >= utf8_size) + return NULL; + + WideCharToMultiByte(CP_UTF8, 0, s, -1, utf8, utf8_size, NULL, NULL); + return string_strdupz(utf8); +} + +STRING *GetProcessFriendlyName(WCHAR *path) { + static __thread uint8_t void_buf[1024 * 1024]; + + DWORD handle; + DWORD size = GetFileVersionInfoSizeW(path, &handle); + if (size == 0 || size > sizeof(void_buf)) + return FALSE; + + if (GetFileVersionInfoW(path, handle, size, void_buf)) { + LPWSTR value = NULL; + UINT len = 0; + DWORD unicode_size = sizeof(unicode) / sizeof(*unicode); + if (VerQueryValueW(void_buf, L"\\StringFileInfo\\040904B0\\FileDescription", (LPVOID*)&value, &len) && + len > 0 && len < unicode_size) { + wcsncpy(unicode, value, unicode_size - 1); + unicode[unicode_size - 1] = L'\0'; + return wchar_to_string(unicode); + } + } + + return NULL; +} + +void GetAllProcessesInfo(void) { + calls_counter++; + + HANDLE hSnapshot = CreateToolhelp32Snapshot(TH32CS_SNAPPROCESS, 0); + if (hSnapshot == INVALID_HANDLE_VALUE) return; + + PROCESSENTRY32W pe32; + pe32.dwSize = sizeof(PROCESSENTRY32W); + + if (!Process32FirstW(hSnapshot, &pe32)) { + CloseHandle(hSnapshot); + return; + } + + do { + struct pid_stat *p = get_or_allocate_pid_entry((pid_t)pe32.th32ProcessID); + p->ppid = (pid_t)pe32.th32ParentProcessID; + if(p->got_info) continue; + p->got_info = true; + + if(!p->initialized) { + string_freez(p->comm); + p->comm = wchar_to_string(pe32.szExeFile); + p->assigned_to_target = false; + } + + HANDLE hProcess = OpenProcess(PROCESS_QUERY_INFORMATION | PROCESS_VM_READ, FALSE, p->pid); + if (hProcess == NULL) continue; + + STRING *full_path = NULL; + STRING *friendly_name = NULL; + + DWORD unicode_size = sizeof(unicode) / sizeof(*unicode); + if(QueryFullProcessImageNameW(hProcess, 0, unicode, &unicode_size)) { + full_path = wchar_to_string(unicode); + friendly_name = GetProcessFriendlyName(unicode); + } + + CloseHandle(hProcess); + + if(full_path) { + string_freez(p->cmdline); + p->cmdline = full_path; + } + + if(friendly_name) { + string_freez(p->name); + p->name = friendly_name; + p->assigned_to_target = false; + } + } while (Process32NextW(hSnapshot, &pe32)); + + CloseHandle(hSnapshot); +} + +static inline kernel_uint_t perflib_cpu_utilization(COUNTER_DATA *d) { + internal_fatal(d->current.CounterType != PERF_100NSEC_TIMER, + "Wrong timer type"); + + ULONGLONG data1 = d->current.Data; + ULONGLONG data0 = d->previous.Data; + LONGLONG time1 = d->current.Time; + LONGLONG time0 = d->previous.Time; + + /* + * The Windows documentation provides the formula for percentage: + * + * 100 * (data1 - data0) / (time1 - time0) + * + * To get a fraction (0.0 to 1.0) instead of a percentage, we + * simply remove the 100 multiplier: + * + * (data1 - data0) / (time1 - time0) + * + * This fraction represents the portion of a single CPU core used + * over the time period. Multiplying this fraction by NSEC_PER_SEC + * converts it to nanosecond-cores: + * + * NSEC_PER_SEC * (data1 - data0) / (time1 - time0) + */ + + LONGLONG dt = time1 - time0; + if(dt > 0) + return NSEC_PER_SEC * (data1 - data0) / dt; + else + return 0; +} + +static inline kernel_uint_t perflib_rate(COUNTER_DATA *d) { + ULONGLONG data1 = d->current.Data; + ULONGLONG data0 = d->previous.Data; + LONGLONG time1 = d->current.Time; + LONGLONG time0 = d->previous.Time; + + LONGLONG dt = (time1 - time0); + if(dt > 0) + return (RATES_DETAIL * (data1 - data0)) / dt; + else + return 0; +} + +static inline kernel_uint_t perflib_value(COUNTER_DATA *d) { + internal_fatal(d->current.CounterType != PERF_COUNTER_LARGE_RAWCOUNT && + d->current.CounterType != PERF_COUNTER_RAWCOUNT, + "Wrong gauge type"); + + return d->current.Data; +} + +static inline kernel_uint_t perflib_elapsed(COUNTER_DATA *d) { + ULONGLONG data1 = d->current.Data; + LONGLONG time1 = d->current.Time; + LONGLONG freq1 = d->current.Frequency; + + internal_fatal(d->current.CounterType != PERF_ELAPSED_TIME || !freq1, + "Wrong gauge type"); + + return (time1 - data1) / freq1; +} + +bool apps_os_collect_all_pids_windows(void) { + calls_counter++; + + struct perflib_data d = { 0 }; + d.pDataBlock = perflibGetPerformanceData(RegistryFindIDByName("Process")); + if(!d.pDataBlock) return false; + + d.pObjectType = perflibFindObjectTypeByName(d.pDataBlock, "Process"); + if(!d.pObjectType) { + perflibFreePerformanceData(); + return false; + } + + // we need these outside the loop to avoid searching by name all the time + // (our perflib library caches the id inside the COUNTER_DATA). + COUNTER_DATA processId = {.key = "ID Process"}; + + d.pi = NULL; + size_t added = 0; + for(LONG i = 0; i < d.pObjectType->NumInstances; i++) { + d.pi = perflibForEachInstance(d.pDataBlock, d.pObjectType, d.pi); + if (!d.pi) break; + + perflibGetInstanceCounter(d.pDataBlock, d.pObjectType, d.pi, &processId); + d.pid = (DWORD) processId.current.Data; + if (d.pid <= 0) continue; // 0 = Idle (this takes all the spare resources) + + // Get or create pid_stat structure + struct pid_stat *p = get_or_allocate_pid_entry((pid_t) d.pid); + + if (unlikely(!p->initialized)) { + // a new pid + p->initialized = true; + + static __thread char name[MAX_PATH]; + + if (getInstanceName(d.pDataBlock, d.pObjectType, d.pi, name, sizeof(name))) { + // remove the PID suffix, if any + char pid[UINT64_MAX_LENGTH + 1]; // +1 for the underscore + pid[0] = '_'; + print_uint64(&pid[1], p->pid); + size_t pid_len = strlen(pid); + size_t name_len = strlen(name); + if (pid_len < name_len) { + char *compare = &name[name_len - pid_len]; + if (strcmp(pid, compare) == 0) + *compare = '\0'; + } + + // remove the .exe suffix, if any + name_len = strlen(name); + size_t exe_len = strlen(".exe"); + if(exe_len < name_len) { + char *compare = &name[name_len - exe_len]; + if (strcmp(".exe", compare) == 0) + *compare = '\0'; + } + } + else + strncpyz(name, "unknown", sizeof(name) - 1); + + if(strcmp(name, "wininit") == 0) + INIT_PID = p->pid; + + string_freez(p->comm); // it may be detected in a previous run via GetAllProcessesInfo() + p->comm = string_strdupz(name); + p->got_info = false; + p->assigned_to_target = false; + added++; + + COUNTER_DATA ppid = {.key = "Creating Process ID"}; + perflibGetInstanceCounter(d.pDataBlock, d.pObjectType, d.pi, &ppid); + p->ppid = (pid_t) ppid.current.Data; + + p->perflib[PDF_UTIME].key = "% User Time"; + p->perflib[PDF_STIME].key = "% Privileged Time"; + p->perflib[PDF_VMSIZE].key = "Virtual Bytes"; + p->perflib[PDF_VMRSS].key = "Working Set"; + p->perflib[PDF_VMSWAP].key = "Page File Bytes"; + p->perflib[PDF_LREAD].key = "IO Read Bytes/sec"; + p->perflib[PDF_LWRITE].key = "IO Write Bytes/sec"; + p->perflib[PDF_OREAD].key = "IO Read Operations/sec"; + p->perflib[PDF_OWRITE].key = "IO Write Operations/sec"; + p->perflib[PDF_THREADS].key = "Thread Count"; + p->perflib[PDF_HANDLES].key = "Handle Count"; + p->perflib[PDF_MINFLT].key = "Page Faults/sec"; + p->perflib[PDF_UPTIME].key = "Elapsed Time"; + } + + pid_collection_started(p); + + // get all data from perflib + size_t ok = 0, failed = 0, invalid = 0; + for (PID_FIELD f = 0; f < PDF_MAX; f++) { + if (p->perflib[f].key) { + if (!perflibGetInstanceCounter(d.pDataBlock, d.pObjectType, d.pi, &p->perflib[f])) { + failed++; + nd_log(NDLS_COLLECTORS, NDLP_ERR, + "Cannot find field '%s' in processes data", p->perflib[f].key); + } else + ok++; + } else + invalid++; + } + + if(failed) { + pid_collection_failed(p); + continue; + } + + // CPU time + p->values[PDF_UTIME] = perflib_cpu_utilization(&p->perflib[PDF_UTIME]); + p->values[PDF_STIME] = perflib_cpu_utilization(&p->perflib[PDF_STIME]); + + // Memory + p->values[PDF_VMRSS] = perflib_value(&p->perflib[PDF_VMRSS]); + p->values[PDF_VMSIZE] = perflib_value(&p->perflib[PDF_VMSIZE]); + p->values[PDF_VMSWAP] = perflib_value(&p->perflib[PDF_VMSWAP]); + + // I/O + p->values[PDF_LREAD] = perflib_rate(&p->perflib[PDF_LREAD]); + p->values[PDF_LWRITE] = perflib_rate(&p->perflib[PDF_LWRITE]); + p->values[PDF_OREAD] = perflib_rate(&p->perflib[PDF_OREAD]); + p->values[PDF_OWRITE] = perflib_rate(&p->perflib[PDF_OWRITE]); + + // Threads + p->values[PDF_THREADS] = perflib_value(&p->perflib[PDF_THREADS]); + + // Handle count + p->values[PDF_HANDLES] = perflib_value(&p->perflib[PDF_HANDLES]); + + // Page faults + // Windows doesn't distinguish between minor and major page faults + p->values[PDF_MINFLT] = perflib_rate(&p->perflib[PDF_MINFLT]); + + // Process uptime + // Convert 100-nanosecond units to seconds + p->values[PDF_UPTIME] = perflib_elapsed(&p->perflib[PDF_UPTIME]); + + pid_collection_completed(p); + +// if(p->perflib[PDF_UTIME].current.Data != p->perflib[PDF_UTIME].previous.Data && +// p->perflib[PDF_UTIME].current.Data && p->perflib[PDF_UTIME].previous.Data && +// p->pid == 61812) { +// const char *cmd = string2str(p->comm); +// uint64_t cpu_divisor = NSEC_PER_SEC / 100ULL; +// uint64_t cpus = os_get_system_cpus(); +// double u = (double)p->values[PDF_UTIME] / cpu_divisor; +// double s = (double)p->values[PDF_STIME] / cpu_divisor; +// int x = 0; +// x++; +// } + } + + perflibFreePerformanceData(); + + if(added) { + GetAllProcessesInfo(); + +#if (USE_APPS_GROUPS_CONF == 1) + for(struct pid_stat *p = root_of_pids(); p ;p = p->next) { + if(!p->assigned_to_target) + assign_app_group_target_to_pid(p); + } +#endif + } + + return true; +} + +#endif diff --git a/src/collectors/apps.plugin/apps_output.c b/src/collectors/apps.plugin/apps_output.c index 84928e6417d7c0..edc917a5c55eda 100644 --- a/src/collectors/apps.plugin/apps_output.c +++ b/src/collectors/apps.plugin/apps_output.c @@ -76,7 +76,7 @@ void send_resource_usage_to_netdata(usec_t dt) { "SET inode_changes = %zu\n" "SET link_changes = %zu\n" "SET pids = %zu\n" - "SET fds = %d\n" + "SET fds = %"PRIu32"\n" "SET targets = %zu\n" "SET new_pids = %zu\n" "END\n" @@ -89,7 +89,7 @@ void send_resource_usage_to_netdata(usec_t dt) { , filenames_allocated_counter , inodes_changed_counter , links_changed_counter - , all_pids_count + , all_pids_count() , all_files_len , apps_groups_targets_count , targets_assignment_counter @@ -103,103 +103,138 @@ void send_collected_data_to_netdata(struct target *root, const char *type, usec_ if (unlikely(!w->exposed)) continue; - send_BEGIN(type, w->clean_name, "processes", dt); - send_SET("processes", w->processes); + send_BEGIN(type, string2str(w->clean_name), "processes", dt); + send_SET("processes", w->values[PDF_PROCESSES]); send_END(); - send_BEGIN(type, w->clean_name, "threads", dt); - send_SET("threads", w->num_threads); + send_BEGIN(type, string2str(w->clean_name), "threads", dt); + send_SET("threads", w->values[PDF_THREADS]); send_END(); - if (unlikely(!w->processes && !w->is_other)) + if (unlikely(!w->values[PDF_PROCESSES])) continue; - send_BEGIN(type, w->clean_name, "cpu_utilization", dt); - send_SET("user", (kernel_uint_t)(w->utime * utime_fix_ratio) + (include_exited_childs ? ((kernel_uint_t)(w->cutime * cutime_fix_ratio)) : 0ULL)); - send_SET("system", (kernel_uint_t)(w->stime * stime_fix_ratio) + (include_exited_childs ? ((kernel_uint_t)(w->cstime * cstime_fix_ratio)) : 0ULL)); +#if (PROCESSES_HAVE_CPU_CHILDREN_TIME) + send_BEGIN(type, string2str(w->clean_name), "cpu_utilization", dt); + send_SET("user", (kernel_uint_t)(w->values[PDF_UTIME] * utime_fix_ratio) + (include_exited_childs ? ((kernel_uint_t)(w->values[PDF_CUTIME] * cutime_fix_ratio)) : 0ULL)); + send_SET("system", (kernel_uint_t)(w->values[PDF_STIME] * stime_fix_ratio) + (include_exited_childs ? ((kernel_uint_t)(w->values[PDF_CSTIME] * cstime_fix_ratio)) : 0ULL)); send_END(); +#else + send_BEGIN(type, string2str(w->clean_name), "cpu_utilization", dt); + send_SET("user", (kernel_uint_t)(w->values[PDF_UTIME] * utime_fix_ratio)); + send_SET("system", (kernel_uint_t)(w->values[PDF_STIME] * stime_fix_ratio)); + send_END(); +#endif -#if !defined(__FreeBSD__) && !defined(__APPLE__) +#if (PROCESSES_HAVE_CPU_GUEST_TIME == 1) if (enable_guest_charts) { - send_BEGIN(type, w->clean_name, "cpu_guest_utilization", dt); - send_SET("guest", (kernel_uint_t)(w->gtime * gtime_fix_ratio) + (include_exited_childs ? ((kernel_uint_t)(w->cgtime * cgtime_fix_ratio)) : 0ULL)); + send_BEGIN(type, string2str(w->clean_name), "cpu_guest_utilization", dt); + send_SET("guest", (kernel_uint_t)(w->values[PDF_GTIME] * gtime_fix_ratio) +#if (PROCESSES_HAVE_CPU_CHILDREN_TIME == 1) + + (include_exited_childs ? ((kernel_uint_t)(w->values[PDF_CGTIME] * cgtime_fix_ratio)) : 0ULL) +#endif + ); send_END(); } +#endif - send_BEGIN(type, w->clean_name, "cpu_context_switches", dt); - send_SET("voluntary", w->status_voluntary_ctxt_switches); - send_SET("involuntary", w->status_nonvoluntary_ctxt_switches); + send_BEGIN(type, string2str(w->clean_name), "mem_private_usage", dt); +#if (PROCESSES_HAVE_VMSHARED == 1) + send_SET("mem", (w->values[PDF_VMRSS] > w->values[PDF_VMSHARED])?(w->values[PDF_VMRSS] - w->values[PDF_VMSHARED]) : 0ULL); +#else + send_SET("mem", w->values[PDF_VMRSS]); +#endif send_END(); - send_BEGIN(type, w->clean_name, "mem_private_usage", dt); - send_SET("mem", (w->status_vmrss > w->status_vmshared)?(w->status_vmrss - w->status_vmshared) : 0ULL); +#if (PROCESSES_HAVE_VOLCTX == 1) || (PROCESSES_HAVE_NVOLCTX == 1) + send_BEGIN(type, string2str(w->clean_name), "cpu_context_switches", dt); +#if (PROCESSES_HAVE_VOLCTX == 1) + send_SET("voluntary", w->values[PDF_VOLCTX]); +#endif +#if (PROCESSES_HAVE_NVOLCTX == 1) + send_SET("involuntary", w->values[PDF_NVOLCTX]); +#endif send_END(); #endif - send_BEGIN(type, w->clean_name, "mem_usage", dt); - send_SET("rss", w->status_vmrss); + send_BEGIN(type, string2str(w->clean_name), "mem_usage", dt); + send_SET("rss", w->values[PDF_VMRSS]); send_END(); -#if !defined(__APPLE__) - send_BEGIN(type, w->clean_name, "vmem_usage", dt); - send_SET("vmem", w->status_vmsize); + send_BEGIN(type, string2str(w->clean_name), "vmem_usage", dt); + send_SET("vmem", w->values[PDF_VMSIZE]); send_END(); -#endif - send_BEGIN(type, w->clean_name, "mem_page_faults", dt); - send_SET("minor", (kernel_uint_t)(w->minflt * minflt_fix_ratio) + (include_exited_childs ? ((kernel_uint_t)(w->cminflt * cminflt_fix_ratio)) : 0ULL)); - send_SET("major", (kernel_uint_t)(w->majflt * majflt_fix_ratio) + (include_exited_childs ? ((kernel_uint_t)(w->cmajflt * cmajflt_fix_ratio)) : 0ULL)); + send_BEGIN(type, string2str(w->clean_name), "mem_page_faults", dt); + send_SET("minor", (kernel_uint_t)(w->values[PDF_MINFLT] * minflt_fix_ratio) +#if (PROCESSES_HAVE_CHILDREN_FLTS == 1) + + (include_exited_childs ? ((kernel_uint_t)(w->values[PDF_CMINFLT] * cminflt_fix_ratio)) : 0ULL) +#endif + ); +#if (PROCESSES_HAVE_MAJFLT == 1) + send_SET("major", (kernel_uint_t)(w->values[PDF_MAJFLT] * majflt_fix_ratio) +#if (PROCESSES_HAVE_CHILDREN_FLTS == 1) + + (include_exited_childs ? ((kernel_uint_t)(w->values[PDF_CMAJFLT] * cmajflt_fix_ratio)) : 0ULL) +#endif + ); +#endif send_END(); -#if !defined(__FreeBSD__) && !defined(__APPLE__) - send_BEGIN(type, w->clean_name, "swap_usage", dt); - send_SET("swap", w->status_vmswap); +#if (PROCESSES_HAVE_VMSWAP == 1) + send_BEGIN(type, string2str(w->clean_name), "swap_usage", dt); + send_SET("swap", w->values[PDF_VMSWAP]); send_END(); #endif - if (w->processes == 0) { - send_BEGIN(type, w->clean_name, "uptime", dt); + if (w->values[PDF_PROCESSES] == 0) { + send_BEGIN(type, string2str(w->clean_name), "uptime", dt); send_SET("uptime", 0); send_END(); if (enable_detailed_uptime_charts) { - send_BEGIN(type, w->clean_name, "uptime_summary", dt); + send_BEGIN(type, string2str(w->clean_name), "uptime_summary", dt); send_SET("min", 0); send_SET("avg", 0); send_SET("max", 0); send_END(); } } else { - send_BEGIN(type, w->clean_name, "uptime", dt); + send_BEGIN(type, string2str(w->clean_name), "uptime", dt); send_SET("uptime", w->uptime_max); send_END(); if (enable_detailed_uptime_charts) { - send_BEGIN(type, w->clean_name, "uptime_summary", dt); + send_BEGIN(type, string2str(w->clean_name), "uptime_summary", dt); send_SET("min", w->uptime_min); - send_SET("avg", w->processes > 0 ? w->uptime_sum / w->processes : 0); + send_SET("avg", w->values[PDF_PROCESSES] > 0 ? w->values[PDF_UPTIME] / w->values[PDF_PROCESSES] : 0); send_SET("max", w->uptime_max); send_END(); } } - send_BEGIN(type, w->clean_name, "disk_physical_io", dt); - send_SET("reads", w->io_storage_bytes_read); - send_SET("writes", w->io_storage_bytes_written); +#if (PROCESSES_HAVE_PHYSICAL_IO == 1) + send_BEGIN(type, string2str(w->clean_name), "disk_physical_io", dt); + send_SET("reads", w->values[PDF_PREAD]); + send_SET("writes", w->values[PDF_PWRITE]); send_END(); +#endif -#if !defined(__FreeBSD__) && !defined(__APPLE__) - send_BEGIN(type, w->clean_name, "disk_logical_io", dt); - send_SET("reads", w->io_logical_bytes_read); - send_SET("writes", w->io_logical_bytes_written); +#if (PROCESSES_HAVE_LOGICAL_IO == 1) + send_BEGIN(type, string2str(w->clean_name), "disk_logical_io", dt); + send_SET("reads", w->values[PDF_LREAD]); + send_SET("writes", w->values[PDF_LWRITE]); send_END(); #endif + if (enable_file_charts) { - send_BEGIN(type, w->clean_name, "fds_open_limit", dt); +#if (PROCESSES_HAVE_FDS == 1) + send_BEGIN(type, string2str(w->clean_name), "fds_open_limit", dt); send_SET("limit", w->max_open_files_percent * 100.0); send_END(); +#endif - send_BEGIN(type, w->clean_name, "fds_open", dt); + send_BEGIN(type, string2str(w->clean_name), "fds_open", dt); +#if (PROCESSES_HAVE_FDS == 1) send_SET("files", w->openfds.files); send_SET("sockets", w->openfds.sockets); send_SET("pipes", w->openfds.sockets); @@ -209,6 +244,10 @@ void send_collected_data_to_netdata(struct target *root, const char *type, usec_ send_SET("signal", w->openfds.signalfds); send_SET("eventpolls", w->openfds.eventpolls); send_SET("other", w->openfds.other); +#endif +#if (PROCESSES_HAVE_HANDLES == 1) + send_SET("handles", w->values[PDF_HANDLES]); +#endif send_END(); } } @@ -223,9 +262,9 @@ void send_charts_updates_to_netdata(struct target *root, const char *type, const if (debug_enabled) { for (w = root; w; w = w->next) { - if (unlikely(!w->target && w->processes)) { + if (unlikely(!w->target && w->values[PDF_PROCESSES])) { struct pid_on_target *pid_on_target; - fprintf(stderr, "apps.plugin: target '%s' has aggregated %u process(es):", w->name, w->processes); + fprintf(stderr, "apps.plugin: target '%s' has aggregated %"PRIu64" process(es):", string2str(w->name), w->values[PDF_PROCESSES]); for (pid_on_target = w->root_pid; pid_on_target; pid_on_target = pid_on_target->next) { fprintf(stderr, " %d", pid_on_target->pid); } @@ -235,101 +274,120 @@ void send_charts_updates_to_netdata(struct target *root, const char *type, const } for (w = root; w; w = w->next) { - if (likely(w->exposed || (!w->processes && !w->is_other))) + if (likely(w->exposed || (!w->values[PDF_PROCESSES]))) continue; - w->exposed = 1; + w->exposed = true; - fprintf(stdout, "CHART %s.%s_cpu_utilization '' '%s CPU utilization (100%% = 1 core)' 'percentage' cpu %s.cpu_utilization stacked 20001 %d\n", type, w->clean_name, title, type, update_every); - fprintf(stdout, "CLABEL '%s' '%s' 1\n", lbl_name, w->name); + fprintf(stdout, "CHART %s.%s_cpu_utilization '' '%s CPU utilization (100%% = 1 core)' 'percentage' cpu %s.cpu_utilization stacked 20001 %d\n", + type, string2str(w->clean_name), title, type, update_every); + fprintf(stdout, "CLABEL '%s' '%s' 1\n", lbl_name, string2str(w->name)); fprintf(stdout, "CLABEL_COMMIT\n"); - fprintf(stdout, "DIMENSION user '' absolute 1 %llu\n", time_factor * RATES_DETAIL / 100LLU); - fprintf(stdout, "DIMENSION system '' absolute 1 %llu\n", time_factor * RATES_DETAIL / 100LLU); + fprintf(stdout, "DIMENSION user '' absolute 1 %llu\n", NSEC_PER_SEC / 100ULL); + fprintf(stdout, "DIMENSION system '' absolute 1 %llu\n", NSEC_PER_SEC / 100ULL); -#if !defined(__FreeBSD__) && !defined(__APPLE__) +#if (PROCESSES_HAVE_CPU_GUEST_TIME == 1) if (enable_guest_charts) { - fprintf(stdout, "CHART %s.%s_cpu_guest_utilization '' '%s CPU guest utlization (100%% = 1 core)' 'percentage' cpu %s.cpu_guest_utilization line 20005 %d\n", type, w->clean_name, title, type, update_every); - fprintf(stdout, "CLABEL '%s' '%s' 1\n", lbl_name, w->name); + fprintf(stdout, "CHART %s.%s_cpu_guest_utilization '' '%s CPU guest utlization (100%% = 1 core)' 'percentage' cpu %s.cpu_guest_utilization line 20005 %d\n", + type, string2str(w->clean_name), title, type, update_every); + fprintf(stdout, "CLABEL '%s' '%s' 1\n", lbl_name, string2str(w->name)); fprintf(stdout, "CLABEL_COMMIT\n"); - fprintf(stdout, "DIMENSION guest '' absolute 1 %llu\n", time_factor * RATES_DETAIL / 100LLU); + fprintf(stdout, "DIMENSION guest '' absolute 1 %llu\n", NSEC_PER_SEC / 100ULL); } +#endif - fprintf(stdout, "CHART %s.%s_cpu_context_switches '' '%s CPU context switches' 'switches/s' cpu %s.cpu_context_switches stacked 20010 %d\n", type, w->clean_name, title, type, update_every); - fprintf(stdout, "CLABEL '%s' '%s' 1\n", lbl_name, w->name); + fprintf(stdout, "CHART %s.%s_mem_private_usage '' '%s memory usage without shared' 'MiB' mem %s.mem_private_usage area 20050 %d\n", + type, string2str(w->clean_name), title, type, update_every); + fprintf(stdout, "CLABEL '%s' '%s' 1\n", lbl_name, string2str(w->name)); fprintf(stdout, "CLABEL_COMMIT\n"); - fprintf(stdout, "DIMENSION voluntary '' absolute 1 %llu\n", RATES_DETAIL); - fprintf(stdout, "DIMENSION involuntary '' absolute 1 %llu\n", RATES_DETAIL); + fprintf(stdout, "DIMENSION mem '' absolute %ld %ld\n", 1L, 1024L * 1024L); - fprintf(stdout, "CHART %s.%s_mem_private_usage '' '%s memory usage without shared' 'MiB' mem %s.mem_private_usage area 20050 %d\n", type, w->clean_name, title, type, update_every); - fprintf(stdout, "CLABEL '%s' '%s' 1\n", lbl_name, w->name); +#if (PROCESSES_HAVE_VOLCTX == 1) || (PROCESSES_HAVE_NVOLCTX == 1) + fprintf(stdout, "CHART %s.%s_cpu_context_switches '' '%s CPU context switches' 'switches/s' cpu %s.cpu_context_switches stacked 20010 %d\n", + type, string2str(w->clean_name), title, type, update_every); + fprintf(stdout, "CLABEL '%s' '%s' 1\n", lbl_name, string2str(w->name)); fprintf(stdout, "CLABEL_COMMIT\n"); - fprintf(stdout, "DIMENSION mem '' absolute %ld %ld\n", 1L, 1024L); +#if (PROCESSES_HAVE_VOLCTX == 1) + fprintf(stdout, "DIMENSION voluntary '' absolute 1 %llu\n", RATES_DETAIL); +#endif +#if (PROCESSES_HAVE_NVOLCTX == 1) + fprintf(stdout, "DIMENSION involuntary '' absolute 1 %llu\n", RATES_DETAIL); +#endif #endif - fprintf(stdout, "CHART %s.%s_mem_usage '' '%s memory RSS usage' 'MiB' mem %s.mem_usage area 20055 %d\n", type, w->clean_name, title, type, update_every); - fprintf(stdout, "CLABEL '%s' '%s' 1\n", lbl_name, w->name); + fprintf(stdout, "CHART %s.%s_mem_usage '' '%s memory RSS usage' 'MiB' mem %s.mem_usage area 20055 %d\n", + type, string2str(w->clean_name), title, type, update_every); + fprintf(stdout, "CLABEL '%s' '%s' 1\n", lbl_name, string2str(w->name)); fprintf(stdout, "CLABEL_COMMIT\n"); - fprintf(stdout, "DIMENSION rss '' absolute %ld %ld\n", 1L, 1024L); + fprintf(stdout, "DIMENSION rss '' absolute %ld %ld\n", 1L, 1024L * 1024L); -#if !defined(__APPLE__) - fprintf(stdout, "CHART %s.%s_vmem_usage '' '%s virtual memory size' 'MiB' mem %s.vmem_usage line 20065 %d\n", type, w->clean_name, title, type, update_every); - fprintf(stdout, "CLABEL '%s' '%s' 1\n", lbl_name, w->name); + fprintf(stdout, "CHART %s.%s_vmem_usage '' '%s virtual memory size' 'MiB' mem %s.vmem_usage line 20065 %d\n", + type, string2str(w->clean_name), title, type, update_every); + fprintf(stdout, "CLABEL '%s' '%s' 1\n", lbl_name, string2str(w->name)); fprintf(stdout, "CLABEL_COMMIT\n"); - fprintf(stdout, "DIMENSION vmem '' absolute %ld %ld\n", 1L, 1024L); -#endif + fprintf(stdout, "DIMENSION vmem '' absolute %ld %ld\n", 1L, 1024L * 1024L); - fprintf(stdout, "CHART %s.%s_mem_page_faults '' '%s memory page faults' 'pgfaults/s' mem %s.mem_page_faults stacked 20060 %d\n", type, w->clean_name, title, type, update_every); - fprintf(stdout, "CLABEL '%s' '%s' 1\n", lbl_name, w->name); + fprintf(stdout, "CHART %s.%s_mem_page_faults '' '%s memory page faults' 'pgfaults/s' mem %s.mem_page_faults stacked 20060 %d\n", + type, string2str(w->clean_name), title, type, update_every); + fprintf(stdout, "CLABEL '%s' '%s' 1\n", lbl_name, string2str(w->name)); fprintf(stdout, "CLABEL_COMMIT\n"); - fprintf(stdout, "DIMENSION major '' absolute 1 %llu\n", RATES_DETAIL); fprintf(stdout, "DIMENSION minor '' absolute 1 %llu\n", RATES_DETAIL); +#if (PROCESSES_HAVE_MAJFLT == 1) + fprintf(stdout, "DIMENSION major '' absolute 1 %llu\n", RATES_DETAIL); +#endif -#if !defined(__FreeBSD__) && !defined(__APPLE__) - fprintf(stdout, "CHART %s.%s_swap_usage '' '%s swap usage' 'MiB' mem %s.swap_usage area 20065 %d\n", type, w->clean_name, title, type, update_every); - fprintf(stdout, "CLABEL '%s' '%s' 1\n", lbl_name, w->name); +#if (PROCESSES_HAVE_VMSWAP == 1) + fprintf(stdout, "CHART %s.%s_swap_usage '' '%s swap usage' 'MiB' mem %s.swap_usage area 20065 %d\n", + type, string2str(w->clean_name), title, type, update_every); + fprintf(stdout, "CLABEL '%s' '%s' 1\n", lbl_name, string2str(w->name)); fprintf(stdout, "CLABEL_COMMIT\n"); - fprintf(stdout, "DIMENSION swap '' absolute %ld %ld\n", 1L, 1024L); + fprintf(stdout, "DIMENSION swap '' absolute %ld %ld\n", 1L, 1024L * 1024L); #endif -#if !defined(__FreeBSD__) && !defined(__APPLE__) - fprintf(stdout, "CHART %s.%s_disk_physical_io '' '%s disk physical IO' 'KiB/s' disk %s.disk_physical_io area 20100 %d\n", type, w->clean_name, title, type, update_every); - fprintf(stdout, "CLABEL '%s' '%s' 1\n", lbl_name, w->name); +#if (PROCESSES_HAVE_PHYSICAL_IO == 1) + fprintf(stdout, "CHART %s.%s_disk_physical_io '' '%s disk physical IO' 'KiB/s' disk %s.disk_physical_io area 20100 %d\n", + type, string2str(w->clean_name), title, type, update_every); + fprintf(stdout, "CLABEL '%s' '%s' 1\n", lbl_name, string2str(w->name)); fprintf(stdout, "CLABEL_COMMIT\n"); fprintf(stdout, "DIMENSION reads '' absolute 1 %llu\n", 1024LLU * RATES_DETAIL); fprintf(stdout, "DIMENSION writes '' absolute -1 %llu\n", 1024LLU * RATES_DETAIL); +#endif - fprintf(stdout, "CHART %s.%s_disk_logical_io '' '%s disk logical IO' 'KiB/s' disk %s.disk_logical_io area 20105 %d\n", type, w->clean_name, title, type, update_every); - fprintf(stdout, "CLABEL '%s' '%s' 1\n", lbl_name, w->name); +#if (PROCESSES_HAVE_LOGICAL_IO == 1) + fprintf(stdout, "CHART %s.%s_disk_logical_io '' '%s disk logical IO' 'KiB/s' disk %s.disk_logical_io area 20105 %d\n", + type, string2str(w->clean_name), title, type, update_every); + fprintf(stdout, "CLABEL '%s' '%s' 1\n", lbl_name, string2str(w->name)); fprintf(stdout, "CLABEL_COMMIT\n"); fprintf(stdout, "DIMENSION reads '' absolute 1 %llu\n", 1024LLU * RATES_DETAIL); fprintf(stdout, "DIMENSION writes '' absolute -1 %llu\n", 1024LLU * RATES_DETAIL); -#else - fprintf(stdout, "CHART %s.%s_disk_physical_io '' '%s disk physical IO' 'blocks/s' disk %s.disk_physical_block_io area 20100 %d\n", type, w->clean_name, title, type, update_every); - fprintf(stdout, "CLABEL '%s' '%s' 1\n", lbl_name, w->name); - fprintf(stdout, "CLABEL_COMMIT\n"); - fprintf(stdout, "DIMENSION reads '' absolute 1 %llu\n", RATES_DETAIL); - fprintf(stdout, "DIMENSION writes '' absolute -1 %llu\n", RATES_DETAIL); #endif - fprintf(stdout, "CHART %s.%s_processes '' '%s processes' 'processes' processes %s.processes line 20150 %d\n", type, w->clean_name, title, type, update_every); - fprintf(stdout, "CLABEL '%s' '%s' 1\n", lbl_name, w->name); + fprintf(stdout, "CHART %s.%s_processes '' '%s processes' 'processes' processes %s.processes line 20150 %d\n", + type, string2str(w->clean_name), title, type, update_every); + fprintf(stdout, "CLABEL '%s' '%s' 1\n", lbl_name, string2str(w->name)); fprintf(stdout, "CLABEL_COMMIT\n"); fprintf(stdout, "DIMENSION processes '' absolute 1 1\n"); - fprintf(stdout, "CHART %s.%s_threads '' '%s threads' 'threads' processes %s.threads line 20155 %d\n", type, w->clean_name, title, type, update_every); - fprintf(stdout, "CLABEL '%s' '%s' 1\n", lbl_name, w->name); + fprintf(stdout, "CHART %s.%s_threads '' '%s threads' 'threads' processes %s.threads line 20155 %d\n", + type, string2str(w->clean_name), title, type, update_every); + fprintf(stdout, "CLABEL '%s' '%s' 1\n", lbl_name, string2str(w->name)); fprintf(stdout, "CLABEL_COMMIT\n"); fprintf(stdout, "DIMENSION threads '' absolute 1 1\n"); if (enable_file_charts) { - fprintf(stdout, "CHART %s.%s_fds_open_limit '' '%s open file descriptors limit' '%%' fds %s.fds_open_limit line 20200 %d\n", type, w->clean_name, title, type, update_every); - fprintf(stdout, "CLABEL '%s' '%s' 1\n", lbl_name, w->name); +#if (PROCESSES_HAVE_FDS == 1) + fprintf(stdout, "CHART %s.%s_fds_open_limit '' '%s open file descriptors limit' '%%' fds %s.fds_open_limit line 20200 %d\n", + type, string2str(w->clean_name), title, type, update_every); + fprintf(stdout, "CLABEL '%s' '%s' 1\n", lbl_name, string2str(w->name)); fprintf(stdout, "CLABEL_COMMIT\n"); fprintf(stdout, "DIMENSION limit '' absolute 1 100\n"); +#endif - fprintf(stdout, "CHART %s.%s_fds_open '' '%s open files descriptors' 'fds' fds %s.fds_open stacked 20210 %d\n", type, w->clean_name, title, type, update_every); - fprintf(stdout, "CLABEL '%s' '%s' 1\n", lbl_name, w->name); + fprintf(stdout, "CHART %s.%s_fds_open '' '%s open files descriptors' 'fds' fds %s.fds_open stacked 20210 %d\n", + type, string2str(w->clean_name), title, type, update_every); + fprintf(stdout, "CLABEL '%s' '%s' 1\n", lbl_name, string2str(w->name)); fprintf(stdout, "CLABEL_COMMIT\n"); +#if (PROCESSES_HAVE_FDS == 1) fprintf(stdout, "DIMENSION files '' absolute 1 1\n"); fprintf(stdout, "DIMENSION sockets '' absolute 1 1\n"); fprintf(stdout, "DIMENSION pipes '' absolute 1 1\n"); @@ -339,16 +397,22 @@ void send_charts_updates_to_netdata(struct target *root, const char *type, const fprintf(stdout, "DIMENSION signal '' absolute 1 1\n"); fprintf(stdout, "DIMENSION eventpolls '' absolute 1 1\n"); fprintf(stdout, "DIMENSION other '' absolute 1 1\n"); +#endif +#if (PROCESSES_HAVE_HANDLES == 1) + fprintf(stdout, "DIMENSION handles '' absolute 1 1\n"); +#endif } - fprintf(stdout, "CHART %s.%s_uptime '' '%s uptime' 'seconds' uptime %s.uptime line 20250 %d\n", type, w->clean_name, title, type, update_every); - fprintf(stdout, "CLABEL '%s' '%s' 1\n", lbl_name, w->name); + fprintf(stdout, "CHART %s.%s_uptime '' '%s uptime' 'seconds' uptime %s.uptime line 20250 %d\n", + type, string2str(w->clean_name), title, type, update_every); + fprintf(stdout, "CLABEL '%s' '%s' 1\n", lbl_name, string2str(w->name)); fprintf(stdout, "CLABEL_COMMIT\n"); fprintf(stdout, "DIMENSION uptime '' absolute 1 1\n"); if (enable_detailed_uptime_charts) { - fprintf(stdout, "CHART %s.%s_uptime_summary '' '%s uptime summary' 'seconds' uptime %s.uptime_summary area 20255 %d\n", type, w->clean_name, title, type, update_every); - fprintf(stdout, "CLABEL '%s' '%s' 1\n", lbl_name, w->name); + fprintf(stdout, "CHART %s.%s_uptime_summary '' '%s uptime summary' 'seconds' uptime %s.uptime_summary area 20255 %d\n", + type, string2str(w->clean_name), title, type, update_every); + fprintf(stdout, "CLABEL '%s' '%s' 1\n", lbl_name, string2str(w->name)); fprintf(stdout, "CLABEL_COMMIT\n"); fprintf(stdout, "DIMENSION min '' absolute 1 1\n"); fprintf(stdout, "DIMENSION avg '' absolute 1 1\n"); @@ -357,8 +421,8 @@ void send_charts_updates_to_netdata(struct target *root, const char *type, const } } +#if (PROCESSES_HAVE_STATE == 1) void send_proc_states_count(usec_t dt __maybe_unused) { -#if !defined(__FreeBSD__) && !defined(__APPLE__) static bool chart_added = false; // create chart for count of processes in different states if (!chart_added) { @@ -379,6 +443,6 @@ void send_proc_states_count(usec_t dt __maybe_unused) { send_SET(proc_states[i], proc_state_count[i]); } send_END(); -#endif } +#endif diff --git a/src/collectors/apps.plugin/apps_pid.c b/src/collectors/apps.plugin/apps_pid.c new file mode 100644 index 00000000000000..03841cd8001acb --- /dev/null +++ b/src/collectors/apps.plugin/apps_pid.c @@ -0,0 +1,759 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "apps_plugin.h" + +static inline void link_pid_to_its_parent(struct pid_stat *p); + +// -------------------------------------------------------------------------------------------------------------------- +// The index of all pids + +#define SIMPLE_HASHTABLE_NAME _PID +#define SIMPLE_HASHTABLE_VALUE_TYPE struct pid_stat +#define SIMPLE_HASHTABLE_KEY_TYPE int32_t +#define SIMPLE_HASHTABLE_VALUE2KEY_FUNCTION pid_stat_to_pid_ptr +#define SIMPLE_HASHTABLE_COMPARE_KEYS_FUNCTION pid_ptr_eq +#define SIMPLE_HASHTABLE_SAMPLE_IMPLEMENTATION 0 +#include "libnetdata/simple_hashtable.h" + +static inline int32_t *pid_stat_to_pid_ptr(struct pid_stat *p) { + return &p->pid; +} + +static inline bool pid_ptr_eq(int32_t *a, int32_t *b) { + return *a == *b; +} + +struct { +#if (ALL_PIDS_ARE_READ_INSTANTLY == 0) + // Another pre-allocated list of all possible pids. + // We need it to assign them a unique sortlist id, so that we + // read parents before children. This is needed to prevent a situation where + // a child is found running, but until we read its parent, it has exited and + // its parent has accumulated its resources. + struct { + size_t size; + struct pid_stat **array; + } sorted; +#endif + + struct { + size_t count; // the number of processes running + struct pid_stat *root; + SIMPLE_HASHTABLE_PID ht; + ARAL *aral; + } all_pids; +} pids = { 0 }; + +struct pid_stat *root_of_pids(void) { + return pids.all_pids.root; +} + +size_t all_pids_count(void) { + return pids.all_pids.count; +} + +void apps_pids_init(void) { + pids.all_pids.aral = aral_create("pid_stat", sizeof(struct pid_stat), 1, 65536, NULL, NULL, NULL, false, true); + simple_hashtable_init_PID(&pids.all_pids.ht, 1024); +} + +static inline uint64_t pid_hash(pid_t pid) { + return ((uint64_t)pid << 31) + (uint64_t)pid; // we remove 1 bit when shifting to make it different +} + +inline struct pid_stat *find_pid_entry(pid_t pid) { + if(pid < INIT_PID) return NULL; + + uint64_t hash = pid_hash(pid); + int32_t key = pid; + SIMPLE_HASHTABLE_SLOT_PID *sl = simple_hashtable_get_slot_PID(&pids.all_pids.ht, hash, &key, true); + return(SIMPLE_HASHTABLE_SLOT_DATA(sl)); +} + +struct pid_stat *get_or_allocate_pid_entry(pid_t pid) { + uint64_t hash = pid_hash(pid); + int32_t key = pid; + SIMPLE_HASHTABLE_SLOT_PID *sl = simple_hashtable_get_slot_PID(&pids.all_pids.ht, hash, &key, true); + struct pid_stat *p = SIMPLE_HASHTABLE_SLOT_DATA(sl); + if(likely(p)) + return p; + + p = aral_callocz(pids.all_pids.aral); + +#if (PROCESSES_HAVE_FDS == 1) + p->fds = mallocz(sizeof(struct pid_fd) * MAX_SPARE_FDS); + p->fds_size = MAX_SPARE_FDS; + init_pid_fds(p, 0, p->fds_size); +#endif + + p->pid = pid; + p->values[PDF_PROCESSES] = 1; + + DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(pids.all_pids.root, p, prev, next); + simple_hashtable_set_slot_PID(&pids.all_pids.ht, sl, hash, p); + pids.all_pids.count++; + + return p; +} + +void del_pid_entry(pid_t pid) { + uint64_t hash = pid_hash(pid); + int32_t key = pid; + SIMPLE_HASHTABLE_SLOT_PID *sl = simple_hashtable_get_slot_PID(&pids.all_pids.ht, hash, &key, true); + struct pid_stat *p = SIMPLE_HASHTABLE_SLOT_DATA(sl); + + if(unlikely(!p)) { + netdata_log_error("attempted to free pid %d that is not allocated.", pid); + return; + } + + debug_log("process %d %s exited, deleting it.", pid, pid_stat_comm(p)); + + DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(pids.all_pids.root, p, prev, next); + simple_hashtable_del_slot_PID(&pids.all_pids.ht, sl); + +#if defined(OS_LINUX) + { + size_t i; + for(i = 0; i < p->fds_size; i++) + if(p->fds[i].filename) + freez(p->fds[i].filename); + } + + arl_free(p->status_arl); + + freez(p->fds_dirname); + freez(p->stat_filename); + freez(p->status_filename); + freez(p->limits_filename); + freez(p->io_filename); + freez(p->cmdline_filename); +#endif + +#if (PROCESSES_HAVE_FDS == 1) + freez(p->fds); +#endif + + string_freez(p->comm); + string_freez(p->cmdline); + aral_freez(pids.all_pids.aral, p); + + pids.all_pids.count--; +} + +// -------------------------------------------------------------------------------------------------------------------- + +static __thread pid_t current_pid; +static __thread kernel_uint_t current_pid_values[PDF_MAX]; + +void pid_collection_started(struct pid_stat *p) { + fatal_assert(sizeof(current_pid_values) == sizeof(p->values)); + current_pid = p->pid; + memcpy(current_pid_values, p->values, sizeof(current_pid_values)); + memset(p->values, 0, sizeof(p->values)); + p->values[PDF_PROCESSES] = 1; + p->read = true; +} + +void pid_collection_failed(struct pid_stat *p) { + fatal_assert(current_pid == p->pid); + fatal_assert(sizeof(current_pid_values) == sizeof(p->values)); + memcpy(p->values, current_pid_values, sizeof(p->values)); +} + +void pid_collection_completed(struct pid_stat *p) { + p->updated = true; + p->keep = false; + p->keeploops = 0; +} + +// -------------------------------------------------------------------------------------------------------------------- +// preloading of parents before their children + +#if (ALL_PIDS_ARE_READ_INSTANTLY == 0) +static inline size_t compute_new_sorted_size(size_t old_size, size_t required_size) { + size_t size = (required_size % 1024 == 0) ? required_size : required_size + 1024; + size = (size / 1024) * 1024; + + if(size < old_size * 2) + size = old_size * 2; + + return size; +} + +static int compar_pid_sortlist(const void *a, const void *b) { + const struct pid_stat *p1 = *(struct pid_stat **)a; + const struct pid_stat *p2 = *(struct pid_stat **)b; + + if(p1->sortlist > p2->sortlist) + return -1; + else + return 1; +} + +bool collect_parents_before_children(void) { + if (!pids.all_pids.count) return false; + + if (pids.all_pids.count > pids.sorted.size) { + size_t new_size = compute_new_sorted_size(pids.sorted.size, pids.all_pids.count); + freez(pids.sorted.array); + pids.sorted.array = mallocz(new_size * sizeof(struct pid_stat *)); + pids.sorted.size = new_size; + } + + size_t slc = 0; + struct pid_stat *p = NULL; + uint32_t sortlist = 1; + for (p = root_of_pids(); p && slc < pids.sorted.size; p = p->next) { + pids.sorted.array[slc++] = p; + + // assign a sortlist id to all it and its parents + for (struct pid_stat *pp = p; pp ; pp = pp->parent) + pp->sortlist = sortlist++; + } + size_t sorted = slc; + + static bool logged = false; + if (unlikely(p && !logged)) { + nd_log( + NDLS_COLLECTORS, + NDLP_ERR, + "Internal error: I was thinking I had %zu processes in my arrays, but it seems there are more.", + pids.all_pids.count); + logged = true; + } + + if (include_exited_childs && sorted) { + // Read parents before childs + // This is needed to prevent a situation where + // a child is found running, but until we read + // its parent, it has exited and its parent + // has accumulated its resources. + + qsort((void *)pids.sorted.array, sorted, sizeof(struct pid_stat *), compar_pid_sortlist); + + // we forward read all running processes + // incrementally_collect_data_for_pid() is smart enough, + // not to read the same pid twice per iteration + for (slc = 0; slc < sorted; slc++) { + p = pids.sorted.array[slc]; + incrementally_collect_data_for_pid_stat(p, NULL); + } + } + + return true; +} +#endif + +// -------------------------------------------------------------------------------------------------------------------- + +static void log_parent_loop(struct pid_stat *p) { + CLEAN_BUFFER *wb = buffer_create(0, NULL); + buffer_sprintf(wb, "original pid %d (%s)", p->pid, string2str(p->comm)); + + size_t loops = 0; + for(struct pid_stat *t = p->parent; t && loops < 2 ;t = t->parent) { + buffer_sprintf(wb, " => %d (%s)", t->pid, string2str(t->comm)); + if(t == p->parent) loops++; + } + + buffer_sprintf(wb, " : broke loop at %d (%s)", p->pid, string2str(p->comm)); + + errno_clear(); + nd_log(NDLS_COLLECTORS, NDLP_WARNING, "Parents loop detected: %s", buffer_tostring(wb)); +} + +static inline bool is_already_a_parent(struct pid_stat *p, struct pid_stat *pp) { + for(struct pid_stat *t = pp; t ;t = t->parent) + if(t == p) return true; + + return false; +} + +static inline void link_pid_to_its_parent(struct pid_stat *p) { + p->parent = NULL; + if(unlikely(!p->ppid)) + return; + + if(unlikely(p->ppid == p->pid)) { + nd_log(NDLS_COLLECTORS, NDLP_WARNING, + "Process %d (%s) states parent %d, which is the same PID. Ignoring it.", + p->pid, string2str(p->comm), p->ppid); + p->ppid = 0; + return; + } + + struct pid_stat *pp = find_pid_entry(p->ppid); + if(likely(pp)) { + fatal_assert(pp->pid == p->ppid); + + if(!is_already_a_parent(p, pp)) { + p->parent = pp; + pp->children_count++; + } + else { + p->parent = pp; + log_parent_loop(p); + p->parent = NULL; + p->ppid = 0; + } + } +#if (PPID_SHOULD_BE_RUNNING == 1) + else { + nd_log(NDLS_COLLECTORS, NDLP_WARNING, + "pid %d %s states parent %d, but the later does not exist.", + p->pid, pid_stat_comm(p), p->ppid); + } +#endif +} + +static inline void link_all_processes_to_their_parents(void) { + // link all children to their parents + // and update children count on parents + for(struct pid_stat *p = root_of_pids(); p ; p = p->next) + link_pid_to_its_parent(p); +} + +// -------------------------------------------------------------------------------------------------------------------- + +void update_pid_comm(struct pid_stat *p, const char *comm) { + // some process names have ( and ), remove the parenthesis + size_t len = strlen(comm); + char buf[len + 1]; + if(comm[0] == '(' && comm[len - 1] == ')') { + memcpy(buf, &comm[1], len - 2); + buf[len - 2] = '\0'; + } + else + memcpy(buf, comm, sizeof(buf)); + + // check if the comm is changed + if(!p->comm || strcmp(pid_stat_comm(p), buf) != 0) { + // it is changed + + string_freez(p->comm); + p->comm = string_strdupz(buf); + +#if (PROCESSES_HAVE_CMDLINE == 1) + if(likely(proc_pid_cmdline_is_needed)) + managed_log(p, PID_LOG_CMDLINE, read_proc_pid_cmdline(p)); +#endif + + // the process changes comm, we may have to reassign it to + // an apps_groups.conf target. + p->target = NULL; + } +} + +// -------------------------------------------------------------------------------------------------------------------- + +#if (PROCESSES_HAVE_CPU_CHILDREN_TIME == 1) || (PROCESSES_HAVE_CHILDREN_FLTS == 1) +//static inline int debug_print_process_and_parents(struct pid_stat *p, usec_t time) { +// char *prefix = "\\_ "; +// int indent = 0; +// +// if(p->parent) +// indent = debug_print_process_and_parents(p->parent, p->stat_collected_usec); +// else +// prefix = " > "; +// +// char buffer[indent + 1]; +// int i; +// +// for(i = 0; i < indent ;i++) buffer[i] = ' '; +// buffer[i] = '\0'; +// +// fprintf(stderr, " %s %s%s (%d %s %"PRIu64"" +// , buffer +// , prefix +// , pid_stat_comm(p) +// , p->pid +// , p->updated?"running":"exited" +// , p->stat_collected_usec - time +// ); +// +// if(p->values[PDF_UTIME]) fprintf(stderr, " utime=" KERNEL_UINT_FORMAT, p->values[PDF_UTIME]); +// if(p->values[PDF_STIME]) fprintf(stderr, " stime=" KERNEL_UINT_FORMAT, p->values[PDF_STIME]); +//#if (PROCESSES_HAVE_CPU_GUEST_TIME == 1) +// if(p->values[PDF_GTIME]) fprintf(stderr, " gtime=" KERNEL_UINT_FORMAT, p->values[PDF_GTIME]); +//#endif +//#if (PROCESSES_HAVE_CPU_CHILDREN_TIME == 1) +// if(p->values[PDF_CUTIME]) fprintf(stderr, " cutime=" KERNEL_UINT_FORMAT, p->values[PDF_CUTIME]); +// if(p->values[PDF_CSTIME]) fprintf(stderr, " cstime=" KERNEL_UINT_FORMAT, p->values[PDF_CSTIME]); +//#if (PROCESSES_HAVE_CPU_GUEST_TIME == 1) +// if(p->values[PDF_CGTIME]) fprintf(stderr, " cgtime=" KERNEL_UINT_FORMAT, p->values[PDF_CGTIME]); +//#endif +//#endif +// if(p->values[PDF_MINFLT]) fprintf(stderr, " minflt=" KERNEL_UINT_FORMAT, p->values[PDF_MINFLT]); +//#if (PROCESSES_HAVE_MAJFLT == 1) +// if(p->values[PDF_MAJFLT]) fprintf(stderr, " majflt=" KERNEL_UINT_FORMAT, p->values[PDF_MAJFLT]); +//#endif +//#if (PROCESSES_HAVE_CHILDREN_FLTS == 1) +// if(p->values[PDF_CMINFLT]) fprintf(stderr, " cminflt=" KERNEL_UINT_FORMAT, p->values[PDF_CMINFLT]); +// if(p->values[PDF_CMAJFLT]) fprintf(stderr, " cmajflt=" KERNEL_UINT_FORMAT, p->values[PDF_CMAJFLT]); +//#endif +// fprintf(stderr, ")\n"); +// +// return indent + 1; +//} +// +//static inline void debug_print_process_tree(struct pid_stat *p, char *msg __maybe_unused) { +// debug_log("%s: process %s (%d, %s) with parents:", msg, pid_stat_comm(p), p->pid, p->updated?"running":"exited"); +// debug_print_process_and_parents(p, p->stat_collected_usec); +//} +// +//static inline void debug_find_lost_child(struct pid_stat *pe, kernel_uint_t lost, int type) { +// int found = 0; +// struct pid_stat *p = NULL; +// +// for(p = root_of_pids(); p ; p = p->next) { +// if(p == pe) continue; +// +// switch(type) { +// case 1: +//#if (PROCESSES_HAVE_CHILDREN_FLTS == 1) +// if(p->values[PDF_CMINFLT] > lost) { +// fprintf(stderr, " > process %d (%s) could use the lost exited child minflt " KERNEL_UINT_FORMAT " of process %d (%s)\n", +// p->pid, pid_stat_comm(p), lost, pe->pid, pid_stat_comm(pe)); +// found++; +// } +//#endif +// break; +// +// case 2: +//#if (PROCESSES_HAVE_CHILDREN_FLTS == 1) +// if(p->values[PDF_CMAJFLT] > lost) { +// fprintf(stderr, " > process %d (%s) could use the lost exited child majflt " KERNEL_UINT_FORMAT " of process %d (%s)\n", +// p->pid, pid_stat_comm(p), lost, pe->pid, pid_stat_comm(pe)); +// found++; +// } +//#endif +// break; +// +// case 3: +//#if (PROCESSES_HAVE_CPU_CHILDREN_TIME == 1) +// if(p->values[PDF_CUTIME] > lost) { +// fprintf(stderr, " > process %d (%s) could use the lost exited child utime " KERNEL_UINT_FORMAT " of process %d (%s)\n", +// p->pid, pid_stat_comm(p), lost, pe->pid, pid_stat_comm(pe)); +// found++; +// } +//#endif +// break; +// +// case 4: +//#if (PROCESSES_HAVE_CPU_CHILDREN_TIME == 1) +// if(p->values[PDF_CSTIME] > lost) { +// fprintf(stderr, " > process %d (%s) could use the lost exited child stime " KERNEL_UINT_FORMAT " of process %d (%s)\n", +// p->pid, pid_stat_comm(p), lost, pe->pid, pid_stat_comm(pe)); +// found++; +// } +//#endif +// break; +// +// case 5: +//#if (PROCESSES_HAVE_CPU_CHILDREN_TIME == 1) && (PROCESSES_HAVE_CPU_GUEST_TIME == 1) +// if(p->values[PDF_CGTIME] > lost) { +// fprintf(stderr, " > process %d (%s) could use the lost exited child gtime " KERNEL_UINT_FORMAT " of process %d (%s)\n", +// p->pid, pid_stat_comm(p), lost, pe->pid, pid_stat_comm(pe)); +// found++; +// } +//#endif +// break; +// } +// } +// +// if(!found) { +// switch(type) { +// case 1: +// fprintf(stderr, " > cannot find any process to use the lost exited child minflt " KERNEL_UINT_FORMAT " of process %d (%s)\n", +// lost, pe->pid, pid_stat_comm(pe)); +// break; +// +// case 2: +// fprintf(stderr, " > cannot find any process to use the lost exited child majflt " KERNEL_UINT_FORMAT " of process %d (%s)\n", +// lost, pe->pid, pid_stat_comm(pe)); +// break; +// +// case 3: +// fprintf(stderr, " > cannot find any process to use the lost exited child utime " KERNEL_UINT_FORMAT " of process %d (%s)\n", +// lost, pe->pid, pid_stat_comm(pe)); +// break; +// +// case 4: +// fprintf(stderr, " > cannot find any process to use the lost exited child stime " KERNEL_UINT_FORMAT " of process %d (%s)\n", +// lost, pe->pid, pid_stat_comm(pe)); +// break; +// +// case 5: +// fprintf(stderr, " > cannot find any process to use the lost exited child gtime " KERNEL_UINT_FORMAT " of process %d (%s)\n", +// lost, pe->pid, pid_stat_comm(pe)); +// break; +// } +// } +//} + +static inline kernel_uint_t remove_exited_child_from_parent(kernel_uint_t *field, kernel_uint_t *pfield) { + kernel_uint_t absorbed = 0; + + if(*field > *pfield) { + absorbed += *pfield; + *field -= *pfield; + *pfield = 0; + } + else { + absorbed += *field; + *pfield -= *field; + *field = 0; + } + + return absorbed; +} + +static inline void process_exited_pids(void) { + /* + * WHY WE NEED THIS? + * + * When a child process exits in Linux, its accumulated user time (utime) and its children's accumulated + * user time (cutime) are added to the parent's cutime. This means the parent process's cutime reflects + * the total user time spent by its exited children and their descendants + * + * This results in spikes in the charts. + * In this function we remove the exited children resources from the parent's cutime, but only for the + * children we have been monitoring and to the degree we have data for them. Since previously running + * children have already been reported by us, removing them is the right thing to do. + * + */ + + for(struct pid_stat *p = root_of_pids(); p ; p = p->next) { + if(p->updated || !p->stat_collected_usec) + continue; + + bool have_work = false; + +#if (PROCESSES_HAVE_CPU_CHILDREN_TIME == 1) + kernel_uint_t utime = (p->raw[PDF_UTIME] + p->raw[PDF_CUTIME]) * CPU_TO_NANOSECONDCORES; + kernel_uint_t stime = (p->raw[PDF_STIME] + p->raw[PDF_CSTIME]) * CPU_TO_NANOSECONDCORES; + if(utime + stime) have_work = true; +#if (PROCESSES_HAVE_CPU_GUEST_TIME == 1) + kernel_uint_t gtime = (p->raw[PDF_GTIME] + p->raw[PDF_CGTIME]) * CPU_TO_NANOSECONDCORES; + if(gtime) have_work = true; +#endif +#endif + +#if (PROCESSES_HAVE_CHILDREN_FLTS == 1) + kernel_uint_t minflt = (p->raw[PDF_MINFLT] + p->raw[PDF_CMINFLT]) * RATES_DETAIL; + if(minflt) have_work = true; +#if (PROCESSES_HAVE_MAJFLT == 1) + kernel_uint_t majflt = (p->raw[PDF_MAJFLT] + p->raw[PDF_CMAJFLT]) * RATES_DETAIL; + if(majflt) have_work = true; +#endif +#endif + + if(!have_work) + continue; + +// if(unlikely(debug_enabled)) { +// debug_log("Absorb %s (%d %s total resources: utime=" KERNEL_UINT_FORMAT " stime=" KERNEL_UINT_FORMAT " gtime=" KERNEL_UINT_FORMAT " minflt=" KERNEL_UINT_FORMAT " majflt=" KERNEL_UINT_FORMAT ")" +// , pid_stat_comm(p) +// , p->pid +// , p->updated?"running":"exited" +// , utime +// , stime +// , gtime +// , minflt +// , majflt +// ); +// debug_print_process_tree(p, "Searching parents"); +// } + + for(struct pid_stat *pp = p->parent; pp ; pp = pp->parent) { + if(!pp->updated) continue; + + kernel_uint_t absorbed; +#if (PROCESSES_HAVE_CPU_CHILDREN_TIME == 1) + absorbed = remove_exited_child_from_parent(&utime, &pp->values[PDF_CUTIME]); +// if(unlikely(debug_enabled && absorbed)) +// debug_log(" > process %s (%d %s) absorbed " KERNEL_UINT_FORMAT " utime (remaining: " KERNEL_UINT_FORMAT ")", +// pid_stat_comm(pp), pp->pid, pp->updated?"running":"exited", absorbed, utime); + + absorbed = remove_exited_child_from_parent(&stime, &pp->values[PDF_CSTIME]); +// if(unlikely(debug_enabled && absorbed)) +// debug_log(" > process %s (%d %s) absorbed " KERNEL_UINT_FORMAT " stime (remaining: " KERNEL_UINT_FORMAT ")", +// pid_stat_comm(pp), pp->pid, pp->updated?"running":"exited", absorbed, stime); + +#if (PROCESSES_HAVE_CPU_GUEST_TIME == 1) + absorbed = remove_exited_child_from_parent(>ime, &pp->values[PDF_CGTIME]); +// if(unlikely(debug_enabled && absorbed)) +// debug_log(" > process %s (%d %s) absorbed " KERNEL_UINT_FORMAT " gtime (remaining: " KERNEL_UINT_FORMAT ")", +// pid_stat_comm(pp), pp->pid, pp->updated?"running":"exited", absorbed, gtime); +#endif +#endif + +#if (PROCESSES_HAVE_CHILDREN_FLTS == 1) + absorbed = remove_exited_child_from_parent(&minflt, &pp->values[PDF_CMINFLT]); +// if(unlikely(debug_enabled && absorbed)) +// debug_log(" > process %s (%d %s) absorbed " KERNEL_UINT_FORMAT " minflt (remaining: " KERNEL_UINT_FORMAT ")", +// pid_stat_comm(pp), pp->pid, pp->updated?"running":"exited", absorbed, minflt); + +#if (PROCESSES_HAVE_MAJFLT == 1) + absorbed = remove_exited_child_from_parent(&majflt, &pp->values[PDF_CMAJFLT]); +// if(unlikely(debug_enabled && absorbed)) +// debug_log(" > process %s (%d %s) absorbed " KERNEL_UINT_FORMAT " majflt (remaining: " KERNEL_UINT_FORMAT ")", +// pid_stat_comm(pp), pp->pid, pp->updated?"running":"exited", absorbed, majflt); +#endif +#endif + + (void)absorbed; + break; + } + +// if(unlikely(debug_enabled)) { +// if(utime) debug_find_lost_child(p, utime, 3); +// if(stime) debug_find_lost_child(p, stime, 4); +// if(gtime) debug_find_lost_child(p, gtime, 5); +// if(minflt) debug_find_lost_child(p, minflt, 1); +// if(majflt) debug_find_lost_child(p, majflt, 2); +// } + +// debug_log(" > remaining resources - KEEP - for another loop: %s (%d %s total resources: utime=" KERNEL_UINT_FORMAT " stime=" KERNEL_UINT_FORMAT " gtime=" KERNEL_UINT_FORMAT " minflt=" KERNEL_UINT_FORMAT " majflt=" KERNEL_UINT_FORMAT ")" +// , pid_stat_comm(p) +// , p->pid +// , p->updated?"running":"exited" +// , utime +// , stime +// , gtime +// , minflt +// , majflt +// ); + + bool done = true; + +#if (PROCESSES_HAVE_CPU_CHILDREN_TIME == 1) + p->values[PDF_UTIME] = utime / CPU_TO_NANOSECONDCORES; + p->values[PDF_STIME] = stime / CPU_TO_NANOSECONDCORES; + p->values[PDF_CUTIME] = 0; + p->values[PDF_CSTIME] = 0; + if(utime + stime) done = false; +#if (PROCESSES_HAVE_CPU_GUEST_TIME == 1) + p->values[PDF_GTIME] = gtime / CPU_TO_NANOSECONDCORES; + p->values[PDF_CGTIME] = 0; + if(gtime) done = false; +#endif +#endif + +#if (PROCESSES_HAVE_CHILDREN_FLTS == 1) + p->values[PDF_MINFLT] = minflt / RATES_DETAIL; + p->values[PDF_CMINFLT] = 0; + if(minflt) done = false; +#if (PROCESSES_HAVE_MAJFLT == 1) + p->values[PDF_MAJFLT] = majflt / RATES_DETAIL; + p->values[PDF_CMAJFLT] = 0; + if(majflt) done = false; +#endif +#endif + + p->keep = !done; + + if(p->keep) { + // we need to keep its exited parents too, to ensure we will have + // the information to reach the running parent at the next iteration + for (struct pid_stat *pp = p->parent; pp; pp = pp->parent) { + if (pp->updated) break; + pp->keep = true; + } + } + } +} +#endif + +// -------------------------------------------------------------------------------------------------------------------- +// the main loop for collecting process data + +static inline void clear_pid_rates(struct pid_stat *p) { + p->values[PDF_UTIME] = 0; + p->values[PDF_STIME] = 0; + +#if (PROCESSES_HAVE_CPU_GUEST_TIME == 1) + p->values[PDF_GTIME] = 0; +#endif + +#if (PROCESSES_HAVE_CPU_CHILDREN_TIME == 1) + p->values[PDF_CUTIME] = 0; + p->values[PDF_CSTIME] = 0; +#if (PROCESSES_HAVE_CPU_GUEST_TIME == 1) + p->values[PDF_CGTIME] = 0; +#endif +#endif + + p->values[PDF_MINFLT] = 0; +#if (PROCESSES_HAVE_MAJFLT == 1) + p->values[PDF_MAJFLT] = 0; +#endif + +#if (PROCESSES_HAVE_CHILDREN_FLTS == 1) + p->values[PDF_CMINFLT] = 0; + p->values[PDF_CMAJFLT] = 0; +#endif + +#if (PROCESSES_HAVE_LOGICAL_IO == 1) + p->values[PDF_LREAD] = 0; + p->values[PDF_LWRITE] = 0; +#endif + +#if (PROCESSES_HAVE_PHYSICAL_IO == 1) + p->values[PDF_PREAD] = 0; + p->values[PDF_PWRITE] = 0; +#endif + +#if (PROCESSES_HAVE_IO_CALLS == 1) + p->values[PDF_OREAD] = 0; + p->values[PDF_OWRITE] = 0; +#endif + +#if (PROCESSES_HAVE_VOLCTX == 1) + p->values[PDF_VOLCTX] = 0; +#endif + +#if (PROCESSES_HAVE_NVOLCTX == 1) + p->values[PDF_NVOLCTX] = 0; +#endif +} + +bool collect_data_for_all_pids(void) { + // mark all pids as unread +#if (INCREMENTAL_DATA_COLLECTION == 0) + usec_t now_mon_ut = now_monotonic_usec(); +#endif + + for(struct pid_stat *p = root_of_pids(); p ; p = p->next) { + p->read = p->updated = p->merged = false; + p->children_count = 0; + +#if (INCREMENTAL_DATA_COLLECTION == 0) + p->last_stat_collected_usec = p->stat_collected_usec; + p->last_io_collected_usec = p->io_collected_usec; + p->stat_collected_usec = p->io_collected_usec = now_mon_ut; +#endif + } + + // collect data for all pids + if(!OS_FUNCTION(apps_os_collect_all_pids)()) + return false; + + // build the process tree + link_all_processes_to_their_parents(); + +#if (PROCESSES_HAVE_CPU_CHILDREN_TIME == 1) || (PROCESSES_HAVE_CHILDREN_FLTS == 1) + // merge exited pids to their parents + process_exited_pids(); +#endif + + // the first iteration needs to be eliminated + // since we are looking for rates + if(unlikely(global_iterations_counter == 1)) { + for(struct pid_stat *p = root_of_pids(); p ; p = p->next) + if(p->read) clear_pid_rates(p); + } + + return true; +} diff --git a/src/collectors/apps.plugin/apps_pid_files.c b/src/collectors/apps.plugin/apps_pid_files.c new file mode 100644 index 00000000000000..097f487750d898 --- /dev/null +++ b/src/collectors/apps.plugin/apps_pid_files.c @@ -0,0 +1,430 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "apps_plugin.h" + +#if (PROCESSES_HAVE_FDS == 1) +// ---------------------------------------------------------------------------- +// file descriptor +// +// this is used to keep a global list of all open files of the system. +// it is needed in order to calculate the unique files processes have open. + +#define FILE_DESCRIPTORS_INCREASE_STEP 100 + +// types for struct file_descriptor->type +typedef enum __attribute__((packed)) fd_filetype { + FILETYPE_OTHER, + FILETYPE_FILE, + FILETYPE_PIPE, + FILETYPE_SOCKET, + FILETYPE_INOTIFY, + FILETYPE_EVENTFD, + FILETYPE_EVENTPOLL, + FILETYPE_TIMERFD, + FILETYPE_SIGNALFD +} FD_FILETYPE; + +struct file_descriptor { + avl_t avl; + +#ifdef NETDATA_INTERNAL_CHECKS + uint32_t magic; +#endif /* NETDATA_INTERNAL_CHECKS */ + + const char *name; + uint32_t hash; + uint32_t count; + uint32_t pos; + FD_FILETYPE type; +} *all_files = NULL; + +// ---------------------------------------------------------------------------- + +static inline void reallocate_target_fds(struct target *w) { + if(unlikely(!w)) + return; + + if(unlikely(!w->target_fds || w->target_fds_size < all_files_size)) { + w->target_fds = reallocz(w->target_fds, sizeof(int) * all_files_size); + memset(&w->target_fds[w->target_fds_size], 0, sizeof(int) * (all_files_size - w->target_fds_size)); + w->target_fds_size = all_files_size; + } +} + +static void aggregage_fd_type_on_openfds(FD_FILETYPE type, struct openfds *openfds) { + switch(type) { + case FILETYPE_SOCKET: + openfds->sockets++; + break; + + case FILETYPE_FILE: + openfds->files++; + break; + + case FILETYPE_PIPE: + openfds->pipes++; + break; + + case FILETYPE_INOTIFY: + openfds->inotifies++; + break; + + case FILETYPE_EVENTFD: + openfds->eventfds++; + break; + + case FILETYPE_TIMERFD: + openfds->timerfds++; + break; + + case FILETYPE_SIGNALFD: + openfds->signalfds++; + break; + + case FILETYPE_EVENTPOLL: + openfds->eventpolls++; + break; + + case FILETYPE_OTHER: + openfds->other++; + break; + } +} + +static inline void aggregate_fd_on_target(int fd, struct target *w) { + if(unlikely(!w)) + return; + + if(unlikely(w->target_fds[fd])) { + // it is already aggregated + // just increase its usage counter + w->target_fds[fd]++; + return; + } + + // increase its usage counter + // so that we will not add it again + w->target_fds[fd]++; + + aggregage_fd_type_on_openfds(all_files[fd].type, &w->openfds); +} + +void aggregate_pid_fds_on_targets(struct pid_stat *p) { + + if(unlikely(!p->updated)) { + // the process is not running + return; + } + + struct target +#if (PROCESSES_HAVE_UID == 1) + *u = p->uid_target, +#endif +#if (PROCESSES_HAVE_GID == 1) + *g = p->gid_target, +#endif + *w = p->target; + + reallocate_target_fds(w); +#if (PROCESSES_HAVE_UID == 1) + reallocate_target_fds(u); +#endif +#if (PROCESSES_HAVE_GID == 1) + reallocate_target_fds(g); +#endif + +#if (PROCESSES_HAVE_FDS == 1) + p->openfds.files = 0; + p->openfds.pipes = 0; + p->openfds.sockets = 0; + p->openfds.inotifies = 0; + p->openfds.eventfds = 0; + p->openfds.timerfds = 0; + p->openfds.signalfds = 0; + p->openfds.eventpolls = 0; + p->openfds.other = 0; + + uint32_t c, size = p->fds_size; + struct pid_fd *fds = p->fds; + for(c = 0; c < size ;c++) { + int fd = fds[c].fd; + + if(likely(fd <= 0 || (uint32_t)fd >= all_files_size)) + continue; + + aggregage_fd_type_on_openfds(all_files[fd].type, &p->openfds); + + aggregate_fd_on_target(fd, w); +#if (PROCESSES_HAVE_UID == 1) + aggregate_fd_on_target(fd, u); +#endif +#if (PROCESSES_HAVE_GID == 1) + aggregate_fd_on_target(fd, g); +#endif + } +#endif +} + +// ---------------------------------------------------------------------------- + +int file_descriptor_compare(void* a, void* b) { +#ifdef NETDATA_INTERNAL_CHECKS + if(((struct file_descriptor *)a)->magic != 0x0BADCAFE || ((struct file_descriptor *)b)->magic != 0x0BADCAFE) + netdata_log_error("Corrupted index data detected. Please report this."); +#endif /* NETDATA_INTERNAL_CHECKS */ + + if(((struct file_descriptor *)a)->hash < ((struct file_descriptor *)b)->hash) + return -1; + + else if(((struct file_descriptor *)a)->hash > ((struct file_descriptor *)b)->hash) + return 1; + + else + return strcmp(((struct file_descriptor *)a)->name, ((struct file_descriptor *)b)->name); +} + +// int file_descriptor_iterator(avl_t *a) { if(a) {}; return 0; } + +avl_tree_type all_files_index = { + NULL, + file_descriptor_compare +}; + +static struct file_descriptor *file_descriptor_find(const char *name, uint32_t hash) { + struct file_descriptor tmp; + tmp.hash = (hash)?hash:simple_hash(name); + tmp.name = name; + tmp.count = 0; + tmp.pos = 0; +#ifdef NETDATA_INTERNAL_CHECKS + tmp.magic = 0x0BADCAFE; +#endif /* NETDATA_INTERNAL_CHECKS */ + + return (struct file_descriptor *)avl_search(&all_files_index, (avl_t *) &tmp); +} + +#define file_descriptor_add(fd) avl_insert(&all_files_index, (avl_t *)(fd)) +#define file_descriptor_remove(fd) avl_remove(&all_files_index, (avl_t *)(fd)) + +// ---------------------------------------------------------------------------- + +void file_descriptor_not_used(int id) { + if(id > 0 && (uint32_t)id < all_files_size) { + +#ifdef NETDATA_INTERNAL_CHECKS + if(all_files[id].magic != 0x0BADCAFE) { + netdata_log_error("Ignoring request to remove empty file id %d.", id); + return; + } +#endif /* NETDATA_INTERNAL_CHECKS */ + + debug_log("decreasing slot %d (count = %d).", id, all_files[id].count); + + if(all_files[id].count > 0) { + all_files[id].count--; + + if(!all_files[id].count) { + debug_log(" >> slot %d is empty.", id); + + if(unlikely(file_descriptor_remove(&all_files[id]) != (void *)&all_files[id])) + netdata_log_error("INTERNAL ERROR: removal of unused fd from index, removed a different fd"); + +#ifdef NETDATA_INTERNAL_CHECKS + all_files[id].magic = 0x00000000; +#endif /* NETDATA_INTERNAL_CHECKS */ + all_files_len--; + } + } + else + netdata_log_error("Request to decrease counter of fd %d (%s), while the use counter is 0", + id, all_files[id].name); + } + else + netdata_log_error("Request to decrease counter of fd %d, which is outside the array size (1 to %"PRIu32")", + id, all_files_size); +} + +static inline void all_files_grow() { + void *old = all_files; + + // there is no empty slot + all_files = reallocz(all_files, (all_files_size + FILE_DESCRIPTORS_INCREASE_STEP) * sizeof(struct file_descriptor)); + + // if the address changed, we have to rebuild the index + // since all pointers are now invalid + + if(unlikely(old && old != (void *)all_files)) { + all_files_index.root = NULL; + for(uint32_t i = 0; i < all_files_size; i++) { + if(!all_files[i].count) continue; + if(unlikely(file_descriptor_add(&all_files[i]) != (void *)&all_files[i])) + netdata_log_error("INTERNAL ERROR: duplicate indexing of fd during realloc."); + } + } + + // initialize the newly added entries + + for(uint32_t i = all_files_size; i < (all_files_size + FILE_DESCRIPTORS_INCREASE_STEP); i++) { + all_files[i].count = 0; + all_files[i].name = NULL; +#ifdef NETDATA_INTERNAL_CHECKS + all_files[i].magic = 0x00000000; +#endif /* NETDATA_INTERNAL_CHECKS */ + all_files[i].pos = i; + } + + if(unlikely(!all_files_size)) all_files_len = 1; + all_files_size += FILE_DESCRIPTORS_INCREASE_STEP; +} + +static inline uint32_t file_descriptor_set_on_empty_slot(const char *name, uint32_t hash, FD_FILETYPE type) { + // check we have enough memory to add it + if(!all_files || all_files_len == all_files_size) + all_files_grow(); + + debug_log(" >> searching for empty slot."); + + // search for an empty slot + + static int last_pos = 0; + uint32_t i, c; + for(i = 0, c = last_pos ; i < all_files_size ; i++, c++) { + if(c >= all_files_size) c = 0; + if(c == 0) continue; + + if(!all_files[c].count) { + debug_log(" >> Examining slot %d.", c); + +#ifdef NETDATA_INTERNAL_CHECKS + if(all_files[c].magic == 0x0BADCAFE && all_files[c].name && file_descriptor_find(all_files[c].name, all_files[c].hash)) + netdata_log_error("fd on position %"PRIu32" is not cleared properly. It still has %s in it.", c, all_files[c].name); +#endif /* NETDATA_INTERNAL_CHECKS */ + + debug_log(" >> %s fd position %d for %s (last name: %s)", all_files[c].name?"re-using":"using", c, name, all_files[c].name); + + freez((void *)all_files[c].name); + all_files[c].name = NULL; + last_pos = c; + break; + } + } + + all_files_len++; + + if(i == all_files_size) { + fatal("We should find an empty slot, but there isn't any"); + exit(1); + } + // else we have an empty slot in 'c' + + debug_log(" >> updating slot %d.", c); + + all_files[c].name = strdupz(name); + all_files[c].hash = hash; + all_files[c].type = type; + all_files[c].pos = c; + all_files[c].count = 1; +#ifdef NETDATA_INTERNAL_CHECKS + all_files[c].magic = 0x0BADCAFE; +#endif /* NETDATA_INTERNAL_CHECKS */ + if(unlikely(file_descriptor_add(&all_files[c]) != (void *)&all_files[c])) + netdata_log_error("INTERNAL ERROR: duplicate indexing of fd."); + + return c; +} + +uint32_t file_descriptor_find_or_add(const char *name, uint32_t hash) { + if(unlikely(!hash)) + hash = simple_hash(name); + + debug_log("adding or finding name '%s' with hash %u", name, hash); + + struct file_descriptor *fd = file_descriptor_find(name, hash); + if(fd) { + // found + debug_log(" >> found on slot %d", fd->pos); + + fd->count++; + return fd->pos; + } + // not found + + FD_FILETYPE type; + if(likely(name[0] == '/')) type = FILETYPE_FILE; + else if(likely(strncmp(name, "pipe:", 5) == 0)) type = FILETYPE_PIPE; + else if(likely(strncmp(name, "socket:", 7) == 0)) type = FILETYPE_SOCKET; + else if(likely(strncmp(name, "anon_inode:", 11) == 0)) { + const char *t = &name[11]; + + if(strcmp(t, "inotify") == 0) type = FILETYPE_INOTIFY; + else if(strcmp(t, "[eventfd]") == 0) type = FILETYPE_EVENTFD; + else if(strcmp(t, "[eventpoll]") == 0) type = FILETYPE_EVENTPOLL; + else if(strcmp(t, "[timerfd]") == 0) type = FILETYPE_TIMERFD; + else if(strcmp(t, "[signalfd]") == 0) type = FILETYPE_SIGNALFD; + else { + debug_log("UNKNOWN anonymous inode: %s", name); + type = FILETYPE_OTHER; + } + } + else if(likely(strcmp(name, "inotify") == 0)) type = FILETYPE_INOTIFY; + else { + debug_log("UNKNOWN linkname: %s", name); + type = FILETYPE_OTHER; + } + + return file_descriptor_set_on_empty_slot(name, hash, type); +} + +void clear_pid_fd(struct pid_fd *pfd) { + pfd->fd = 0; + +#if defined(OS_LINUX) + pfd->link_hash = 0; + pfd->inode = 0; + pfd->cache_iterations_counter = 0; + pfd->cache_iterations_reset = 0; +#endif +} + +void make_all_pid_fds_negative(struct pid_stat *p) { + struct pid_fd *pfd = p->fds, *pfdend = &p->fds[p->fds_size]; + while(pfd < pfdend) { + pfd->fd = -(pfd->fd); + pfd++; + } +} + +static inline void cleanup_negative_pid_fds(struct pid_stat *p) { + struct pid_fd *pfd = p->fds, *pfdend = &p->fds[p->fds_size]; + + while(pfd < pfdend) { + int fd = pfd->fd; + + if(unlikely(fd < 0)) { + file_descriptor_not_used(-(fd)); + clear_pid_fd(pfd); + } + + pfd++; + } +} + +void init_pid_fds(struct pid_stat *p, size_t first, size_t size) { + struct pid_fd *pfd = &p->fds[first], *pfdend = &p->fds[first + size]; + + while(pfd < pfdend) { +#if defined(OS_LINUX) + pfd->filename = NULL; +#endif + clear_pid_fd(pfd); + pfd++; + } +} + +int read_pid_file_descriptors(struct pid_stat *p, void *ptr) { + bool ret = OS_FUNCTION(apps_os_read_pid_fds)(p, ptr); + cleanup_negative_pid_fds(p); + + return ret ? 1 : 0; +} +#endif \ No newline at end of file diff --git a/src/collectors/apps.plugin/apps_plugin.c b/src/collectors/apps.plugin/apps_plugin.c index 8fe1ff00816a7a..60f80c3c9b17d4 100644 --- a/src/collectors/apps.plugin/apps_plugin.c +++ b/src/collectors/apps.plugin/apps_plugin.c @@ -27,16 +27,16 @@ // options bool debug_enabled = false; -bool enable_guest_charts = false; + bool enable_detailed_uptime_charts = false; bool enable_users_charts = true; bool enable_groups_charts = true; bool include_exited_childs = true; -bool proc_pid_cmdline_is_needed = false; // true when we need to read /proc/cmdline +bool proc_pid_cmdline_is_needed = true; // true when we need to read /proc/cmdline -#if defined(__FreeBSD__) || defined(__APPLE__) +#if defined(OS_FREEBSD) || defined(OS_MACOS) || defined(OS_WINDOWS) bool enable_file_charts = false; -#else +#elif defined(OS_LINUX) bool enable_file_charts = true; #endif @@ -53,19 +53,16 @@ size_t targets_assignment_counter = 0, apps_groups_targets_count = 0; // # of apps_groups.conf targets -int - all_files_len = 0, - all_files_size = 0, - show_guest_time = 0, // 1 when guest values are collected - show_guest_time_old = 0; - -#if defined(__FreeBSD__) || defined(__APPLE__) -usec_t system_current_time_ut; -#else -kernel_uint_t system_uptime_secs; +#if (PROCESSES_HAVE_CPU_GUEST_TIME == 1) +bool enable_guest_charts = false; +bool show_guest_time = false; // set when guest values are collected #endif -// ---------------------------------------------------------------------------- +uint32_t + all_files_len = 0, + all_files_size = 0; + +// -------------------------------------------------------------------------------------------------------------------- // Normalization // // With normalization we lower the collected metrics by a factor to make them @@ -80,10 +77,12 @@ kernel_uint_t system_uptime_secs; // metric. // the total system time, as reported by /proc/stat +#if (ALL_PIDS_ARE_READ_INSTANTLY == 0) kernel_uint_t global_utime = 0, global_stime = 0, global_gtime = 0; +#endif // the normalization ratios, as calculated by normalize_utilization() NETDATA_DOUBLE @@ -98,21 +97,11 @@ NETDATA_DOUBLE cminflt_fix_ratio = 1.0, cmajflt_fix_ratio = 1.0; -// ---------------------------------------------------------------------------- -// factor for calculating correct CPU time values depending on units of raw data -unsigned int time_factor = 0; - -// ---------------------------------------------------------------------------- -// command line options +// -------------------------------------------------------------------------------------------------------------------- int update_every = 1; -#if defined(__APPLE__) -mach_timebase_info_data_t mach_info; -#endif - -#if !defined(__FreeBSD__) && !defined(__APPLE__) -int max_fds_cache_seconds = 60; +#if defined(OS_LINUX) proc_state proc_state_count[PROC_STATUS_END]; const char *proc_states[] = { [PROC_STATUS_RUNNING] = "running", @@ -127,412 +116,12 @@ const char *proc_states[] = { static char *user_config_dir = CONFIG_DIR; static char *stock_config_dir = LIBCONFIG_DIR; -struct target - *apps_groups_default_target = NULL, // the default target - *apps_groups_root_target = NULL, // apps_groups.conf defined - *users_root_target = NULL, // users - *groups_root_target = NULL; // user groups - size_t pagesize; -// ---------------------------------------------------------------------------- - -int managed_log(struct pid_stat *p, PID_LOG log, int status) { - if(unlikely(!status)) { - // netdata_log_error("command failed log %u, errno %d", log, errno); - - if(unlikely(debug_enabled || errno != ENOENT)) { - if(unlikely(debug_enabled || !(p->log_thrown & log))) { - p->log_thrown |= log; - switch(log) { - case PID_LOG_IO: - #if defined(__FreeBSD__) || defined(__APPLE__) - netdata_log_error("Cannot fetch process %d I/O info (command '%s')", p->pid, p->comm); - #else - netdata_log_error("Cannot process %s/proc/%d/io (command '%s')", netdata_configured_host_prefix, p->pid, p->comm); - #endif - break; - - case PID_LOG_STATUS: - #if defined(__FreeBSD__) || defined(__APPLE__) - netdata_log_error("Cannot fetch process %d status info (command '%s')", p->pid, p->comm); - #else - netdata_log_error("Cannot process %s/proc/%d/status (command '%s')", netdata_configured_host_prefix, p->pid, p->comm); - #endif - break; - - case PID_LOG_CMDLINE: - #if defined(__FreeBSD__) || defined(__APPLE__) - netdata_log_error("Cannot fetch process %d command line (command '%s')", p->pid, p->comm); - #else - netdata_log_error("Cannot process %s/proc/%d/cmdline (command '%s')", netdata_configured_host_prefix, p->pid, p->comm); - #endif - break; - - case PID_LOG_FDS: - #if defined(__FreeBSD__) || defined(__APPLE__) - netdata_log_error("Cannot fetch process %d files (command '%s')", p->pid, p->comm); - #else - netdata_log_error("Cannot process entries in %s/proc/%d/fd (command '%s')", netdata_configured_host_prefix, p->pid, p->comm); - #endif - break; - - case PID_LOG_LIMITS: - #if defined(__FreeBSD__) || defined(__APPLE__) - ; - #else - netdata_log_error("Cannot process %s/proc/%d/limits (command '%s')", netdata_configured_host_prefix, p->pid, p->comm); - #endif - - case PID_LOG_STAT: - break; - - default: - netdata_log_error("unhandled error for pid %d, command '%s'", p->pid, p->comm); - break; - } - } - } - errno_clear(); - } - else if(unlikely(p->log_thrown & log)) { - // netdata_log_error("unsetting log %u on pid %d", log, p->pid); - p->log_thrown &= ~log; - } - - return status; -} - -// ---------------------------------------------------------------------------- -// update statistics on the targets - -// 1. link all childs to their parents -// 2. go from bottom to top, marking as merged all children to their parents, -// this step links all parents without a target to the child target, if any -// 3. link all top level processes (the ones not merged) to default target -// 4. go from top to bottom, linking all children without a target to their parent target -// after this step all processes have a target. -// [5. for each killed pid (updated = 0), remove its usage from its target] -// 6. zero all apps_groups_targets -// 7. concentrate all values on the apps_groups_targets -// 8. remove all killed processes -// 9. find the unique file count for each target -// check: update_apps_groups_statistics() - -static void apply_apps_groups_targets_inheritance(void) { - struct pid_stat *p = NULL; - - // children that do not have a target - // inherit their target from their parent - int found = 1, loops = 0; - while(found) { - if(unlikely(debug_enabled)) loops++; - found = 0; - for(p = root_of_pids; p ; p = p->next) { - // if this process does not have a target, - // and it has a parent - // and its parent has a target - // then, set the parent's target to this process - if(unlikely(!p->target && p->parent && p->parent->target)) { - p->target = p->parent->target; - found++; - - if(debug_enabled || (p->target && p->target->debug_enabled)) - debug_log_int("TARGET INHERITANCE: %s is inherited by %d (%s) from its parent %d (%s).", p->target->name, p->pid, p->comm, p->parent->pid, p->parent->comm); - } - } - } - - // find all the procs with 0 childs and merge them to their parents - // repeat, until nothing more can be done. - int sortlist = 1; - found = 1; - while(found) { - if(unlikely(debug_enabled)) loops++; - found = 0; - - for(p = root_of_pids; p ; p = p->next) { - if(unlikely(!p->sortlist && !p->children_count)) - p->sortlist = sortlist++; - - if(unlikely( - !p->children_count // if this process does not have any children - && !p->merged // and is not already merged - && p->parent // and has a parent - && p->parent->children_count // and its parent has children - // and the target of this process and its parent is the same, - // or the parent does not have a target - && (p->target == p->parent->target || !p->parent->target) - && p->ppid != INIT_PID // and its parent is not init - )) { - // mark it as merged - p->parent->children_count--; - p->merged = true; - - // the parent inherits the child's target, if it does not have a target itself - if(unlikely(p->target && !p->parent->target)) { - p->parent->target = p->target; - - if(debug_enabled || (p->target && p->target->debug_enabled)) - debug_log_int("TARGET INHERITANCE: %s is inherited by %d (%s) from its child %d (%s).", p->target->name, p->parent->pid, p->parent->comm, p->pid, p->comm); - } - - found++; - } - } - - debug_log("TARGET INHERITANCE: merged %d processes", found); - } - - // init goes always to default target - struct pid_stat *pi = find_pid_entry(INIT_PID); - if(pi && !pi->matched_by_config) - pi->target = apps_groups_default_target; - - // pid 0 goes always to default target - pi = find_pid_entry(0); - if(pi && !pi->matched_by_config) - pi->target = apps_groups_default_target; - - // give a default target on all top level processes - if(unlikely(debug_enabled)) loops++; - for(p = root_of_pids; p ; p = p->next) { - // if the process is not merged itself - // then it is a top level process - if(unlikely(!p->merged && !p->target)) - p->target = apps_groups_default_target; - - // make sure all processes have a sortlist - if(unlikely(!p->sortlist)) - p->sortlist = sortlist++; - } - - pi = find_pid_entry(1); - if(pi) - pi->sortlist = sortlist++; - - // give a target to all merged child processes - found = 1; - while(found) { - if(unlikely(debug_enabled)) loops++; - found = 0; - for(p = root_of_pids; p ; p = p->next) { - if(unlikely(!p->target && p->merged && p->parent && p->parent->target)) { - p->target = p->parent->target; - found++; - - if(debug_enabled || (p->target && p->target->debug_enabled)) - debug_log_int("TARGET INHERITANCE: %s is inherited by %d (%s) from its parent %d (%s) at phase 2.", p->target->name, p->pid, p->comm, p->parent->pid, p->parent->comm); - } - } - } - - debug_log("apply_apps_groups_targets_inheritance() made %d loops on the process tree", loops); -} - -static size_t zero_all_targets(struct target *root) { - struct target *w; - size_t count = 0; - - for (w = root; w ; w = w->next) { - count++; - - w->minflt = 0; - w->majflt = 0; - w->utime = 0; - w->stime = 0; - w->gtime = 0; - w->cminflt = 0; - w->cmajflt = 0; - w->cutime = 0; - w->cstime = 0; - w->cgtime = 0; - w->num_threads = 0; - // w->rss = 0; - w->processes = 0; - - w->status_vmsize = 0; - w->status_vmrss = 0; - w->status_vmshared = 0; - w->status_rssfile = 0; - w->status_rssshmem = 0; - w->status_vmswap = 0; - w->status_voluntary_ctxt_switches = 0; - w->status_nonvoluntary_ctxt_switches = 0; - - w->io_logical_bytes_read = 0; - w->io_logical_bytes_written = 0; - w->io_read_calls = 0; - w->io_write_calls = 0; - w->io_storage_bytes_read = 0; - w->io_storage_bytes_written = 0; - w->io_cancelled_write_bytes = 0; - - // zero file counters - if(w->target_fds) { - memset(w->target_fds, 0, sizeof(int) * w->target_fds_size); - w->openfds.files = 0; - w->openfds.pipes = 0; - w->openfds.sockets = 0; - w->openfds.inotifies = 0; - w->openfds.eventfds = 0; - w->openfds.timerfds = 0; - w->openfds.signalfds = 0; - w->openfds.eventpolls = 0; - w->openfds.other = 0; - - w->max_open_files_percent = 0.0; - } - - w->uptime_min = 0; - w->uptime_sum = 0; - w->uptime_max = 0; - - if(unlikely(w->root_pid)) { - struct pid_on_target *pid_on_target = w->root_pid; - - while(pid_on_target) { - struct pid_on_target *pid_on_target_to_free = pid_on_target; - pid_on_target = pid_on_target->next; - freez(pid_on_target_to_free); - } - - w->root_pid = NULL; - } - } - - return count; -} - -static inline void aggregate_pid_on_target(struct target *w, struct pid_stat *p, struct target *o) { - (void)o; - - if(unlikely(!p->updated)) { - // the process is not running - return; - } - - if(unlikely(!w)) { - netdata_log_error("pid %d %s was left without a target!", p->pid, p->comm); - return; - } - - if(p->openfds_limits_percent > w->max_open_files_percent) - w->max_open_files_percent = p->openfds_limits_percent; - - w->cutime += p->cutime; - w->cstime += p->cstime; - w->cgtime += p->cgtime; - w->cminflt += p->cminflt; - w->cmajflt += p->cmajflt; - - w->utime += p->utime; - w->stime += p->stime; - w->gtime += p->gtime; - w->minflt += p->minflt; - w->majflt += p->majflt; - - // w->rss += p->rss; - - w->status_vmsize += p->status_vmsize; - w->status_vmrss += p->status_vmrss; - w->status_vmshared += p->status_vmshared; - w->status_rssfile += p->status_rssfile; - w->status_rssshmem += p->status_rssshmem; - w->status_vmswap += p->status_vmswap; - w->status_voluntary_ctxt_switches += p->status_voluntary_ctxt_switches; - w->status_nonvoluntary_ctxt_switches += p->status_nonvoluntary_ctxt_switches; - - w->io_logical_bytes_read += p->io_logical_bytes_read; - w->io_logical_bytes_written += p->io_logical_bytes_written; - w->io_read_calls += p->io_read_calls; - w->io_write_calls += p->io_write_calls; - w->io_storage_bytes_read += p->io_storage_bytes_read; - w->io_storage_bytes_written += p->io_storage_bytes_written; - w->io_cancelled_write_bytes += p->io_cancelled_write_bytes; - - w->processes++; - w->num_threads += p->num_threads; - - if(!w->uptime_min || p->uptime < w->uptime_min) w->uptime_min = p->uptime; - if(!w->uptime_max || w->uptime_max < p->uptime) w->uptime_max = p->uptime; - w->uptime_sum += p->uptime; - - if(unlikely(debug_enabled || w->debug_enabled)) { - debug_log_int("aggregating '%s' pid %d on target '%s' utime=" KERNEL_UINT_FORMAT ", stime=" KERNEL_UINT_FORMAT ", gtime=" KERNEL_UINT_FORMAT ", cutime=" KERNEL_UINT_FORMAT ", cstime=" KERNEL_UINT_FORMAT ", cgtime=" KERNEL_UINT_FORMAT ", minflt=" KERNEL_UINT_FORMAT ", majflt=" KERNEL_UINT_FORMAT ", cminflt=" KERNEL_UINT_FORMAT ", cmajflt=" KERNEL_UINT_FORMAT "", p->comm, p->pid, w->name, p->utime, p->stime, p->gtime, p->cutime, p->cstime, p->cgtime, p->minflt, p->majflt, p->cminflt, p->cmajflt); - - struct pid_on_target *pid_on_target = mallocz(sizeof(struct pid_on_target)); - pid_on_target->pid = p->pid; - pid_on_target->next = w->root_pid; - w->root_pid = pid_on_target; - } -} - -static void calculate_netdata_statistics(void) { - apply_apps_groups_targets_inheritance(); - - zero_all_targets(users_root_target); - zero_all_targets(groups_root_target); - apps_groups_targets_count = zero_all_targets(apps_groups_root_target); - - // this has to be done, before the cleanup - struct pid_stat *p = NULL; - struct target *w = NULL, *o = NULL; - - // concentrate everything on the targets - for(p = root_of_pids; p ; p = p->next) { - - // -------------------------------------------------------------------- - // apps_groups target - - aggregate_pid_on_target(p->target, p, NULL); - - - // -------------------------------------------------------------------- - // user target - - o = p->user_target; - if(likely(p->user_target && p->user_target->uid == p->uid)) - w = p->user_target; - else { - if(unlikely(debug_enabled && p->user_target)) - debug_log("pid %d (%s) switched user from %u (%s) to %u.", p->pid, p->comm, p->user_target->uid, p->user_target->name, p->uid); - - w = p->user_target = get_users_target(p->uid); - } - - aggregate_pid_on_target(w, p, o); - - - // -------------------------------------------------------------------- - // user group target - - o = p->group_target; - if(likely(p->group_target && p->group_target->gid == p->gid)) - w = p->group_target; - else { - if(unlikely(debug_enabled && p->group_target)) - debug_log("pid %d (%s) switched group from %u (%s) to %u.", p->pid, p->comm, p->group_target->gid, p->group_target->name, p->gid); - - w = p->group_target = get_groups_target(p->gid); - } - - aggregate_pid_on_target(w, p, o); - - - // -------------------------------------------------------------------- - // aggregate all file descriptors - - if(enable_file_charts) - aggregate_pid_fds_on_targets(p); - } - - cleanup_exited_pids(); -} - // ---------------------------------------------------------------------------- // update chart dimensions +#if (ALL_PIDS_ARE_READ_INSTANTLY == 0) static void normalize_utilization(struct target *root) { struct target *w; @@ -540,7 +129,7 @@ static void normalize_utilization(struct target *root) { // here we try to eliminate them by disabling childs processing either for specific dimensions // or entirely. Of course, either way, we disable it just a single iteration. - kernel_uint_t max_time = os_get_system_cpus() * time_factor * RATES_DETAIL; + kernel_uint_t max_time = os_get_system_cpus() * NSEC_PER_SEC; kernel_uint_t utime = 0, cutime = 0, stime = 0, cstime = 0, gtime = 0, cgtime = 0, minflt = 0, cminflt = 0, majflt = 0, cmajflt = 0; if(global_utime > max_time) global_utime = max_time; @@ -548,19 +137,19 @@ static void normalize_utilization(struct target *root) { if(global_gtime > max_time) global_gtime = max_time; for(w = root; w ; w = w->next) { - if(w->target || (!w->processes && !w->exposed)) continue; - - utime += w->utime; - stime += w->stime; - gtime += w->gtime; - cutime += w->cutime; - cstime += w->cstime; - cgtime += w->cgtime; - - minflt += w->minflt; - majflt += w->majflt; - cminflt += w->cminflt; - cmajflt += w->cmajflt; + if(w->target || (!w->values[PDF_PROCESSES] && !w->exposed)) continue; + + utime += w->values[PDF_UTIME]; + stime += w->values[PDF_STIME]; + gtime += w->values[PDF_GTIME]; + cutime += w->values[PDF_CUTIME]; + cstime += w->values[PDF_CSTIME]; + cgtime += w->values[PDF_CGTIME]; + + minflt += w->values[PDF_MINFLT]; + majflt += w->values[PDF_MAJFLT]; + cminflt += w->values[PDF_CMINFLT]; + cmajflt += w->values[PDF_CMAJFLT]; } if(global_utime || global_stime || global_gtime) { @@ -683,6 +272,7 @@ static void normalize_utilization(struct target *root) { , (kernel_uint_t)(cgtime * cgtime_fix_ratio) ); } +#endif // ---------------------------------------------------------------------------- // parse command line arguments @@ -690,6 +280,7 @@ static void normalize_utilization(struct target *root) { int check_proc_1_io() { int ret = 0; +#if defined(OS_LINUX) procfile *ff = procfile_open("/proc/1/io", NULL, PROCFILE_FLAG_NO_ERROR_ON_FILE_IO); if(!ff) goto cleanup; @@ -700,9 +291,13 @@ int check_proc_1_io() { cleanup: procfile_close(ff); +#endif + return ret; } +static bool profile_speed = false; + static void parse_args(int argc, char **argv) { int i, freq = 0; @@ -721,6 +316,7 @@ static void parse_args(int argc, char **argv) exit(0); } +#if defined(OS_LINUX) if(strcmp("test-permissions", argv[i]) == 0 || strcmp("-t", argv[i]) == 0) { if(!check_proc_1_io()) { perror("Tried to read /proc/1/io and it failed"); @@ -729,6 +325,7 @@ static void parse_args(int argc, char **argv) printf("OK\n"); exit(0); } +#endif if(strcmp("debug", argv[i]) == 0) { debug_enabled = true; @@ -738,7 +335,12 @@ static void parse_args(int argc, char **argv) continue; } -#if !defined(__FreeBSD__) && !defined(__APPLE__) + if(strcmp("profile-speed", argv[i]) == 0) { + profile_speed = true; + continue; + } + +#if defined(OS_LINUX) if(strcmp("fds-cache-secs", argv[i]) == 0) { if(argc <= i + 1) { fprintf(stderr, "Parameter 'fds-cache-secs' requires a number as argument.\n"); @@ -751,6 +353,7 @@ static void parse_args(int argc, char **argv) } #endif +#if (PROCESSES_HAVE_CPU_CHILDREN_TIME == 1) || (PROCESSES_HAVE_CHILDREN_FLTS == 1) if(strcmp("no-childs", argv[i]) == 0 || strcmp("without-childs", argv[i]) == 0) { include_exited_childs = 0; continue; @@ -760,7 +363,9 @@ static void parse_args(int argc, char **argv) include_exited_childs = 1; continue; } +#endif +#if (PROCESSES_HAVE_CPU_GUEST_TIME == 1) if(strcmp("with-guest", argv[i]) == 0) { enable_guest_charts = true; continue; @@ -770,7 +375,9 @@ static void parse_args(int argc, char **argv) enable_guest_charts = false; continue; } +#endif +#if (PROCESSES_HAVE_FDS == 1) if(strcmp("with-files", argv[i]) == 0) { enable_file_charts = 1; continue; @@ -780,16 +387,21 @@ static void parse_args(int argc, char **argv) enable_file_charts = 0; continue; } +#endif +#if (PROCESSES_HAVE_UID == 1) if(strcmp("no-users", argv[i]) == 0 || strcmp("without-users", argv[i]) == 0) { enable_users_charts = 0; continue; } +#endif +#if (PROCESSES_HAVE_GID == 1) if(strcmp("no-groups", argv[i]) == 0 || strcmp("without-groups", argv[i]) == 0) { enable_groups_charts = 0; continue; } +#endif if(strcmp("with-detailed-uptime", argv[i]) == 0) { enable_detailed_uptime_charts = 1; @@ -821,26 +433,36 @@ static void parse_args(int argc, char **argv) " it may include sensitive data such as passwords and tokens\n" " enabling this could be a security risk\n" "\n" +#if (PROCESSES_HAVE_CPU_CHILDREN_TIME == 1) || (PROCESSES_HAVE_CHILDREN_FLTS == 1) " with-childs\n" " without-childs enable / disable aggregating exited\n" " children resources into parents\n" " (default is enabled)\n" "\n" +#endif +#if (PROCESSES_HAVE_CPU_GUEST_TIME == 1) " with-guest\n" " without-guest enable / disable reporting guest charts\n" " (default is disabled)\n" "\n" +#endif +#if (PROCESSES_HAVE_FDS == 1) " with-files\n" " without-files enable / disable reporting files, sockets, pipes\n" " (default is enabled)\n" "\n" +#endif +#if (PROCESSES_HAVE_UID == 1) " without-users disable reporting per user charts\n" "\n" +#endif +#if (PROCESSES_HAVE_GID == 1) " without-groups disable reporting per user group charts\n" "\n" +#endif " with-detailed-uptime enable reporting min/avg/max uptime charts\n" "\n" -#if !defined(__FreeBSD__) && !defined(__APPLE__) +#if defined(OS_LINUX) " fds-cache-secs N cache the files of processed for N seconds\n" " caching is adaptive per file (when a file\n" " is found, it starts at 0 and while the file\n" @@ -852,15 +474,17 @@ static void parse_args(int argc, char **argv) " version or -v or -V print program version and exit\n" "\n" , NETDATA_VERSION -#if !defined(__FreeBSD__) && !defined(__APPLE__) +#if defined(OS_LINUX) , max_fds_cache_seconds #endif ); - exit(1); + exit(0); } +#if !defined(OS_WINDOWS) || !defined(RUN_UNDER_CLION) netdata_log_error("Cannot understand option %s", argv[i]); exit(1); +#endif } if(freq > 0) update_every = freq; @@ -879,7 +503,8 @@ static void parse_args(int argc, char **argv) netdata_log_info("Loaded config file '%s/apps_groups.conf'", user_config_dir); } -static int am_i_running_as_root() { +#if !defined(OS_WINDOWS) +static inline int am_i_running_as_root() { uid_t uid = getuid(), euid = geteuid(); if(uid == 0 || euid == 0) { @@ -892,7 +517,7 @@ static int am_i_running_as_root() { } #ifdef HAVE_SYS_CAPABILITY_H -static int check_capabilities() { +static inline int check_capabilities() { cap_t caps = cap_get_proc(); if(!caps) { netdata_log_error("Cannot get current capabilities."); @@ -936,22 +561,13 @@ static int check_capabilities() { return ret; } #else -static int check_capabilities() { +static inline int check_capabilities() { return 0; } #endif +#endif -static netdata_mutex_t apps_and_stdout_mutex = NETDATA_MUTEX_INITIALIZER; - -struct target *find_target_by_name(struct target *base, const char *name) { - struct target *t; - for(t = base; t ; t = t->next) { - if (strcmp(t->name, name) == 0) - return t; - } - - return NULL; -} +netdata_mutex_t apps_and_stdout_mutex = NETDATA_MUTEX_INITIALIZER; static bool apps_plugin_exit = false; @@ -1000,47 +616,35 @@ int main(int argc, char **argv) { #endif /* NETDATA_INTERNAL_CHECKS */ procfile_adaptive_initial_allocation = 1; - os_get_system_HZ(); -#if defined(__FreeBSD__) - time_factor = 1000000ULL / RATES_DETAIL; // FreeBSD uses usecs -#endif -#if defined(__APPLE__) - mach_timebase_info(&mach_info); - time_factor = 1000000ULL / RATES_DETAIL; -#endif -#if !defined(__FreeBSD__) && !defined(__APPLE__) - time_factor = system_hz; // Linux uses clock ticks -#endif - - os_get_system_pid_max(); os_get_system_cpus_uncached(); - + apps_orchestrators_and_aggregators_init(); // before parsing args! parse_args(argc, argv); +#if !defined(OS_WINDOWS) if(!check_capabilities() && !am_i_running_as_root() && !check_proc_1_io()) { uid_t uid = getuid(), euid = geteuid(); #ifdef HAVE_SYS_CAPABILITY_H netdata_log_error("apps.plugin should either run as root (now running with uid %u, euid %u) or have special capabilities. " - "Without these, apps.plugin cannot report disk I/O utilization of other processes. " - "To enable capabilities run: sudo setcap cap_dac_read_search,cap_sys_ptrace+ep %s; " - "To enable setuid to root run: sudo chown root:netdata %s; sudo chmod 4750 %s; " - , uid, euid, argv[0], argv[0], argv[0] - ); + "Without these, apps.plugin cannot report disk I/O utilization of other processes. " + "To enable capabilities run: sudo setcap cap_dac_read_search,cap_sys_ptrace+ep %s; " + "To enable setuid to root run: sudo chown root:netdata %s; sudo chmod 4750 %s; " + , uid, euid, argv[0], argv[0], argv[0]); #else netdata_log_error("apps.plugin should either run as root (now running with uid %u, euid %u) or have special capabilities. " - "Without these, apps.plugin cannot report disk I/O utilization of other processes. " - "Your system does not support capabilities. " - "To enable setuid to root run: sudo chown root:netdata %s; sudo chmod 4750 %s; " - , uid, euid, argv[0], argv[0] - ); + "Without these, apps.plugin cannot report disk I/O utilization of other processes. " + "Your system does not support capabilities. " + "To enable setuid to root run: sudo chown root:netdata %s; sudo chmod 4750 %s; " + , uid, euid, argv[0], argv[0]); #endif } +#endif netdata_log_info("started on pid %d", getpid()); - users_and_groups_init(); - pids_init(); + apps_users_and_groups_init(); + apps_pids_init(); + OS_FUNCTION(apps_os_init)(); // ------------------------------------------------------------------------ // the event loop for functions @@ -1062,15 +666,16 @@ int main(int argc, char **argv) { for(; !apps_plugin_exit ; global_iterations_counter++) { netdata_mutex_unlock(&apps_and_stdout_mutex); -#ifdef NETDATA_PROFILING -#warning "compiling for profiling" - static int profiling_count=0; - profiling_count++; - if(unlikely(profiling_count > 2000)) exit(0); - usec_t dt = update_every * USEC_PER_SEC; -#else - usec_t dt = heartbeat_next(&hb, step); -#endif + usec_t dt; + if(profile_speed) { + static int profiling_count=0; + profiling_count++; + if(unlikely(profiling_count > 500)) exit(0); + dt = update_every * USEC_PER_SEC; + } + else + dt = heartbeat_next(&hb, step); + netdata_mutex_lock(&apps_and_stdout_mutex); struct pollfd pollfd = { .fd = fileno(stdout), .events = POLLERR }; @@ -1083,9 +688,6 @@ int main(int argc, char **argv) { fatal("Received error on read pipe."); } - if(global_iterations_counter % 10 == 0) - get_MemTotal(); - if(!collect_data_for_all_pids()) { netdata_log_error("Cannot collect /proc data for running processes. Disabling apps.plugin..."); printf("DISABLE\n"); @@ -1093,30 +695,39 @@ int main(int argc, char **argv) { exit(1); } - calculate_netdata_statistics(); + aggregate_processes_to_targets(); + +#if (ALL_PIDS_ARE_READ_INSTANTLY == 0) + OS_FUNCTION(apps_os_read_global_cpu_utilization)(); normalize_utilization(apps_groups_root_target); +#endif if(send_resource_usage) send_resource_usage_to_netdata(dt); +#if (PROCESSES_HAVE_STATE == 1) send_proc_states_count(dt); - send_charts_updates_to_netdata(apps_groups_root_target, "app", "app_group", "Apps"); +#endif + + send_charts_updates_to_netdata(apps_groups_root_target, "app", "app_group", "Applications Groups"); send_collected_data_to_netdata(apps_groups_root_target, "app", dt); +#if (PROCESSES_HAVE_UID == 1) if (enable_users_charts) { - send_charts_updates_to_netdata(users_root_target, "user", "user", "Users"); + send_charts_updates_to_netdata(users_root_target, "user", "user", "User Processes"); send_collected_data_to_netdata(users_root_target, "user", dt); } +#endif +#if (PROCESSES_HAVE_GID == 1) if (enable_groups_charts) { - send_charts_updates_to_netdata(groups_root_target, "usergroup", "user_group", "User Groups"); + send_charts_updates_to_netdata(groups_root_target, "usergroup", "user_group", "User Group Processes"); send_collected_data_to_netdata(groups_root_target, "usergroup", dt); } +#endif fflush(stdout); - show_guest_time_old = show_guest_time; - debug_log("done Loop No %zu", global_iterations_counter); } netdata_mutex_unlock(&apps_and_stdout_mutex); diff --git a/src/collectors/apps.plugin/apps_plugin.h b/src/collectors/apps.plugin/apps_plugin.h index a085872d9f2960..bf6e3924a7f63a 100644 --- a/src/collectors/apps.plugin/apps_plugin.h +++ b/src/collectors/apps.plugin/apps_plugin.h @@ -6,11 +6,40 @@ #include "collectors/all.h" #include "libnetdata/libnetdata.h" -#ifdef __FreeBSD__ +#define OS_FUNC_CONCAT(a, b) a##b + +#if defined(OS_FREEBSD) #include -#endif -#ifdef __APPLE__ +#define OS_INIT_PID 1 +#define ALL_PIDS_ARE_READ_INSTANTLY 1 +#define PROCESSES_HAVE_CPU_GUEST_TIME 0 +#define PROCESSES_HAVE_CPU_CHILDREN_TIME 1 +#define PROCESSES_HAVE_VOLCTX 0 +#define PROCESSES_HAVE_NVOLCTX 0 +#define PROCESSES_HAVE_PHYSICAL_IO 0 +#define PROCESSES_HAVE_LOGICAL_IO 1 +#define PROCESSES_HAVE_IO_CALLS 0 +#define PROCESSES_HAVE_UID 1 +#define PROCESSES_HAVE_GID 1 +#define PROCESSES_HAVE_MAJFLT 1 +#define PROCESSES_HAVE_CHILDREN_FLTS 1 +#define PROCESSES_HAVE_VMSWAP 0 +#define PROCESSES_HAVE_VMSHARED 0 +#define PROCESSES_HAVE_RSSFILE 0 +#define PROCESSES_HAVE_RSSSHMEM 0 +#define PROCESSES_HAVE_FDS 1 +#define PROCESSES_HAVE_HANDLES 0 +#define PROCESSES_HAVE_CMDLINE 1 +#define PROCESSES_HAVE_PID_LIMITS 0 +#define PROCESSES_HAVE_COMM_AND_NAME 0 +#define PROCESSES_HAVE_STATE 0 +#define PPID_SHOULD_BE_RUNNING 1 +#define INCREMENTAL_DATA_COLLECTION 1 +#define CPU_TO_NANOSECONDCORES (1000) // convert microseconds to nanoseconds +#define OS_FUNCTION(func) OS_FUNC_CONCAT(func, _freebsd) + +#elif defined(OS_MACOS) #include #include #include @@ -18,40 +47,114 @@ #include #include // For mach_timebase_info_data_t and mach_timebase_info -extern mach_timebase_info_data_t mach_info; -#endif - -// ---------------------------------------------------------------------------- -// per O/S configuration - -// the minimum PID of the system -// this is also the pid of the init process -#define INIT_PID 1 - -// if the way apps.plugin will work, will read the entire process list, -// including the resource utilization of each process, instantly -// set this to 1 -// when set to 0, apps.plugin builds a sort list of processes, in order -// to process children processes, before parent processes -#if defined(__FreeBSD__) || defined(__APPLE__) -#define ALL_PIDS_ARE_READ_INSTANTLY 1 -#else -#define ALL_PIDS_ARE_READ_INSTANTLY 0 -#endif - -#if defined(__APPLE__) struct pid_info { struct kinfo_proc proc; struct proc_taskinfo taskinfo; struct proc_bsdinfo bsdinfo; struct rusage_info_v4 rusageinfo; }; + +#define OS_INIT_PID 1 +#define ALL_PIDS_ARE_READ_INSTANTLY 1 +#define PROCESSES_HAVE_CPU_GUEST_TIME 0 +#define PROCESSES_HAVE_CPU_CHILDREN_TIME 0 +#define PROCESSES_HAVE_VOLCTX 1 +#define PROCESSES_HAVE_NVOLCTX 0 +#define PROCESSES_HAVE_PHYSICAL_IO 0 +#define PROCESSES_HAVE_LOGICAL_IO 1 +#define PROCESSES_HAVE_IO_CALLS 0 +#define PROCESSES_HAVE_UID 1 +#define PROCESSES_HAVE_GID 1 +#define PROCESSES_HAVE_MAJFLT 1 +#define PROCESSES_HAVE_CHILDREN_FLTS 0 +#define PROCESSES_HAVE_VMSWAP 0 +#define PROCESSES_HAVE_VMSHARED 0 +#define PROCESSES_HAVE_RSSFILE 0 +#define PROCESSES_HAVE_RSSSHMEM 0 +#define PROCESSES_HAVE_FDS 1 +#define PROCESSES_HAVE_HANDLES 0 +#define PROCESSES_HAVE_CMDLINE 1 +#define PROCESSES_HAVE_PID_LIMITS 0 +#define PROCESSES_HAVE_COMM_AND_NAME 0 +#define PROCESSES_HAVE_STATE 0 +#define PPID_SHOULD_BE_RUNNING 1 +#define INCREMENTAL_DATA_COLLECTION 1 +#define CPU_TO_NANOSECONDCORES (1) // already in nanoseconds +#define OS_FUNCTION(func) OS_FUNC_CONCAT(func, _macos) + +#elif defined(OS_WINDOWS) +#include + +#define OS_INIT_PID 0 // dynamic, is set during data collection +#define ALL_PIDS_ARE_READ_INSTANTLY 1 +#define PROCESSES_HAVE_CPU_GUEST_TIME 0 +#define PROCESSES_HAVE_CPU_CHILDREN_TIME 0 +#define PROCESSES_HAVE_VOLCTX 0 +#define PROCESSES_HAVE_NVOLCTX 0 +#define PROCESSES_HAVE_PHYSICAL_IO 0 +#define PROCESSES_HAVE_LOGICAL_IO 1 +#define PROCESSES_HAVE_IO_CALLS 1 +#define PROCESSES_HAVE_UID 0 +#define PROCESSES_HAVE_GID 0 +#define PROCESSES_HAVE_MAJFLT 0 +#define PROCESSES_HAVE_CHILDREN_FLTS 0 +#define PROCESSES_HAVE_VMSWAP 1 +#define PROCESSES_HAVE_VMSHARED 0 +#define PROCESSES_HAVE_RSSFILE 0 +#define PROCESSES_HAVE_RSSSHMEM 0 +#define PROCESSES_HAVE_FDS 0 +#define PROCESSES_HAVE_HANDLES 1 +#define PROCESSES_HAVE_CMDLINE 0 +#define PROCESSES_HAVE_PID_LIMITS 0 +#define PROCESSES_HAVE_COMM_AND_NAME 1 +#define PROCESSES_HAVE_STATE 0 +#define PPID_SHOULD_BE_RUNNING 0 +#define INCREMENTAL_DATA_COLLECTION 0 +#define CPU_TO_NANOSECONDCORES (100) // convert 100ns to ns +#define OS_FUNCTION(func) OS_FUNC_CONCAT(func, _windows) + +#elif defined(OS_LINUX) +#define OS_INIT_PID 1 +#define ALL_PIDS_ARE_READ_INSTANTLY 0 +#define PROCESSES_HAVE_CPU_GUEST_TIME 1 +#define PROCESSES_HAVE_CPU_CHILDREN_TIME 1 +#define PROCESSES_HAVE_VOLCTX 1 +#define PROCESSES_HAVE_NVOLCTX 1 +#define PROCESSES_HAVE_PHYSICAL_IO 1 +#define PROCESSES_HAVE_LOGICAL_IO 1 +#define PROCESSES_HAVE_IO_CALLS 1 +#define PROCESSES_HAVE_UID 1 +#define PROCESSES_HAVE_GID 1 +#define PROCESSES_HAVE_MAJFLT 1 +#define PROCESSES_HAVE_CHILDREN_FLTS 1 +#define PROCESSES_HAVE_VMSWAP 1 +#define PROCESSES_HAVE_VMSHARED 1 +#define PROCESSES_HAVE_RSSFILE 1 +#define PROCESSES_HAVE_RSSSHMEM 1 +#define PROCESSES_HAVE_FDS 1 +#define PROCESSES_HAVE_HANDLES 0 +#define PROCESSES_HAVE_CMDLINE 1 +#define PROCESSES_HAVE_PID_LIMITS 1 +#define PROCESSES_HAVE_COMM_AND_NAME 0 +#define PROCESSES_HAVE_STATE 1 +#define PPID_SHOULD_BE_RUNNING 1 +#define USE_APPS_GROUPS_CONF 1 +#define INCREMENTAL_DATA_COLLECTION 1 +#define CPU_TO_NANOSECONDCORES (NSEC_PER_SEC / system_hz) +#define OS_FUNCTION(func) OS_FUNC_CONCAT(func, _linux) + +extern int max_fds_cache_seconds; + +#else +#error "Unsupported operating system" #endif -// ---------------------------------------------------------------------------- +// -------------------------------------------------------------------------------------------------------------------- + +extern pid_t INIT_PID; extern bool debug_enabled; -extern bool enable_guest_charts; + extern bool enable_detailed_uptime_charts; extern bool enable_users_charts; extern bool enable_groups_charts; @@ -68,19 +171,23 @@ extern size_t inodes_changed_counter, links_changed_counter, targets_assignment_counter, - all_pids_count, apps_groups_targets_count; -extern int +#if (PROCESSES_HAVE_CPU_GUEST_TIME == 1) +extern bool enable_guest_charts; +extern bool show_guest_time; +#endif + +extern uint32_t all_files_len, - all_files_size, - show_guest_time, - show_guest_time_old; + all_files_size; +#if (ALL_PIDS_ARE_READ_INSTANTLY == 0) extern kernel_uint_t global_utime, global_stime, global_gtime; +#endif // the normalization ratios, as calculated by normalize_utilization() extern NETDATA_DOUBLE @@ -95,19 +202,13 @@ extern NETDATA_DOUBLE cminflt_fix_ratio, cmajflt_fix_ratio; -#if defined(__FreeBSD__) || defined(__APPLE__) -extern usec_t system_current_time_ut; -#else -extern kernel_uint_t system_uptime_secs; -#endif - extern size_t pagesize; +extern netdata_mutex_t apps_and_stdout_mutex; + // ---------------------------------------------------------------------------- // string lengths -#define MAX_COMPARE_NAME 100 -#define MAX_NAME 100 #define MAX_CMDLINE 65536 // ---------------------------------------------------------------------------- @@ -117,13 +218,9 @@ extern size_t pagesize; // having a lot of spares, increases the CPU utilization of the plugin. #define MAX_SPARE_FDS 1 -#if !defined(__FreeBSD__) && !defined(__APPLE__) -extern int max_fds_cache_seconds; -#endif - // ---------------------------------------------------------------------------- // some variables for keeping track of processes count by states - +#if (PROCESSES_HAVE_STATE == 1) typedef enum { PROC_STATUS_RUNNING = 0, PROC_STATUS_SLEEPING_D, // uninterruptible sleep @@ -135,6 +232,7 @@ typedef enum { extern proc_state proc_state_count[PROC_STATUS_END]; extern const char *proc_states[]; +#endif // ---------------------------------------------------------------------------- // the rates we are going to send to netdata will have this detail a value of: @@ -144,6 +242,7 @@ extern const char *proc_states[]; // etc. #define RATES_DETAIL 10000ULL +#if (PROCESSES_HAVE_FDS == 1) struct openfds { kernel_uint_t files; kernel_uint_t pipes; @@ -155,8 +254,8 @@ struct openfds { kernel_uint_t eventpolls; kernel_uint_t other; }; - #define pid_openfds_sum(p) ((p)->openfds.files + (p)->openfds.pipes + (p)->openfds.sockets + (p)->openfds.inotifies + (p)->openfds.eventfds + (p)->openfds.timerfds + (p)->openfds.signalfds + (p)->openfds.eventpolls + (p)->openfds.other) +#endif // ---------------------------------------------------------------------------- // target @@ -172,69 +271,133 @@ struct pid_on_target { struct pid_on_target *next; }; -struct target { - char compare[MAX_COMPARE_NAME + 1]; - uint32_t comparehash; - size_t comparelen; +typedef enum __attribute__((packed)) { + TARGET_TYPE_APP_GROUP = 1, +#if (PROCESSES_HAVE_UID == 1) + TARGET_TYPE_UID, +#endif +#if (PROCESSES_HAVE_GID == 1) + TARGET_TYPE_GID, +#endif + TARGET_TYPE_TREE, +} TARGET_TYPE; - char id[MAX_NAME + 1]; - uint32_t idhash; +typedef enum __attribute__((packed)) { + // CPU utilization time + // The values are expressed in "NANOSECONDCORES". + // 1 x "NANOSECONDCORE" = 1 x NSEC_PER_SEC (1 billion). + PDF_UTIME, // CPU user time + PDF_STIME, // CPU system time +#if (PROCESSES_HAVE_CPU_GUEST_TIME == 1) + PDF_GTIME, // CPU guest time +#endif +#if (PROCESSES_HAVE_CPU_CHILDREN_TIME == 1) + PDF_CUTIME, // exited children CPU user time + PDF_CSTIME, // exited children CPU system time +#if (PROCESSES_HAVE_CPU_GUEST_TIME == 1) + PDF_CGTIME, // exited children CPU guest time +#endif +#endif - char name[MAX_NAME + 1]; - char clean_name[MAX_NAME + 1]; // sanitized name used in chart id (need to replace at least dots) - uid_t uid; - gid_t gid; + PDF_MINFLT, // rate, unit: faults * RATES_DETAIL + +#if (PROCESSES_HAVE_MAJFLT == 1) + PDF_MAJFLT, // rate, unit: faults * RATES_DETAIL +#endif - bool is_other; - - kernel_uint_t minflt; - kernel_uint_t cminflt; - kernel_uint_t majflt; - kernel_uint_t cmajflt; - kernel_uint_t utime; - kernel_uint_t stime; - kernel_uint_t gtime; - kernel_uint_t cutime; - kernel_uint_t cstime; - kernel_uint_t cgtime; - kernel_uint_t num_threads; - // kernel_uint_t rss; - - kernel_uint_t status_vmsize; - kernel_uint_t status_vmrss; - kernel_uint_t status_vmshared; - kernel_uint_t status_rssfile; - kernel_uint_t status_rssshmem; - kernel_uint_t status_vmswap; - kernel_uint_t status_voluntary_ctxt_switches; - kernel_uint_t status_nonvoluntary_ctxt_switches; - - kernel_uint_t io_logical_bytes_read; - kernel_uint_t io_logical_bytes_written; - kernel_uint_t io_read_calls; - kernel_uint_t io_write_calls; - kernel_uint_t io_storage_bytes_read; - kernel_uint_t io_storage_bytes_written; - kernel_uint_t io_cancelled_write_bytes; +#if (PROCESSES_HAVE_CHILDREN_FLTS == 1) + PDF_CMINFLT, // rate, unit: faults * RATES_DETAIL + PDF_CMAJFLT, // rate, unit: faults * RATES_DETAIL +#endif - int *target_fds; - int target_fds_size; + PDF_VMSIZE, // the current virtual memory used by the process, in bytes + PDF_VMRSS, // the resident memory used by the process, in bytes - struct openfds openfds; +#if (PROCESSES_HAVE_VMSHARED == 1) + PDF_VMSHARED, // the shared memory used by the process, in bytes +#endif - NETDATA_DOUBLE max_open_files_percent; +#if (PROCESSES_HAVE_RSSFILE == 1) + PDF_RSSFILE, // unit: bytes +#endif + +#if (PROCESSES_HAVE_RSSSHMEM == 1) + PDF_RSSSHMEM, // unit: bytes +#endif + +#if (PROCESSES_HAVE_VMSWAP == 1) + PDF_VMSWAP, // the swap memory used by the process, in bytes +#endif + +#if (PROCESSES_HAVE_VOLCTX == 1) + PDF_VOLCTX, // rate, unit: switches * RATES_DETAIL +#endif + +#if (PROCESSES_HAVE_NVOLCTX == 1) + PDF_NVOLCTX, // rate, unit: switches * RATES_DETAIL +#endif + +#if (PROCESSES_HAVE_LOGICAL_IO == 1) + PDF_LREAD, // rate, logical reads in bytes/sec * RATES_DETAIL + PDF_LWRITE, // rate, logical writes in bytes/sec * RATES_DETAIL +#endif + +#if (PROCESSES_HAVE_PHYSICAL_IO == 1) + PDF_PREAD, // rate, physical reads in bytes/sec * RATES_DETAIL + PDF_PWRITE, // rate, physical writes in bytes/sec * RATES_DETAIL +#endif + +#if (PROCESSES_HAVE_IO_CALLS == 1) + PDF_OREAD, // rate, read ops/sec * RATES_DETAIL + PDF_OWRITE, // rate, write ops/sec * RATES_DETAIL +#endif + + PDF_UPTIME, // the process uptime in seconds + PDF_THREADS, // the number of threads + PDF_PROCESSES, // the number of processes + +#if (PROCESSES_HAVE_HANDLES == 1) + PDF_HANDLES, // the number of handles the process maintains +#endif + + // terminator + PDF_MAX +} PID_FIELD; + +struct target { + STRING *id; + STRING *name; + STRING *clean_name; + + TARGET_TYPE type; + union { + STRING *compare; +#if (PROCESSES_HAVE_UID == 1) + uid_t uid; +#endif +#if (PROCESSES_HAVE_GID == 1) + gid_t gid; +#endif + }; + + kernel_uint_t values[PDF_MAX]; kernel_uint_t uptime_min; - kernel_uint_t uptime_sum; kernel_uint_t uptime_max; - unsigned int processes; // how many processes have been merged to this - int exposed; // if set, we have sent this to netdata - int hidden; // if set, we set the hidden flag on the dimension - int debug_enabled; - int ends_with; - int starts_with; // if set, the compare string matches only the - // beginning of the command +#if (PROCESSES_HAVE_FDS == 1) + struct openfds openfds; + NETDATA_DOUBLE max_open_files_percent; + int *target_fds; + uint32_t target_fds_size; +#endif + + bool exposed:1; // if set, we have sent this to netdata + bool hidden:1; // if set, we set the hidden flag on the dimension + bool debug_enabled:1; + bool ends_with:1; + bool starts_with:1; // if set, the compare string matches only the + // beginning of the command struct pid_on_target *root_pid; // list of aggregated pids for target debugging @@ -264,6 +427,7 @@ typedef enum __attribute__((packed)) { // structure to store data for each process running // see: man proc for the description of the fields +#if (PROCESSES_HAVE_PID_LIMITS == 1) struct pid_limits { // kernel_uint_t max_cpu_time; // kernel_uint_t max_file_size; @@ -282,11 +446,12 @@ struct pid_limits { // kernel_uint_t max_realtime_priority; // kernel_uint_t max_realtime_timeout; }; +#endif struct pid_fd { int fd; -#if !defined(__FreeBSD__) && !defined(__APPLE__) +#if defined(OS_LINUX) ino_t inode; char *filename; uint32_t link_hash; @@ -295,6 +460,9 @@ struct pid_fd { #endif }; +#define pid_stat_comm(p) (string2str(p->comm)) +#define pid_stat_cmdline(p) (string2str(p->cmdline)) + struct pid_stat { int32_t pid; int32_t ppid; @@ -304,122 +472,77 @@ struct pid_stat { // int32_t tpgid; // uint64_t flags; - char state; - - char comm[MAX_COMPARE_NAME + 1]; - char *cmdline; - - // these are raw values collected - kernel_uint_t minflt_raw; - kernel_uint_t cminflt_raw; - kernel_uint_t majflt_raw; - kernel_uint_t cmajflt_raw; - kernel_uint_t utime_raw; - kernel_uint_t stime_raw; - kernel_uint_t gtime_raw; // guest_time - kernel_uint_t cutime_raw; - kernel_uint_t cstime_raw; - kernel_uint_t cgtime_raw; // cguest_time - - // these are rates - kernel_uint_t minflt; - kernel_uint_t cminflt; - kernel_uint_t majflt; - kernel_uint_t cmajflt; - kernel_uint_t utime; - kernel_uint_t stime; - kernel_uint_t gtime; - kernel_uint_t cutime; - kernel_uint_t cstime; - kernel_uint_t cgtime; - - // int64_t priority; - // int64_t nice; - int32_t num_threads; - // int64_t itrealvalue; - // kernel_uint_t collected_starttime; - // kernel_uint_t vsize; - // kernel_uint_t rss; - // kernel_uint_t rsslim; - // kernel_uint_t starcode; - // kernel_uint_t endcode; - // kernel_uint_t startstack; - // kernel_uint_t kstkesp; - // kernel_uint_t kstkeip; - // uint64_t signal; - // uint64_t blocked; - // uint64_t sigignore; - // uint64_t sigcatch; - // uint64_t wchan; - // uint64_t nswap; - // uint64_t cnswap; - // int32_t exit_signal; - // int32_t processor; - // uint32_t rt_priority; - // uint32_t policy; - // kernel_uint_t delayacct_blkio_ticks; + struct pid_stat *parent; + struct pid_stat *next; + struct pid_stat *prev; - uid_t uid; - gid_t gid; + struct target *target; // app_groups.conf targets - kernel_uint_t status_voluntary_ctxt_switches_raw; - kernel_uint_t status_nonvoluntary_ctxt_switches_raw; - - kernel_uint_t status_vmsize; - kernel_uint_t status_vmrss; - kernel_uint_t status_vmshared; - kernel_uint_t status_rssfile; - kernel_uint_t status_rssshmem; - kernel_uint_t status_vmswap; - kernel_uint_t status_voluntary_ctxt_switches; - kernel_uint_t status_nonvoluntary_ctxt_switches; -#ifndef __FreeBSD__ - ARL_BASE *status_arl; +#if (PROCESSES_HAVE_UID == 1) + struct target *uid_target; // uid based targets +#endif +#if (PROCESSES_HAVE_GID == 1) + struct target *gid_target; // gid based targets #endif - kernel_uint_t io_logical_bytes_read_raw; - kernel_uint_t io_logical_bytes_written_raw; - kernel_uint_t io_read_calls_raw; - kernel_uint_t io_write_calls_raw; - kernel_uint_t io_storage_bytes_read_raw; - kernel_uint_t io_storage_bytes_written_raw; - kernel_uint_t io_cancelled_write_bytes_raw; + STRING *comm; // the command name (short version) + STRING *name; // a better name, or NULL + STRING *cmdline; // the full command line (or on windows, the full pathname of the program) - kernel_uint_t io_logical_bytes_read; - kernel_uint_t io_logical_bytes_written; - kernel_uint_t io_read_calls; - kernel_uint_t io_write_calls; - kernel_uint_t io_storage_bytes_read; - kernel_uint_t io_storage_bytes_written; - kernel_uint_t io_cancelled_write_bytes; +#if defined(OS_WINDOWS) + COUNTER_DATA perflib[PDF_MAX]; +#else + kernel_uint_t raw[PDF_MAX]; +#endif - kernel_uint_t uptime; + kernel_uint_t values[PDF_MAX]; - struct pid_fd *fds; // array of fds it uses - size_t fds_size; // the size of the fds array +#if (PROCESSES_HAVE_UID == 1) + uid_t uid; +#endif +#if (PROCESSES_HAVE_GID == 1) + gid_t gid; +#endif +#if (ALL_PIDS_ARE_READ_INSTANTLY == 0) + uint32_t sortlist; // higher numbers = top on the process tree + // each process gets a unique number (non-sequential though) +#endif + +#if (PROCESSES_HAVE_FDS == 1) struct openfds openfds; +#if (PROCESSES_HAVE_PID_LIMITS == 1) struct pid_limits limits; - NETDATA_DOUBLE openfds_limits_percent; +#endif + struct pid_fd *fds; // array of fds it uses + uint32_t fds_size; // the size of the fds array +#endif - int sortlist; // higher numbers = top on the process tree - // each process gets a unique number + uint32_t children_count; // number of processes directly referencing this + // it is absorbed by apps_groups.conf inheritance + // don't rely on it for anything else. - int children_count; // number of processes directly referencing this - int keeploops; // increases by 1 every time keep is 1 and updated 0 + uint32_t keeploops; // increases by 1 every time keep is 1 and updated 0 PID_LOG log_thrown; - bool keep; // true when we need to keep this process in memory even after it exited - bool updated; // true when the process is currently running - bool merged; // true when it has been merged to its parent - bool read; // true when we have already read this process for this iteration - bool matched_by_config; + bool read:1; // true when we have already read this process for this iteration + bool updated:1; // true when the process is currently running + bool merged:1; // true when it has been merged to its parent + bool keep:1; // true when we need to keep this process in memory even after it exited - struct target *target; // app_groups.conf targets - struct target *user_target; // uid based targets - struct target *group_target; // gid based targets + bool matched_by_config:1; + +#if (PROCESSES_HAVE_STATE == 1) + char state; +#endif + +#if defined(OS_WINDOWS) + bool got_info:1; + bool assigned_to_target:1; + bool initialized:1; +#endif usec_t stat_collected_usec; usec_t last_stat_collected_usec; @@ -428,27 +551,30 @@ struct pid_stat { usec_t last_io_collected_usec; usec_t last_limits_collected_usec; +#if defined(OS_LINUX) + ARL_BASE *status_arl; char *fds_dirname; // the full directory name in /proc/PID/fd - char *stat_filename; char *status_filename; char *io_filename; char *cmdline_filename; char *limits_filename; - - struct pid_stat *parent; - struct pid_stat *prev; - struct pid_stat *next; +#endif }; // ---------------------------------------------------------------------------- +#if (PROCESSES_HAVE_UID == 1) || (PROCESSES_HAVE_GID == 1) struct user_or_group_id { avl_t avl; union { +#if (PROCESSES_HAVE_UID == 1) uid_t uid; +#endif +#if (PROCESSES_HAVE_GID == 1) gid_t gid; +#endif } id; char *name; @@ -457,39 +583,9 @@ struct user_or_group_id { struct user_or_group_id * next; }; - -extern struct target - *apps_groups_default_target, - *apps_groups_root_target, - *users_root_target, - *groups_root_target; - -extern struct pid_stat *root_of_pids; - -extern int update_every; -extern unsigned int time_factor; -extern kernel_uint_t MemTotal; - -#if (ALL_PIDS_ARE_READ_INSTANTLY == 0) -extern pid_t *all_pids_sortlist; #endif -#define APPS_PLUGIN_PROCESSES_FUNCTION_DESCRIPTION "Detailed information on the currently running processes." - -void function_processes(const char *transaction, char *function, - usec_t *stop_monotonic_ut __maybe_unused, bool *cancelled __maybe_unused, - BUFFER *payload __maybe_unused, HTTP_ACCESS access, - const char *source __maybe_unused, void *data __maybe_unused); - -struct target *find_target_by_name(struct target *base, const char *name); - -struct target *get_users_target(uid_t uid); -struct target *get_groups_target(gid_t gid); -int read_apps_groups_conf(const char *path, const char *file); - -void users_and_groups_init(void); -struct user_or_group_id *user_id_find(struct user_or_group_id *user_id_to_find); -struct user_or_group_id *group_id_find(struct user_or_group_id *group_id_to_find); +extern int update_every; // ---------------------------------------------------------------------------- // debugging @@ -515,46 +611,141 @@ static inline void debug_log_dummy(void) {} #define debug_log(fmt, args...) debug_log_dummy() #endif -int managed_log(struct pid_stat *p, PID_LOG log, int status); +bool managed_log(struct pid_stat *p, PID_LOG log, bool status); // ---------------------------------------------------------------------------- // macro to calculate the incremental rate of a value // each parameter is accessed only ONCE - so it is safe to pass function calls // or other macros as parameters -#define incremental_rate(rate_variable, last_kernel_variable, new_kernel_value, collected_usec, last_collected_usec) do { \ +#define incremental_rate(rate_variable, last_kernel_variable, new_kernel_value, collected_usec, last_collected_usec, multiplier) do { \ kernel_uint_t _new_tmp = new_kernel_value; \ - (rate_variable) = (_new_tmp - (last_kernel_variable)) * (USEC_PER_SEC * RATES_DETAIL) / ((collected_usec) - (last_collected_usec)); \ + (rate_variable) = (_new_tmp - (last_kernel_variable)) * (USEC_PER_SEC * multiplier) / ((collected_usec) - (last_collected_usec)); \ (last_kernel_variable) = _new_tmp; \ } while(0) // the same macro for struct pid members -#define pid_incremental_rate(type, var, value) \ - incremental_rate(var, var##_raw, value, p->type##_collected_usec, p->last_##type##_collected_usec) +#define pid_incremental_rate(type, idx, value) \ + incremental_rate(p->values[idx], p->raw[idx], value, p->type##_collected_usec, p->last_##type##_collected_usec, RATES_DETAIL) -int read_proc_pid_stat(struct pid_stat *p, void *ptr); -int read_proc_pid_limits(struct pid_stat *p, void *ptr); -int read_proc_pid_status(struct pid_stat *p, void *ptr); -int read_proc_pid_cmdline(struct pid_stat *p); -int read_proc_pid_io(struct pid_stat *p, void *ptr); -int read_pid_file_descriptors(struct pid_stat *p, void *ptr); -int read_global_time(void); -void get_MemTotal(void); +#define pid_incremental_cpu(type, idx, value) \ + incremental_rate(p->values[idx], p->raw[idx], value, p->type##_collected_usec, p->last_##type##_collected_usec, CPU_TO_NANOSECONDCORES) -bool collect_data_for_all_pids(void); -void cleanup_exited_pids(void); +void apps_orchestrators_and_aggregators_init(void); +void apps_users_and_groups_init(void); +void apps_pids_init(void); + +#if (PROCESSES_HAVE_CMDLINE == 1) +int read_proc_pid_cmdline(struct pid_stat *p); +#endif +#if (PROCESSES_HAVE_FDS == 1) void clear_pid_fd(struct pid_fd *pfd); void file_descriptor_not_used(int id); void init_pid_fds(struct pid_stat *p, size_t first, size_t size); void aggregate_pid_fds_on_targets(struct pid_stat *p); +int read_pid_file_descriptors(struct pid_stat *p, void *ptr); +void make_all_pid_fds_negative(struct pid_stat *p); +uint32_t file_descriptor_find_or_add(const char *name, uint32_t hash); +#endif + +// -------------------------------------------------------------------------------------------------------------------- +// data collection management + +bool collect_data_for_all_pids(void); + +void pid_collection_started(struct pid_stat *p); +void pid_collection_failed(struct pid_stat *p); +void pid_collection_completed(struct pid_stat *p); + +#if (INCREMENTAL_DATA_COLLECTION == 1) +bool collect_parents_before_children(void); +int incrementally_collect_data_for_pid(pid_t pid, void *ptr); +int incrementally_collect_data_for_pid_stat(struct pid_stat *p, void *ptr); +#endif + +// -------------------------------------------------------------------------------------------------------------------- +// pid management + +struct pid_stat *root_of_pids(void); +size_t all_pids_count(void); + +struct pid_stat *get_or_allocate_pid_entry(pid_t pid); +struct pid_stat *find_pid_entry(pid_t pid); +void del_pid_entry(pid_t pid); +void update_pid_comm(struct pid_stat *p, const char *comm); + + +// -------------------------------------------------------------------------------------------------------------------- +// targets management + +struct target *find_target_by_name(struct target *base, const char *name); +struct target *get_tree_target(struct pid_stat *p); + +void aggregate_processes_to_targets(void); + +#if (PROCESSES_HAVE_UID == 1) +extern struct target *users_root_target; +struct target *get_uid_target(uid_t uid); +struct user_or_group_id *user_id_find(struct user_or_group_id *user_id_to_find); +#endif + +#if (PROCESSES_HAVE_GID == 1) +extern struct target *groups_root_target; +struct target *get_gid_target(gid_t gid); +struct user_or_group_id *group_id_find(struct user_or_group_id *group_id_to_find); +#endif + +extern struct target *apps_groups_root_target; +int read_apps_groups_conf(const char *path, const char *file); + +// -------------------------------------------------------------------------------------------------------------------- +// output -void send_proc_states_count(usec_t dt); void send_charts_updates_to_netdata(struct target *root, const char *type, const char *lbl_name, const char *title); void send_collected_data_to_netdata(struct target *root, const char *type, usec_t dt); void send_resource_usage_to_netdata(usec_t dt); -void pids_init(void); -struct pid_stat *find_pid_entry(pid_t pid); +#if (PROCESSES_HAVE_STATE == 1) +void send_proc_states_count(usec_t dt); +#endif + +#define APPS_PLUGIN_PROCESSES_FUNCTION_DESCRIPTION "Detailed information on the currently running processes." +void function_processes(const char *transaction, char *function, + usec_t *stop_monotonic_ut __maybe_unused, bool *cancelled __maybe_unused, + BUFFER *payload __maybe_unused, HTTP_ACCESS access, + const char *source __maybe_unused, void *data __maybe_unused); + +// -------------------------------------------------------------------------------------------------------------------- +// operating system functions + +// one time initialization per operating system +void OS_FUNCTION(apps_os_init)(void); + +// collect all the available information for all processes running +bool OS_FUNCTION(apps_os_collect_all_pids)(void); + +bool OS_FUNCTION(apps_os_read_pid_status)(struct pid_stat *p, void *ptr); +bool OS_FUNCTION(apps_os_read_pid_stat)(struct pid_stat *p, void *ptr); +bool OS_FUNCTION(apps_os_read_pid_io)(struct pid_stat *p, void *ptr); + +#if (PROCESSES_HAVE_PID_LIMITS == 1) +bool OS_FUNCTION(apps_os_read_pid_limits)(struct pid_stat *p, void *ptr); +#endif + +#if (PROCESSES_HAVE_CMDLINE == 1) +bool OS_FUNCTION(apps_os_get_pid_cmdline)(struct pid_stat *p, char *cmdline, size_t bytes); +#endif + +#if (PROCESSES_HAVE_FDS == 1) +bool OS_FUNCTION(apps_os_read_pid_fds)(struct pid_stat *p, void *ptr); +#endif + +#if (ALL_PIDS_ARE_READ_INSTANTLY == 0) +bool OS_FUNCTION(apps_os_read_global_cpu_utilization)(void); +#endif + +// return the total physical memory of the system, in bytes +uint64_t OS_FUNCTION(apps_os_get_total_memory)(void); #endif //NETDATA_APPS_PLUGIN_H diff --git a/src/collectors/apps.plugin/apps_proc_meminfo.c b/src/collectors/apps.plugin/apps_proc_meminfo.c deleted file mode 100644 index a7227c213a77a6..00000000000000 --- a/src/collectors/apps.plugin/apps_proc_meminfo.c +++ /dev/null @@ -1,68 +0,0 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -#include "apps_plugin.h" - -kernel_uint_t MemTotal = 0; - -#ifdef __FreeBSD__ -static inline bool get_MemTotal_per_os(void) { - int mib[2] = {CTL_HW, HW_PHYSMEM}; - size_t size = sizeof(MemTotal); - if (sysctl(mib, 2, &MemTotal, &size, NULL, 0) == -1) { - netdata_log_error("Failed to get total memory using sysctl"); - return false; - } - // FreeBSD returns bytes; convert to kB - MemTotal /= 1024; - return true; -} -#endif // __FreeBSD__ - -#ifdef __APPLE__ -static inline bool get_MemTotal_per_os(void) { - int mib[2] = {CTL_HW, HW_MEMSIZE}; - size_t size = sizeof(MemTotal); - if (sysctl(mib, 2, &MemTotal, &size, NULL, 0) == -1) { - netdata_log_error("Failed to get total memory using sysctl"); - return false; - } - // MacOS returns bytes; convert to kB - MemTotal /= 1024; - return true; -} -#endif // __APPLE__ - -#if !defined(__FreeBSD__) && !defined(__APPLE__) -static inline bool get_MemTotal_per_os(void) { - char filename[FILENAME_MAX + 1]; - snprintfz(filename, FILENAME_MAX, "%s/proc/meminfo", netdata_configured_host_prefix); - - procfile *ff = procfile_open(filename, ": \t", PROCFILE_FLAG_DEFAULT); - if(!ff) - return false; - - ff = procfile_readall(ff); - if(!ff) - return false; - - size_t line, lines = procfile_lines(ff); - - for(line = 0; line < lines ;line++) { - size_t words = procfile_linewords(ff, line); - if(words == 3 && strcmp(procfile_lineword(ff, line, 0), "MemTotal") == 0 && strcmp(procfile_lineword(ff, line, 2), "kB") == 0) { - kernel_uint_t n = str2ull(procfile_lineword(ff, line, 1), NULL); - if(n) MemTotal = n; - break; - } - } - - procfile_close(ff); - - return true; -} -#endif - -void get_MemTotal(void) { - if(!get_MemTotal_per_os()) - MemTotal = 0; -} diff --git a/src/collectors/apps.plugin/apps_proc_pid_cmdline.c b/src/collectors/apps.plugin/apps_proc_pid_cmdline.c deleted file mode 100644 index 75a60fa3a97406..00000000000000 --- a/src/collectors/apps.plugin/apps_proc_pid_cmdline.c +++ /dev/null @@ -1,130 +0,0 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -#include "apps_plugin.h" - -#ifdef __APPLE__ -bool get_cmdline_per_os(struct pid_stat *p, char *cmdline, size_t maxBytes) { - int mib[3] = {CTL_KERN, KERN_PROCARGS2, p->pid}; - static char *args = NULL; - static size_t size = 0; - - size_t new_size; - if (sysctl(mib, 3, NULL, &new_size, NULL, 0) == -1) { - return false; - } - - if (new_size > size) { - if (args) - freez(args); - - args = (char *)mallocz(new_size); - size = new_size; - } - - memset(cmdline, 0, new_size < maxBytes ? new_size : maxBytes); - - size_t used_size = size; - if (sysctl(mib, 3, args, &used_size, NULL, 0) == -1) - return false; - - int argc; - memcpy(&argc, args, sizeof(argc)); - char *ptr = args + sizeof(argc); - used_size -= sizeof(argc); - - // Skip the executable path - while (*ptr && used_size > 0) { - ptr++; - used_size--; - } - - // Copy only the arguments to the cmdline buffer, skipping the environment variables - size_t i = 0, copied_args = 0; - bool inArg = false; - for (; used_size > 0 && i < maxBytes - 1 && copied_args < argc; --used_size, ++ptr) { - if (*ptr == '\0') { - if (inArg) { - cmdline[i++] = ' '; // Replace nulls between arguments with spaces - inArg = false; - copied_args++; - } - } else { - cmdline[i++] = *ptr; - inArg = true; - } - } - - if (i > 0 && cmdline[i - 1] == ' ') - i--; // Remove the trailing space if present - - cmdline[i] = '\0'; // Null-terminate the string - - return true; -} -#endif // __APPLE__ - -#if defined(__FreeBSD__) -static inline bool get_cmdline_per_os(struct pid_stat *p, char *cmdline, size_t bytes) { - size_t i, b = bytes - 1; - int mib[4]; - - mib[0] = CTL_KERN; - mib[1] = KERN_PROC; - mib[2] = KERN_PROC_ARGS; - mib[3] = p->pid; - if (unlikely(sysctl(mib, 4, cmdline, &b, NULL, 0))) - return false; - - cmdline[b] = '\0'; - for(i = 0; i < b ; i++) - if(unlikely(!cmdline[i])) cmdline[i] = ' '; - - return true; -} -#endif // __FreeBSD__ - -#if !defined(__FreeBSD__) && !defined(__APPLE__) -static inline bool get_cmdline_per_os(struct pid_stat *p, char *cmdline, size_t bytes) { - if(unlikely(!p->cmdline_filename)) { - char filename[FILENAME_MAX]; - snprintfz(filename, FILENAME_MAX, "%s/proc/%d/cmdline", netdata_configured_host_prefix, p->pid); - p->cmdline_filename = strdupz(filename); - } - - int fd = open(p->cmdline_filename, procfile_open_flags, 0666); - if(unlikely(fd == -1)) - return false; - - ssize_t i, b = read(fd, cmdline, bytes - 1); - close(fd); - - if(unlikely(b < 0)) - return false; - - cmdline[b] = '\0'; - for(i = 0; i < b ; i++) - if(unlikely(!cmdline[i])) cmdline[i] = ' '; - - return true; -} -#endif // !__FreeBSD__ !__APPLE__ - -int read_proc_pid_cmdline(struct pid_stat *p) { - static char cmdline[MAX_CMDLINE]; - - if(unlikely(!get_cmdline_per_os(p, cmdline, sizeof(cmdline)))) - goto cleanup; - - if(p->cmdline) freez(p->cmdline); - p->cmdline = strdupz(cmdline); - - debug_log("Read file '%s' contents: %s", p->cmdline_filename, p->cmdline); - - return 1; - -cleanup: - // copy the command to the command line - if(p->cmdline) freez(p->cmdline); - p->cmdline = strdupz(p->comm); - return 0; -} diff --git a/src/collectors/apps.plugin/apps_proc_pid_fd.c b/src/collectors/apps.plugin/apps_proc_pid_fd.c deleted file mode 100644 index 519b0794da51c2..00000000000000 --- a/src/collectors/apps.plugin/apps_proc_pid_fd.c +++ /dev/null @@ -1,753 +0,0 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -#include "apps_plugin.h" - -// ---------------------------------------------------------------------------- -// file descriptor -// -// this is used to keep a global list of all open files of the system. -// it is needed in order to calculate the unique files processes have open. - -#define FILE_DESCRIPTORS_INCREASE_STEP 100 - -// types for struct file_descriptor->type -typedef enum fd_filetype { - FILETYPE_OTHER, - FILETYPE_FILE, - FILETYPE_PIPE, - FILETYPE_SOCKET, - FILETYPE_INOTIFY, - FILETYPE_EVENTFD, - FILETYPE_EVENTPOLL, - FILETYPE_TIMERFD, - FILETYPE_SIGNALFD -} FD_FILETYPE; - -struct file_descriptor { - avl_t avl; - -#ifdef NETDATA_INTERNAL_CHECKS - uint32_t magic; -#endif /* NETDATA_INTERNAL_CHECKS */ - - const char *name; - uint32_t hash; - - FD_FILETYPE type; - int count; - int pos; -} *all_files = NULL; - -// ---------------------------------------------------------------------------- - -static inline void reallocate_target_fds(struct target *w) { - if(unlikely(!w)) - return; - - if(unlikely(!w->target_fds || w->target_fds_size < all_files_size)) { - w->target_fds = reallocz(w->target_fds, sizeof(int) * all_files_size); - memset(&w->target_fds[w->target_fds_size], 0, sizeof(int) * (all_files_size - w->target_fds_size)); - w->target_fds_size = all_files_size; - } -} - -static void aggregage_fd_type_on_openfds(FD_FILETYPE type, struct openfds *openfds) { - switch(type) { - case FILETYPE_FILE: - openfds->files++; - break; - - case FILETYPE_PIPE: - openfds->pipes++; - break; - - case FILETYPE_SOCKET: - openfds->sockets++; - break; - - case FILETYPE_INOTIFY: - openfds->inotifies++; - break; - - case FILETYPE_EVENTFD: - openfds->eventfds++; - break; - - case FILETYPE_TIMERFD: - openfds->timerfds++; - break; - - case FILETYPE_SIGNALFD: - openfds->signalfds++; - break; - - case FILETYPE_EVENTPOLL: - openfds->eventpolls++; - break; - - case FILETYPE_OTHER: - openfds->other++; - break; - } -} - -static inline void aggregate_fd_on_target(int fd, struct target *w) { - if(unlikely(!w)) - return; - - if(unlikely(w->target_fds[fd])) { - // it is already aggregated - // just increase its usage counter - w->target_fds[fd]++; - return; - } - - // increase its usage counter - // so that we will not add it again - w->target_fds[fd]++; - - aggregage_fd_type_on_openfds(all_files[fd].type, &w->openfds); -} - -void aggregate_pid_fds_on_targets(struct pid_stat *p) { - - if(unlikely(!p->updated)) { - // the process is not running - return; - } - - struct target *w = p->target, *u = p->user_target, *g = p->group_target; - - reallocate_target_fds(w); - reallocate_target_fds(u); - reallocate_target_fds(g); - - p->openfds.files = 0; - p->openfds.pipes = 0; - p->openfds.sockets = 0; - p->openfds.inotifies = 0; - p->openfds.eventfds = 0; - p->openfds.timerfds = 0; - p->openfds.signalfds = 0; - p->openfds.eventpolls = 0; - p->openfds.other = 0; - - long currentfds = 0; - size_t c, size = p->fds_size; - struct pid_fd *fds = p->fds; - for(c = 0; c < size ;c++) { - int fd = fds[c].fd; - - if(likely(fd <= 0 || fd >= all_files_size)) - continue; - - currentfds++; - aggregage_fd_type_on_openfds(all_files[fd].type, &p->openfds); - - aggregate_fd_on_target(fd, w); - aggregate_fd_on_target(fd, u); - aggregate_fd_on_target(fd, g); - } -} - -// ---------------------------------------------------------------------------- - -int file_descriptor_compare(void* a, void* b) { -#ifdef NETDATA_INTERNAL_CHECKS - if(((struct file_descriptor *)a)->magic != 0x0BADCAFE || ((struct file_descriptor *)b)->magic != 0x0BADCAFE) - netdata_log_error("Corrupted index data detected. Please report this."); -#endif /* NETDATA_INTERNAL_CHECKS */ - - if(((struct file_descriptor *)a)->hash < ((struct file_descriptor *)b)->hash) - return -1; - - else if(((struct file_descriptor *)a)->hash > ((struct file_descriptor *)b)->hash) - return 1; - - else - return strcmp(((struct file_descriptor *)a)->name, ((struct file_descriptor *)b)->name); -} - -// int file_descriptor_iterator(avl_t *a) { if(a) {}; return 0; } - -avl_tree_type all_files_index = { - NULL, - file_descriptor_compare -}; - -static struct file_descriptor *file_descriptor_find(const char *name, uint32_t hash) { - struct file_descriptor tmp; - tmp.hash = (hash)?hash:simple_hash(name); - tmp.name = name; - tmp.count = 0; - tmp.pos = 0; -#ifdef NETDATA_INTERNAL_CHECKS - tmp.magic = 0x0BADCAFE; -#endif /* NETDATA_INTERNAL_CHECKS */ - - return (struct file_descriptor *)avl_search(&all_files_index, (avl_t *) &tmp); -} - -#define file_descriptor_add(fd) avl_insert(&all_files_index, (avl_t *)(fd)) -#define file_descriptor_remove(fd) avl_remove(&all_files_index, (avl_t *)(fd)) - -// ---------------------------------------------------------------------------- - -void file_descriptor_not_used(int id) { - if(id > 0 && id < all_files_size) { - -#ifdef NETDATA_INTERNAL_CHECKS - if(all_files[id].magic != 0x0BADCAFE) { - netdata_log_error("Ignoring request to remove empty file id %d.", id); - return; - } -#endif /* NETDATA_INTERNAL_CHECKS */ - - debug_log("decreasing slot %d (count = %d).", id, all_files[id].count); - - if(all_files[id].count > 0) { - all_files[id].count--; - - if(!all_files[id].count) { - debug_log(" >> slot %d is empty.", id); - - if(unlikely(file_descriptor_remove(&all_files[id]) != (void *)&all_files[id])) - netdata_log_error("INTERNAL ERROR: removal of unused fd from index, removed a different fd"); - -#ifdef NETDATA_INTERNAL_CHECKS - all_files[id].magic = 0x00000000; -#endif /* NETDATA_INTERNAL_CHECKS */ - all_files_len--; - } - } - else - netdata_log_error("Request to decrease counter of fd %d (%s), while the use counter is 0", - id, - all_files[id].name); - } - else - netdata_log_error("Request to decrease counter of fd %d, which is outside the array size (1 to %d)", - id, - all_files_size); -} - -static inline void all_files_grow() { - void *old = all_files; - int i; - - // there is no empty slot - debug_log("extending fd array to %d entries", all_files_size + FILE_DESCRIPTORS_INCREASE_STEP); - - all_files = reallocz(all_files, (all_files_size + FILE_DESCRIPTORS_INCREASE_STEP) * sizeof(struct file_descriptor)); - - // if the address changed, we have to rebuild the index - // since all pointers are now invalid - - if(unlikely(old && old != (void *)all_files)) { - debug_log(" >> re-indexing."); - - all_files_index.root = NULL; - for(i = 0; i < all_files_size; i++) { - if(!all_files[i].count) continue; - if(unlikely(file_descriptor_add(&all_files[i]) != (void *)&all_files[i])) - netdata_log_error("INTERNAL ERROR: duplicate indexing of fd during realloc."); - } - - debug_log(" >> re-indexing done."); - } - - // initialize the newly added entries - - for(i = all_files_size; i < (all_files_size + FILE_DESCRIPTORS_INCREASE_STEP); i++) { - all_files[i].count = 0; - all_files[i].name = NULL; -#ifdef NETDATA_INTERNAL_CHECKS - all_files[i].magic = 0x00000000; -#endif /* NETDATA_INTERNAL_CHECKS */ - all_files[i].pos = i; - } - - if(unlikely(!all_files_size)) all_files_len = 1; - all_files_size += FILE_DESCRIPTORS_INCREASE_STEP; -} - -static inline int file_descriptor_set_on_empty_slot(const char *name, uint32_t hash, FD_FILETYPE type) { - // check we have enough memory to add it - if(!all_files || all_files_len == all_files_size) - all_files_grow(); - - debug_log(" >> searching for empty slot."); - - // search for an empty slot - - static int last_pos = 0; - int i, c; - for(i = 0, c = last_pos ; i < all_files_size ; i++, c++) { - if(c >= all_files_size) c = 0; - if(c == 0) continue; - - if(!all_files[c].count) { - debug_log(" >> Examining slot %d.", c); - -#ifdef NETDATA_INTERNAL_CHECKS - if(all_files[c].magic == 0x0BADCAFE && all_files[c].name && file_descriptor_find(all_files[c].name, all_files[c].hash)) - netdata_log_error("fd on position %d is not cleared properly. It still has %s in it.", c, all_files[c].name); -#endif /* NETDATA_INTERNAL_CHECKS */ - - debug_log(" >> %s fd position %d for %s (last name: %s)", all_files[c].name?"re-using":"using", c, name, all_files[c].name); - - freez((void *)all_files[c].name); - all_files[c].name = NULL; - last_pos = c; - break; - } - } - - all_files_len++; - - if(i == all_files_size) { - fatal("We should find an empty slot, but there isn't any"); - exit(1); - } - // else we have an empty slot in 'c' - - debug_log(" >> updating slot %d.", c); - - all_files[c].name = strdupz(name); - all_files[c].hash = hash; - all_files[c].type = type; - all_files[c].pos = c; - all_files[c].count = 1; -#ifdef NETDATA_INTERNAL_CHECKS - all_files[c].magic = 0x0BADCAFE; -#endif /* NETDATA_INTERNAL_CHECKS */ - if(unlikely(file_descriptor_add(&all_files[c]) != (void *)&all_files[c])) - netdata_log_error("INTERNAL ERROR: duplicate indexing of fd."); - - debug_log("using fd position %d (name: %s)", c, all_files[c].name); - - return c; -} - -static inline int file_descriptor_find_or_add(const char *name, uint32_t hash) { - if(unlikely(!hash)) - hash = simple_hash(name); - - debug_log("adding or finding name '%s' with hash %u", name, hash); - - struct file_descriptor *fd = file_descriptor_find(name, hash); - if(fd) { - // found - debug_log(" >> found on slot %d", fd->pos); - - fd->count++; - return fd->pos; - } - // not found - - FD_FILETYPE type; - if(likely(name[0] == '/')) type = FILETYPE_FILE; - else if(likely(strncmp(name, "pipe:", 5) == 0)) type = FILETYPE_PIPE; - else if(likely(strncmp(name, "socket:", 7) == 0)) type = FILETYPE_SOCKET; - else if(likely(strncmp(name, "anon_inode:", 11) == 0)) { - const char *t = &name[11]; - - if(strcmp(t, "inotify") == 0) type = FILETYPE_INOTIFY; - else if(strcmp(t, "[eventfd]") == 0) type = FILETYPE_EVENTFD; - else if(strcmp(t, "[eventpoll]") == 0) type = FILETYPE_EVENTPOLL; - else if(strcmp(t, "[timerfd]") == 0) type = FILETYPE_TIMERFD; - else if(strcmp(t, "[signalfd]") == 0) type = FILETYPE_SIGNALFD; - else { - debug_log("UNKNOWN anonymous inode: %s", name); - type = FILETYPE_OTHER; - } - } - else if(likely(strcmp(name, "inotify") == 0)) type = FILETYPE_INOTIFY; - else { - debug_log("UNKNOWN linkname: %s", name); - type = FILETYPE_OTHER; - } - - return file_descriptor_set_on_empty_slot(name, hash, type); -} - -void clear_pid_fd(struct pid_fd *pfd) { - pfd->fd = 0; - -#if !defined(__FreeBSD__) && !defined(__APPLE__) - pfd->link_hash = 0; - pfd->inode = 0; - pfd->cache_iterations_counter = 0; - pfd->cache_iterations_reset = 0; -#endif -} - -static inline void make_all_pid_fds_negative(struct pid_stat *p) { - struct pid_fd *pfd = p->fds, *pfdend = &p->fds[p->fds_size]; - while(pfd < pfdend) { - pfd->fd = -(pfd->fd); - pfd++; - } -} - -static inline void cleanup_negative_pid_fds(struct pid_stat *p) { - struct pid_fd *pfd = p->fds, *pfdend = &p->fds[p->fds_size]; - - while(pfd < pfdend) { - int fd = pfd->fd; - - if(unlikely(fd < 0)) { - file_descriptor_not_used(-(fd)); - clear_pid_fd(pfd); - } - - pfd++; - } -} - -void init_pid_fds(struct pid_stat *p, size_t first, size_t size) { - struct pid_fd *pfd = &p->fds[first], *pfdend = &p->fds[first + size]; - - while(pfd < pfdend) { -#if !defined(__FreeBSD__) && !defined(__APPLE__) - pfd->filename = NULL; -#endif - clear_pid_fd(pfd); - pfd++; - } -} - -#ifdef __APPLE__ -static bool read_pid_file_descriptors_per_os(struct pid_stat *p, void *ptr __maybe_unused) { - static struct proc_fdinfo *fds = NULL; - static int fdsCapacity = 0; - - int bufferSize = proc_pidinfo(p->pid, PROC_PIDLISTFDS, 0, NULL, 0); - if (bufferSize <= 0) { - netdata_log_error("Failed to get the size of file descriptors for PID %d", p->pid); - return false; - } - - // Resize buffer if necessary - if (bufferSize > fdsCapacity) { - if(fds) - freez(fds); - - fds = mallocz(bufferSize); - fdsCapacity = bufferSize; - } - - int num_fds = proc_pidinfo(p->pid, PROC_PIDLISTFDS, 0, fds, bufferSize) / PROC_PIDLISTFD_SIZE; - if (num_fds <= 0) { - netdata_log_error("Failed to get the file descriptors for PID %d", p->pid); - return false; - } - - for (int i = 0; i < num_fds; i++) { - switch (fds[i].proc_fdtype) { - case PROX_FDTYPE_VNODE: { - struct vnode_fdinfowithpath vi; - if (proc_pidfdinfo(p->pid, fds[i].proc_fd, PROC_PIDFDVNODEPATHINFO, &vi, sizeof(vi)) > 0) - p->openfds.files++; - else - p->openfds.other++; - - break; - } - case PROX_FDTYPE_SOCKET: { - p->openfds.sockets++; - break; - } - case PROX_FDTYPE_PIPE: { - p->openfds.pipes++; - break; - } - - default: - p->openfds.other++; - break; - } - } - - return true; -} -#endif // __APPLE__ - -#if defined(__FreeBSD__) -static bool read_pid_file_descriptors_per_os(struct pid_stat *p, void *ptr) { - int mib[4]; - size_t size; - struct kinfo_file *fds; - static char *fdsbuf; - char *bfdsbuf, *efdsbuf; - char fdsname[FILENAME_MAX + 1]; -#define SHM_FORMAT_LEN 31 // format: 21 + size: 10 - char shm_name[FILENAME_MAX - SHM_FORMAT_LEN + 1]; - - // we make all pid fds negative, so that - // we can detect unused file descriptors - // at the end, to free them - make_all_pid_fds_negative(p); - - mib[0] = CTL_KERN; - mib[1] = KERN_PROC; - mib[2] = KERN_PROC_FILEDESC; - mib[3] = p->pid; - - if (unlikely(sysctl(mib, 4, NULL, &size, NULL, 0))) { - netdata_log_error("sysctl error: Can't get file descriptors data size for pid %d", p->pid); - return false; - } - if (likely(size > 0)) - fdsbuf = reallocz(fdsbuf, size); - if (unlikely(sysctl(mib, 4, fdsbuf, &size, NULL, 0))) { - netdata_log_error("sysctl error: Can't get file descriptors data for pid %d", p->pid); - return false; - } - - bfdsbuf = fdsbuf; - efdsbuf = fdsbuf + size; - while (bfdsbuf < efdsbuf) { - fds = (struct kinfo_file *)(uintptr_t)bfdsbuf; - if (unlikely(fds->kf_structsize == 0)) - break; - - // do not process file descriptors for current working directory, root directory, - // jail directory, ktrace vnode, text vnode and controlling terminal - if (unlikely(fds->kf_fd < 0)) { - bfdsbuf += fds->kf_structsize; - continue; - } - - // get file descriptors array index - size_t fdid = fds->kf_fd; - - // check if the fds array is small - if (unlikely(fdid >= p->fds_size)) { - // it is small, extend it - - debug_log("extending fd memory slots for %s from %d to %d", p->comm, p->fds_size, fdid + MAX_SPARE_FDS); - - p->fds = reallocz(p->fds, (fdid + MAX_SPARE_FDS) * sizeof(struct pid_fd)); - - // and initialize it - init_pid_fds(p, p->fds_size, (fdid + MAX_SPARE_FDS) - p->fds_size); - p->fds_size = fdid + MAX_SPARE_FDS; - } - - if (unlikely(p->fds[fdid].fd == 0)) { - // we don't know this fd, get it - - switch (fds->kf_type) { - case KF_TYPE_FIFO: - case KF_TYPE_VNODE: - if (unlikely(!fds->kf_path[0])) { - sprintf(fdsname, "other: inode: %lu", fds->kf_un.kf_file.kf_file_fileid); - break; - } - sprintf(fdsname, "%s", fds->kf_path); - break; - case KF_TYPE_SOCKET: - switch (fds->kf_sock_domain) { - case AF_INET: - case AF_INET6: -#if __FreeBSD_version < 1400074 - if (fds->kf_sock_protocol == IPPROTO_TCP) - sprintf(fdsname, "socket: %d %lx", fds->kf_sock_protocol, fds->kf_un.kf_sock.kf_sock_inpcb); - else -#endif - sprintf(fdsname, "socket: %d %lx", fds->kf_sock_protocol, fds->kf_un.kf_sock.kf_sock_pcb); - break; - case AF_UNIX: - /* print address of pcb and connected pcb */ - sprintf(fdsname, "socket: %lx %lx", fds->kf_un.kf_sock.kf_sock_pcb, fds->kf_un.kf_sock.kf_sock_unpconn); - break; - default: - /* print protocol number and socket address */ -#if __FreeBSD_version < 1200031 - sprintf(fdsname, "socket: other: %d %s %s", fds->kf_sock_protocol, fds->kf_sa_local.__ss_pad1, fds->kf_sa_local.__ss_pad2); -#else - sprintf(fdsname, "socket: other: %d %s %s", fds->kf_sock_protocol, fds->kf_un.kf_sock.kf_sa_local.__ss_pad1, fds->kf_un.kf_sock.kf_sa_local.__ss_pad2); -#endif - } - break; - case KF_TYPE_PIPE: - sprintf(fdsname, "pipe: %lu %lu", fds->kf_un.kf_pipe.kf_pipe_addr, fds->kf_un.kf_pipe.kf_pipe_peer); - break; - case KF_TYPE_PTS: -#if __FreeBSD_version < 1200031 - sprintf(fdsname, "other: pts: %u", fds->kf_un.kf_pts.kf_pts_dev); -#else - sprintf(fdsname, "other: pts: %lu", fds->kf_un.kf_pts.kf_pts_dev); -#endif - break; - case KF_TYPE_SHM: - strncpyz(shm_name, fds->kf_path, FILENAME_MAX - SHM_FORMAT_LEN); - sprintf(fdsname, "other: shm: %s size: %lu", shm_name, fds->kf_un.kf_file.kf_file_size); - break; - case KF_TYPE_SEM: - sprintf(fdsname, "other: sem: %u", fds->kf_un.kf_sem.kf_sem_value); - break; - default: - sprintf(fdsname, "other: pid: %d fd: %d", fds->kf_un.kf_proc.kf_pid, fds->kf_fd); - } - - // if another process already has this, we will get - // the same id - p->fds[fdid].fd = file_descriptor_find_or_add(fdsname, 0); - } - - // else make it positive again, we need it - // of course, the actual file may have changed - - else - p->fds[fdid].fd = -p->fds[fdid].fd; - - bfdsbuf += fds->kf_structsize; - } - - return true; -} -#endif // __FreeBSD__ - -#if !defined(__FreeBSD__) && !defined(__APPLE__) -static bool read_pid_file_descriptors_per_os(struct pid_stat *p, void *ptr __maybe_unused) { - if(unlikely(!p->fds_dirname)) { - char dirname[FILENAME_MAX+1]; - snprintfz(dirname, FILENAME_MAX, "%s/proc/%d/fd", netdata_configured_host_prefix, p->pid); - p->fds_dirname = strdupz(dirname); - } - - DIR *fds = opendir(p->fds_dirname); - if(unlikely(!fds)) return false; - - struct dirent *de; - char linkname[FILENAME_MAX + 1]; - - // we make all pid fds negative, so that - // we can detect unused file descriptors - // at the end, to free them - make_all_pid_fds_negative(p); - - while((de = readdir(fds))) { - // we need only files with numeric names - - if(unlikely(de->d_name[0] < '0' || de->d_name[0] > '9')) - continue; - - // get its number - int fdid = (int) str2l(de->d_name); - if(unlikely(fdid < 0)) continue; - - // check if the fds array is small - if(unlikely((size_t)fdid >= p->fds_size)) { - // it is small, extend it - - debug_log("extending fd memory slots for %s from %d to %d" - , p->comm - , p->fds_size - , fdid + MAX_SPARE_FDS - ); - - p->fds = reallocz(p->fds, (fdid + MAX_SPARE_FDS) * sizeof(struct pid_fd)); - - // and initialize it - init_pid_fds(p, p->fds_size, (fdid + MAX_SPARE_FDS) - p->fds_size); - p->fds_size = (size_t)fdid + MAX_SPARE_FDS; - } - - if(unlikely(p->fds[fdid].fd < 0 && de->d_ino != p->fds[fdid].inode)) { - // inodes do not match, clear the previous entry - inodes_changed_counter++; - file_descriptor_not_used(-p->fds[fdid].fd); - clear_pid_fd(&p->fds[fdid]); - } - - if(p->fds[fdid].fd < 0 && p->fds[fdid].cache_iterations_counter > 0) { - p->fds[fdid].fd = -p->fds[fdid].fd; - p->fds[fdid].cache_iterations_counter--; - continue; - } - - if(unlikely(!p->fds[fdid].filename)) { - filenames_allocated_counter++; - char fdname[FILENAME_MAX + 1]; - snprintfz(fdname, FILENAME_MAX, "%s/proc/%d/fd/%s", netdata_configured_host_prefix, p->pid, de->d_name); - p->fds[fdid].filename = strdupz(fdname); - } - - file_counter++; - ssize_t l = readlink(p->fds[fdid].filename, linkname, FILENAME_MAX); - if(unlikely(l == -1)) { - // cannot read the link - - if(debug_enabled || (p->target && p->target->debug_enabled)) - netdata_log_error("Cannot read link %s", p->fds[fdid].filename); - - if(unlikely(p->fds[fdid].fd < 0)) { - file_descriptor_not_used(-p->fds[fdid].fd); - clear_pid_fd(&p->fds[fdid]); - } - - continue; - } - else - linkname[l] = '\0'; - - uint32_t link_hash = simple_hash(linkname); - - if(unlikely(p->fds[fdid].fd < 0 && p->fds[fdid].link_hash != link_hash)) { - // the link changed - links_changed_counter++; - file_descriptor_not_used(-p->fds[fdid].fd); - clear_pid_fd(&p->fds[fdid]); - } - - if(unlikely(p->fds[fdid].fd == 0)) { - // we don't know this fd, get it - - // if another process already has this, we will get - // the same id - p->fds[fdid].fd = file_descriptor_find_or_add(linkname, link_hash); - p->fds[fdid].inode = de->d_ino; - p->fds[fdid].link_hash = link_hash; - } - else { - // else make it positive again, we need it - p->fds[fdid].fd = -p->fds[fdid].fd; - } - - // caching control - // without this we read all the files on every iteration - if(max_fds_cache_seconds > 0) { - size_t spread = ((size_t)max_fds_cache_seconds > 10) ? 10 : (size_t)max_fds_cache_seconds; - - // cache it for a few iterations - size_t max = ((size_t) max_fds_cache_seconds + (fdid % spread)) / (size_t) update_every; - p->fds[fdid].cache_iterations_reset++; - - if(unlikely(p->fds[fdid].cache_iterations_reset % spread == (size_t) fdid % spread)) - p->fds[fdid].cache_iterations_reset++; - - if(unlikely((fdid <= 2 && p->fds[fdid].cache_iterations_reset > 5) || - p->fds[fdid].cache_iterations_reset > max)) { - // for stdin, stdout, stderr (fdid <= 2) we have checked a few times, or if it goes above the max, goto max - p->fds[fdid].cache_iterations_reset = max; - } - - p->fds[fdid].cache_iterations_counter = p->fds[fdid].cache_iterations_reset; - } - } - - closedir(fds); - - return true; -} -#endif // !__FreeBSD__ !__APPLE - -int read_pid_file_descriptors(struct pid_stat *p, void *ptr) { - bool ret = read_pid_file_descriptors_per_os(p, ptr); - cleanup_negative_pid_fds(p); - - return ret ? 1 : 0; -} diff --git a/src/collectors/apps.plugin/apps_proc_pid_io.c b/src/collectors/apps.plugin/apps_proc_pid_io.c deleted file mode 100644 index 0fef3fc2420612..00000000000000 --- a/src/collectors/apps.plugin/apps_proc_pid_io.c +++ /dev/null @@ -1,95 +0,0 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -#include "apps_plugin.h" - -static inline void clear_pid_io(struct pid_stat *p) { - p->io_logical_bytes_read = 0; - p->io_logical_bytes_written = 0; - p->io_read_calls = 0; - p->io_write_calls = 0; - p->io_storage_bytes_read = 0; - p->io_storage_bytes_written = 0; - p->io_cancelled_write_bytes = 0; -} - -#if defined(__FreeBSD__) -static inline bool read_proc_pid_io_per_os(struct pid_stat *p, void *ptr) { - struct kinfo_proc *proc_info = (struct kinfo_proc *)ptr; - - pid_incremental_rate(io, p->io_storage_bytes_read, proc_info->ki_rusage.ru_inblock); - pid_incremental_rate(io, p->io_storage_bytes_written, proc_info->ki_rusage.ru_oublock); - - p->io_logical_bytes_read = 0; - p->io_logical_bytes_written = 0; - p->io_read_calls = 0; - p->io_write_calls = 0; - p->io_cancelled_write_bytes = 0; - - return true; -} -#endif - -#ifdef __APPLE__ -static inline bool read_proc_pid_io_per_os(struct pid_stat *p, void *ptr) { - struct pid_info *pi = ptr; - - // On MacOS, the proc_pid_rusage provides disk_io_statistics which includes io bytes read and written - // but does not provide the same level of detail as Linux, like separating logical and physical I/O bytes. - pid_incremental_rate(io, p->io_storage_bytes_read, pi->rusageinfo.ri_diskio_bytesread); - pid_incremental_rate(io, p->io_storage_bytes_written, pi->rusageinfo.ri_diskio_byteswritten); - - p->io_logical_bytes_read = 0; - p->io_logical_bytes_written = 0; - p->io_read_calls = 0; - p->io_write_calls = 0; - p->io_cancelled_write_bytes = 0; - - return true; -} -#endif // __APPLE__ - -#if !defined(__FreeBSD__) && !defined(__APPLE__) -static inline int read_proc_pid_io_per_os(struct pid_stat *p, void *ptr __maybe_unused) { - static procfile *ff = NULL; - - if(unlikely(!p->io_filename)) { - char filename[FILENAME_MAX + 1]; - snprintfz(filename, FILENAME_MAX, "%s/proc/%d/io", netdata_configured_host_prefix, p->pid); - p->io_filename = strdupz(filename); - } - - // open the file - ff = procfile_reopen(ff, p->io_filename, NULL, PROCFILE_FLAG_NO_ERROR_ON_FILE_IO); - if(unlikely(!ff)) goto cleanup; - - ff = procfile_readall(ff); - if(unlikely(!ff)) goto cleanup; - - pid_incremental_rate(io, p->io_logical_bytes_read, str2kernel_uint_t(procfile_lineword(ff, 0, 1))); - pid_incremental_rate(io, p->io_logical_bytes_written, str2kernel_uint_t(procfile_lineword(ff, 1, 1))); - pid_incremental_rate(io, p->io_read_calls, str2kernel_uint_t(procfile_lineword(ff, 2, 1))); - pid_incremental_rate(io, p->io_write_calls, str2kernel_uint_t(procfile_lineword(ff, 3, 1))); - pid_incremental_rate(io, p->io_storage_bytes_read, str2kernel_uint_t(procfile_lineword(ff, 4, 1))); - pid_incremental_rate(io, p->io_storage_bytes_written, str2kernel_uint_t(procfile_lineword(ff, 5, 1))); - pid_incremental_rate(io, p->io_cancelled_write_bytes, str2kernel_uint_t(procfile_lineword(ff, 6, 1))); - - return true; - -cleanup: - clear_pid_io(p); - return false; -} -#endif // !__FreeBSD__ !__APPLE__ - -int read_proc_pid_io(struct pid_stat *p, void *ptr) { - p->last_io_collected_usec = p->io_collected_usec; - p->io_collected_usec = now_monotonic_usec(); - calls_counter++; - - bool ret = read_proc_pid_io_per_os(p, ptr); - - if(unlikely(global_iterations_counter == 1)) - clear_pid_io(p); - - return ret ? 1 : 0; -} diff --git a/src/collectors/apps.plugin/apps_proc_pid_limits.c b/src/collectors/apps.plugin/apps_proc_pid_limits.c deleted file mode 100644 index 7485086ba33c82..00000000000000 --- a/src/collectors/apps.plugin/apps_proc_pid_limits.c +++ /dev/null @@ -1,151 +0,0 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -#include "apps_plugin.h" - -// ---------------------------------------------------------------------------- - -#define MAX_PROC_PID_LIMITS 8192 -#define PROC_PID_LIMITS_MAX_OPEN_FILES_KEY "\nMax open files " - -static inline kernel_uint_t get_proc_pid_limits_limit(char *buf, const char *key, size_t key_len, kernel_uint_t def) { - char *line = strstr(buf, key); - if(!line) - return def; - - char *v = &line[key_len]; - while(isspace(*v)) v++; - - if(strcmp(v, "unlimited") == 0) - return 0; - - return str2ull(v, NULL); -} - -#if defined(__FreeBSD__) || defined(__APPLE__) -int read_proc_pid_limits_per_os(struct pid_stat *p, void *ptr __maybe_unused) { - return false; -} -#endif - -#if !defined(__FreeBSD__) && !defined(__APPLE__) -static inline bool read_proc_pid_limits_per_os(struct pid_stat *p, void *ptr __maybe_unused) { - static char proc_pid_limits_buffer[MAX_PROC_PID_LIMITS + 1]; - bool ret = false; - bool read_limits = false; - - errno_clear(); - proc_pid_limits_buffer[0] = '\0'; - - kernel_uint_t all_fds = pid_openfds_sum(p); - if(all_fds < p->limits.max_open_files / 2 && p->io_collected_usec > p->last_limits_collected_usec && p->io_collected_usec - p->last_limits_collected_usec <= 60 * USEC_PER_SEC) { - // too frequent, we want to collect limits once per minute - ret = true; - goto cleanup; - } - - if(unlikely(!p->limits_filename)) { - char filename[FILENAME_MAX + 1]; - snprintfz(filename, FILENAME_MAX, "%s/proc/%d/limits", netdata_configured_host_prefix, p->pid); - p->limits_filename = strdupz(filename); - } - - int fd = open(p->limits_filename, procfile_open_flags, 0666); - if(unlikely(fd == -1)) goto cleanup; - - ssize_t bytes = read(fd, proc_pid_limits_buffer, MAX_PROC_PID_LIMITS); - close(fd); - - if(bytes <= 0) - goto cleanup; - - // make it '\0' terminated - if(bytes < MAX_PROC_PID_LIMITS) - proc_pid_limits_buffer[bytes] = '\0'; - else - proc_pid_limits_buffer[MAX_PROC_PID_LIMITS - 1] = '\0'; - - p->limits.max_open_files = get_proc_pid_limits_limit(proc_pid_limits_buffer, PROC_PID_LIMITS_MAX_OPEN_FILES_KEY, sizeof(PROC_PID_LIMITS_MAX_OPEN_FILES_KEY) - 1, 0); - if(p->limits.max_open_files == 1) { - // it seems a bug in the kernel or something similar - // it sets max open files to 1 but the number of files - // the process has open are more than 1... - // https://github.com/netdata/netdata/issues/15443 - p->limits.max_open_files = 0; - ret = true; - goto cleanup; - } - - p->last_limits_collected_usec = p->io_collected_usec; - read_limits = true; - - ret = true; - -cleanup: - if(p->limits.max_open_files) - p->openfds_limits_percent = (NETDATA_DOUBLE)all_fds * 100.0 / (NETDATA_DOUBLE)p->limits.max_open_files; - else - p->openfds_limits_percent = 0.0; - - if(p->openfds_limits_percent > 100.0) { - if(!(p->log_thrown & PID_LOG_LIMITS_DETAIL)) { - char *line; - - if(!read_limits) { - proc_pid_limits_buffer[0] = '\0'; - line = "NOT READ"; - } - else { - line = strstr(proc_pid_limits_buffer, PROC_PID_LIMITS_MAX_OPEN_FILES_KEY); - if (line) { - line++; // skip the initial newline - - char *end = strchr(line, '\n'); - if (end) - *end = '\0'; - } - } - - netdata_log_info( - "FDS_LIMITS: PID %d (%s) is using " - "%0.2f %% of its fds limits, " - "open fds = %"PRIu64 "(" - "files = %"PRIu64 ", " - "pipes = %"PRIu64 ", " - "sockets = %"PRIu64", " - "inotifies = %"PRIu64", " - "eventfds = %"PRIu64", " - "timerfds = %"PRIu64", " - "signalfds = %"PRIu64", " - "eventpolls = %"PRIu64" " - "other = %"PRIu64" " - "), open fds limit = %"PRIu64", " - "%s, " - "original line [%s]", - p->pid, p->comm, p->openfds_limits_percent, all_fds, - p->openfds.files, - p->openfds.pipes, - p->openfds.sockets, - p->openfds.inotifies, - p->openfds.eventfds, - p->openfds.timerfds, - p->openfds.signalfds, - p->openfds.eventpolls, - p->openfds.other, - p->limits.max_open_files, - read_limits ? "and we have read the limits AFTER counting the fds" - : "but we have read the limits BEFORE counting the fds", - line); - - p->log_thrown |= PID_LOG_LIMITS_DETAIL; - } - } - else - p->log_thrown &= ~PID_LOG_LIMITS_DETAIL; - - return ret; -} -#endif // !__FreeBSD__ !__APPLE__ - -int read_proc_pid_limits(struct pid_stat *p, void *ptr) { - return read_proc_pid_limits_per_os(p, ptr) ? 1 : 0; -} diff --git a/src/collectors/apps.plugin/apps_proc_pid_stat.c b/src/collectors/apps.plugin/apps_proc_pid_stat.c deleted file mode 100644 index 8767f783122dba..00000000000000 --- a/src/collectors/apps.plugin/apps_proc_pid_stat.c +++ /dev/null @@ -1,293 +0,0 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -#include "apps_plugin.h" - -// ---------------------------------------------------------------------------- - -static inline void assign_target_to_pid(struct pid_stat *p) { - targets_assignment_counter++; - - uint32_t hash = simple_hash(p->comm); - size_t pclen = strlen(p->comm); - - struct target *w; - for(w = apps_groups_root_target; w ; w = w->next) { - // if(debug_enabled || (p->target && p->target->debug_enabled)) debug_log_int("\t\tcomparing '%s' with '%s'", w->compare, p->comm); - - // find it - 4 cases: - // 1. the target is not a pattern - // 2. the target has the prefix - // 3. the target has the suffix - // 4. the target is something inside cmdline - - if(unlikely(( (!w->starts_with && !w->ends_with && w->comparehash == hash && !strcmp(w->compare, p->comm)) - || (w->starts_with && !w->ends_with && !strncmp(w->compare, p->comm, w->comparelen)) - || (!w->starts_with && w->ends_with && pclen >= w->comparelen && !strcmp(w->compare, &p->comm[pclen - w->comparelen])) - || (proc_pid_cmdline_is_needed && w->starts_with && w->ends_with && p->cmdline && strstr(p->cmdline, w->compare)) - ))) { - - p->matched_by_config = true; - if(w->target) p->target = w->target; - else p->target = w; - - if(debug_enabled || (p->target && p->target->debug_enabled)) - debug_log_int("%s linked to target %s", p->comm, p->target->name); - - break; - } - } -} - -static inline void update_pid_comm(struct pid_stat *p, const char *comm) { - if(strcmp(p->comm, comm) != 0) { - if(unlikely(debug_enabled)) { - if(p->comm[0]) - debug_log("\tpid %d (%s) changed name to '%s'", p->pid, p->comm, comm); - else - debug_log("\tJust added %d (%s)", p->pid, comm); - } - - strncpyz(p->comm, comm, MAX_COMPARE_NAME); - - // /proc//cmdline - if(likely(proc_pid_cmdline_is_needed)) - managed_log(p, PID_LOG_CMDLINE, read_proc_pid_cmdline(p)); - - assign_target_to_pid(p); - } -} - -static inline void clear_pid_stat(struct pid_stat *p, bool threads) { - p->minflt = 0; - p->cminflt = 0; - p->majflt = 0; - p->cmajflt = 0; - p->utime = 0; - p->stime = 0; - p->gtime = 0; - p->cutime = 0; - p->cstime = 0; - p->cgtime = 0; - - if(threads) - p->num_threads = 0; - - // p->rss = 0; -} - -#if defined(__FreeBSD__) -static inline bool read_proc_pid_stat_per_os(struct pid_stat *p, void *ptr) { - struct kinfo_proc *proc_info = (struct kinfo_proc *)ptr; - if (unlikely(proc_info->ki_tdflags & TDF_IDLETD)) - goto cleanup; - - char *comm = proc_info->ki_comm; - p->ppid = proc_info->ki_ppid; - - update_pid_comm(p, comm); - - pid_incremental_rate(stat, p->minflt, (kernel_uint_t)proc_info->ki_rusage.ru_minflt); - pid_incremental_rate(stat, p->cminflt, (kernel_uint_t)proc_info->ki_rusage_ch.ru_minflt); - pid_incremental_rate(stat, p->majflt, (kernel_uint_t)proc_info->ki_rusage.ru_majflt); - pid_incremental_rate(stat, p->cmajflt, (kernel_uint_t)proc_info->ki_rusage_ch.ru_majflt); - pid_incremental_rate(stat, p->utime, (kernel_uint_t)proc_info->ki_rusage.ru_utime.tv_sec * 100 + proc_info->ki_rusage.ru_utime.tv_usec / 10000); - pid_incremental_rate(stat, p->stime, (kernel_uint_t)proc_info->ki_rusage.ru_stime.tv_sec * 100 + proc_info->ki_rusage.ru_stime.tv_usec / 10000); - pid_incremental_rate(stat, p->cutime, (kernel_uint_t)proc_info->ki_rusage_ch.ru_utime.tv_sec * 100 + proc_info->ki_rusage_ch.ru_utime.tv_usec / 10000); - pid_incremental_rate(stat, p->cstime, (kernel_uint_t)proc_info->ki_rusage_ch.ru_stime.tv_sec * 100 + proc_info->ki_rusage_ch.ru_stime.tv_usec / 10000); - - p->num_threads = proc_info->ki_numthreads; - - usec_t started_ut = timeval_usec(&proc_info->ki_start); - p->uptime = (system_current_time_ut > started_ut) ? (system_current_time_ut - started_ut) / USEC_PER_SEC : 0; - - if(enable_guest_charts) { - enable_guest_charts = false; - netdata_log_info("Guest charts aren't supported by FreeBSD"); - } - - if(unlikely(debug_enabled || (p->target && p->target->debug_enabled))) - debug_log_int("READ PROC/PID/STAT: %s/proc/%d/stat, process: '%s' on target '%s' (dt=%llu) VALUES: utime=" KERNEL_UINT_FORMAT ", stime=" KERNEL_UINT_FORMAT ", cutime=" KERNEL_UINT_FORMAT ", cstime=" KERNEL_UINT_FORMAT ", minflt=" KERNEL_UINT_FORMAT ", majflt=" KERNEL_UINT_FORMAT ", cminflt=" KERNEL_UINT_FORMAT ", cmajflt=" KERNEL_UINT_FORMAT ", threads=%d", netdata_configured_host_prefix, p->pid, p->comm, (p->target)?p->target->name:"UNSET", p->stat_collected_usec - p->last_stat_collected_usec, p->utime, p->stime, p->cutime, p->cstime, p->minflt, p->majflt, p->cminflt, p->cmajflt, p->num_threads); - - if(unlikely(global_iterations_counter == 1)) - clear_pid_stat(p, false); - - return true; - -cleanup: - clear_pid_stat(p, true); - return false; -} -#endif // __FreeBSD__ - -#ifdef __APPLE__ -static inline bool read_proc_pid_stat_per_os(struct pid_stat *p, void *ptr) { - struct pid_info *pi = ptr; - - p->ppid = pi->proc.kp_eproc.e_ppid; - - // Update command name and target if changed - char comm[PROC_PIDPATHINFO_MAXSIZE]; - int ret = proc_name(p->pid, comm, sizeof(comm)); - if (ret <= 0) - strncpyz(comm, "unknown", sizeof(comm) - 1); - - update_pid_comm(p, comm); - - kernel_uint_t userCPU = (pi->taskinfo.pti_total_user * mach_info.numer) / mach_info.denom / NSEC_PER_USEC / 10000; - kernel_uint_t systemCPU = (pi->taskinfo.pti_total_system * mach_info.numer) / mach_info.denom / NSEC_PER_USEC / 10000; - - // Map the values from taskinfo to the pid_stat structure - pid_incremental_rate(stat, p->minflt, pi->taskinfo.pti_faults); - pid_incremental_rate(stat, p->majflt, pi->taskinfo.pti_pageins); - pid_incremental_rate(stat, p->utime, userCPU); - pid_incremental_rate(stat, p->stime, systemCPU); - p->num_threads = pi->taskinfo.pti_threadnum; - - usec_t started_ut = timeval_usec(&pi->proc.kp_proc.p_starttime); - p->uptime = (system_current_time_ut > started_ut) ? (system_current_time_ut - started_ut) / USEC_PER_SEC : 0; - - // Note: Some values such as guest time, cutime, cstime, etc., are not directly available in MacOS. - // You might need to approximate or leave them unset depending on your needs. - - if(unlikely(debug_enabled || (p->target && p->target->debug_enabled))) { - debug_log_int("READ PROC/PID/STAT for MacOS: process: '%s' on target '%s' VALUES: utime=" KERNEL_UINT_FORMAT ", stime=" KERNEL_UINT_FORMAT ", minflt=" KERNEL_UINT_FORMAT ", majflt=" KERNEL_UINT_FORMAT ", threads=%d", - p->comm, (p->target) ? p->target->name : "UNSET", p->utime, p->stime, p->minflt, p->majflt, p->num_threads); - } - - if(unlikely(global_iterations_counter == 1)) - clear_pid_stat(p, false); - - // MacOS doesn't have a direct concept of process state like Linux, - // so updating process state count might need a different approach. - - return true; -} -#endif // __APPLE__ - -#if !defined(__FreeBSD__) && !defined(__APPLE__) -static inline void update_proc_state_count(char proc_stt) { - switch (proc_stt) { - case 'S': - proc_state_count[PROC_STATUS_SLEEPING] += 1; - break; - case 'R': - proc_state_count[PROC_STATUS_RUNNING] += 1; - break; - case 'D': - proc_state_count[PROC_STATUS_SLEEPING_D] += 1; - break; - case 'Z': - proc_state_count[PROC_STATUS_ZOMBIE] += 1; - break; - case 'T': - proc_state_count[PROC_STATUS_STOPPED] += 1; - break; - default: - break; - } -} - -static inline bool read_proc_pid_stat_per_os(struct pid_stat *p, void *ptr __maybe_unused) { - static procfile *ff = NULL; - - if(unlikely(!p->stat_filename)) { - char filename[FILENAME_MAX + 1]; - snprintfz(filename, FILENAME_MAX, "%s/proc/%d/stat", netdata_configured_host_prefix, p->pid); - p->stat_filename = strdupz(filename); - } - - int set_quotes = (!ff)?1:0; - - ff = procfile_reopen(ff, p->stat_filename, NULL, PROCFILE_FLAG_NO_ERROR_ON_FILE_IO); - if(unlikely(!ff)) goto cleanup; - - // if(set_quotes) procfile_set_quotes(ff, "()"); - if(unlikely(set_quotes)) - procfile_set_open_close(ff, "(", ")"); - - ff = procfile_readall(ff); - if(unlikely(!ff)) goto cleanup; - - // p->pid = str2pid_t(procfile_lineword(ff, 0, 0)); - char *comm = procfile_lineword(ff, 0, 1); - p->state = *(procfile_lineword(ff, 0, 2)); - p->ppid = (int32_t)str2pid_t(procfile_lineword(ff, 0, 3)); - // p->pgrp = (int32_t)str2pid_t(procfile_lineword(ff, 0, 4)); - // p->session = (int32_t)str2pid_t(procfile_lineword(ff, 0, 5)); - // p->tty_nr = (int32_t)str2pid_t(procfile_lineword(ff, 0, 6)); - // p->tpgid = (int32_t)str2pid_t(procfile_lineword(ff, 0, 7)); - // p->flags = str2uint64_t(procfile_lineword(ff, 0, 8)); - - update_pid_comm(p, comm); - - pid_incremental_rate(stat, p->minflt, str2kernel_uint_t(procfile_lineword(ff, 0, 9))); - pid_incremental_rate(stat, p->cminflt, str2kernel_uint_t(procfile_lineword(ff, 0, 10))); - pid_incremental_rate(stat, p->majflt, str2kernel_uint_t(procfile_lineword(ff, 0, 11))); - pid_incremental_rate(stat, p->cmajflt, str2kernel_uint_t(procfile_lineword(ff, 0, 12))); - pid_incremental_rate(stat, p->utime, str2kernel_uint_t(procfile_lineword(ff, 0, 13))); - pid_incremental_rate(stat, p->stime, str2kernel_uint_t(procfile_lineword(ff, 0, 14))); - pid_incremental_rate(stat, p->cutime, str2kernel_uint_t(procfile_lineword(ff, 0, 15))); - pid_incremental_rate(stat, p->cstime, str2kernel_uint_t(procfile_lineword(ff, 0, 16))); - // p->priority = str2kernel_uint_t(procfile_lineword(ff, 0, 17)); - // p->nice = str2kernel_uint_t(procfile_lineword(ff, 0, 18)); - p->num_threads = (int32_t) str2uint32_t(procfile_lineword(ff, 0, 19), NULL); - // p->itrealvalue = str2kernel_uint_t(procfile_lineword(ff, 0, 20)); - kernel_uint_t collected_starttime = str2kernel_uint_t(procfile_lineword(ff, 0, 21)) / system_hz; - p->uptime = (system_uptime_secs > collected_starttime)?(system_uptime_secs - collected_starttime):0; - // p->vsize = str2kernel_uint_t(procfile_lineword(ff, 0, 22)); - // p->rss = str2kernel_uint_t(procfile_lineword(ff, 0, 23)); - // p->rsslim = str2kernel_uint_t(procfile_lineword(ff, 0, 24)); - // p->starcode = str2kernel_uint_t(procfile_lineword(ff, 0, 25)); - // p->endcode = str2kernel_uint_t(procfile_lineword(ff, 0, 26)); - // p->startstack = str2kernel_uint_t(procfile_lineword(ff, 0, 27)); - // p->kstkesp = str2kernel_uint_t(procfile_lineword(ff, 0, 28)); - // p->kstkeip = str2kernel_uint_t(procfile_lineword(ff, 0, 29)); - // p->signal = str2kernel_uint_t(procfile_lineword(ff, 0, 30)); - // p->blocked = str2kernel_uint_t(procfile_lineword(ff, 0, 31)); - // p->sigignore = str2kernel_uint_t(procfile_lineword(ff, 0, 32)); - // p->sigcatch = str2kernel_uint_t(procfile_lineword(ff, 0, 33)); - // p->wchan = str2kernel_uint_t(procfile_lineword(ff, 0, 34)); - // p->nswap = str2kernel_uint_t(procfile_lineword(ff, 0, 35)); - // p->cnswap = str2kernel_uint_t(procfile_lineword(ff, 0, 36)); - // p->exit_signal = str2kernel_uint_t(procfile_lineword(ff, 0, 37)); - // p->processor = str2kernel_uint_t(procfile_lineword(ff, 0, 38)); - // p->rt_priority = str2kernel_uint_t(procfile_lineword(ff, 0, 39)); - // p->policy = str2kernel_uint_t(procfile_lineword(ff, 0, 40)); - // p->delayacct_blkio_ticks = str2kernel_uint_t(procfile_lineword(ff, 0, 41)); - - if(enable_guest_charts) { - pid_incremental_rate(stat, p->gtime, str2kernel_uint_t(procfile_lineword(ff, 0, 42))); - pid_incremental_rate(stat, p->cgtime, str2kernel_uint_t(procfile_lineword(ff, 0, 43))); - - if (show_guest_time || p->gtime || p->cgtime) { - p->utime -= (p->utime >= p->gtime) ? p->gtime : p->utime; - p->cutime -= (p->cutime >= p->cgtime) ? p->cgtime : p->cutime; - show_guest_time = 1; - } - } - - if(unlikely(debug_enabled || (p->target && p->target->debug_enabled))) - debug_log_int("READ PROC/PID/STAT: %s/proc/%d/stat, process: '%s' on target '%s' (dt=%llu) VALUES: utime=" KERNEL_UINT_FORMAT ", stime=" KERNEL_UINT_FORMAT ", cutime=" KERNEL_UINT_FORMAT ", cstime=" KERNEL_UINT_FORMAT ", minflt=" KERNEL_UINT_FORMAT ", majflt=" KERNEL_UINT_FORMAT ", cminflt=" KERNEL_UINT_FORMAT ", cmajflt=" KERNEL_UINT_FORMAT ", threads=%d", netdata_configured_host_prefix, p->pid, p->comm, (p->target)?p->target->name:"UNSET", p->stat_collected_usec - p->last_stat_collected_usec, p->utime, p->stime, p->cutime, p->cstime, p->minflt, p->majflt, p->cminflt, p->cmajflt, p->num_threads); - - if(unlikely(global_iterations_counter == 1)) - clear_pid_stat(p, false); - - update_proc_state_count(p->state); - return true; - -cleanup: - clear_pid_stat(p, true); - return false; -} -#endif // !__FreeBSD__ !__APPLE__ - -int read_proc_pid_stat(struct pid_stat *p, void *ptr) { - p->last_stat_collected_usec = p->stat_collected_usec; - p->stat_collected_usec = now_monotonic_usec(); - calls_counter++; - - if(!read_proc_pid_stat_per_os(p, ptr)) - return 0; - - return 1; -} diff --git a/src/collectors/apps.plugin/apps_proc_pid_status.c b/src/collectors/apps.plugin/apps_proc_pid_status.c deleted file mode 100644 index 364d480473c8c2..00000000000000 --- a/src/collectors/apps.plugin/apps_proc_pid_status.c +++ /dev/null @@ -1,192 +0,0 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -#include "apps_plugin.h" - -#if defined(__FreeBSD__) -static inline bool read_proc_pid_status_per_os(struct pid_stat *p, void *ptr) { - struct kinfo_proc *proc_info = (struct kinfo_proc *)ptr; - - p->uid = proc_info->ki_uid; - p->gid = proc_info->ki_groups[0]; - p->status_vmsize = proc_info->ki_size / 1024; // in KiB - p->status_vmrss = proc_info->ki_rssize * pagesize / 1024; // in KiB - // TODO: what about shared and swap memory on FreeBSD? - return true; -} -#endif - -#ifdef __APPLE__ -static inline bool read_proc_pid_status_per_os(struct pid_stat *p, void *ptr) { - struct pid_info *pi = ptr; - - p->uid = pi->bsdinfo.pbi_uid; - p->gid = pi->bsdinfo.pbi_gid; - p->status_vmsize = pi->taskinfo.pti_virtual_size / 1024; // Convert bytes to KiB - p->status_vmrss = pi->taskinfo.pti_resident_size / 1024; // Convert bytes to KiB - // p->status_vmswap = rusageinfo.ri_swapins + rusageinfo.ri_swapouts; // This is not directly available, consider an alternative representation - p->status_voluntary_ctxt_switches = pi->taskinfo.pti_csw; - // p->status_nonvoluntary_ctxt_switches = taskinfo.pti_nivcsw; - - return true; -} -#endif // __APPLE__ - -#if !defined(__FreeBSD__) && !defined(__APPLE__) -struct arl_callback_ptr { - struct pid_stat *p; - procfile *ff; - size_t line; -}; - -void arl_callback_status_uid(const char *name, uint32_t hash, const char *value, void *dst) { - (void)name; (void)hash; (void)value; - struct arl_callback_ptr *aptr = (struct arl_callback_ptr *)dst; - if(unlikely(procfile_linewords(aptr->ff, aptr->line) < 5)) return; - - //const char *real_uid = procfile_lineword(aptr->ff, aptr->line, 1); - const char *effective_uid = procfile_lineword(aptr->ff, aptr->line, 2); - //const char *saved_uid = procfile_lineword(aptr->ff, aptr->line, 3); - //const char *filesystem_uid = procfile_lineword(aptr->ff, aptr->line, 4); - - if(likely(effective_uid && *effective_uid)) - aptr->p->uid = (uid_t)str2l(effective_uid); -} - -void arl_callback_status_gid(const char *name, uint32_t hash, const char *value, void *dst) { - (void)name; (void)hash; (void)value; - struct arl_callback_ptr *aptr = (struct arl_callback_ptr *)dst; - if(unlikely(procfile_linewords(aptr->ff, aptr->line) < 5)) return; - - //const char *real_gid = procfile_lineword(aptr->ff, aptr->line, 1); - const char *effective_gid = procfile_lineword(aptr->ff, aptr->line, 2); - //const char *saved_gid = procfile_lineword(aptr->ff, aptr->line, 3); - //const char *filesystem_gid = procfile_lineword(aptr->ff, aptr->line, 4); - - if(likely(effective_gid && *effective_gid)) - aptr->p->gid = (uid_t)str2l(effective_gid); -} - -void arl_callback_status_vmsize(const char *name, uint32_t hash, const char *value, void *dst) { - (void)name; (void)hash; (void)value; - struct arl_callback_ptr *aptr = (struct arl_callback_ptr *)dst; - if(unlikely(procfile_linewords(aptr->ff, aptr->line) < 3)) return; - - aptr->p->status_vmsize = str2kernel_uint_t(procfile_lineword(aptr->ff, aptr->line, 1)); -} - -void arl_callback_status_vmswap(const char *name, uint32_t hash, const char *value, void *dst) { - (void)name; (void)hash; (void)value; - struct arl_callback_ptr *aptr = (struct arl_callback_ptr *)dst; - if(unlikely(procfile_linewords(aptr->ff, aptr->line) < 3)) return; - - aptr->p->status_vmswap = str2kernel_uint_t(procfile_lineword(aptr->ff, aptr->line, 1)); -} - -void arl_callback_status_vmrss(const char *name, uint32_t hash, const char *value, void *dst) { - (void)name; (void)hash; (void)value; - struct arl_callback_ptr *aptr = (struct arl_callback_ptr *)dst; - if(unlikely(procfile_linewords(aptr->ff, aptr->line) < 3)) return; - - aptr->p->status_vmrss = str2kernel_uint_t(procfile_lineword(aptr->ff, aptr->line, 1)); -} - -void arl_callback_status_rssfile(const char *name, uint32_t hash, const char *value, void *dst) { - (void)name; (void)hash; (void)value; - struct arl_callback_ptr *aptr = (struct arl_callback_ptr *)dst; - if(unlikely(procfile_linewords(aptr->ff, aptr->line) < 3)) return; - - aptr->p->status_rssfile = str2kernel_uint_t(procfile_lineword(aptr->ff, aptr->line, 1)); -} - -void arl_callback_status_rssshmem(const char *name, uint32_t hash, const char *value, void *dst) { - (void)name; (void)hash; (void)value; - struct arl_callback_ptr *aptr = (struct arl_callback_ptr *)dst; - if(unlikely(procfile_linewords(aptr->ff, aptr->line) < 3)) return; - - aptr->p->status_rssshmem = str2kernel_uint_t(procfile_lineword(aptr->ff, aptr->line, 1)); -} - -void arl_callback_status_voluntary_ctxt_switches(const char *name, uint32_t hash, const char *value, void *dst) { - (void)name; (void)hash; (void)value; - struct arl_callback_ptr *aptr = (struct arl_callback_ptr *)dst; - if(unlikely(procfile_linewords(aptr->ff, aptr->line) < 2)) return; - - struct pid_stat *p = aptr->p; - pid_incremental_rate(stat, p->status_voluntary_ctxt_switches, str2kernel_uint_t(procfile_lineword(aptr->ff, aptr->line, 1))); -} - -void arl_callback_status_nonvoluntary_ctxt_switches(const char *name, uint32_t hash, const char *value, void *dst) { - (void)name; (void)hash; (void)value; - struct arl_callback_ptr *aptr = (struct arl_callback_ptr *)dst; - if(unlikely(procfile_linewords(aptr->ff, aptr->line) < 2)) return; - - struct pid_stat *p = aptr->p; - pid_incremental_rate(stat, p->status_nonvoluntary_ctxt_switches, str2kernel_uint_t(procfile_lineword(aptr->ff, aptr->line, 1))); -} - -static inline bool read_proc_pid_status_per_os(struct pid_stat *p, void *ptr __maybe_unused) { - static struct arl_callback_ptr arl_ptr; - static procfile *ff = NULL; - - if(unlikely(!p->status_arl)) { - p->status_arl = arl_create("/proc/pid/status", NULL, 60); - arl_expect_custom(p->status_arl, "Uid", arl_callback_status_uid, &arl_ptr); - arl_expect_custom(p->status_arl, "Gid", arl_callback_status_gid, &arl_ptr); - arl_expect_custom(p->status_arl, "VmSize", arl_callback_status_vmsize, &arl_ptr); - arl_expect_custom(p->status_arl, "VmRSS", arl_callback_status_vmrss, &arl_ptr); - arl_expect_custom(p->status_arl, "RssFile", arl_callback_status_rssfile, &arl_ptr); - arl_expect_custom(p->status_arl, "RssShmem", arl_callback_status_rssshmem, &arl_ptr); - arl_expect_custom(p->status_arl, "VmSwap", arl_callback_status_vmswap, &arl_ptr); - arl_expect_custom(p->status_arl, "voluntary_ctxt_switches", arl_callback_status_voluntary_ctxt_switches, &arl_ptr); - arl_expect_custom(p->status_arl, "nonvoluntary_ctxt_switches", arl_callback_status_nonvoluntary_ctxt_switches, &arl_ptr); - } - - if(unlikely(!p->status_filename)) { - char filename[FILENAME_MAX + 1]; - snprintfz(filename, FILENAME_MAX, "%s/proc/%d/status", netdata_configured_host_prefix, p->pid); - p->status_filename = strdupz(filename); - } - - ff = procfile_reopen(ff, p->status_filename, (!ff)?" \t:,-()/":NULL, PROCFILE_FLAG_NO_ERROR_ON_FILE_IO); - if(unlikely(!ff)) return false; - - ff = procfile_readall(ff); - if(unlikely(!ff)) return false; - - calls_counter++; - - // let ARL use this pid - arl_ptr.p = p; - arl_ptr.ff = ff; - - size_t lines = procfile_lines(ff), l; - arl_begin(p->status_arl); - - for(l = 0; l < lines ;l++) { - // debug_log("CHECK: line %zu of %zu, key '%s' = '%s'", l, lines, procfile_lineword(ff, l, 0), procfile_lineword(ff, l, 1)); - arl_ptr.line = l; - if(unlikely(arl_check(p->status_arl, - procfile_lineword(ff, l, 0), - procfile_lineword(ff, l, 1)))) break; - } - - p->status_vmshared = p->status_rssfile + p->status_rssshmem; - - // debug_log("%s uid %d, gid %d, VmSize %zu, VmRSS %zu, RssFile %zu, RssShmem %zu, shared %zu", p->comm, (int)p->uid, (int)p->gid, p->status_vmsize, p->status_vmrss, p->status_rssfile, p->status_rssshmem, p->status_vmshared); - - return true; -} -#endif // !__FreeBSD__ !__APPLE__ - -int read_proc_pid_status(struct pid_stat *p, void *ptr) { - p->status_vmsize = 0; - p->status_vmrss = 0; - p->status_vmshared = 0; - p->status_rssfile = 0; - p->status_rssshmem = 0; - p->status_vmswap = 0; - p->status_voluntary_ctxt_switches = 0; - p->status_nonvoluntary_ctxt_switches = 0; - - return read_proc_pid_status_per_os(p, ptr) ? 1 : 0; -} diff --git a/src/collectors/apps.plugin/apps_proc_pids.c b/src/collectors/apps.plugin/apps_proc_pids.c deleted file mode 100644 index b53060d60bc85d..00000000000000 --- a/src/collectors/apps.plugin/apps_proc_pids.c +++ /dev/null @@ -1,720 +0,0 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -#include "apps_plugin.h" - -static struct pid_stat **all_pids = NULL; -size_t all_pids_count = 0; // the number of processes running - -struct pid_stat *root_of_pids = NULL; // global linked list of all processes running - -#if (ALL_PIDS_ARE_READ_INSTANTLY == 0) -// Another pre-allocated list of all possible pids. -// We need it to assign them a unique sortlist id, so that we -// read parents before children. This is needed to prevent a situation where -// a child is found running, but until we read its parent, it has exited and -// its parent has accumulated its resources. -pid_t *all_pids_sortlist = NULL; -#endif - -void pids_init(void) { -#if (ALL_PIDS_ARE_READ_INSTANTLY == 0) - all_pids_sortlist = callocz(sizeof(pid_t), (size_t)pid_max + 1); -#endif - - all_pids = callocz(sizeof(struct pid_stat *), (size_t) pid_max + 1); -} - -inline struct pid_stat *find_pid_entry(pid_t pid) { - return all_pids[pid]; -} - -static inline struct pid_stat *get_or_allocate_pid_entry(pid_t pid) { - struct pid_stat *p = find_pid_entry(pid); - if(likely(p)) - return p; - - p = callocz(sizeof(struct pid_stat), 1); - p->fds = mallocz(sizeof(struct pid_fd) * MAX_SPARE_FDS); - p->fds_size = MAX_SPARE_FDS; - init_pid_fds(p, 0, p->fds_size); - p->pid = pid; - - DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(root_of_pids, p, prev, next); - all_pids[pid] = p; - all_pids_count++; - - return p; -} - -static inline void del_pid_entry(pid_t pid) { - struct pid_stat *p = find_pid_entry(pid); - - if(unlikely(!p)) { - netdata_log_error("attempted to free pid %d that is not allocated.", pid); - return; - } - - debug_log("process %d %s exited, deleting it.", pid, p->comm); - - DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(root_of_pids, p, prev, next); - -#if !defined(__FreeBSD__) && !defined(__APPLE__) - { - size_t i; - for(i = 0; i < p->fds_size; i++) - if(p->fds[i].filename) - freez(p->fds[i].filename); - } - arl_free(p->status_arl); -#endif - - freez(p->fds); - freez(p->fds_dirname); - freez(p->stat_filename); - freez(p->status_filename); - freez(p->limits_filename); - freez(p->io_filename); - freez(p->cmdline_filename); - freez(p->cmdline); - freez(p); - - all_pids[pid] = NULL; - all_pids_count--; -} - -static inline int collect_data_for_pid(pid_t pid, void *ptr) { - if(unlikely(pid < 0 || pid > pid_max)) { - netdata_log_error("Invalid pid %d read (expected %d to %d). Ignoring process.", pid, 0, pid_max); - return 0; - } - - struct pid_stat *p = get_or_allocate_pid_entry(pid); - if(unlikely(!p || p->read)) return 0; - p->read = true; - - // debug_log("Reading process %d (%s), sortlist %d", p->pid, p->comm, p->sortlist); - - // -------------------------------------------------------------------- - // /proc//stat - - if(unlikely(!managed_log(p, PID_LOG_STAT, read_proc_pid_stat(p, ptr)))) - // there is no reason to proceed if we cannot get its status - return 0; - - // check its parent pid - if(unlikely(p->ppid < 0 || p->ppid > pid_max)) { - netdata_log_error("Pid %d (command '%s') states invalid parent pid %d. Using 0.", pid, p->comm, p->ppid); - p->ppid = 0; - } - - // -------------------------------------------------------------------- - // /proc//io - - managed_log(p, PID_LOG_IO, read_proc_pid_io(p, ptr)); - - // -------------------------------------------------------------------- - // /proc//status - - if(unlikely(!managed_log(p, PID_LOG_STATUS, read_proc_pid_status(p, ptr)))) - // there is no reason to proceed if we cannot get its status - return 0; - - // -------------------------------------------------------------------- - // /proc//fd - - if(enable_file_charts) { - managed_log(p, PID_LOG_FDS, read_pid_file_descriptors(p, ptr)); - managed_log(p, PID_LOG_LIMITS, read_proc_pid_limits(p, ptr)); - } - - // -------------------------------------------------------------------- - // done! - - if(unlikely(debug_enabled && include_exited_childs && all_pids_count && p->ppid && all_pids[p->ppid] && !all_pids[p->ppid]->read)) - debug_log("Read process %d (%s) sortlisted %d, but its parent %d (%s) sortlisted %d, is not read", p->pid, p->comm, p->sortlist, all_pids[p->ppid]->pid, all_pids[p->ppid]->comm, all_pids[p->ppid]->sortlist); - - // mark it as updated - p->updated = true; - p->keep = false; - p->keeploops = 0; - - return 1; -} - -void cleanup_exited_pids(void) { - size_t c; - struct pid_stat *p = NULL; - - for(p = root_of_pids; p ;) { - if(!p->updated && (!p->keep || p->keeploops > 0)) { - if(unlikely(debug_enabled && (p->keep || p->keeploops))) - debug_log(" > CLEANUP cannot keep exited process %d (%s) anymore - removing it.", p->pid, p->comm); - - for(c = 0; c < p->fds_size; c++) - if(p->fds[c].fd > 0) { - file_descriptor_not_used(p->fds[c].fd); - clear_pid_fd(&p->fds[c]); - } - - pid_t r = p->pid; - p = p->next; - del_pid_entry(r); - } - else { - if(unlikely(p->keep)) p->keeploops++; - p->keep = false; - p = p->next; - } - } -} - -// ---------------------------------------------------------------------------- - -static inline void link_all_processes_to_their_parents(void) { - struct pid_stat *p, *pp; - - // link all children to their parents - // and update children count on parents - for(p = root_of_pids; p ; p = p->next) { - // for each process found - - p->sortlist = 0; - p->parent = NULL; - - if(unlikely(!p->ppid)) { - //unnecessary code from apps_plugin.c - //p->parent = NULL; - continue; - } - - pp = all_pids[p->ppid]; - if(likely(pp)) { - p->parent = pp; - pp->children_count++; - - if(unlikely(debug_enabled || (p->target && p->target->debug_enabled))) - debug_log_int("child %d (%s, %s) on target '%s' has parent %d (%s, %s). Parent: utime=" KERNEL_UINT_FORMAT ", stime=" KERNEL_UINT_FORMAT ", gtime=" KERNEL_UINT_FORMAT ", minflt=" KERNEL_UINT_FORMAT ", majflt=" KERNEL_UINT_FORMAT ", cutime=" KERNEL_UINT_FORMAT ", cstime=" KERNEL_UINT_FORMAT ", cgtime=" KERNEL_UINT_FORMAT ", cminflt=" KERNEL_UINT_FORMAT ", cmajflt=" KERNEL_UINT_FORMAT "", p->pid, p->comm, p->updated?"running":"exited", (p->target)?p->target->name:"UNSET", pp->pid, pp->comm, pp->updated?"running":"exited", pp->utime, pp->stime, pp->gtime, pp->minflt, pp->majflt, pp->cutime, pp->cstime, pp->cgtime, pp->cminflt, pp->cmajflt); - } - else { - p->parent = NULL; - netdata_log_error("pid %d %s states parent %d, but the later does not exist.", p->pid, p->comm, p->ppid); - } - } -} - -// ---------------------------------------------------------------------------- - -static inline int debug_print_process_and_parents(struct pid_stat *p, usec_t time) { - char *prefix = "\\_ "; - int indent = 0; - - if(p->parent) - indent = debug_print_process_and_parents(p->parent, p->stat_collected_usec); - else - prefix = " > "; - - char buffer[indent + 1]; - int i; - - for(i = 0; i < indent ;i++) buffer[i] = ' '; - buffer[i] = '\0'; - - fprintf(stderr, " %s %s%s (%d %s %"PRIu64"" - , buffer - , prefix - , p->comm - , p->pid - , p->updated?"running":"exited" - , p->stat_collected_usec - time - ); - - if(p->utime) fprintf(stderr, " utime=" KERNEL_UINT_FORMAT, p->utime); - if(p->stime) fprintf(stderr, " stime=" KERNEL_UINT_FORMAT, p->stime); - if(p->gtime) fprintf(stderr, " gtime=" KERNEL_UINT_FORMAT, p->gtime); - if(p->cutime) fprintf(stderr, " cutime=" KERNEL_UINT_FORMAT, p->cutime); - if(p->cstime) fprintf(stderr, " cstime=" KERNEL_UINT_FORMAT, p->cstime); - if(p->cgtime) fprintf(stderr, " cgtime=" KERNEL_UINT_FORMAT, p->cgtime); - if(p->minflt) fprintf(stderr, " minflt=" KERNEL_UINT_FORMAT, p->minflt); - if(p->cminflt) fprintf(stderr, " cminflt=" KERNEL_UINT_FORMAT, p->cminflt); - if(p->majflt) fprintf(stderr, " majflt=" KERNEL_UINT_FORMAT, p->majflt); - if(p->cmajflt) fprintf(stderr, " cmajflt=" KERNEL_UINT_FORMAT, p->cmajflt); - fprintf(stderr, ")\n"); - - return indent + 1; -} - -static inline void debug_print_process_tree(struct pid_stat *p, char *msg __maybe_unused) { - debug_log("%s: process %s (%d, %s) with parents:", msg, p->comm, p->pid, p->updated?"running":"exited"); - debug_print_process_and_parents(p, p->stat_collected_usec); -} - -static inline void debug_find_lost_child(struct pid_stat *pe, kernel_uint_t lost, int type) { - int found = 0; - struct pid_stat *p = NULL; - - for(p = root_of_pids; p ; p = p->next) { - if(p == pe) continue; - - switch(type) { - case 1: - if(p->cminflt > lost) { - fprintf(stderr, " > process %d (%s) could use the lost exited child minflt " KERNEL_UINT_FORMAT " of process %d (%s)\n", p->pid, p->comm, lost, pe->pid, pe->comm); - found++; - } - break; - - case 2: - if(p->cmajflt > lost) { - fprintf(stderr, " > process %d (%s) could use the lost exited child majflt " KERNEL_UINT_FORMAT " of process %d (%s)\n", p->pid, p->comm, lost, pe->pid, pe->comm); - found++; - } - break; - - case 3: - if(p->cutime > lost) { - fprintf(stderr, " > process %d (%s) could use the lost exited child utime " KERNEL_UINT_FORMAT " of process %d (%s)\n", p->pid, p->comm, lost, pe->pid, pe->comm); - found++; - } - break; - - case 4: - if(p->cstime > lost) { - fprintf(stderr, " > process %d (%s) could use the lost exited child stime " KERNEL_UINT_FORMAT " of process %d (%s)\n", p->pid, p->comm, lost, pe->pid, pe->comm); - found++; - } - break; - - case 5: - if(p->cgtime > lost) { - fprintf(stderr, " > process %d (%s) could use the lost exited child gtime " KERNEL_UINT_FORMAT " of process %d (%s)\n", p->pid, p->comm, lost, pe->pid, pe->comm); - found++; - } - break; - } - } - - if(!found) { - switch(type) { - case 1: - fprintf(stderr, " > cannot find any process to use the lost exited child minflt " KERNEL_UINT_FORMAT " of process %d (%s)\n", lost, pe->pid, pe->comm); - break; - - case 2: - fprintf(stderr, " > cannot find any process to use the lost exited child majflt " KERNEL_UINT_FORMAT " of process %d (%s)\n", lost, pe->pid, pe->comm); - break; - - case 3: - fprintf(stderr, " > cannot find any process to use the lost exited child utime " KERNEL_UINT_FORMAT " of process %d (%s)\n", lost, pe->pid, pe->comm); - break; - - case 4: - fprintf(stderr, " > cannot find any process to use the lost exited child stime " KERNEL_UINT_FORMAT " of process %d (%s)\n", lost, pe->pid, pe->comm); - break; - - case 5: - fprintf(stderr, " > cannot find any process to use the lost exited child gtime " KERNEL_UINT_FORMAT " of process %d (%s)\n", lost, pe->pid, pe->comm); - break; - } - } -} - -static inline kernel_uint_t remove_exited_child_from_parent(kernel_uint_t *field, kernel_uint_t *pfield) { - kernel_uint_t absorbed = 0; - - if(*field > *pfield) { - absorbed += *pfield; - *field -= *pfield; - *pfield = 0; - } - else { - absorbed += *field; - *pfield -= *field; - *field = 0; - } - - return absorbed; -} - -static inline void process_exited_pids() { - struct pid_stat *p; - - for(p = root_of_pids; p ; p = p->next) { - if(p->updated || !p->stat_collected_usec) - continue; - - kernel_uint_t utime = (p->utime_raw + p->cutime_raw) * (USEC_PER_SEC * RATES_DETAIL) / (p->stat_collected_usec - p->last_stat_collected_usec); - kernel_uint_t stime = (p->stime_raw + p->cstime_raw) * (USEC_PER_SEC * RATES_DETAIL) / (p->stat_collected_usec - p->last_stat_collected_usec); - kernel_uint_t gtime = (p->gtime_raw + p->cgtime_raw) * (USEC_PER_SEC * RATES_DETAIL) / (p->stat_collected_usec - p->last_stat_collected_usec); - kernel_uint_t minflt = (p->minflt_raw + p->cminflt_raw) * (USEC_PER_SEC * RATES_DETAIL) / (p->stat_collected_usec - p->last_stat_collected_usec); - kernel_uint_t majflt = (p->majflt_raw + p->cmajflt_raw) * (USEC_PER_SEC * RATES_DETAIL) / (p->stat_collected_usec - p->last_stat_collected_usec); - - if(utime + stime + gtime + minflt + majflt == 0) - continue; - - if(unlikely(debug_enabled)) { - debug_log("Absorb %s (%d %s total resources: utime=" KERNEL_UINT_FORMAT " stime=" KERNEL_UINT_FORMAT " gtime=" KERNEL_UINT_FORMAT " minflt=" KERNEL_UINT_FORMAT " majflt=" KERNEL_UINT_FORMAT ")" - , p->comm - , p->pid - , p->updated?"running":"exited" - , utime - , stime - , gtime - , minflt - , majflt - ); - debug_print_process_tree(p, "Searching parents"); - } - - struct pid_stat *pp; - for(pp = p->parent; pp ; pp = pp->parent) { - if(!pp->updated) continue; - - kernel_uint_t absorbed; - absorbed = remove_exited_child_from_parent(&utime, &pp->cutime); - if(unlikely(debug_enabled && absorbed)) - debug_log(" > process %s (%d %s) absorbed " KERNEL_UINT_FORMAT " utime (remaining: " KERNEL_UINT_FORMAT ")", pp->comm, pp->pid, pp->updated?"running":"exited", absorbed, utime); - - absorbed = remove_exited_child_from_parent(&stime, &pp->cstime); - if(unlikely(debug_enabled && absorbed)) - debug_log(" > process %s (%d %s) absorbed " KERNEL_UINT_FORMAT " stime (remaining: " KERNEL_UINT_FORMAT ")", pp->comm, pp->pid, pp->updated?"running":"exited", absorbed, stime); - - absorbed = remove_exited_child_from_parent(>ime, &pp->cgtime); - if(unlikely(debug_enabled && absorbed)) - debug_log(" > process %s (%d %s) absorbed " KERNEL_UINT_FORMAT " gtime (remaining: " KERNEL_UINT_FORMAT ")", pp->comm, pp->pid, pp->updated?"running":"exited", absorbed, gtime); - - absorbed = remove_exited_child_from_parent(&minflt, &pp->cminflt); - if(unlikely(debug_enabled && absorbed)) - debug_log(" > process %s (%d %s) absorbed " KERNEL_UINT_FORMAT " minflt (remaining: " KERNEL_UINT_FORMAT ")", pp->comm, pp->pid, pp->updated?"running":"exited", absorbed, minflt); - - absorbed = remove_exited_child_from_parent(&majflt, &pp->cmajflt); - if(unlikely(debug_enabled && absorbed)) - debug_log(" > process %s (%d %s) absorbed " KERNEL_UINT_FORMAT " majflt (remaining: " KERNEL_UINT_FORMAT ")", pp->comm, pp->pid, pp->updated?"running":"exited", absorbed, majflt); - } - - if(unlikely(utime + stime + gtime + minflt + majflt > 0)) { - if(unlikely(debug_enabled)) { - if(utime) debug_find_lost_child(p, utime, 3); - if(stime) debug_find_lost_child(p, stime, 4); - if(gtime) debug_find_lost_child(p, gtime, 5); - if(minflt) debug_find_lost_child(p, minflt, 1); - if(majflt) debug_find_lost_child(p, majflt, 2); - } - - p->keep = true; - - debug_log(" > remaining resources - KEEP - for another loop: %s (%d %s total resources: utime=" KERNEL_UINT_FORMAT " stime=" KERNEL_UINT_FORMAT " gtime=" KERNEL_UINT_FORMAT " minflt=" KERNEL_UINT_FORMAT " majflt=" KERNEL_UINT_FORMAT ")" - , p->comm - , p->pid - , p->updated?"running":"exited" - , utime - , stime - , gtime - , minflt - , majflt - ); - - for(pp = p->parent; pp ; pp = pp->parent) { - if(pp->updated) break; - pp->keep = true; - - debug_log(" > - KEEP - parent for another loop: %s (%d %s)" - , pp->comm - , pp->pid - , pp->updated?"running":"exited" - ); - } - - p->utime_raw = utime * (p->stat_collected_usec - p->last_stat_collected_usec) / (USEC_PER_SEC * RATES_DETAIL); - p->stime_raw = stime * (p->stat_collected_usec - p->last_stat_collected_usec) / (USEC_PER_SEC * RATES_DETAIL); - p->gtime_raw = gtime * (p->stat_collected_usec - p->last_stat_collected_usec) / (USEC_PER_SEC * RATES_DETAIL); - p->minflt_raw = minflt * (p->stat_collected_usec - p->last_stat_collected_usec) / (USEC_PER_SEC * RATES_DETAIL); - p->majflt_raw = majflt * (p->stat_collected_usec - p->last_stat_collected_usec) / (USEC_PER_SEC * RATES_DETAIL); - p->cutime_raw = p->cstime_raw = p->cgtime_raw = p->cminflt_raw = p->cmajflt_raw = 0; - - debug_log(" "); - } - else - debug_log(" > totally absorbed - DONE - %s (%d %s)" - , p->comm - , p->pid - , p->updated?"running":"exited" - ); - } -} - -// ---------------------------------------------------------------------------- - -// 1. read all files in /proc -// 2. for each numeric directory: -// i. read /proc/pid/stat -// ii. read /proc/pid/status -// iii. read /proc/pid/io (requires root access) -// iii. read the entries in directory /proc/pid/fd (requires root access) -// for each entry: -// a. find or create a struct file_descriptor -// b. cleanup any old/unused file_descriptors - -// after all these, some pids may be linked to targets, while others may not - -// in case of errors, only 1 every 1000 errors is printed -// to avoid filling up all disk space -// if debug is enabled, all errors are printed - -static inline void mark_pid_as_unread(struct pid_stat *p) { - p->read = false; // mark it as not read, so that collect_data_for_pid() will read it - p->updated = false; - p->merged = false; - p->children_count = 0; - p->parent = NULL; -} - -#if defined(__FreeBSD__) || defined(__APPLE__) -static inline void get_current_time(void) { - struct timeval current_time; - gettimeofday(¤t_time, NULL); - system_current_time_ut = timeval_usec(¤t_time); -} -#endif - -#if defined(__FreeBSD__) -static inline bool collect_data_for_all_pids_per_os(void) { - // Mark all processes as unread before collecting new data - struct pid_stat *p = NULL; - if(all_pids_count) { - for(p = root_of_pids; p ; p = p->next) - mark_pid_as_unread(p); - } - - int i, procnum; - - static size_t procbase_size = 0; - static struct kinfo_proc *procbase = NULL; - - size_t new_procbase_size; - - int mib[3] = { CTL_KERN, KERN_PROC, KERN_PROC_PROC }; - if (unlikely(sysctl(mib, 3, NULL, &new_procbase_size, NULL, 0))) { - netdata_log_error("sysctl error: Can't get processes data size"); - return false; - } - - // give it some air for processes that may be started - // during this little time. - new_procbase_size += 100 * sizeof(struct kinfo_proc); - - // increase the buffer if needed - if(new_procbase_size > procbase_size) { - procbase_size = new_procbase_size; - procbase = reallocz(procbase, procbase_size); - } - - // sysctl() gets from new_procbase_size the buffer size - // and also returns to it the amount of data filled in - new_procbase_size = procbase_size; - - // get the processes from the system - if (unlikely(sysctl(mib, 3, procbase, &new_procbase_size, NULL, 0))) { - netdata_log_error("sysctl error: Can't get processes data"); - return false; - } - - // based on the amount of data filled in - // calculate the number of processes we got - procnum = new_procbase_size / sizeof(struct kinfo_proc); - - get_current_time(); - - for (i = 0 ; i < procnum ; ++i) { - pid_t pid = procbase[i].ki_pid; - if (pid <= 0) continue; - collect_data_for_pid(pid, &procbase[i]); - } - - return true; -} -#endif // __FreeBSD__ - -#if defined(__APPLE__) -static inline bool collect_data_for_all_pids_per_os(void) { - // Mark all processes as unread before collecting new data - struct pid_stat *p; - if(all_pids_count) { - for(p = root_of_pids; p; p = p->next) - mark_pid_as_unread(p); - } - - static pid_t *pids = NULL; - static int allocatedProcessCount = 0; - - // Get the number of processes - int numberOfProcesses = proc_listpids(PROC_ALL_PIDS, 0, NULL, 0); - if (numberOfProcesses <= 0) { - netdata_log_error("Failed to retrieve the process count"); - return false; - } - - // Allocate or reallocate space to hold all the process IDs if necessary - if (numberOfProcesses > allocatedProcessCount) { - // Allocate additional space to avoid frequent reallocations - allocatedProcessCount = numberOfProcesses + 100; - pids = reallocz(pids, allocatedProcessCount * sizeof(pid_t)); - } - - // this is required, otherwise the PIDs become totally random - memset(pids, 0, allocatedProcessCount * sizeof(pid_t)); - - // get the list of PIDs - numberOfProcesses = proc_listpids(PROC_ALL_PIDS, 0, pids, allocatedProcessCount * sizeof(pid_t)); - if (numberOfProcesses <= 0) { - netdata_log_error("Failed to retrieve the process IDs"); - return false; - } - - get_current_time(); - - // Collect data for each process - for (int i = 0; i < numberOfProcesses; ++i) { - pid_t pid = pids[i]; - if (pid <= 0) continue; - - struct pid_info pi = { 0 }; - - int mib[4] = {CTL_KERN, KERN_PROC, KERN_PROC_PID, pid}; - - size_t procSize = sizeof(pi.proc); - if(sysctl(mib, 4, &pi.proc, &procSize, NULL, 0) == -1) { - netdata_log_error("Failed to get proc for PID %d", pid); - continue; - } - if(procSize == 0) // no such process - continue; - - int st = proc_pidinfo(pid, PROC_PIDTASKINFO, 0, &pi.taskinfo, sizeof(pi.taskinfo)); - if (st <= 0) { - netdata_log_error("Failed to get task info for PID %d", pid); - continue; - } - - st = proc_pidinfo(pid, PROC_PIDTBSDINFO, 0, &pi.bsdinfo, sizeof(pi.bsdinfo)); - if (st <= 0) { - netdata_log_error("Failed to get BSD info for PID %d", pid); - continue; - } - - st = proc_pid_rusage(pid, RUSAGE_INFO_V4, (rusage_info_t *)&pi.rusageinfo); - if (st < 0) { - netdata_log_error("Failed to get resource usage info for PID %d", pid); - continue; - } - - collect_data_for_pid(pid, &pi); - } - - return true; -} -#endif // __APPLE__ - -#if !defined(__FreeBSD__) && !defined(__APPLE__) -static int compar_pid(const void *pid1, const void *pid2) { - - struct pid_stat *p1 = all_pids[*((pid_t *)pid1)]; - struct pid_stat *p2 = all_pids[*((pid_t *)pid2)]; - - if(p1->sortlist > p2->sortlist) - return -1; - else - return 1; -} - -static inline bool collect_data_for_all_pids_per_os(void) { - struct pid_stat *p = NULL; - - // clear process state counter - memset(proc_state_count, 0, sizeof proc_state_count); - - if(all_pids_count) { - size_t slc = 0; - for(p = root_of_pids; p ; p = p->next) { - mark_pid_as_unread(p); - all_pids_sortlist[slc++] = p->pid; - } - - if(unlikely(slc != all_pids_count)) { - netdata_log_error("Internal error: I was thinking I had %zu processes in my arrays, but it seems there are %zu.", all_pids_count, slc); - all_pids_count = slc; - } - - if(include_exited_childs) { - // Read parents before childs - // This is needed to prevent a situation where - // a child is found running, but until we read - // its parent, it has exited and its parent - // has accumulated its resources. - - qsort((void *)all_pids_sortlist, (size_t)all_pids_count, sizeof(pid_t), compar_pid); - - // we forward read all running processes - // collect_data_for_pid() is smart enough, - // not to read the same pid twice per iteration - for(slc = 0; slc < all_pids_count; slc++) { - collect_data_for_pid(all_pids_sortlist[slc], NULL); - } - } - } - - static char uptime_filename[FILENAME_MAX + 1] = ""; - if(*uptime_filename == '\0') - snprintfz(uptime_filename, FILENAME_MAX, "%s/proc/uptime", netdata_configured_host_prefix); - - system_uptime_secs = (kernel_uint_t)(uptime_msec(uptime_filename) / MSEC_PER_SEC); - - char dirname[FILENAME_MAX + 1]; - - snprintfz(dirname, FILENAME_MAX, "%s/proc", netdata_configured_host_prefix); - DIR *dir = opendir(dirname); - if(!dir) return false; - - struct dirent *de = NULL; - - while((de = readdir(dir))) { - char *endptr = de->d_name; - - if(unlikely(de->d_type != DT_DIR || de->d_name[0] < '0' || de->d_name[0] > '9')) - continue; - - pid_t pid = (pid_t) strtoul(de->d_name, &endptr, 10); - - // make sure we read a valid number - if(unlikely(endptr == de->d_name || *endptr != '\0')) - continue; - - collect_data_for_pid(pid, NULL); - } - closedir(dir); - - return true; -} -#endif // !__FreeBSD__ && !__APPLE__ - -bool collect_data_for_all_pids(void) { - if(!collect_data_for_all_pids_per_os()) - return false; - - if(!all_pids_count) - return false; - - // we need /proc/stat to normalize the cpu consumption of the exited childs - read_global_time(); - - // build the process tree - link_all_processes_to_their_parents(); - - // normally this is done - // however we may have processes exited while we collected values - // so let's find the exited ones - // we do this by collecting the ownership of process - // if we manage to get the ownership, the process still runs - process_exited_pids(); - - return true; -} diff --git a/src/collectors/apps.plugin/apps_proc_stat.c b/src/collectors/apps.plugin/apps_proc_stat.c deleted file mode 100644 index 8564ddd55eede0..00000000000000 --- a/src/collectors/apps.plugin/apps_proc_stat.c +++ /dev/null @@ -1,154 +0,0 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -#include "apps_plugin.h" - -#if defined(__APPLE__) -int read_global_time(void) { - static kernel_uint_t utime_raw = 0, stime_raw = 0, ntime_raw = 0; - static usec_t collected_usec = 0, last_collected_usec = 0; - - host_cpu_load_info_data_t cpuinfo; - mach_msg_type_number_t count = HOST_CPU_LOAD_INFO_COUNT; - - if (host_statistics(mach_host_self(), HOST_CPU_LOAD_INFO, (host_info_t)&cpuinfo, &count) != KERN_SUCCESS) { - // Handle error - goto cleanup; - } - - last_collected_usec = collected_usec; - collected_usec = now_monotonic_usec(); - - calls_counter++; - - // Convert ticks to time - // Note: MacOS does not separate nice time from user time in the CPU stats, so you might need to adjust this logic - kernel_uint_t global_ntime = 0; // Assuming you want to keep track of nice time separately - - incremental_rate(global_utime, utime_raw, cpuinfo.cpu_ticks[CPU_STATE_USER] + cpuinfo.cpu_ticks[CPU_STATE_NICE], collected_usec, last_collected_usec); - incremental_rate(global_ntime, ntime_raw, cpuinfo.cpu_ticks[CPU_STATE_NICE], collected_usec, last_collected_usec); - incremental_rate(global_stime, stime_raw, cpuinfo.cpu_ticks[CPU_STATE_SYSTEM], collected_usec, last_collected_usec); - - global_utime += global_ntime; - - if(unlikely(global_iterations_counter == 1)) { - global_utime = 0; - global_stime = 0; - global_gtime = 0; - } - - return 1; - -cleanup: - global_utime = 0; - global_stime = 0; - global_gtime = 0; - return 0; -} -#endif // __APPLE__ - - -#if defined(__FreeBSD__) -int read_global_time(void) { - static kernel_uint_t utime_raw = 0, stime_raw = 0, ntime_raw = 0; - static usec_t collected_usec = 0, last_collected_usec = 0; - long cp_time[CPUSTATES]; - - if (unlikely(CPUSTATES != 5)) { - goto cleanup; - } else { - static int mib[2] = {0, 0}; - - if (unlikely(GETSYSCTL_SIMPLE("kern.cp_time", mib, cp_time))) { - goto cleanup; - } - } - - last_collected_usec = collected_usec; - collected_usec = now_monotonic_usec(); - - calls_counter++; - - // temporary - it is added global_ntime; - kernel_uint_t global_ntime = 0; - - incremental_rate(global_utime, utime_raw, cp_time[0] * 100LLU / system_hz, collected_usec, last_collected_usec); - incremental_rate(global_ntime, ntime_raw, cp_time[1] * 100LLU / system_hz, collected_usec, last_collected_usec); - incremental_rate(global_stime, stime_raw, cp_time[2] * 100LLU / system_hz, collected_usec, last_collected_usec); - - global_utime += global_ntime; - - if(unlikely(global_iterations_counter == 1)) { - global_utime = 0; - global_stime = 0; - global_gtime = 0; - } - - return 1; - -cleanup: - global_utime = 0; - global_stime = 0; - global_gtime = 0; - return 0; -} -#endif // __APPLE__ - -#if !defined(__FreeBSD__) && !defined(__APPLE__) -int read_global_time(void) { - static char filename[FILENAME_MAX + 1] = ""; - static procfile *ff = NULL; - static kernel_uint_t utime_raw = 0, stime_raw = 0, gtime_raw = 0, gntime_raw = 0, ntime_raw = 0; - static usec_t collected_usec = 0, last_collected_usec = 0; - - if(unlikely(!ff)) { - snprintfz(filename, FILENAME_MAX, "%s/proc/stat", netdata_configured_host_prefix); - ff = procfile_open(filename, " \t:", PROCFILE_FLAG_DEFAULT); - if(unlikely(!ff)) goto cleanup; - } - - ff = procfile_readall(ff); - if(unlikely(!ff)) goto cleanup; - - last_collected_usec = collected_usec; - collected_usec = now_monotonic_usec(); - - calls_counter++; - - // temporary - it is added global_ntime; - kernel_uint_t global_ntime = 0; - - incremental_rate(global_utime, utime_raw, str2kernel_uint_t(procfile_lineword(ff, 0, 1)), collected_usec, last_collected_usec); - incremental_rate(global_ntime, ntime_raw, str2kernel_uint_t(procfile_lineword(ff, 0, 2)), collected_usec, last_collected_usec); - incremental_rate(global_stime, stime_raw, str2kernel_uint_t(procfile_lineword(ff, 0, 3)), collected_usec, last_collected_usec); - incremental_rate(global_gtime, gtime_raw, str2kernel_uint_t(procfile_lineword(ff, 0, 10)), collected_usec, last_collected_usec); - - global_utime += global_ntime; - - if(enable_guest_charts) { - // temporary - it is added global_ntime; - kernel_uint_t global_gntime = 0; - - // guest nice time, on guest time - incremental_rate(global_gntime, gntime_raw, str2kernel_uint_t(procfile_lineword(ff, 0, 11)), collected_usec, last_collected_usec); - - global_gtime += global_gntime; - - // remove guest time from user time - global_utime -= (global_utime > global_gtime) ? global_gtime : global_utime; - } - - if(unlikely(global_iterations_counter == 1)) { - global_utime = 0; - global_stime = 0; - global_gtime = 0; - } - - return 1; - -cleanup: - global_utime = 0; - global_stime = 0; - global_gtime = 0; - return 0; -} -#endif // !__FreeBSD__ !__APPLE__ diff --git a/src/collectors/apps.plugin/apps_targets.c b/src/collectors/apps.plugin/apps_targets.c index 7deaa798ca8d07..c35eb1119eed5b 100644 --- a/src/collectors/apps.plugin/apps_targets.c +++ b/src/collectors/apps.plugin/apps_targets.c @@ -2,22 +2,226 @@ #include "apps_plugin.h" -// ---------------------------------------------------------------------------- -// apps_groups.conf -// aggregate all processes in groups, to have a limited number of dimensions +pid_t INIT_PID = OS_INIT_PID; + +static STRING *get_clean_name(STRING *name) { + char buf[string_strlen(name) + 1]; + memcpy(buf, string2str(name), string_strlen(name) + 1); + netdata_fix_chart_name(buf); + for (char *d = buf; *d ; d++) { + if (*d == '.') *d = '_'; + } + return string_strdupz(buf); +} + +static inline STRING *get_numeric_string(uint64_t n) { + char buf[UINT64_MAX_LENGTH]; + print_uint64(buf, n); + return string_strdupz(buf); +} + +struct target *find_target_by_name(struct target *base, const char *name) { + struct target *t; + for(t = base; t ; t = t->next) { + if (string_strcmp(t->name, name) == 0) + return t; + } + + return NULL; +} + +// -------------------------------------------------------------------------------------------------------------------- +// Tree + +static inline STRING *comm_from_cmdline(STRING *comm, STRING *cmdline) { + if(!cmdline) return sanitize_chart_meta_string(comm); + + const char *cl = string2str(cmdline); + size_t len = string_strlen(cmdline); + + char buf_cmd[len + 1]; + // if it is enclosed in (), remove the parenthesis + if(cl[0] == '(' && cl[len - 1] == ')') { + memcpy(buf_cmd, &cl[1], len - 2); + buf_cmd[len - 2] = '\0'; + } + else + memcpy(buf_cmd, cl, sizeof(buf_cmd)); + + char *start = strstr(buf_cmd, string2str(comm)); + if(start) { + char *end = start + string_strlen(comm); + while(*end && !isspace((uint8_t)*end) && *end != '/' && *end != '\\') end++; + *end = '\0'; + + sanitize_chart_meta(start); + return string_strdupz(start); + } + + return sanitize_chart_meta_string(comm); +} + +struct comm_list { + STRING *comm; +}; + +struct managed_list { + size_t used; + size_t size; + struct comm_list *array; +}; + +static struct { + struct managed_list managers; + struct managed_list aggregators; +} tree = { + .managers = { + .array = NULL, + .size = 0, + .used = 0, + }, + .aggregators = { + .array = NULL, + .size = 0, + .used = 0, + } +}; + +static void managed_list_clear(struct managed_list *list) { + for(size_t c = 0; c < list->used ; c++) + string_freez(list->array[c].comm); + + freez(list->array); + list->array = NULL; + list->used = 0; + list->size = 0; +} + +static void managed_list_add(struct managed_list *list, const char *s) { + if(list->used >= list->size) { + if(!list->size) + list->size = 10; + else + list->size *= 2; + list->array = reallocz(list->array, sizeof(*list->array) * list->size); + } + + list->array[list->used++].comm = string_strdupz(s); +} + +static STRING *KernelAggregator = NULL; + +void apps_orchestrators_and_aggregators_init(void) { + KernelAggregator = string_strdupz("kernel"); + + managed_list_clear(&tree.managers); +#if defined(OS_LINUX) + managed_list_add(&tree.managers, "init"); // linux systems + managed_list_add(&tree.managers, "systemd"); // lxc containers and host systems (this also catches "systemd --user") + managed_list_add(&tree.managers, "containerd-shim"); // docker containers + managed_list_add(&tree.managers, "docker-init"); // docker containers + managed_list_add(&tree.managers, "dumb-init"); // some docker containers use this + managed_list_add(&tree.managers, "gnome-shell"); // gnome user applications +#elif defined(OS_WINDOWS) + managed_list_add(&tree.managers, "System"); + managed_list_add(&tree.managers, "services"); + managed_list_add(&tree.managers, "wininit"); +#elif defined(OS_FREEBSD) + managed_list_add(&tree.managers, "init"); +#elif defined(OS_MACOS) + managed_list_add(&tree.managers, "launchd"); +#endif + + managed_list_clear(&tree.aggregators); +#if defined(OS_LINUX) + managed_list_add(&tree.aggregators, "kthread"); +#elif defined(OS_WINDOWS) +#elif defined(OS_FREEBSD) + managed_list_add(&tree.aggregators, "kernel"); +#elif defined(OS_MACOS) +#endif +} + +static inline bool is_orchestrator(struct pid_stat *p) { + for(size_t c = 0; c < tree.managers.used ; c++) { + if(p->comm == tree.managers.array[c].comm) + return true; + } + + return false; +} + +static inline bool is_aggregator(struct pid_stat *p) { + for(size_t c = 0; c < tree.aggregators.used ; c++) { + if(p->comm == tree.aggregators.array[c].comm) + return true; + } + + return false; +} + +struct target *get_tree_target(struct pid_stat *p) { +// // skip fast all the children that are more than 3 levels down +// while(p->parent && p->parent->pid != INIT_PID && p->parent->parent && p->parent->parent->parent) +// p = p->parent; + + // keep the children of INIT_PID, and process orchestrators + while(p->parent && p->parent->pid != INIT_PID && p->parent->pid != 0 && !is_orchestrator(p->parent)) + p = p->parent; + + // merge all processes into process aggregators + STRING *search_for = string_dup(p->comm); + bool aggregator = false; + if((p->ppid == 0 && p->pid != INIT_PID) || (p->parent && is_aggregator(p->parent))) { + aggregator = true; + search_for = string_dup(KernelAggregator); + } + + if(!aggregator) { +#if (PROCESSES_HAVE_COMM_AND_NAME == 1) + search_for = sanitize_chart_meta_string(p->name ? p->name : p->comm); +#else + search_for = comm_from_cmdline(p->comm, p->cmdline); +#endif + } + + struct target *w; + for(w = apps_groups_root_target; w ; w = w->next) { + if (w->name == search_for) { + string_freez(search_for); + return w; + } + } + + w = callocz(sizeof(struct target), 1); + w->type = TARGET_TYPE_TREE; + w->starts_with = w->ends_with = false; + w->compare = string_dup(p->comm); + w->id = search_for; + w->name = string_dup(search_for); + w->clean_name = get_clean_name(w->name); + + w->next = apps_groups_root_target; + apps_groups_root_target = w; -struct target *get_users_target(uid_t uid) { + return w; +} + +// -------------------------------------------------------------------------------------------------------------------- +// Users + +#if (PROCESSES_HAVE_UID == 1) +struct target *users_root_target = NULL; + +struct target *get_uid_target(uid_t uid) { struct target *w; for(w = users_root_target ; w ; w = w->next) if(w->uid == uid) return w; w = callocz(sizeof(struct target), 1); - snprintfz(w->compare, MAX_COMPARE_NAME, "%u", uid); - w->comparehash = simple_hash(w->compare); - w->comparelen = strlen(w->compare); - - snprintfz(w->id, MAX_NAME, "%u", uid); - w->idhash = simple_hash(w->id); + w->type = TARGET_TYPE_UID; + w->uid = uid; + w->id = get_numeric_string(uid); struct user_or_group_id user_id_to_find = { .id = { @@ -27,41 +231,41 @@ struct target *get_users_target(uid_t uid) { struct user_or_group_id *user_or_group_id = user_id_find(&user_id_to_find); if(user_or_group_id && user_or_group_id->name && *user_or_group_id->name) - snprintfz(w->name, MAX_NAME, "%s", user_or_group_id->name); - + w->name = string_strdupz(user_or_group_id->name); else { struct passwd *pw = getpwuid(uid); if(!pw || !pw->pw_name || !*pw->pw_name) - snprintfz(w->name, MAX_NAME, "%u", uid); + w->name = get_numeric_string(uid); else - snprintfz(w->name, MAX_NAME, "%s", pw->pw_name); + w->name = string_strdupz(pw->pw_name); } - strncpyz(w->clean_name, w->name, MAX_NAME); - netdata_fix_chart_name(w->clean_name); - - w->uid = uid; + w->clean_name = get_clean_name(w->name); w->next = users_root_target; users_root_target = w; - debug_log("added uid %u ('%s') target", w->uid, w->name); + debug_log("added uid %u ('%s') target", w->uid, string2str(w->name)); return w; } +#endif + +// -------------------------------------------------------------------------------------------------------------------- +// Groups -struct target *get_groups_target(gid_t gid) { +#if (PROCESSES_HAVE_GID == 1) +struct target *groups_root_target = NULL; + +struct target *get_gid_target(gid_t gid) { struct target *w; for(w = groups_root_target ; w ; w = w->next) if(w->gid == gid) return w; w = callocz(sizeof(struct target), 1); - snprintfz(w->compare, MAX_COMPARE_NAME, "%u", gid); - w->comparehash = simple_hash(w->compare); - w->comparelen = strlen(w->compare); - - snprintfz(w->id, MAX_NAME, "%u", gid); - w->idhash = simple_hash(w->id); + w->type = TARGET_TYPE_GID; + w->gid = gid; + w->id = get_numeric_string(gid); struct user_or_group_id group_id_to_find = { .id = { @@ -70,21 +274,17 @@ struct target *get_groups_target(gid_t gid) { }; struct user_or_group_id *group_id = group_id_find(&group_id_to_find); - if(group_id && group_id->name && *group_id->name) { - snprintfz(w->name, MAX_NAME, "%s", group_id->name); - } + if(group_id && group_id->name) + w->name = string_strdupz(group_id->name); else { struct group *gr = getgrgid(gid); if(!gr || !gr->gr_name || !*gr->gr_name) - snprintfz(w->name, MAX_NAME, "%u", gid); + w->name = get_numeric_string(gid); else - snprintfz(w->name, MAX_NAME, "%s", gr->gr_name); + w->name = string_strdupz(gr->gr_name); } - strncpyz(w->clean_name, w->name, MAX_NAME); - netdata_fix_chart_name(w->clean_name); - - w->gid = gid; + w->clean_name = get_clean_name(w->name); w->next = groups_root_target; groups_root_target = w; @@ -93,87 +293,101 @@ struct target *get_groups_target(gid_t gid) { return w; } +#endif + +// -------------------------------------------------------------------------------------------------------------------- +// apps_groups.conf + +struct target *apps_groups_root_target = NULL; // find or create a new target // there are targets that are just aggregated to other target (the second argument) static struct target *get_apps_groups_target(const char *id, struct target *target, const char *name) { - int tdebug = 0, thidden = target?target->hidden:0, ends_with = 0; - const char *nid = id; - - // extract the options - while(nid[0] == '-' || nid[0] == '+' || nid[0] == '*') { - if(nid[0] == '-') thidden = 1; - if(nid[0] == '+') tdebug = 1; - if(nid[0] == '*') ends_with = 1; - nid++; + bool tdebug = false, thidden = target ? target->hidden : false, ends_with = false, starts_with = false; + + STRING *id_lookup = NULL; + STRING *name_lookup = NULL; + + // extract the options from the id + { + size_t len = strlen(id); + char buf[len + 1]; + memcpy(buf, id, sizeof(buf)); + + if(buf[len - 1] == '*') { + buf[--len] = '\0'; + starts_with = true; + } + + const char *nid = buf; + while (nid[0] == '-' || nid[0] == '+' || nid[0] == '*') { + if (nid[0] == '-') thidden = true; + if (nid[0] == '+') tdebug = true; + if (nid[0] == '*') ends_with = true; + nid++; + } + + id_lookup = string_strdupz(nid); + } + + // extract the options from the name + { + size_t len = strlen(name); + char buf[len + 1]; + memcpy(buf, name, sizeof(buf)); + + const char *nn = buf; + while (nn[0] == '-' || nn[0] == '+') { + if (nn[0] == '-') thidden = true; + if (nn[0] == '+') tdebug = true; + nn++; + } + + name_lookup = string_strdupz(nn); } - uint32_t hash = simple_hash(id); // find if it already exists struct target *w, *last = apps_groups_root_target; for(w = apps_groups_root_target ; w ; w = w->next) { - if(w->idhash == hash && strncmp(nid, w->id, MAX_NAME) == 0) + if(w->id == id_lookup) { + string_freez(id_lookup); + string_freez(name_lookup); return w; + } last = w; } // find an existing target if(unlikely(!target)) { - while(*name == '-') { - if(*name == '-') thidden = 1; - name++; - } - - for(target = apps_groups_root_target ; target != NULL ; target = target->next) { - if(!target->target && strcmp(name, target->name) == 0) + for(target = apps_groups_root_target ; target ; target = target->next) { + if(!target->target && name_lookup == target->name) break; } - - if(unlikely(debug_enabled)) { - if(unlikely(target)) - debug_log("REUSING TARGET NAME '%s' on ID '%s'", target->name, target->id); - else - debug_log("NEW TARGET NAME '%s' on ID '%s'", name, id); - } } if(target && target->target) - fatal("Internal Error: request to link process '%s' to target '%s' which is linked to target '%s'", id, target->id, target->target->id); + fatal("Internal Error: request to link process '%s' to target '%s' which is linked to target '%s'", + id, string2str(target->id), string2str(target->target->id)); w = callocz(sizeof(struct target), 1); - strncpyz(w->id, nid, MAX_NAME); - w->idhash = simple_hash(w->id); + w->type = TARGET_TYPE_APP_GROUP; + w->compare = string_dup(id_lookup); + w->starts_with = starts_with; + w->ends_with = ends_with; + w->id = string_dup(id_lookup); if(unlikely(!target)) - // copy the name - strncpyz(w->name, name, MAX_NAME); + w->name = string_dup(name_lookup); // copy the name else - // copy the id - strncpyz(w->name, nid, MAX_NAME); + w->name = string_dup(id_lookup); // copy the id // dots are used to distinguish chart type and id in streaming, so we should replace them - strncpyz(w->clean_name, w->name, MAX_NAME); - netdata_fix_chart_name(w->clean_name); - for (char *d = w->clean_name; *d; d++) { - if (*d == '.') - *d = '_'; - } - - strncpyz(w->compare, nid, MAX_COMPARE_NAME); - size_t len = strlen(w->compare); - if(w->compare[len - 1] == '*') { - w->compare[len - 1] = '\0'; - w->starts_with = 1; - } - w->ends_with = ends_with; + w->clean_name = get_clean_name(w->name); if(w->starts_with && w->ends_with) proc_pid_cmdline_is_needed = true; - w->comparehash = simple_hash(w->compare); - w->comparelen = strlen(w->compare); - w->hidden = thidden; #ifdef NETDATA_INTERNAL_CHECKS w->debug_enabled = tdebug; @@ -188,13 +402,17 @@ static struct target *get_apps_groups_target(const char *id, struct target *targ else apps_groups_root_target = w; debug_log("ADDING TARGET ID '%s', process name '%s' (%s), aggregated on target '%s', options: %s %s" - , w->id - , w->compare, (w->starts_with && w->ends_with)?"substring":((w->starts_with)?"prefix":((w->ends_with)?"suffix":"exact")) - , w->target?w->target->name:w->name + , string2str(w->id) + , string2str(w->compare) + , (w->starts_with && w->ends_with)?"substring":((w->starts_with)?"prefix":((w->ends_with)?"suffix":"exact")) + , w->target?w->target->name:w->name , (w->hidden)?"hidden":"-" , (w->debug_enabled)?"debug":"-" ); + string_freez(id_lookup); + string_freez(name_lookup); + return w; } @@ -219,6 +437,8 @@ int read_apps_groups_conf(const char *path, const char *file) { size_t line, lines = procfile_lines(ff); + bool managers_reset_done = false; + for(line = 0; line < lines ;line++) { size_t word, words = procfile_linewords(ff, line); if(!words) continue; @@ -226,6 +446,27 @@ int read_apps_groups_conf(const char *path, const char *file) { char *name = procfile_lineword(ff, line, 0); if(!name || !*name) continue; + if(strcmp(name, "managers") == 0) { + if(!managers_reset_done) { + managers_reset_done = true; + managed_list_clear(&tree.managers); + } + + for(word = 0; word < words ;word++) { + char *s = procfile_lineword(ff, line, word); + if (!s || !*s) continue; + if (*s == '#') break; + + // is this the first word? skip it + if(s == name) continue; + + managed_list_add(&tree.managers, s); + } + + // done with managers, proceed to next line + continue; + } + // find a possibly existing target struct target *w = NULL; @@ -252,15 +493,5 @@ int read_apps_groups_conf(const char *path, const char *file) { } procfile_close(ff); - - apps_groups_default_target = get_apps_groups_target("p+!o@w#e$i^r&7*5(-i)l-o_", NULL, "other"); // match nothing - if(!apps_groups_default_target) - fatal("Cannot create default target"); - apps_groups_default_target->is_other = true; - - // allow the user to override group 'other' - if(apps_groups_default_target->target) - apps_groups_default_target = apps_groups_default_target->target; - return 0; } diff --git a/src/collectors/apps.plugin/apps_users_and_groups.c b/src/collectors/apps.plugin/apps_users_and_groups.c index d28b39e7983527..8a8b50ff54176f 100644 --- a/src/collectors/apps.plugin/apps_users_and_groups.c +++ b/src/collectors/apps.plugin/apps_users_and_groups.c @@ -2,6 +2,8 @@ #include "apps_plugin.h" +#if (PROCESSES_HAVE_UID == 1) || (PROCESSES_HAVE_GID == 1) + // ---------------------------------------------------------------------------- // read users and groups from files @@ -19,7 +21,8 @@ struct user_or_group_ids { char filename[FILENAME_MAX + 1]; }; -int user_id_compare(void* a, void* b) { +#if (PROCESSES_HAVE_UID == 1) +static int user_id_compare(void* a, void* b) { if(((struct user_or_group_id *)a)->id.uid < ((struct user_or_group_id *)b)->id.uid) return -1; @@ -30,7 +33,7 @@ int user_id_compare(void* a, void* b) { return 0; } -struct user_or_group_ids all_user_ids = { +static struct user_or_group_ids all_user_ids = { .type = USER_ID, .index = { @@ -42,8 +45,10 @@ struct user_or_group_ids all_user_ids = { .filename = "", }; +#endif -int group_id_compare(void* a, void* b) { +#if (PROCESSES_HAVE_GID == 1) +static int group_id_compare(void* a, void* b) { if(((struct user_or_group_id *)a)->id.gid < ((struct user_or_group_id *)b)->id.gid) return -1; @@ -54,7 +59,7 @@ int group_id_compare(void* a, void* b) { return 0; } -struct user_or_group_ids all_group_ids = { +static struct user_or_group_ids all_group_ids = { .type = GROUP_ID, .index = { @@ -66,9 +71,10 @@ struct user_or_group_ids all_group_ids = { .filename = "", }; +#endif -int file_changed(const struct stat *statbuf __maybe_unused, struct timespec *last_modification_time __maybe_unused) { -#if defined(__APPLE__) +static int file_changed(const struct stat *statbuf __maybe_unused, struct timespec *last_modification_time __maybe_unused) { +#if defined(OS_MACOS) || defined(OS_WINDOWS) return 0; #else if(likely(statbuf->st_mtim.tv_sec == last_modification_time->tv_sec && @@ -81,7 +87,7 @@ int file_changed(const struct stat *statbuf __maybe_unused, struct timespec *las #endif } -int read_user_or_group_ids(struct user_or_group_ids *ids, struct timespec *last_modification_time) { +static int read_user_or_group_ids(struct user_or_group_ids *ids, struct timespec *last_modification_time) { struct stat statbuf; if(unlikely(stat(ids->filename, &statbuf))) return 1; @@ -109,10 +115,14 @@ int read_user_or_group_ids(struct user_or_group_ids *ids, struct timespec *last_ struct user_or_group_id *user_or_group_id = callocz(1, sizeof(struct user_or_group_id)); +#if (PROCESSES_HAVE_UID == 1) if(ids->type == USER_ID) user_or_group_id->id.uid = (uid_t) str2ull(id_string, NULL); - else +#endif +#if (PROCESSES_HAVE_GID == 1) + if(ids->type == GROUP_ID) user_or_group_id->id.gid = (uid_t) str2ull(id_string, NULL); +#endif user_or_group_id->name = strdupz(name); user_or_group_id->updated = 1; @@ -172,6 +182,7 @@ int read_user_or_group_ids(struct user_or_group_ids *ids, struct timespec *last_ return 0; } +#if (PROCESSES_HAVE_UID == 1) struct user_or_group_id *user_id_find(struct user_or_group_id *user_id_to_find) { if(*netdata_configured_host_prefix) { static struct timespec last_passwd_modification_time; @@ -183,7 +194,9 @@ struct user_or_group_id *user_id_find(struct user_or_group_id *user_id_to_find) return NULL; } +#endif +#if (PROCESSES_HAVE_GID == 1) struct user_or_group_id *group_id_find(struct user_or_group_id *group_id_to_find) { if(*netdata_configured_host_prefix) { static struct timespec last_group_modification_time; @@ -195,12 +208,17 @@ struct user_or_group_id *group_id_find(struct user_or_group_id *group_id_to_find return NULL; } +#endif +#endif -void users_and_groups_init(void) { +void apps_users_and_groups_init(void) { +#if (PROCESSES_HAVE_UID == 1) snprintfz(all_user_ids.filename, FILENAME_MAX, "%s/etc/passwd", netdata_configured_host_prefix); debug_log("passwd file: '%s'", all_user_ids.filename); +#endif +#if (PROCESSES_HAVE_GID == 1) snprintfz(all_group_ids.filename, FILENAME_MAX, "%s/etc/group", netdata_configured_host_prefix); debug_log("group file: '%s'", all_group_ids.filename); +#endif } - diff --git a/src/collectors/apps.plugin/busy_threads.c b/src/collectors/apps.plugin/busy_threads.c new file mode 100644 index 00000000000000..490c66148685cf --- /dev/null +++ b/src/collectors/apps.plugin/busy_threads.c @@ -0,0 +1,76 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +/* + * A very simple pthreads program to spawn N busy threads. + * It is just used for validating apps.plugin CPU utilization + * calculations per operating system. + * + * Compile with: + * + * gcc -O2 -ggdb -o busy_threads busy_threads.c -pthread + * + * Run as: + * + * busy_threads 2 + * + * The above will create 2 busy threads, each using 1 core in user time. + * + */ + +#include +#include +#include +#include +#include + +volatile int keep_running = 1; + +void handle_signal(int signal) { + keep_running = 0; +} + +void *busy_loop(void *arg) { + while (keep_running) { + // Busy loop to keep CPU at 100% + } + return NULL; +} + +int main(int argc, char *argv[]) { + if (argc != 2) { + fprintf(stderr, "Usage: %s \n", argv[0]); + exit(EXIT_FAILURE); + } + + int num_threads = atoi(argv[1]); + if (num_threads <= 0) { + fprintf(stderr, "Number of threads must be a positive integer.\n"); + exit(EXIT_FAILURE); + } + + // Register the signal handler to gracefully exit on Ctrl-C + signal(SIGINT, handle_signal); + + pthread_t *threads = malloc(sizeof(pthread_t) * num_threads); + if (threads == NULL) { + perror("malloc"); + exit(EXIT_FAILURE); + } + + // Create threads + for (int i = 0; i < num_threads; i++) { + if (pthread_create(&threads[i], NULL, busy_loop, NULL) != 0) { + perror("pthread_create"); + free(threads); + exit(EXIT_FAILURE); + } + } + + // Wait for threads to finish (they never will unless interrupted) + for (int i = 0; i < num_threads; i++) { + pthread_join(threads[i], NULL); + } + + free(threads); + return 0; +} diff --git a/src/collectors/windows.plugin/GetSystemUptime.c b/src/collectors/windows.plugin/GetSystemUptime.c index 9ed939ca081254..59bf9d8556d01f 100644 --- a/src/collectors/windows.plugin/GetSystemUptime.c +++ b/src/collectors/windows.plugin/GetSystemUptime.c @@ -1,34 +1,34 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -#include "windows_plugin.h" -#include "windows-internals.h" - -int do_GetSystemUptime(int update_every, usec_t dt __maybe_unused) { - ULONGLONG uptime = GetTickCount64(); // in milliseconds - - static RRDSET *st = NULL; - static RRDDIM *rd_uptime = NULL; - if (!st) { - st = rrdset_create_localhost( - "system" - , "uptime" - , NULL - , "uptime" - , "system.uptime" - , "System Uptime" - , "seconds" - , PLUGIN_WINDOWS_NAME - , "GetSystemUptime" - , NETDATA_CHART_PRIO_SYSTEM_UPTIME - , update_every - , RRDSET_TYPE_LINE - ); - - rd_uptime = rrddim_add(st, "uptime", NULL, 1, 1000, RRD_ALGORITHM_ABSOLUTE); - } - - rrddim_set_by_pointer(st, rd_uptime, (collected_number)uptime); - rrdset_done(st); - - return 0; -} +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "windows_plugin.h" +#include "windows-internals.h" + +int do_GetSystemUptime(int update_every, usec_t dt __maybe_unused) { + ULONGLONG uptime = GetTickCount64(); // in milliseconds + + static RRDSET *st = NULL; + static RRDDIM *rd_uptime = NULL; + if (!st) { + st = rrdset_create_localhost( + "system" + , "uptime" + , NULL + , "uptime" + , "system.uptime" + , "System Uptime" + , "seconds" + , PLUGIN_WINDOWS_NAME + , "GetSystemUptime" + , NETDATA_CHART_PRIO_SYSTEM_UPTIME + , update_every + , RRDSET_TYPE_LINE + ); + + rd_uptime = rrddim_add(st, "uptime", NULL, 1, 1000, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st, rd_uptime, (collected_number)uptime); + rrdset_done(st); + + return 0; +} diff --git a/src/collectors/windows.plugin/perflib-memory.c b/src/collectors/windows.plugin/perflib-memory.c index d9c35ce76517c5..e26729cdab7d4b 100644 --- a/src/collectors/windows.plugin/perflib-memory.c +++ b/src/collectors/windows.plugin/perflib-memory.c @@ -1,219 +1,219 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -#include "windows_plugin.h" -#include "windows-internals.h" - -#define _COMMON_PLUGIN_NAME "windows.plugin" -#define _COMMON_PLUGIN_MODULE_NAME "PerflibMemory" -#include "../common-contexts/common-contexts.h" - -struct swap { - RRDSET *operations; - RRDDIM *rd_op_read; - RRDDIM *rd_op_write; - - RRDSET *pages; - RRDDIM *rd_page_read; - RRDDIM *rd_page_write; - - COUNTER_DATA pageReadsTotal; - COUNTER_DATA pageWritesTotal; - COUNTER_DATA pageInputTotal; - COUNTER_DATA pageOutputTotal; -}; - -struct system_pool { - RRDSET *pool; - RRDDIM *rd_paged; - RRDDIM *rd_nonpaged; - - COUNTER_DATA pagedData; - COUNTER_DATA nonPagedData; -}; - -struct swap localSwap = { 0 }; -struct system_pool localPool = { 0 }; - -void initialize_swap_keys(struct swap *p) { - // SWAP Operations - p->pageReadsTotal.key = "Page Reads/sec"; - p->pageWritesTotal.key = "Page Writes/s"; - - // Swap Pages - p->pageInputTotal.key = "Pages Input/sec"; - p->pageOutputTotal.key = "Pages Output/s"; -} - -void initialize_pool_keys(struct system_pool *p) { - p->pagedData.key = "Pool Paged Bytes"; - p->nonPagedData.key = "Pool Nonpaged Bytes"; -} - -static void initialize(void) { - initialize_swap_keys(&localSwap); - initialize_pool_keys(&localPool); -} - -static void do_memory_swap(PERF_DATA_BLOCK *pDataBlock, PERF_OBJECT_TYPE *pObjectType, int update_every) -{ - perflibGetObjectCounter(pDataBlock, pObjectType, &localSwap.pageReadsTotal); - perflibGetObjectCounter(pDataBlock, pObjectType, &localSwap.pageWritesTotal); - perflibGetObjectCounter(pDataBlock, pObjectType, &localSwap.pageInputTotal); - perflibGetObjectCounter(pDataBlock, pObjectType, &localSwap.pageOutputTotal); - - if (!localSwap.operations) { - localSwap.operations = rrdset_create_localhost( - "mem" - , "swap_operations", NULL - , "swap" - , "mem.swap_iops" - - , "Swap Operations" - , "operations/s" - , PLUGIN_WINDOWS_NAME - , "PerflibMemory" - , NETDATA_CHART_PRIO_MEM_SWAPIO - , update_every - , RRDSET_TYPE_STACKED - ); - - localSwap.rd_op_read = rrddim_add(localSwap.operations, "read", NULL, - 1, 1, RRD_ALGORITHM_INCREMENTAL); - localSwap.rd_op_write = rrddim_add(localSwap.operations, "write", NULL, - 1, -1, RRD_ALGORITHM_INCREMENTAL); - } - - rrddim_set_by_pointer(localSwap.operations, - localSwap.rd_op_read, - (collected_number)localSwap.pageReadsTotal.current.Data); - - rrddim_set_by_pointer(localSwap.operations, - localSwap.rd_op_write, - (collected_number)localSwap.pageWritesTotal.current.Data); - rrdset_done(localSwap.operations); - - if (!localSwap.pages) { - localSwap.pages = rrdset_create_localhost( - "mem" - , "swap_pages", NULL - , "swap" - , "mem.swap_pages_io" - - , "Swap Pages" - , "pages/s" - , PLUGIN_WINDOWS_NAME - , "PerflibMemory" - , NETDATA_CHART_PRIO_MEM_SWAP_PAGES - , update_every - , RRDSET_TYPE_STACKED - ); - - localSwap.rd_page_read = rrddim_add(localSwap.pages, "read", NULL, - 1, 1, RRD_ALGORITHM_INCREMENTAL); - localSwap.rd_page_write = rrddim_add(localSwap.pages, "write", NULL, - 1, -1, RRD_ALGORITHM_INCREMENTAL); - } - - rrddim_set_by_pointer(localSwap.pages, - localSwap.rd_page_read, - (collected_number)localSwap.pageInputTotal.current.Data); - - rrddim_set_by_pointer(localSwap.pages, - localSwap.rd_page_write, - (collected_number)localSwap.pageOutputTotal.current.Data); - rrdset_done(localSwap.pages); -} - -static void do_memory_system_pool(PERF_DATA_BLOCK *pDataBlock, PERF_OBJECT_TYPE *pObjectType, int update_every) -{ - perflibGetObjectCounter(pDataBlock, pObjectType, &localPool.nonPagedData); - perflibGetObjectCounter(pDataBlock, pObjectType, &localPool.pagedData); - - if (!localPool.pool) { - localPool.pool = rrdset_create_localhost( - "mem" - , "system_pool", NULL - , "mem" - , "mem.system_pool_size" - - , "System Memory Pool" - , "bytes" - , PLUGIN_WINDOWS_NAME - , "PerflibMemory" - , NETDATA_CHART_PRIO_MEM_SYSTEM_POOL - , update_every - , RRDSET_TYPE_STACKED - ); - - localPool.rd_paged = rrddim_add(localPool.pool, "paged", NULL, - 1, 1, RRD_ALGORITHM_ABSOLUTE); - localPool.rd_nonpaged = rrddim_add(localPool.pool, "pool-paged", NULL, - 1, 1, RRD_ALGORITHM_ABSOLUTE); - } - - rrddim_set_by_pointer(localPool.pool, - localPool.rd_paged, - (collected_number)localPool.pagedData.current.Data); - - rrddim_set_by_pointer(localPool.pool, - localPool.rd_nonpaged, - (collected_number)localPool.nonPagedData.current.Data); - rrdset_done(localPool.pool); -} - -static bool do_memory(PERF_DATA_BLOCK *pDataBlock, int update_every) { - PERF_OBJECT_TYPE *pObjectType = perflibFindObjectTypeByName(pDataBlock, "Memory"); - if (!pObjectType) - return false; - - static COUNTER_DATA pagesPerSec = { .key = "Pages/sec" }; - static COUNTER_DATA pageFaultsPerSec = { .key = "Page Faults/sec" }; - - if(perflibGetObjectCounter(pDataBlock, pObjectType, &pageFaultsPerSec) && - perflibGetObjectCounter(pDataBlock, pObjectType, &pagesPerSec)) { - ULONGLONG total = pageFaultsPerSec.current.Data; - ULONGLONG major = pagesPerSec.current.Data; - ULONGLONG minor = (total > major) ? total - major : 0; - common_mem_pgfaults(minor, major, update_every); - } - - static COUNTER_DATA availableBytes = { .key = "Available Bytes" }; - static COUNTER_DATA availableKBytes = { .key = "Available KBytes" }; - static COUNTER_DATA availableMBytes = { .key = "Available MBytes" }; - ULONGLONG available_bytes = 0; - - if(perflibGetObjectCounter(pDataBlock, pObjectType, &availableBytes)) - available_bytes = availableBytes.current.Data; - else if(perflibGetObjectCounter(pDataBlock, pObjectType, &availableKBytes)) - available_bytes = availableKBytes.current.Data * 1024; - else if(perflibGetObjectCounter(pDataBlock, pObjectType, &availableMBytes)) - available_bytes = availableMBytes.current.Data * 1024 * 1024; - - common_mem_available(available_bytes, update_every); - - do_memory_swap(pDataBlock, pObjectType, update_every); - - do_memory_system_pool(pDataBlock, pObjectType, update_every); - - return true; -} - -int do_PerflibMemory(int update_every, usec_t dt __maybe_unused) { - static bool initialized = false; - - if(unlikely(!initialized)) { - initialize(); - initialized = true; - } - - DWORD id = RegistryFindIDByName("Memory"); - if(id == PERFLIB_REGISTRY_NAME_NOT_FOUND) - return -1; - - PERF_DATA_BLOCK *pDataBlock = perflibGetPerformanceData(id); - if(!pDataBlock) return -1; - - do_memory(pDataBlock, update_every); - - return 0; -} +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "windows_plugin.h" +#include "windows-internals.h" + +#define _COMMON_PLUGIN_NAME "windows.plugin" +#define _COMMON_PLUGIN_MODULE_NAME "PerflibMemory" +#include "../common-contexts/common-contexts.h" + +struct swap { + RRDSET *operations; + RRDDIM *rd_op_read; + RRDDIM *rd_op_write; + + RRDSET *pages; + RRDDIM *rd_page_read; + RRDDIM *rd_page_write; + + COUNTER_DATA pageReadsTotal; + COUNTER_DATA pageWritesTotal; + COUNTER_DATA pageInputTotal; + COUNTER_DATA pageOutputTotal; +}; + +struct system_pool { + RRDSET *pool; + RRDDIM *rd_paged; + RRDDIM *rd_nonpaged; + + COUNTER_DATA pagedData; + COUNTER_DATA nonPagedData; +}; + +struct swap localSwap = { 0 }; +struct system_pool localPool = { 0 }; + +void initialize_swap_keys(struct swap *p) { + // SWAP Operations + p->pageReadsTotal.key = "Page Reads/sec"; + p->pageWritesTotal.key = "Page Writes/s"; + + // Swap Pages + p->pageInputTotal.key = "Pages Input/sec"; + p->pageOutputTotal.key = "Pages Output/s"; +} + +void initialize_pool_keys(struct system_pool *p) { + p->pagedData.key = "Pool Paged Bytes"; + p->nonPagedData.key = "Pool Nonpaged Bytes"; +} + +static void initialize(void) { + initialize_swap_keys(&localSwap); + initialize_pool_keys(&localPool); +} + +static void do_memory_swap(PERF_DATA_BLOCK *pDataBlock, PERF_OBJECT_TYPE *pObjectType, int update_every) +{ + perflibGetObjectCounter(pDataBlock, pObjectType, &localSwap.pageReadsTotal); + perflibGetObjectCounter(pDataBlock, pObjectType, &localSwap.pageWritesTotal); + perflibGetObjectCounter(pDataBlock, pObjectType, &localSwap.pageInputTotal); + perflibGetObjectCounter(pDataBlock, pObjectType, &localSwap.pageOutputTotal); + + if (!localSwap.operations) { + localSwap.operations = rrdset_create_localhost( + "mem" + , "swap_operations", NULL + , "swap" + , "mem.swap_iops" + + , "Swap Operations" + , "operations/s" + , PLUGIN_WINDOWS_NAME + , "PerflibMemory" + , NETDATA_CHART_PRIO_MEM_SWAPIO + , update_every + , RRDSET_TYPE_STACKED + ); + + localSwap.rd_op_read = rrddim_add(localSwap.operations, "read", NULL, + 1, 1, RRD_ALGORITHM_INCREMENTAL); + localSwap.rd_op_write = rrddim_add(localSwap.operations, "write", NULL, + 1, -1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(localSwap.operations, + localSwap.rd_op_read, + (collected_number)localSwap.pageReadsTotal.current.Data); + + rrddim_set_by_pointer(localSwap.operations, + localSwap.rd_op_write, + (collected_number)localSwap.pageWritesTotal.current.Data); + rrdset_done(localSwap.operations); + + if (!localSwap.pages) { + localSwap.pages = rrdset_create_localhost( + "mem" + , "swap_pages", NULL + , "swap" + , "mem.swap_pages_io" + + , "Swap Pages" + , "pages/s" + , PLUGIN_WINDOWS_NAME + , "PerflibMemory" + , NETDATA_CHART_PRIO_MEM_SWAP_PAGES + , update_every + , RRDSET_TYPE_STACKED + ); + + localSwap.rd_page_read = rrddim_add(localSwap.pages, "read", NULL, + 1, 1, RRD_ALGORITHM_INCREMENTAL); + localSwap.rd_page_write = rrddim_add(localSwap.pages, "write", NULL, + 1, -1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(localSwap.pages, + localSwap.rd_page_read, + (collected_number)localSwap.pageInputTotal.current.Data); + + rrddim_set_by_pointer(localSwap.pages, + localSwap.rd_page_write, + (collected_number)localSwap.pageOutputTotal.current.Data); + rrdset_done(localSwap.pages); +} + +static void do_memory_system_pool(PERF_DATA_BLOCK *pDataBlock, PERF_OBJECT_TYPE *pObjectType, int update_every) +{ + perflibGetObjectCounter(pDataBlock, pObjectType, &localPool.nonPagedData); + perflibGetObjectCounter(pDataBlock, pObjectType, &localPool.pagedData); + + if (!localPool.pool) { + localPool.pool = rrdset_create_localhost( + "mem" + , "system_pool", NULL + , "mem" + , "mem.system_pool_size" + + , "System Memory Pool" + , "bytes" + , PLUGIN_WINDOWS_NAME + , "PerflibMemory" + , NETDATA_CHART_PRIO_MEM_SYSTEM_POOL + , update_every + , RRDSET_TYPE_STACKED + ); + + localPool.rd_paged = rrddim_add(localPool.pool, "paged", NULL, + 1, 1, RRD_ALGORITHM_ABSOLUTE); + localPool.rd_nonpaged = rrddim_add(localPool.pool, "pool-paged", NULL, + 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(localPool.pool, + localPool.rd_paged, + (collected_number)localPool.pagedData.current.Data); + + rrddim_set_by_pointer(localPool.pool, + localPool.rd_nonpaged, + (collected_number)localPool.nonPagedData.current.Data); + rrdset_done(localPool.pool); +} + +static bool do_memory(PERF_DATA_BLOCK *pDataBlock, int update_every) { + PERF_OBJECT_TYPE *pObjectType = perflibFindObjectTypeByName(pDataBlock, "Memory"); + if (!pObjectType) + return false; + + static COUNTER_DATA pagesPerSec = { .key = "Pages/sec" }; + static COUNTER_DATA pageFaultsPerSec = { .key = "Page Faults/sec" }; + + if(perflibGetObjectCounter(pDataBlock, pObjectType, &pageFaultsPerSec) && + perflibGetObjectCounter(pDataBlock, pObjectType, &pagesPerSec)) { + ULONGLONG total = pageFaultsPerSec.current.Data; + ULONGLONG major = pagesPerSec.current.Data; + ULONGLONG minor = (total > major) ? total - major : 0; + common_mem_pgfaults(minor, major, update_every); + } + + static COUNTER_DATA availableBytes = { .key = "Available Bytes" }; + static COUNTER_DATA availableKBytes = { .key = "Available KBytes" }; + static COUNTER_DATA availableMBytes = { .key = "Available MBytes" }; + ULONGLONG available_bytes = 0; + + if(perflibGetObjectCounter(pDataBlock, pObjectType, &availableBytes)) + available_bytes = availableBytes.current.Data; + else if(perflibGetObjectCounter(pDataBlock, pObjectType, &availableKBytes)) + available_bytes = availableKBytes.current.Data * 1024; + else if(perflibGetObjectCounter(pDataBlock, pObjectType, &availableMBytes)) + available_bytes = availableMBytes.current.Data * 1024 * 1024; + + common_mem_available(available_bytes, update_every); + + do_memory_swap(pDataBlock, pObjectType, update_every); + + do_memory_system_pool(pDataBlock, pObjectType, update_every); + + return true; +} + +int do_PerflibMemory(int update_every, usec_t dt __maybe_unused) { + static bool initialized = false; + + if(unlikely(!initialized)) { + initialize(); + initialized = true; + } + + DWORD id = RegistryFindIDByName("Memory"); + if(id == PERFLIB_REGISTRY_NAME_NOT_FOUND) + return -1; + + PERF_DATA_BLOCK *pDataBlock = perflibGetPerformanceData(id); + if(!pDataBlock) return -1; + + do_memory(pDataBlock, update_every); + + return 0; +} diff --git a/src/collectors/windows.plugin/perflib-network.c b/src/collectors/windows.plugin/perflib-network.c index e6cb086ca7b706..308984c6bbfd1e 100644 --- a/src/collectors/windows.plugin/perflib-network.c +++ b/src/collectors/windows.plugin/perflib-network.c @@ -1,714 +1,713 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -#include "windows_plugin.h" -#include "windows-internals.h" - - -#define ADD_PACKET_IF_KEY(p, packets, pDataBlock, pObjectType, counter) \ - do { \ - if ((p)->packets.counter.key) { \ - packets += perflibGetObjectCounter((pDataBlock), (pObjectType), &(p)->packets.counter) ? 1 : 0; \ - } \ - } while (0) - -#define SET_DIM_IF_KEY_AND_UPDATED(p, field) \ - do { \ - if ((p)->packets.field.key && (p)->packets.field.updated) { \ - rrddim_set_by_pointer( \ - (p)->packets.st, (p)->packets.rd_##field, (collected_number)(p)->packets.field.current.Data); \ - } \ - } while (0) - -#define ADD_RRD_DIM_IF_KEY(packet_field, id, name, multiplier, algorithm) \ - do { \ - if (p->packets.packet_field.key) \ - p->packets.rd_##packet_field = rrddim_add(st, id, name, multiplier, 1, algorithm); \ - } while (0) - -// -------------------------------------------------------------------------------------------------------------------- -// network protocols - -struct network_protocol { - const char *protocol; - - struct { - COUNTER_DATA received; - COUNTER_DATA sent; - COUNTER_DATA delivered; - COUNTER_DATA forwarded; - - COUNTER_DATA InDiscards; - COUNTER_DATA OutDiscards; - COUNTER_DATA InHdrErrors; - COUNTER_DATA InAddrErrors; - COUNTER_DATA InUnknownProtos; - COUNTER_DATA InTooBigErrors; - COUNTER_DATA InTruncatedPkts; - COUNTER_DATA InNoRoutes; - COUNTER_DATA OutNoRoutes; - - COUNTER_DATA InEchoReps; - COUNTER_DATA OutEchoReps; - COUNTER_DATA InDestUnreachs; - COUNTER_DATA OutDestUnreachs; - COUNTER_DATA InRedirects; - COUNTER_DATA OutRedirects; - COUNTER_DATA InEchos; - COUNTER_DATA OutEchos; - COUNTER_DATA InRouterAdvert; - COUNTER_DATA OutRouterAdvert; - COUNTER_DATA InRouterSelect; - COUNTER_DATA OutRouterSelect; - COUNTER_DATA InTimeExcds; - COUNTER_DATA OutTimeExcds; - COUNTER_DATA InParmProbs; - COUNTER_DATA OutParmProbs; - COUNTER_DATA InTimestamps; - COUNTER_DATA OutTimestamps; - COUNTER_DATA InTimestampReps; - COUNTER_DATA OutTimestampReps; - - RRDSET *st; - RRDDIM *rd_received; - RRDDIM *rd_sent; - RRDDIM *rd_forwarded; - RRDDIM *rd_delivered; - - RRDDIM *rd_InDiscards; - RRDDIM *rd_OutDiscards; - RRDDIM *rd_InHdrErrors; - RRDDIM *rd_InAddrErrors; - RRDDIM *rd_InUnknownProtos; - RRDDIM *rd_InTooBigErrors; - RRDDIM *rd_InTruncatedPkts; - RRDDIM *rd_InNoRoutes; - RRDDIM *rd_OutNoRoutes; - - RRDDIM *rd_InEchoReps; - RRDDIM *rd_OutEchoReps; - RRDDIM *rd_InDestUnreachs; - RRDDIM *rd_OutDestUnreachs; - RRDDIM *rd_InRedirects; - RRDDIM *rd_OutRedirects; - RRDDIM *rd_InEchos; - RRDDIM *rd_OutEchos; - RRDDIM *rd_InRouterAdvert; - RRDDIM *rd_OutRouterAdvert; - RRDDIM *rd_InRouterSelect; - RRDDIM *rd_OutRouterSelect; - RRDDIM *rd_InTimeExcds; - RRDDIM *rd_OutTimeExcds; - RRDDIM *rd_InParmProbs; - RRDDIM *rd_OutParmProbs; - RRDDIM *rd_InTimestamps; - RRDDIM *rd_OutTimestamps; - RRDDIM *rd_InTimestampReps; - RRDDIM *rd_OutTimestampReps; - - const char *type; - const char *id; - const char *family; - const char *context; - const char *title; - long priority; - } packets; - -} networks[] = { - { - .protocol = "IPv4", - .packets = { - .received = { .key = "Datagrams Received/sec" }, - .sent = { .key = "Datagrams Sent/sec" }, - .delivered = { .key = "Datagrams Received Delivered/sec" }, - .forwarded = { .key = "Datagrams Forwarded/sec" }, - .type = "ipv4", - .id = "packets", - .family = "packets", - .context = "ipv4.packets", - .title = "IPv4 Packets", - .priority = NETDATA_CHART_PRIO_IPV4_PACKETS, - }, - }, - { - .protocol = "IPv6", - .packets = { - .received = { .key = "Datagrams Received/sec" }, - .sent = { .key = "Datagrams Sent/sec" }, - .delivered = { .key = "Datagrams Received Delivered/sec" }, - .forwarded = { .key = "Datagrams Forwarded/sec" }, - .type = "ipv6", - .id = "packets", - .family = "packets", - .context = "ip6.packets", - .title = "IPv6 Packets", - .priority = NETDATA_CHART_PRIO_IPV6_PACKETS, - }, - }, - { - .protocol = "TCPv4", - .packets = { - .received = { .key = "Segments Received/sec" }, - .sent = { .key = "Segments Sent/sec" }, - .type = "ipv4", - .id = "tcppackets", - .family = "tcp", - .context = "ipv4.tcppackets", - .title = "IPv4 TCP Packets", - .priority = NETDATA_CHART_PRIO_IPV4_TCP_PACKETS, - }, - }, - { - .protocol = "TCPv6", - .packets = { - .received = { .key = "Segments Received/sec" }, - .sent = { .key = "Segments Sent/sec" }, - .type = "ipv6", - .id = "tcppackets", - .family = "tcp6", - .context = "ipv6.tcppackets", - .title = "IPv6 TCP Packets", - .priority = NETDATA_CHART_PRIO_IPV6_TCP_PACKETS, - }, - }, - { - .protocol = "UDPv4", - .packets = { - .received = { .key = "Datagrams Received/sec" }, - .sent = { .key = "Datagrams Sent/sec" }, - .type = "ipv4", - .id = "udppackets", - .family = "udp", - .context = "ipv4.udppackets", - .title = "IPv4 UDP Packets", - .priority = NETDATA_CHART_PRIO_IPV4_UDP_PACKETS, - }, - }, - { - .protocol = "UDPv6", - .packets = { - .received = { .key = "Datagrams Received/sec" }, - .sent = { .key = "Datagrams Sent/sec" }, - .type = "ipv6", - .id = "udppackets", - .family = "udp6", - .context = "ipv6.udppackets", - .title = "IPv6 UDP Packets", - .priority = NETDATA_CHART_PRIO_IPV6_UDP_PACKETS, - }, - }, - { - .protocol = "ICMP", - .packets = { - .received = { .key = "Messages Received/sec" }, - .sent = { .key = "Messages Sent/sec" }, - .type = "ipv4", - .id = "icmp", - .family = "icmp", - .context = "ipv4.icmp", - .title = "IPv4 ICMP Packets", - .priority = NETDATA_CHART_PRIO_IPV4_ICMP_PACKETS, - }, - }, - { - .protocol = "ICMPv6", - .packets = { - .received = { .key = "Messages Received/sec" }, - .sent = { .key = "Messages Sent/sec" }, - .type = "ipv6", - .id = "icmp", - .family = "icmp6", - .context = "ipv6.icmp", - .title = "IPv6 ICMP Packets", - .priority = NETDATA_CHART_PRIO_IPV6_ICMP_PACKETS, - }, - }, - - { - .protocol = "IPv4", - .packets = { - .InDiscards = { .key = "Datagrams Received Discarded" }, - .OutDiscards = { .key = "Datagrams Outbound Discarded" }, - .OutNoRoutes = { .key = "Datagrams Outbound No Route" }, - .InAddrErrors = { .key = "Datagrams Received Address Errors" }, - .InHdrErrors = { .key = "Datagrams Received Header Errors" }, - .InUnknownProtos = { .key = "Datagrams Received Unknown Protocol" }, - .type = "ipv4", - .id = "errors", - .family = "errors", - .context = "ipv4.errors", - .title = "IPv4 errors", - .priority = NETDATA_CHART_PRIO_IPV4_ERRORS, - }, - }, - { - .protocol = "IPv6", - .packets = { - .InDiscards = { .key = "Datagrams Received Discarded" }, - .OutDiscards = { .key = "Datagrams Outbound Discarded" }, - .OutNoRoutes = { .key = "Datagrams Outbound No Route" }, - .InAddrErrors = { .key = "Datagrams Received Address Errors" }, - .InHdrErrors = { .key = "Datagrams Received Header Errors" }, - .InUnknownProtos = { .key = "Datagrams Received Unknown Protocol" }, - .type = "ipv6", - .id = "errors", - .family = "errors", - .context = "ipv6.errors", - .title = "IPv6 errors", - .priority = NETDATA_CHART_PRIO_IPV6_ERRORS, - }, - }, - { - .protocol = "ICMP", - .packets = - { - .InEchoReps = {.key = "Received Echo Reply/sec"}, - .OutEchoReps = {.key = "Received Echo Reply/sec"}, - .InDestUnreachs = {.key = "Received Dest. Unreachable"}, - .OutDestUnreachs = {.key = "Sent Destination Unreachable"}, - .InRedirects = {.key = "Received Redirect/sec"}, - .OutRedirects = {.key = "Sent Redirect/sec"}, - .InEchos = {.key = "Received Echo/sec"}, - .OutEchos = {.key = "Sent Echo/sec"}, - .InRouterAdvert = {.key = NULL}, - .OutRouterAdvert = {.key = NULL}, - .InRouterSelect = {.key = NULL}, - .OutRouterSelect = {.key = NULL}, - .InTimeExcds = {.key = "Received Time Exceeded"}, - .OutTimeExcds = {.key = "Sent Time Exceeded"}, - .InParmProbs = {.key = "Received Parameter Problem"}, - .OutParmProbs = {.key = "Sent Parameter Problem"}, - .InTimestamps = {.key = "Received Timestamp/sec"}, - .OutTimestamps = {.key = "Sent Timestamp/sec"}, - .InTimestampReps = {.key = "Received Timestamp Reply/sec"}, - .OutTimestampReps = {.key = "Sent Timestamp Reply/sec"}, - - .type = "ipv4", - .id = "icmpmsg", - .family = "icmp", - .context = "ipv4.icmpmsg", - .title = "IPv4 ICMP Packets", - .priority = NETDATA_CHART_PRIO_IPV4_ICMP_MESSAGES, - }, - }, - { - .protocol = "ICMPv6", - .packets = - { - .InEchoReps = {.key = "Received Echo Reply/sec"}, - .OutEchoReps = {.key = "Received Echo Reply/sec"}, - .InDestUnreachs = {.key = "Received Dest. Unreachable"}, - .OutDestUnreachs = {.key = "Sent Destination Unreachable"}, - .InRedirects = {.key = "Received Redirect/sec"}, - .OutRedirects = {.key = "Sent Redirect/sec"}, - .InEchos = {.key = "Received Echo/sec"}, - .OutEchos = {.key = "Sent Echo/sec"}, - .InRouterAdvert = {.key = NULL}, - .OutRouterAdvert = {.key = NULL}, - .InRouterSelect = {.key = NULL}, - .OutRouterSelect = {.key = NULL}, - .InTimeExcds = {.key = "Received Time Exceeded"}, - .OutTimeExcds = {.key = "Sent Time Exceeded"}, - .InParmProbs = {.key = "Received Parameter Problem"}, - .OutParmProbs = {.key = "Sent Parameter Problem"}, - .InTimestamps = {.key = "Received Timestamp/sec"}, - .OutTimestamps = {.key = "Sent Timestamp/sec"}, - .InTimestampReps = {.key = "Received Timestamp Reply/sec"}, - .OutTimestampReps = {.key = "Sent Timestamp Reply/sec"}, - - .type = "ipv6", - .id = "icmpmsg", - .family = "icmp", - .context = "ipv6.icmpmsg", - .title = "IPv6 ICMP Packets", - .priority = NETDATA_CHART_PRIO_IPV6_ICMP_MESSAGES, - }, - }, - - // terminator - { - .protocol = NULL, - } -}; - -struct network_protocol tcp46 = { - .packets = { - .type = "ip", - .id = "tcppackets", - .family = "tcp", - .context = "ip.tcppackets", - .title = "TCP Packets", - .priority = NETDATA_CHART_PRIO_IP_TCP_PACKETS, - } -}; - -static void protocol_packets_chart_update(struct network_protocol *p, int update_every) { - if(!p->packets.st) { - p->packets.st = rrdset_create_localhost( - p->packets.type - , p->packets.id - , NULL - , p->packets.family - , NULL - , p->packets.title - , "packets/s" - , PLUGIN_WINDOWS_NAME - , "PerflibNetwork" - , p->packets.priority - , update_every - , RRDSET_TYPE_AREA - ); - - RRDSET *st = p->packets.st; - - ADD_RRD_DIM_IF_KEY(received, "received", NULL, 1, RRD_ALGORITHM_INCREMENTAL); - ADD_RRD_DIM_IF_KEY(sent, "sent", NULL, -1, RRD_ALGORITHM_INCREMENTAL); - ADD_RRD_DIM_IF_KEY(forwarded, "forwarded", NULL, -1, RRD_ALGORITHM_INCREMENTAL); - ADD_RRD_DIM_IF_KEY(delivered, "delivered", NULL, 1, RRD_ALGORITHM_INCREMENTAL); - ADD_RRD_DIM_IF_KEY(InDiscards, "InDiscards", NULL, 1, RRD_ALGORITHM_INCREMENTAL); - ADD_RRD_DIM_IF_KEY(OutDiscards, "OutDiscards", NULL, -1, RRD_ALGORITHM_INCREMENTAL); - ADD_RRD_DIM_IF_KEY(InHdrErrors, "InHdrErrors", NULL, 1, RRD_ALGORITHM_INCREMENTAL); - ADD_RRD_DIM_IF_KEY(InAddrErrors, "InAddrErrors", NULL, 1, RRD_ALGORITHM_INCREMENTAL); - ADD_RRD_DIM_IF_KEY(InUnknownProtos, "InUnknownProtos", NULL, 1, RRD_ALGORITHM_INCREMENTAL); - ADD_RRD_DIM_IF_KEY(InTooBigErrors, "InTooBigErrors", NULL, 1, RRD_ALGORITHM_INCREMENTAL); - ADD_RRD_DIM_IF_KEY(InTruncatedPkts, "InTruncatedPkts", NULL, 1, RRD_ALGORITHM_INCREMENTAL); - ADD_RRD_DIM_IF_KEY(InNoRoutes, "InNoRoutes", NULL, 1, RRD_ALGORITHM_INCREMENTAL); - ADD_RRD_DIM_IF_KEY(OutNoRoutes, "OutNoRoutes", NULL, -1, RRD_ALGORITHM_INCREMENTAL); - ADD_RRD_DIM_IF_KEY(InEchoReps, "InType0", "InEchoReps", 1, RRD_ALGORITHM_INCREMENTAL); - ADD_RRD_DIM_IF_KEY(OutEchoReps, "OutType0", "OutEchoReps", -1, RRD_ALGORITHM_INCREMENTAL); - ADD_RRD_DIM_IF_KEY(InDestUnreachs, "InType3", "InDestUnreachs", 1, RRD_ALGORITHM_INCREMENTAL); - ADD_RRD_DIM_IF_KEY(OutDestUnreachs, "OutType3", "OutDestUnreachs", -1, RRD_ALGORITHM_INCREMENTAL); - ADD_RRD_DIM_IF_KEY(InRedirects, "InType5", "InRedirects", 1, RRD_ALGORITHM_INCREMENTAL); - ADD_RRD_DIM_IF_KEY(OutRedirects, "OutType5", "OutRedirects", -1, RRD_ALGORITHM_INCREMENTAL); - ADD_RRD_DIM_IF_KEY(InEchos, "InType8", "InEchos", 1, RRD_ALGORITHM_INCREMENTAL); - ADD_RRD_DIM_IF_KEY(OutEchos, "OutType8", "OutEchos", -1, RRD_ALGORITHM_INCREMENTAL); - ADD_RRD_DIM_IF_KEY(InRouterAdvert, "InType9", "InRouterAdvert", 1, RRD_ALGORITHM_INCREMENTAL); - ADD_RRD_DIM_IF_KEY(OutRouterAdvert, "OutType9", "OutRouterAdvert", -1, RRD_ALGORITHM_INCREMENTAL); - ADD_RRD_DIM_IF_KEY(InRouterSelect, "InType10", "InRouterSelect", 1, RRD_ALGORITHM_INCREMENTAL); - ADD_RRD_DIM_IF_KEY(OutRouterSelect, "OutType10", "OutRouterSelect", -1, RRD_ALGORITHM_INCREMENTAL); - ADD_RRD_DIM_IF_KEY(InTimeExcds, "InType11", "InTimeExcds", 1, RRD_ALGORITHM_INCREMENTAL); - ADD_RRD_DIM_IF_KEY(OutTimeExcds, "OutType11", "OutTimeExcds", -1, RRD_ALGORITHM_INCREMENTAL); - ADD_RRD_DIM_IF_KEY(InParmProbs, "InType12", "InParmProbs", 1, RRD_ALGORITHM_INCREMENTAL); - ADD_RRD_DIM_IF_KEY(OutParmProbs, "OutType12", "OutParmProbs", -1, RRD_ALGORITHM_INCREMENTAL); - ADD_RRD_DIM_IF_KEY(InTimestamps, "InType13", "InTimestamps", 1, RRD_ALGORITHM_INCREMENTAL); - ADD_RRD_DIM_IF_KEY(OutTimestamps, "OutType13", "OutTimestamps", -1, RRD_ALGORITHM_INCREMENTAL); - ADD_RRD_DIM_IF_KEY(InTimestampReps, "InType14", "InTimestampReps", 1, RRD_ALGORITHM_INCREMENTAL); - ADD_RRD_DIM_IF_KEY(OutTimestampReps, "OutType14", "OutTimestampReps", -1, RRD_ALGORITHM_INCREMENTAL); - - } - - SET_DIM_IF_KEY_AND_UPDATED(p, received); - SET_DIM_IF_KEY_AND_UPDATED(p, sent); - - SET_DIM_IF_KEY_AND_UPDATED(p, forwarded); - SET_DIM_IF_KEY_AND_UPDATED(p, delivered); - SET_DIM_IF_KEY_AND_UPDATED(p, InDiscards); - SET_DIM_IF_KEY_AND_UPDATED(p, OutDiscards); - SET_DIM_IF_KEY_AND_UPDATED(p, InHdrErrors); - SET_DIM_IF_KEY_AND_UPDATED(p, InAddrErrors); - SET_DIM_IF_KEY_AND_UPDATED(p, InUnknownProtos); - SET_DIM_IF_KEY_AND_UPDATED(p, InTooBigErrors); - SET_DIM_IF_KEY_AND_UPDATED(p, InTruncatedPkts); - SET_DIM_IF_KEY_AND_UPDATED(p, InNoRoutes); - SET_DIM_IF_KEY_AND_UPDATED(p, OutNoRoutes); - SET_DIM_IF_KEY_AND_UPDATED(p, InEchoReps); - SET_DIM_IF_KEY_AND_UPDATED(p, OutEchoReps); - SET_DIM_IF_KEY_AND_UPDATED(p, InDestUnreachs); - SET_DIM_IF_KEY_AND_UPDATED(p, OutDestUnreachs); - SET_DIM_IF_KEY_AND_UPDATED(p, InRedirects); - SET_DIM_IF_KEY_AND_UPDATED(p, OutRedirects); - SET_DIM_IF_KEY_AND_UPDATED(p, InEchos); - SET_DIM_IF_KEY_AND_UPDATED(p, OutEchos); - SET_DIM_IF_KEY_AND_UPDATED(p, InRouterAdvert); - SET_DIM_IF_KEY_AND_UPDATED(p, OutRouterAdvert); - SET_DIM_IF_KEY_AND_UPDATED(p, InRouterSelect); - SET_DIM_IF_KEY_AND_UPDATED(p, OutRouterSelect); - SET_DIM_IF_KEY_AND_UPDATED(p, InTimeExcds); - SET_DIM_IF_KEY_AND_UPDATED(p, OutTimeExcds); - SET_DIM_IF_KEY_AND_UPDATED(p, InParmProbs); - SET_DIM_IF_KEY_AND_UPDATED(p, OutParmProbs); - SET_DIM_IF_KEY_AND_UPDATED(p, InTimestamps); - SET_DIM_IF_KEY_AND_UPDATED(p, OutTimestamps); - SET_DIM_IF_KEY_AND_UPDATED(p, InTimestampReps); - SET_DIM_IF_KEY_AND_UPDATED(p, OutTimestampReps); - - rrdset_done(p->packets.st); -} - -static bool do_network_protocol(PERF_DATA_BLOCK *pDataBlock, int update_every, struct network_protocol *p) { - if(!p || !p->protocol) return false; - - PERF_OBJECT_TYPE *pObjectType = perflibFindObjectTypeByName(pDataBlock, p->protocol); - if(!pObjectType) return false; - - size_t packets = 0; - ADD_PACKET_IF_KEY(p, packets, pDataBlock, pObjectType, received); - ADD_PACKET_IF_KEY(p, packets, pDataBlock, pObjectType, sent); - ADD_PACKET_IF_KEY(p, packets, pDataBlock, pObjectType, delivered); - ADD_PACKET_IF_KEY(p, packets, pDataBlock, pObjectType, forwarded); - - ADD_PACKET_IF_KEY(p, packets, pDataBlock, pObjectType, InDiscards); - ADD_PACKET_IF_KEY(p, packets, pDataBlock, pObjectType, OutDiscards); - ADD_PACKET_IF_KEY(p, packets, pDataBlock, pObjectType, InHdrErrors); - ADD_PACKET_IF_KEY(p, packets, pDataBlock, pObjectType, InAddrErrors); - ADD_PACKET_IF_KEY(p, packets, pDataBlock, pObjectType, InUnknownProtos); - ADD_PACKET_IF_KEY(p, packets, pDataBlock, pObjectType, InTooBigErrors); - ADD_PACKET_IF_KEY(p, packets, pDataBlock, pObjectType, InTruncatedPkts); - ADD_PACKET_IF_KEY(p, packets, pDataBlock, pObjectType, InNoRoutes); - ADD_PACKET_IF_KEY(p, packets, pDataBlock, pObjectType, OutNoRoutes); - ADD_PACKET_IF_KEY(p, packets, pDataBlock, pObjectType, InEchoReps); - ADD_PACKET_IF_KEY(p, packets, pDataBlock, pObjectType, OutEchoReps); - ADD_PACKET_IF_KEY(p, packets, pDataBlock, pObjectType, InDestUnreachs); - ADD_PACKET_IF_KEY(p, packets, pDataBlock, pObjectType, OutDestUnreachs); - - ADD_PACKET_IF_KEY(p, packets, pDataBlock, pObjectType, InRedirects); - ADD_PACKET_IF_KEY(p, packets, pDataBlock, pObjectType, OutRedirects); - ADD_PACKET_IF_KEY(p, packets, pDataBlock, pObjectType, InEchos); - ADD_PACKET_IF_KEY(p, packets, pDataBlock, pObjectType, OutEchos); - ADD_PACKET_IF_KEY(p, packets, pDataBlock, pObjectType, InRouterAdvert); - ADD_PACKET_IF_KEY(p, packets, pDataBlock, pObjectType, OutRouterAdvert); - ADD_PACKET_IF_KEY(p, packets, pDataBlock, pObjectType, InRouterSelect); - ADD_PACKET_IF_KEY(p, packets, pDataBlock, pObjectType, OutRouterSelect); - ADD_PACKET_IF_KEY(p, packets, pDataBlock, pObjectType, InTimeExcds); - ADD_PACKET_IF_KEY(p, packets, pDataBlock, pObjectType, OutTimeExcds); - ADD_PACKET_IF_KEY(p, packets, pDataBlock, pObjectType, InParmProbs); - ADD_PACKET_IF_KEY(p, packets, pDataBlock, pObjectType, OutParmProbs); - ADD_PACKET_IF_KEY(p, packets, pDataBlock, pObjectType, InTimestamps); - ADD_PACKET_IF_KEY(p, packets, pDataBlock, pObjectType, OutTimestamps); - ADD_PACKET_IF_KEY(p, packets, pDataBlock, pObjectType, InTimestampReps); - ADD_PACKET_IF_KEY(p, packets, pDataBlock, pObjectType, OutTimestampReps); - - if(packets) - protocol_packets_chart_update(p, update_every); - - return true; -} - -// -------------------------------------------------------------------------------------------------------------------- -// network interfaces - -struct network_interface { - bool collected_metadata; - - struct { - COUNTER_DATA received; - COUNTER_DATA sent; - - RRDSET *st; - RRDDIM *rd_received; - RRDDIM *rd_sent; - } packets; - - struct { - COUNTER_DATA received; - COUNTER_DATA sent; - - RRDSET *st; - RRDDIM *rd_received; - RRDDIM *rd_sent; - } traffic; -}; - -static DICTIONARY *physical_interfaces = NULL, *virtual_interfaces = NULL; - -static void network_interface_init(struct network_interface *ni) { - ni->packets.received.key = "Packets Received/sec"; - ni->packets.sent.key = "Packets Sent/sec"; - - ni->traffic.received.key = "Bytes Received/sec"; - ni->traffic.sent.key = "Bytes Sent/sec"; -} - -void dict_interface_insert_cb(const DICTIONARY_ITEM *item __maybe_unused, void *value, void *data __maybe_unused) { - struct network_interface *ni = value; - network_interface_init(ni); -} - -static void initialize(void) { - physical_interfaces = dictionary_create_advanced(DICT_OPTION_DONT_OVERWRITE_VALUE | - DICT_OPTION_FIXED_SIZE, NULL, sizeof(struct network_interface)); - - virtual_interfaces = dictionary_create_advanced(DICT_OPTION_DONT_OVERWRITE_VALUE | - DICT_OPTION_FIXED_SIZE, NULL, sizeof(struct network_interface)); - - dictionary_register_insert_callback(physical_interfaces, dict_interface_insert_cb, NULL); - dictionary_register_insert_callback(virtual_interfaces, dict_interface_insert_cb, NULL); -} - -static void add_interface_labels(RRDSET *st, const char *name, bool physical) { - rrdlabels_add(st->rrdlabels, "device", name, RRDLABEL_SRC_AUTO); - rrdlabels_add(st->rrdlabels, "interface_type", physical ? "real" : "virtual", RRDLABEL_SRC_AUTO); -} - -static bool is_physical_interface(const char *name) { - void *d = dictionary_get(physical_interfaces, name); - return d ? true : false; -} - -static bool do_network_interface(PERF_DATA_BLOCK *pDataBlock, int update_every, bool physical) { - DICTIONARY *dict = physical_interfaces; - - PERF_OBJECT_TYPE *pObjectType = perflibFindObjectTypeByName(pDataBlock, physical ? "Network Interface" : "Network Adapter"); - if(!pObjectType) return false; - - uint64_t total_received = 0, total_sent = 0; - - PERF_INSTANCE_DEFINITION *pi = NULL; - for(LONG i = 0; i < pObjectType->NumInstances ; i++) { - pi = perflibForEachInstance(pDataBlock, pObjectType, pi); - if(!pi) break; - - if(!getInstanceName(pDataBlock, pObjectType, pi, windows_shared_buffer, sizeof(windows_shared_buffer))) - strncpyz(windows_shared_buffer, "[unknown]", sizeof(windows_shared_buffer) - 1); - - if(strcasecmp(windows_shared_buffer, "_Total") == 0) - continue; - - if(!physical && is_physical_interface(windows_shared_buffer)) - // this virtual interface is already reported as physical interface - continue; - - struct network_interface *d = dictionary_set(dict, windows_shared_buffer, NULL, sizeof(*d)); - - if(!d->collected_metadata) { - // TODO - get metadata about the network interface - d->collected_metadata = true; - } - - if(perflibGetInstanceCounter(pDataBlock, pObjectType, pi, &d->traffic.received) && - perflibGetInstanceCounter(pDataBlock, pObjectType, pi, &d->traffic.sent)) { - - if(d->traffic.received.current.Data == 0 && d->traffic.sent.current.Data == 0) - // this interface has not received or sent any traffic - continue; - - if (unlikely(!d->traffic.st)) { - d->traffic.st = rrdset_create_localhost( - "net", - windows_shared_buffer, - NULL, - windows_shared_buffer, - "net.net", - "Bandwidth", - "kilobits/s", - PLUGIN_WINDOWS_NAME, - "PerflibNetwork", - NETDATA_CHART_PRIO_FIRST_NET_IFACE, - update_every, - RRDSET_TYPE_AREA); - - rrdset_flag_set(d->traffic.st, RRDSET_FLAG_DETAIL); - - add_interface_labels(d->traffic.st, windows_shared_buffer, physical); - - d->traffic.rd_received = rrddim_add(d->traffic.st, "received", NULL, 8, BITS_IN_A_KILOBIT, RRD_ALGORITHM_INCREMENTAL); - d->traffic.rd_sent = rrddim_add(d->traffic.st, "sent", NULL, -8, BITS_IN_A_KILOBIT, RRD_ALGORITHM_INCREMENTAL); - } - - total_received += d->traffic.received.current.Data; - total_sent += d->traffic.sent.current.Data; - - rrddim_set_by_pointer(d->traffic.st, d->traffic.rd_received, (collected_number)d->traffic.received.current.Data); - rrddim_set_by_pointer(d->traffic.st, d->traffic.rd_sent, (collected_number)d->traffic.sent.current.Data); - rrdset_done(d->traffic.st); - } - - if(perflibGetInstanceCounter(pDataBlock, pObjectType, pi, &d->packets.received) && - perflibGetInstanceCounter(pDataBlock, pObjectType, pi, &d->packets.sent)) { - - if (unlikely(!d->packets.st)) { - d->packets.st = rrdset_create_localhost( - "net_packets", - windows_shared_buffer, - NULL, - windows_shared_buffer, - "net.packets", - "Packets", - "packets/s", - PLUGIN_WINDOWS_NAME, - "PerflibNetwork", - NETDATA_CHART_PRIO_FIRST_NET_IFACE + 1, - update_every, - RRDSET_TYPE_LINE); - - rrdset_flag_set(d->packets.st, RRDSET_FLAG_DETAIL); - - add_interface_labels(d->traffic.st, windows_shared_buffer, physical); - - d->packets.rd_received = rrddim_add(d->packets.st, "received", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); - d->packets.rd_sent = rrddim_add(d->packets.st, "sent", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); - } - - rrddim_set_by_pointer(d->packets.st, d->packets.rd_received, (collected_number)d->packets.received.current.Data); - rrddim_set_by_pointer(d->packets.st, d->packets.rd_sent, (collected_number)d->packets.sent.current.Data); - rrdset_done(d->packets.st); - } - } - - if(physical) { - static RRDSET *st = NULL; - static RRDDIM *rd_received = NULL, *rd_sent = NULL; - - if (unlikely(!st)) { - st = rrdset_create_localhost( - "system", - "net", - NULL, - "network", - "system.net", - "Physical Network Interfaces Aggregated Bandwidth", - "kilobits/s", - PLUGIN_WINDOWS_NAME, - "PerflibNetwork", - NETDATA_CHART_PRIO_SYSTEM_NET, - update_every, - RRDSET_TYPE_AREA); - - rd_received = rrddim_add(st, "received", NULL, 8, BITS_IN_A_KILOBIT, RRD_ALGORITHM_INCREMENTAL); - rd_sent = rrddim_add(st, "sent", NULL, -8, BITS_IN_A_KILOBIT, RRD_ALGORITHM_INCREMENTAL); - } - - rrddim_set_by_pointer(st, rd_received, (collected_number)total_received); - rrddim_set_by_pointer(st, rd_sent, (collected_number)total_sent); - rrdset_done(st); - } - - return true; -} - -int do_PerflibNetwork(int update_every, usec_t dt __maybe_unused) { - static bool initialized = false; - - if(unlikely(!initialized)) { - initialize(); - initialized = true; - } - - DWORD id = RegistryFindIDByName("Network Interface"); - if(id == PERFLIB_REGISTRY_NAME_NOT_FOUND) - return -1; - - PERF_DATA_BLOCK *pDataBlock = perflibGetPerformanceData(id); - if(!pDataBlock) return -1; - - do_network_interface(pDataBlock, update_every, true); - do_network_interface(pDataBlock, update_every, false); - - struct network_protocol *tcp4 = NULL, *tcp6 = NULL; - for(size_t i = 0; networks[i].protocol ;i++) { - do_network_protocol(pDataBlock, update_every, &networks[i]); - - if(!tcp4 && strcmp(networks[i].protocol, "TCPv4") == 0) - tcp4 = &networks[i]; - if(!tcp6 && strcmp(networks[i].protocol, "TCPv6") == 0) - tcp6 = &networks[i]; - } - - if(tcp4 && tcp6) { - tcp46.packets.received = tcp4->packets.received; - tcp46.packets.sent = tcp4->packets.sent; - tcp46.packets.received.current.Data += tcp6->packets.received.current.Data; - tcp46.packets.sent.current.Data += tcp6->packets.sent.current.Data; - protocol_packets_chart_update(&tcp46, update_every); - } - return 0; -} +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "windows_plugin.h" +#include "windows-internals.h" + +#define ADD_PACKET_IF_KEY(p, packets, pDataBlock, pObjectType, counter) \ + do { \ + if ((p)->packets.counter.key) { \ + packets += perflibGetObjectCounter((pDataBlock), (pObjectType), &(p)->packets.counter) ? 1 : 0; \ + } \ + } while (0) + +#define SET_DIM_IF_KEY_AND_UPDATED(p, field) \ + do { \ + if ((p)->packets.field.key && (p)->packets.field.updated) { \ + rrddim_set_by_pointer( \ + (p)->packets.st, (p)->packets.rd_##field, (collected_number)(p)->packets.field.current.Data); \ + } \ + } while (0) + +#define ADD_RRD_DIM_IF_KEY(packet_field, id, name, multiplier, algorithm) \ + do { \ + if (p->packets.packet_field.key) \ + p->packets.rd_##packet_field = rrddim_add(st, id, name, multiplier, 1, algorithm); \ + } while (0) + +// -------------------------------------------------------------------------------------------------------------------- +// network protocols + +struct network_protocol { + const char *protocol; + + struct { + COUNTER_DATA received; + COUNTER_DATA sent; + COUNTER_DATA delivered; + COUNTER_DATA forwarded; + + COUNTER_DATA InDiscards; + COUNTER_DATA OutDiscards; + COUNTER_DATA InHdrErrors; + COUNTER_DATA InAddrErrors; + COUNTER_DATA InUnknownProtos; + COUNTER_DATA InTooBigErrors; + COUNTER_DATA InTruncatedPkts; + COUNTER_DATA InNoRoutes; + COUNTER_DATA OutNoRoutes; + + COUNTER_DATA InEchoReps; + COUNTER_DATA OutEchoReps; + COUNTER_DATA InDestUnreachs; + COUNTER_DATA OutDestUnreachs; + COUNTER_DATA InRedirects; + COUNTER_DATA OutRedirects; + COUNTER_DATA InEchos; + COUNTER_DATA OutEchos; + COUNTER_DATA InRouterAdvert; + COUNTER_DATA OutRouterAdvert; + COUNTER_DATA InRouterSelect; + COUNTER_DATA OutRouterSelect; + COUNTER_DATA InTimeExcds; + COUNTER_DATA OutTimeExcds; + COUNTER_DATA InParmProbs; + COUNTER_DATA OutParmProbs; + COUNTER_DATA InTimestamps; + COUNTER_DATA OutTimestamps; + COUNTER_DATA InTimestampReps; + COUNTER_DATA OutTimestampReps; + + RRDSET *st; + RRDDIM *rd_received; + RRDDIM *rd_sent; + RRDDIM *rd_forwarded; + RRDDIM *rd_delivered; + + RRDDIM *rd_InDiscards; + RRDDIM *rd_OutDiscards; + RRDDIM *rd_InHdrErrors; + RRDDIM *rd_InAddrErrors; + RRDDIM *rd_InUnknownProtos; + RRDDIM *rd_InTooBigErrors; + RRDDIM *rd_InTruncatedPkts; + RRDDIM *rd_InNoRoutes; + RRDDIM *rd_OutNoRoutes; + + RRDDIM *rd_InEchoReps; + RRDDIM *rd_OutEchoReps; + RRDDIM *rd_InDestUnreachs; + RRDDIM *rd_OutDestUnreachs; + RRDDIM *rd_InRedirects; + RRDDIM *rd_OutRedirects; + RRDDIM *rd_InEchos; + RRDDIM *rd_OutEchos; + RRDDIM *rd_InRouterAdvert; + RRDDIM *rd_OutRouterAdvert; + RRDDIM *rd_InRouterSelect; + RRDDIM *rd_OutRouterSelect; + RRDDIM *rd_InTimeExcds; + RRDDIM *rd_OutTimeExcds; + RRDDIM *rd_InParmProbs; + RRDDIM *rd_OutParmProbs; + RRDDIM *rd_InTimestamps; + RRDDIM *rd_OutTimestamps; + RRDDIM *rd_InTimestampReps; + RRDDIM *rd_OutTimestampReps; + + const char *type; + const char *id; + const char *family; + const char *context; + const char *title; + long priority; + } packets; + +} networks[] = { + { + .protocol = "IPv4", + .packets = { + .received = { .key = "Datagrams Received/sec" }, + .sent = { .key = "Datagrams Sent/sec" }, + .delivered = { .key = "Datagrams Received Delivered/sec" }, + .forwarded = { .key = "Datagrams Forwarded/sec" }, + .type = "ipv4", + .id = "packets", + .family = "packets", + .context = "ipv4.packets", + .title = "IPv4 Packets", + .priority = NETDATA_CHART_PRIO_IPV4_PACKETS, + }, + }, + { + .protocol = "IPv6", + .packets = { + .received = { .key = "Datagrams Received/sec" }, + .sent = { .key = "Datagrams Sent/sec" }, + .delivered = { .key = "Datagrams Received Delivered/sec" }, + .forwarded = { .key = "Datagrams Forwarded/sec" }, + .type = "ipv6", + .id = "packets", + .family = "packets", + .context = "ip6.packets", + .title = "IPv6 Packets", + .priority = NETDATA_CHART_PRIO_IPV6_PACKETS, + }, + }, + { + .protocol = "TCPv4", + .packets = { + .received = { .key = "Segments Received/sec" }, + .sent = { .key = "Segments Sent/sec" }, + .type = "ipv4", + .id = "tcppackets", + .family = "tcp", + .context = "ipv4.tcppackets", + .title = "IPv4 TCP Packets", + .priority = NETDATA_CHART_PRIO_IPV4_TCP_PACKETS, + }, + }, + { + .protocol = "TCPv6", + .packets = { + .received = { .key = "Segments Received/sec" }, + .sent = { .key = "Segments Sent/sec" }, + .type = "ipv6", + .id = "tcppackets", + .family = "tcp6", + .context = "ipv6.tcppackets", + .title = "IPv6 TCP Packets", + .priority = NETDATA_CHART_PRIO_IPV6_TCP_PACKETS, + }, + }, + { + .protocol = "UDPv4", + .packets = { + .received = { .key = "Datagrams Received/sec" }, + .sent = { .key = "Datagrams Sent/sec" }, + .type = "ipv4", + .id = "udppackets", + .family = "udp", + .context = "ipv4.udppackets", + .title = "IPv4 UDP Packets", + .priority = NETDATA_CHART_PRIO_IPV4_UDP_PACKETS, + }, + }, + { + .protocol = "UDPv6", + .packets = { + .received = { .key = "Datagrams Received/sec" }, + .sent = { .key = "Datagrams Sent/sec" }, + .type = "ipv6", + .id = "udppackets", + .family = "udp6", + .context = "ipv6.udppackets", + .title = "IPv6 UDP Packets", + .priority = NETDATA_CHART_PRIO_IPV6_UDP_PACKETS, + }, + }, + { + .protocol = "ICMP", + .packets = { + .received = { .key = "Messages Received/sec" }, + .sent = { .key = "Messages Sent/sec" }, + .type = "ipv4", + .id = "icmp", + .family = "icmp", + .context = "ipv4.icmp", + .title = "IPv4 ICMP Packets", + .priority = NETDATA_CHART_PRIO_IPV4_ICMP_PACKETS, + }, + }, + { + .protocol = "ICMPv6", + .packets = { + .received = { .key = "Messages Received/sec" }, + .sent = { .key = "Messages Sent/sec" }, + .type = "ipv6", + .id = "icmp", + .family = "icmp6", + .context = "ipv6.icmp", + .title = "IPv6 ICMP Packets", + .priority = NETDATA_CHART_PRIO_IPV6_ICMP_PACKETS, + }, + }, + + { + .protocol = "IPv4", + .packets = { + .InDiscards = { .key = "Datagrams Received Discarded" }, + .OutDiscards = { .key = "Datagrams Outbound Discarded" }, + .OutNoRoutes = { .key = "Datagrams Outbound No Route" }, + .InAddrErrors = { .key = "Datagrams Received Address Errors" }, + .InHdrErrors = { .key = "Datagrams Received Header Errors" }, + .InUnknownProtos = { .key = "Datagrams Received Unknown Protocol" }, + .type = "ipv4", + .id = "errors", + .family = "errors", + .context = "ipv4.errors", + .title = "IPv4 errors", + .priority = NETDATA_CHART_PRIO_IPV4_ERRORS, + }, + }, + { + .protocol = "IPv6", + .packets = { + .InDiscards = { .key = "Datagrams Received Discarded" }, + .OutDiscards = { .key = "Datagrams Outbound Discarded" }, + .OutNoRoutes = { .key = "Datagrams Outbound No Route" }, + .InAddrErrors = { .key = "Datagrams Received Address Errors" }, + .InHdrErrors = { .key = "Datagrams Received Header Errors" }, + .InUnknownProtos = { .key = "Datagrams Received Unknown Protocol" }, + .type = "ipv6", + .id = "errors", + .family = "errors", + .context = "ipv6.errors", + .title = "IPv6 errors", + .priority = NETDATA_CHART_PRIO_IPV6_ERRORS, + }, + }, + { + .protocol = "ICMP", + .packets = + { + .InEchoReps = {.key = "Received Echo Reply/sec"}, + .OutEchoReps = {.key = "Received Echo Reply/sec"}, + .InDestUnreachs = {.key = "Received Dest. Unreachable"}, + .OutDestUnreachs = {.key = "Sent Destination Unreachable"}, + .InRedirects = {.key = "Received Redirect/sec"}, + .OutRedirects = {.key = "Sent Redirect/sec"}, + .InEchos = {.key = "Received Echo/sec"}, + .OutEchos = {.key = "Sent Echo/sec"}, + .InRouterAdvert = {.key = NULL}, + .OutRouterAdvert = {.key = NULL}, + .InRouterSelect = {.key = NULL}, + .OutRouterSelect = {.key = NULL}, + .InTimeExcds = {.key = "Received Time Exceeded"}, + .OutTimeExcds = {.key = "Sent Time Exceeded"}, + .InParmProbs = {.key = "Received Parameter Problem"}, + .OutParmProbs = {.key = "Sent Parameter Problem"}, + .InTimestamps = {.key = "Received Timestamp/sec"}, + .OutTimestamps = {.key = "Sent Timestamp/sec"}, + .InTimestampReps = {.key = "Received Timestamp Reply/sec"}, + .OutTimestampReps = {.key = "Sent Timestamp Reply/sec"}, + + .type = "ipv4", + .id = "icmpmsg", + .family = "icmp", + .context = "ipv4.icmpmsg", + .title = "IPv4 ICMP Packets", + .priority = NETDATA_CHART_PRIO_IPV4_ICMP_MESSAGES, + }, + }, + { + .protocol = "ICMPv6", + .packets = + { + .InEchoReps = {.key = "Received Echo Reply/sec"}, + .OutEchoReps = {.key = "Received Echo Reply/sec"}, + .InDestUnreachs = {.key = "Received Dest. Unreachable"}, + .OutDestUnreachs = {.key = "Sent Destination Unreachable"}, + .InRedirects = {.key = "Received Redirect/sec"}, + .OutRedirects = {.key = "Sent Redirect/sec"}, + .InEchos = {.key = "Received Echo/sec"}, + .OutEchos = {.key = "Sent Echo/sec"}, + .InRouterAdvert = {.key = NULL}, + .OutRouterAdvert = {.key = NULL}, + .InRouterSelect = {.key = NULL}, + .OutRouterSelect = {.key = NULL}, + .InTimeExcds = {.key = "Received Time Exceeded"}, + .OutTimeExcds = {.key = "Sent Time Exceeded"}, + .InParmProbs = {.key = "Received Parameter Problem"}, + .OutParmProbs = {.key = "Sent Parameter Problem"}, + .InTimestamps = {.key = "Received Timestamp/sec"}, + .OutTimestamps = {.key = "Sent Timestamp/sec"}, + .InTimestampReps = {.key = "Received Timestamp Reply/sec"}, + .OutTimestampReps = {.key = "Sent Timestamp Reply/sec"}, + + .type = "ipv6", + .id = "icmpmsg", + .family = "icmp", + .context = "ipv6.icmpmsg", + .title = "IPv6 ICMP Packets", + .priority = NETDATA_CHART_PRIO_IPV6_ICMP_MESSAGES, + }, + }, + + // terminator + { + .protocol = NULL, + } +}; + +struct network_protocol tcp46 = { + .packets = { + .type = "ip", + .id = "tcppackets", + .family = "tcp", + .context = "ip.tcppackets", + .title = "TCP Packets", + .priority = NETDATA_CHART_PRIO_IP_TCP_PACKETS, + } +}; + +static void protocol_packets_chart_update(struct network_protocol *p, int update_every) { + if(!p->packets.st) { + p->packets.st = rrdset_create_localhost( + p->packets.type + , p->packets.id + , NULL + , p->packets.family + , NULL + , p->packets.title + , "packets/s" + , PLUGIN_WINDOWS_NAME + , "PerflibNetwork" + , p->packets.priority + , update_every + , RRDSET_TYPE_AREA + ); + + RRDSET *st = p->packets.st; + + ADD_RRD_DIM_IF_KEY(received, "received", NULL, 1, RRD_ALGORITHM_INCREMENTAL); + ADD_RRD_DIM_IF_KEY(sent, "sent", NULL, -1, RRD_ALGORITHM_INCREMENTAL); + ADD_RRD_DIM_IF_KEY(forwarded, "forwarded", NULL, -1, RRD_ALGORITHM_INCREMENTAL); + ADD_RRD_DIM_IF_KEY(delivered, "delivered", NULL, 1, RRD_ALGORITHM_INCREMENTAL); + ADD_RRD_DIM_IF_KEY(InDiscards, "InDiscards", NULL, 1, RRD_ALGORITHM_INCREMENTAL); + ADD_RRD_DIM_IF_KEY(OutDiscards, "OutDiscards", NULL, -1, RRD_ALGORITHM_INCREMENTAL); + ADD_RRD_DIM_IF_KEY(InHdrErrors, "InHdrErrors", NULL, 1, RRD_ALGORITHM_INCREMENTAL); + ADD_RRD_DIM_IF_KEY(InAddrErrors, "InAddrErrors", NULL, 1, RRD_ALGORITHM_INCREMENTAL); + ADD_RRD_DIM_IF_KEY(InUnknownProtos, "InUnknownProtos", NULL, 1, RRD_ALGORITHM_INCREMENTAL); + ADD_RRD_DIM_IF_KEY(InTooBigErrors, "InTooBigErrors", NULL, 1, RRD_ALGORITHM_INCREMENTAL); + ADD_RRD_DIM_IF_KEY(InTruncatedPkts, "InTruncatedPkts", NULL, 1, RRD_ALGORITHM_INCREMENTAL); + ADD_RRD_DIM_IF_KEY(InNoRoutes, "InNoRoutes", NULL, 1, RRD_ALGORITHM_INCREMENTAL); + ADD_RRD_DIM_IF_KEY(OutNoRoutes, "OutNoRoutes", NULL, -1, RRD_ALGORITHM_INCREMENTAL); + ADD_RRD_DIM_IF_KEY(InEchoReps, "InType0", "InEchoReps", 1, RRD_ALGORITHM_INCREMENTAL); + ADD_RRD_DIM_IF_KEY(OutEchoReps, "OutType0", "OutEchoReps", -1, RRD_ALGORITHM_INCREMENTAL); + ADD_RRD_DIM_IF_KEY(InDestUnreachs, "InType3", "InDestUnreachs", 1, RRD_ALGORITHM_INCREMENTAL); + ADD_RRD_DIM_IF_KEY(OutDestUnreachs, "OutType3", "OutDestUnreachs", -1, RRD_ALGORITHM_INCREMENTAL); + ADD_RRD_DIM_IF_KEY(InRedirects, "InType5", "InRedirects", 1, RRD_ALGORITHM_INCREMENTAL); + ADD_RRD_DIM_IF_KEY(OutRedirects, "OutType5", "OutRedirects", -1, RRD_ALGORITHM_INCREMENTAL); + ADD_RRD_DIM_IF_KEY(InEchos, "InType8", "InEchos", 1, RRD_ALGORITHM_INCREMENTAL); + ADD_RRD_DIM_IF_KEY(OutEchos, "OutType8", "OutEchos", -1, RRD_ALGORITHM_INCREMENTAL); + ADD_RRD_DIM_IF_KEY(InRouterAdvert, "InType9", "InRouterAdvert", 1, RRD_ALGORITHM_INCREMENTAL); + ADD_RRD_DIM_IF_KEY(OutRouterAdvert, "OutType9", "OutRouterAdvert", -1, RRD_ALGORITHM_INCREMENTAL); + ADD_RRD_DIM_IF_KEY(InRouterSelect, "InType10", "InRouterSelect", 1, RRD_ALGORITHM_INCREMENTAL); + ADD_RRD_DIM_IF_KEY(OutRouterSelect, "OutType10", "OutRouterSelect", -1, RRD_ALGORITHM_INCREMENTAL); + ADD_RRD_DIM_IF_KEY(InTimeExcds, "InType11", "InTimeExcds", 1, RRD_ALGORITHM_INCREMENTAL); + ADD_RRD_DIM_IF_KEY(OutTimeExcds, "OutType11", "OutTimeExcds", -1, RRD_ALGORITHM_INCREMENTAL); + ADD_RRD_DIM_IF_KEY(InParmProbs, "InType12", "InParmProbs", 1, RRD_ALGORITHM_INCREMENTAL); + ADD_RRD_DIM_IF_KEY(OutParmProbs, "OutType12", "OutParmProbs", -1, RRD_ALGORITHM_INCREMENTAL); + ADD_RRD_DIM_IF_KEY(InTimestamps, "InType13", "InTimestamps", 1, RRD_ALGORITHM_INCREMENTAL); + ADD_RRD_DIM_IF_KEY(OutTimestamps, "OutType13", "OutTimestamps", -1, RRD_ALGORITHM_INCREMENTAL); + ADD_RRD_DIM_IF_KEY(InTimestampReps, "InType14", "InTimestampReps", 1, RRD_ALGORITHM_INCREMENTAL); + ADD_RRD_DIM_IF_KEY(OutTimestampReps, "OutType14", "OutTimestampReps", -1, RRD_ALGORITHM_INCREMENTAL); + + } + + SET_DIM_IF_KEY_AND_UPDATED(p, received); + SET_DIM_IF_KEY_AND_UPDATED(p, sent); + + SET_DIM_IF_KEY_AND_UPDATED(p, forwarded); + SET_DIM_IF_KEY_AND_UPDATED(p, delivered); + SET_DIM_IF_KEY_AND_UPDATED(p, InDiscards); + SET_DIM_IF_KEY_AND_UPDATED(p, OutDiscards); + SET_DIM_IF_KEY_AND_UPDATED(p, InHdrErrors); + SET_DIM_IF_KEY_AND_UPDATED(p, InAddrErrors); + SET_DIM_IF_KEY_AND_UPDATED(p, InUnknownProtos); + SET_DIM_IF_KEY_AND_UPDATED(p, InTooBigErrors); + SET_DIM_IF_KEY_AND_UPDATED(p, InTruncatedPkts); + SET_DIM_IF_KEY_AND_UPDATED(p, InNoRoutes); + SET_DIM_IF_KEY_AND_UPDATED(p, OutNoRoutes); + SET_DIM_IF_KEY_AND_UPDATED(p, InEchoReps); + SET_DIM_IF_KEY_AND_UPDATED(p, OutEchoReps); + SET_DIM_IF_KEY_AND_UPDATED(p, InDestUnreachs); + SET_DIM_IF_KEY_AND_UPDATED(p, OutDestUnreachs); + SET_DIM_IF_KEY_AND_UPDATED(p, InRedirects); + SET_DIM_IF_KEY_AND_UPDATED(p, OutRedirects); + SET_DIM_IF_KEY_AND_UPDATED(p, InEchos); + SET_DIM_IF_KEY_AND_UPDATED(p, OutEchos); + SET_DIM_IF_KEY_AND_UPDATED(p, InRouterAdvert); + SET_DIM_IF_KEY_AND_UPDATED(p, OutRouterAdvert); + SET_DIM_IF_KEY_AND_UPDATED(p, InRouterSelect); + SET_DIM_IF_KEY_AND_UPDATED(p, OutRouterSelect); + SET_DIM_IF_KEY_AND_UPDATED(p, InTimeExcds); + SET_DIM_IF_KEY_AND_UPDATED(p, OutTimeExcds); + SET_DIM_IF_KEY_AND_UPDATED(p, InParmProbs); + SET_DIM_IF_KEY_AND_UPDATED(p, OutParmProbs); + SET_DIM_IF_KEY_AND_UPDATED(p, InTimestamps); + SET_DIM_IF_KEY_AND_UPDATED(p, OutTimestamps); + SET_DIM_IF_KEY_AND_UPDATED(p, InTimestampReps); + SET_DIM_IF_KEY_AND_UPDATED(p, OutTimestampReps); + + rrdset_done(p->packets.st); +} + +static bool do_network_protocol(PERF_DATA_BLOCK *pDataBlock, int update_every, struct network_protocol *p) { + if(!p || !p->protocol) return false; + + PERF_OBJECT_TYPE *pObjectType = perflibFindObjectTypeByName(pDataBlock, p->protocol); + if(!pObjectType) return false; + + size_t packets = 0; + ADD_PACKET_IF_KEY(p, packets, pDataBlock, pObjectType, received); + ADD_PACKET_IF_KEY(p, packets, pDataBlock, pObjectType, sent); + ADD_PACKET_IF_KEY(p, packets, pDataBlock, pObjectType, delivered); + ADD_PACKET_IF_KEY(p, packets, pDataBlock, pObjectType, forwarded); + + ADD_PACKET_IF_KEY(p, packets, pDataBlock, pObjectType, InDiscards); + ADD_PACKET_IF_KEY(p, packets, pDataBlock, pObjectType, OutDiscards); + ADD_PACKET_IF_KEY(p, packets, pDataBlock, pObjectType, InHdrErrors); + ADD_PACKET_IF_KEY(p, packets, pDataBlock, pObjectType, InAddrErrors); + ADD_PACKET_IF_KEY(p, packets, pDataBlock, pObjectType, InUnknownProtos); + ADD_PACKET_IF_KEY(p, packets, pDataBlock, pObjectType, InTooBigErrors); + ADD_PACKET_IF_KEY(p, packets, pDataBlock, pObjectType, InTruncatedPkts); + ADD_PACKET_IF_KEY(p, packets, pDataBlock, pObjectType, InNoRoutes); + ADD_PACKET_IF_KEY(p, packets, pDataBlock, pObjectType, OutNoRoutes); + ADD_PACKET_IF_KEY(p, packets, pDataBlock, pObjectType, InEchoReps); + ADD_PACKET_IF_KEY(p, packets, pDataBlock, pObjectType, OutEchoReps); + ADD_PACKET_IF_KEY(p, packets, pDataBlock, pObjectType, InDestUnreachs); + ADD_PACKET_IF_KEY(p, packets, pDataBlock, pObjectType, OutDestUnreachs); + + ADD_PACKET_IF_KEY(p, packets, pDataBlock, pObjectType, InRedirects); + ADD_PACKET_IF_KEY(p, packets, pDataBlock, pObjectType, OutRedirects); + ADD_PACKET_IF_KEY(p, packets, pDataBlock, pObjectType, InEchos); + ADD_PACKET_IF_KEY(p, packets, pDataBlock, pObjectType, OutEchos); + ADD_PACKET_IF_KEY(p, packets, pDataBlock, pObjectType, InRouterAdvert); + ADD_PACKET_IF_KEY(p, packets, pDataBlock, pObjectType, OutRouterAdvert); + ADD_PACKET_IF_KEY(p, packets, pDataBlock, pObjectType, InRouterSelect); + ADD_PACKET_IF_KEY(p, packets, pDataBlock, pObjectType, OutRouterSelect); + ADD_PACKET_IF_KEY(p, packets, pDataBlock, pObjectType, InTimeExcds); + ADD_PACKET_IF_KEY(p, packets, pDataBlock, pObjectType, OutTimeExcds); + ADD_PACKET_IF_KEY(p, packets, pDataBlock, pObjectType, InParmProbs); + ADD_PACKET_IF_KEY(p, packets, pDataBlock, pObjectType, OutParmProbs); + ADD_PACKET_IF_KEY(p, packets, pDataBlock, pObjectType, InTimestamps); + ADD_PACKET_IF_KEY(p, packets, pDataBlock, pObjectType, OutTimestamps); + ADD_PACKET_IF_KEY(p, packets, pDataBlock, pObjectType, InTimestampReps); + ADD_PACKET_IF_KEY(p, packets, pDataBlock, pObjectType, OutTimestampReps); + + if(packets) + protocol_packets_chart_update(p, update_every); + + return true; +} + +// -------------------------------------------------------------------------------------------------------------------- +// network interfaces + +struct network_interface { + bool collected_metadata; + + struct { + COUNTER_DATA received; + COUNTER_DATA sent; + + RRDSET *st; + RRDDIM *rd_received; + RRDDIM *rd_sent; + } packets; + + struct { + COUNTER_DATA received; + COUNTER_DATA sent; + + RRDSET *st; + RRDDIM *rd_received; + RRDDIM *rd_sent; + } traffic; +}; + +static DICTIONARY *physical_interfaces = NULL, *virtual_interfaces = NULL; + +static void network_interface_init(struct network_interface *ni) { + ni->packets.received.key = "Packets Received/sec"; + ni->packets.sent.key = "Packets Sent/sec"; + + ni->traffic.received.key = "Bytes Received/sec"; + ni->traffic.sent.key = "Bytes Sent/sec"; +} + +void dict_interface_insert_cb(const DICTIONARY_ITEM *item __maybe_unused, void *value, void *data __maybe_unused) { + struct network_interface *ni = value; + network_interface_init(ni); +} + +static void initialize(void) { + physical_interfaces = dictionary_create_advanced(DICT_OPTION_DONT_OVERWRITE_VALUE | + DICT_OPTION_FIXED_SIZE, NULL, sizeof(struct network_interface)); + + virtual_interfaces = dictionary_create_advanced(DICT_OPTION_DONT_OVERWRITE_VALUE | + DICT_OPTION_FIXED_SIZE, NULL, sizeof(struct network_interface)); + + dictionary_register_insert_callback(physical_interfaces, dict_interface_insert_cb, NULL); + dictionary_register_insert_callback(virtual_interfaces, dict_interface_insert_cb, NULL); +} + +static void add_interface_labels(RRDSET *st, const char *name, bool physical) { + rrdlabels_add(st->rrdlabels, "device", name, RRDLABEL_SRC_AUTO); + rrdlabels_add(st->rrdlabels, "interface_type", physical ? "real" : "virtual", RRDLABEL_SRC_AUTO); +} + +static bool is_physical_interface(const char *name) { + void *d = dictionary_get(physical_interfaces, name); + return d ? true : false; +} + +static bool do_network_interface(PERF_DATA_BLOCK *pDataBlock, int update_every, bool physical) { + DICTIONARY *dict = physical_interfaces; + + PERF_OBJECT_TYPE *pObjectType = perflibFindObjectTypeByName(pDataBlock, physical ? "Network Interface" : "Network Adapter"); + if(!pObjectType) return false; + + uint64_t total_received = 0, total_sent = 0; + + PERF_INSTANCE_DEFINITION *pi = NULL; + for(LONG i = 0; i < pObjectType->NumInstances ; i++) { + pi = perflibForEachInstance(pDataBlock, pObjectType, pi); + if(!pi) break; + + if(!getInstanceName(pDataBlock, pObjectType, pi, windows_shared_buffer, sizeof(windows_shared_buffer))) + strncpyz(windows_shared_buffer, "[unknown]", sizeof(windows_shared_buffer) - 1); + + if(strcasecmp(windows_shared_buffer, "_Total") == 0) + continue; + + if(!physical && is_physical_interface(windows_shared_buffer)) + // this virtual interface is already reported as physical interface + continue; + + struct network_interface *d = dictionary_set(dict, windows_shared_buffer, NULL, sizeof(*d)); + + if(!d->collected_metadata) { + // TODO - get metadata about the network interface + d->collected_metadata = true; + } + + if(perflibGetInstanceCounter(pDataBlock, pObjectType, pi, &d->traffic.received) && + perflibGetInstanceCounter(pDataBlock, pObjectType, pi, &d->traffic.sent)) { + + if(d->traffic.received.current.Data == 0 && d->traffic.sent.current.Data == 0) + // this interface has not received or sent any traffic + continue; + + if (unlikely(!d->traffic.st)) { + d->traffic.st = rrdset_create_localhost( + "net", + windows_shared_buffer, + NULL, + windows_shared_buffer, + "net.net", + "Bandwidth", + "kilobits/s", + PLUGIN_WINDOWS_NAME, + "PerflibNetwork", + NETDATA_CHART_PRIO_FIRST_NET_IFACE, + update_every, + RRDSET_TYPE_AREA); + + rrdset_flag_set(d->traffic.st, RRDSET_FLAG_DETAIL); + + add_interface_labels(d->traffic.st, windows_shared_buffer, physical); + + d->traffic.rd_received = rrddim_add(d->traffic.st, "received", NULL, 8, BITS_IN_A_KILOBIT, RRD_ALGORITHM_INCREMENTAL); + d->traffic.rd_sent = rrddim_add(d->traffic.st, "sent", NULL, -8, BITS_IN_A_KILOBIT, RRD_ALGORITHM_INCREMENTAL); + } + + total_received += d->traffic.received.current.Data; + total_sent += d->traffic.sent.current.Data; + + rrddim_set_by_pointer(d->traffic.st, d->traffic.rd_received, (collected_number)d->traffic.received.current.Data); + rrddim_set_by_pointer(d->traffic.st, d->traffic.rd_sent, (collected_number)d->traffic.sent.current.Data); + rrdset_done(d->traffic.st); + } + + if(perflibGetInstanceCounter(pDataBlock, pObjectType, pi, &d->packets.received) && + perflibGetInstanceCounter(pDataBlock, pObjectType, pi, &d->packets.sent)) { + + if (unlikely(!d->packets.st)) { + d->packets.st = rrdset_create_localhost( + "net_packets", + windows_shared_buffer, + NULL, + windows_shared_buffer, + "net.packets", + "Packets", + "packets/s", + PLUGIN_WINDOWS_NAME, + "PerflibNetwork", + NETDATA_CHART_PRIO_FIRST_NET_IFACE + 1, + update_every, + RRDSET_TYPE_LINE); + + rrdset_flag_set(d->packets.st, RRDSET_FLAG_DETAIL); + + add_interface_labels(d->traffic.st, windows_shared_buffer, physical); + + d->packets.rd_received = rrddim_add(d->packets.st, "received", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + d->packets.rd_sent = rrddim_add(d->packets.st, "sent", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(d->packets.st, d->packets.rd_received, (collected_number)d->packets.received.current.Data); + rrddim_set_by_pointer(d->packets.st, d->packets.rd_sent, (collected_number)d->packets.sent.current.Data); + rrdset_done(d->packets.st); + } + } + + if(physical) { + static RRDSET *st = NULL; + static RRDDIM *rd_received = NULL, *rd_sent = NULL; + + if (unlikely(!st)) { + st = rrdset_create_localhost( + "system", + "net", + NULL, + "network", + "system.net", + "Physical Network Interfaces Aggregated Bandwidth", + "kilobits/s", + PLUGIN_WINDOWS_NAME, + "PerflibNetwork", + NETDATA_CHART_PRIO_SYSTEM_NET, + update_every, + RRDSET_TYPE_AREA); + + rd_received = rrddim_add(st, "received", NULL, 8, BITS_IN_A_KILOBIT, RRD_ALGORITHM_INCREMENTAL); + rd_sent = rrddim_add(st, "sent", NULL, -8, BITS_IN_A_KILOBIT, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st, rd_received, (collected_number)total_received); + rrddim_set_by_pointer(st, rd_sent, (collected_number)total_sent); + rrdset_done(st); + } + + return true; +} + +int do_PerflibNetwork(int update_every, usec_t dt __maybe_unused) { + static bool initialized = false; + + if(unlikely(!initialized)) { + initialize(); + initialized = true; + } + + DWORD id = RegistryFindIDByName("Network Interface"); + if(id == PERFLIB_REGISTRY_NAME_NOT_FOUND) + return -1; + + PERF_DATA_BLOCK *pDataBlock = perflibGetPerformanceData(id); + if(!pDataBlock) return -1; + + do_network_interface(pDataBlock, update_every, true); + do_network_interface(pDataBlock, update_every, false); + + struct network_protocol *tcp4 = NULL, *tcp6 = NULL; + for(size_t i = 0; networks[i].protocol ;i++) { + do_network_protocol(pDataBlock, update_every, &networks[i]); + + if(!tcp4 && strcmp(networks[i].protocol, "TCPv4") == 0) + tcp4 = &networks[i]; + if(!tcp6 && strcmp(networks[i].protocol, "TCPv6") == 0) + tcp6 = &networks[i]; + } + + if(tcp4 && tcp6) { + tcp46.packets.received = tcp4->packets.received; + tcp46.packets.sent = tcp4->packets.sent; + tcp46.packets.received.current.Data += tcp6->packets.received.current.Data; + tcp46.packets.sent.current.Data += tcp6->packets.sent.current.Data; + protocol_packets_chart_update(&tcp46, update_every); + } + return 0; +} diff --git a/src/collectors/windows.plugin/perflib-objects.c b/src/collectors/windows.plugin/perflib-objects.c index 6628ff8640158e..cb1bc8d22e9fd2 100644 --- a/src/collectors/windows.plugin/perflib-objects.c +++ b/src/collectors/windows.plugin/perflib-objects.c @@ -1,47 +1,47 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -#include "windows_plugin.h" -#include "windows-internals.h" - -#define _COMMON_PLUGIN_NAME "windows.plugin" -#define _COMMON_PLUGIN_MODULE_NAME "PerflibObjects" -#include "../common-contexts/common-contexts.h" - -static void initialize(void) { - ; -} - -static bool do_objects(PERF_DATA_BLOCK *pDataBlock, int update_every) { - PERF_OBJECT_TYPE *pObjectType = perflibFindObjectTypeByName(pDataBlock, "Objects"); - if (!pObjectType) - return false; - - static COUNTER_DATA semaphores = { .key = "Semaphores" }; - - if(perflibGetObjectCounter(pDataBlock, pObjectType, &semaphores)) { - ULONGLONG sem = semaphores.current.Data; - common_semaphore_ipc(sem, WINDOWS_MAX_KERNEL_OBJECT, _COMMON_PLUGIN_MODULE_NAME, update_every); - } - - return true; -} - -int do_PerflibObjects(int update_every, usec_t dt __maybe_unused) { - static bool initialized = false; - - if(unlikely(!initialized)) { - initialize(); - initialized = true; - } - - DWORD id = RegistryFindIDByName("Objects"); - if(id == PERFLIB_REGISTRY_NAME_NOT_FOUND) - return -1; - - PERF_DATA_BLOCK *pDataBlock = perflibGetPerformanceData(id); - if(!pDataBlock) return -1; - - do_objects(pDataBlock, update_every); - - return 0; -} +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "windows_plugin.h" +#include "windows-internals.h" + +#define _COMMON_PLUGIN_NAME "windows.plugin" +#define _COMMON_PLUGIN_MODULE_NAME "PerflibObjects" +#include "../common-contexts/common-contexts.h" + +static void initialize(void) { + ; +} + +static bool do_objects(PERF_DATA_BLOCK *pDataBlock, int update_every) { + PERF_OBJECT_TYPE *pObjectType = perflibFindObjectTypeByName(pDataBlock, "Objects"); + if (!pObjectType) + return false; + + static COUNTER_DATA semaphores = { .key = "Semaphores" }; + + if(perflibGetObjectCounter(pDataBlock, pObjectType, &semaphores)) { + ULONGLONG sem = semaphores.current.Data; + common_semaphore_ipc(sem, WINDOWS_MAX_KERNEL_OBJECT, _COMMON_PLUGIN_MODULE_NAME, update_every); + } + + return true; +} + +int do_PerflibObjects(int update_every, usec_t dt __maybe_unused) { + static bool initialized = false; + + if(unlikely(!initialized)) { + initialize(); + initialized = true; + } + + DWORD id = RegistryFindIDByName("Objects"); + if(id == PERFLIB_REGISTRY_NAME_NOT_FOUND) + return -1; + + PERF_DATA_BLOCK *pDataBlock = perflibGetPerformanceData(id); + if(!pDataBlock) return -1; + + do_objects(pDataBlock, update_every); + + return 0; +} diff --git a/src/collectors/windows.plugin/perflib-processes.c b/src/collectors/windows.plugin/perflib-processes.c index 92aa243b92f90f..70e388eed9bbdb 100644 --- a/src/collectors/windows.plugin/perflib-processes.c +++ b/src/collectors/windows.plugin/perflib-processes.c @@ -1,58 +1,58 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -#include "windows_plugin.h" -#include "windows-internals.h" - -#define _COMMON_PLUGIN_NAME "windows.plugin" -#define _COMMON_PLUGIN_MODULE_NAME "PerflibProcesses" -#include "../common-contexts/common-contexts.h" - -static void initialize(void) { - ; -} - -static bool do_processes(PERF_DATA_BLOCK *pDataBlock, int update_every) { - PERF_OBJECT_TYPE *pObjectType = perflibFindObjectTypeByName(pDataBlock, "System"); - if (!pObjectType) - return false; - - static COUNTER_DATA processesRunning = { .key = "Processes" }; - static COUNTER_DATA contextSwitchPerSec = { .key = "Context Switches/sec" }; - static COUNTER_DATA threads = { .key = "Threads" }; - - if(perflibGetObjectCounter(pDataBlock, pObjectType, &processesRunning)) { - ULONGLONG running = processesRunning.current.Data; - common_system_processes(running, update_every); - } - - if(perflibGetObjectCounter(pDataBlock, pObjectType, &contextSwitchPerSec)) { - ULONGLONG contexts = contextSwitchPerSec.current.Data; - common_system_context_switch(contexts, update_every); - } - - if(perflibGetObjectCounter(pDataBlock, pObjectType, &threads)) { - ULONGLONG totalThreads = threads.current.Data; - common_system_threads(totalThreads, update_every); - } - return true; -} - -int do_PerflibProcesses(int update_every, usec_t dt __maybe_unused) { - static bool initialized = false; - - if(unlikely(!initialized)) { - initialize(); - initialized = true; - } - - DWORD id = RegistryFindIDByName("System"); - if(id == PERFLIB_REGISTRY_NAME_NOT_FOUND) - return -1; - - PERF_DATA_BLOCK *pDataBlock = perflibGetPerformanceData(id); - if(!pDataBlock) return -1; - - do_processes(pDataBlock, update_every); - - return 0; -} +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "windows_plugin.h" +#include "windows-internals.h" + +#define _COMMON_PLUGIN_NAME "windows.plugin" +#define _COMMON_PLUGIN_MODULE_NAME "PerflibProcesses" +#include "../common-contexts/common-contexts.h" + +static void initialize(void) { + ; +} + +static bool do_processes(PERF_DATA_BLOCK *pDataBlock, int update_every) { + PERF_OBJECT_TYPE *pObjectType = perflibFindObjectTypeByName(pDataBlock, "System"); + if (!pObjectType) + return false; + + static COUNTER_DATA processesRunning = { .key = "Processes" }; + static COUNTER_DATA contextSwitchPerSec = { .key = "Context Switches/sec" }; + static COUNTER_DATA threads = { .key = "Threads" }; + + if(perflibGetObjectCounter(pDataBlock, pObjectType, &processesRunning)) { + ULONGLONG running = processesRunning.current.Data; + common_system_processes(running, update_every); + } + + if(perflibGetObjectCounter(pDataBlock, pObjectType, &contextSwitchPerSec)) { + ULONGLONG contexts = contextSwitchPerSec.current.Data; + common_system_context_switch(contexts, update_every); + } + + if(perflibGetObjectCounter(pDataBlock, pObjectType, &threads)) { + ULONGLONG totalThreads = threads.current.Data; + common_system_threads(totalThreads, update_every); + } + return true; +} + +int do_PerflibProcesses(int update_every, usec_t dt __maybe_unused) { + static bool initialized = false; + + if(unlikely(!initialized)) { + initialize(); + initialized = true; + } + + DWORD id = RegistryFindIDByName("System"); + if(id == PERFLIB_REGISTRY_NAME_NOT_FOUND) + return -1; + + PERF_DATA_BLOCK *pDataBlock = perflibGetPerformanceData(id); + if(!pDataBlock) return -1; + + do_processes(pDataBlock, update_every); + + return 0; +} diff --git a/src/collectors/windows.plugin/perflib-processor.c b/src/collectors/windows.plugin/perflib-processor.c index 4c7d86c90c8660..a3df0fced1ae9c 100644 --- a/src/collectors/windows.plugin/perflib-processor.c +++ b/src/collectors/windows.plugin/perflib-processor.c @@ -1,205 +1,205 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -#include "windows_plugin.h" -#include "windows-internals.h" - -#define _COMMON_PLUGIN_NAME "windows.plugin" -#define _COMMON_PLUGIN_MODULE_NAME "PerflibProcesses" -#include "../common-contexts/common-contexts.h" - -struct processor { - bool collected_metadata; - - RRDSET *st; - RRDDIM *rd_user; - RRDDIM *rd_system; - RRDDIM *rd_irq; - RRDDIM *rd_dpc; - RRDDIM *rd_idle; - -// RRDSET *st2; -// RRDDIM *rd2_busy; - - COUNTER_DATA percentProcessorTime; - COUNTER_DATA percentUserTime; - COUNTER_DATA percentPrivilegedTime; - COUNTER_DATA percentDPCTime; - COUNTER_DATA percentInterruptTime; - COUNTER_DATA percentIdleTime; - - COUNTER_DATA interruptsPerSec; -}; - -struct processor total = { 0 }; - -void initialize_processor_keys(struct processor *p) { - p->percentProcessorTime.key = "% Processor Time"; - p->percentUserTime.key = "% User Time"; - p->percentPrivilegedTime.key = "% Privileged Time"; - p->percentDPCTime.key = "% DPC Time"; - p->percentInterruptTime.key = "% Interrupt Time"; - p->percentIdleTime.key = "% Idle Time"; - p->interruptsPerSec.key = "Interrupts/sec"; -} - -void dict_processor_insert_cb(const DICTIONARY_ITEM *item __maybe_unused, void *value, void *data __maybe_unused) { - struct processor *p = value; - initialize_processor_keys(p); -} - -static DICTIONARY *processors = NULL; - -static void initialize(void) { - initialize_processor_keys(&total); - - processors = dictionary_create_advanced(DICT_OPTION_DONT_OVERWRITE_VALUE | - DICT_OPTION_FIXED_SIZE, NULL, sizeof(struct processor)); - - dictionary_register_insert_callback(processors, dict_processor_insert_cb, NULL); -} - -static bool do_processors(PERF_DATA_BLOCK *pDataBlock, int update_every) { - PERF_OBJECT_TYPE *pObjectType = perflibFindObjectTypeByName(pDataBlock, "Processor"); - if(!pObjectType) return false; - - static const RRDVAR_ACQUIRED *cpus_var = NULL; - int cores_found = 0; - uint64_t totalIPC = 0; - - PERF_INSTANCE_DEFINITION *pi = NULL; - for(LONG i = 0; i < pObjectType->NumInstances ; i++) { - pi = perflibForEachInstance(pDataBlock, pObjectType, pi); - if(!pi) break; - - if(!getInstanceName(pDataBlock, pObjectType, pi, windows_shared_buffer, sizeof(windows_shared_buffer))) - strncpyz(windows_shared_buffer, "[unknown]", sizeof(windows_shared_buffer) - 1); - - bool is_total = false; - struct processor *p; - int cpu = -1; - if(strcasecmp(windows_shared_buffer, "_Total") == 0) { - p = &total; - is_total = true; - cpu = -1; - } - else { - p = dictionary_set(processors, windows_shared_buffer, NULL, sizeof(*p)); - is_total = false; - cpu = str2i(windows_shared_buffer); - snprintfz(windows_shared_buffer, sizeof(windows_shared_buffer), "cpu%d", cpu); - - if(cpu + 1 > cores_found) - cores_found = cpu + 1; - } - - if(!is_total && !p->collected_metadata) { - // TODO collect processor metadata - p->collected_metadata = true; - } - - perflibGetInstanceCounter(pDataBlock, pObjectType, pi, &p->percentProcessorTime); - perflibGetInstanceCounter(pDataBlock, pObjectType, pi, &p->percentUserTime); - perflibGetInstanceCounter(pDataBlock, pObjectType, pi, &p->percentPrivilegedTime); - perflibGetInstanceCounter(pDataBlock, pObjectType, pi, &p->percentDPCTime); - perflibGetInstanceCounter(pDataBlock, pObjectType, pi, &p->percentInterruptTime); - perflibGetInstanceCounter(pDataBlock, pObjectType, pi, &p->percentIdleTime); - - perflibGetInstanceCounter(pDataBlock, pObjectType, pi, &p->interruptsPerSec); - - if(!p->st) { - p->st = rrdset_create_localhost( - is_total ? "system" : "cpu" - , is_total ? "cpu" : windows_shared_buffer, NULL - , is_total ? "cpu" : "utilization" - , is_total ? "system.cpu" : "cpu.cpu" - , is_total ? "Total CPU Utilization" : "Core Utilization" - , "percentage" - , PLUGIN_WINDOWS_NAME - , "PerflibProcessor" - , is_total ? NETDATA_CHART_PRIO_SYSTEM_CPU : NETDATA_CHART_PRIO_CPU_PER_CORE - , update_every - , RRDSET_TYPE_STACKED - ); - - p->rd_irq = rrddim_add(p->st, "interrupts", "irq", 1, 1, RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL); - p->rd_user = rrddim_add(p->st, "user", NULL, 1, 1, RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL); - p->rd_system = rrddim_add(p->st, "privileged", "system", 1, 1, RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL); - p->rd_dpc = rrddim_add(p->st, "dpc", NULL, 1, 1, RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL); - p->rd_idle = rrddim_add(p->st, "idle", NULL, 1, 1, RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL); - rrddim_hide(p->st, "idle"); - - if(!is_total) - rrdlabels_add(p->st->rrdlabels, "cpu", windows_shared_buffer, RRDLABEL_SRC_AUTO); - else - cpus_var = rrdvar_host_variable_add_and_acquire(localhost, "active_processors"); - } - - uint64_t user = p->percentUserTime.current.Data; - uint64_t system = p->percentPrivilegedTime.current.Data; - uint64_t dpc = p->percentDPCTime.current.Data; - uint64_t irq = p->percentInterruptTime.current.Data; - uint64_t idle = p->percentIdleTime.current.Data; - - totalIPC += p->interruptsPerSec.current.Data; - - rrddim_set_by_pointer(p->st, p->rd_user, (collected_number)user); - rrddim_set_by_pointer(p->st, p->rd_system, (collected_number)system); - rrddim_set_by_pointer(p->st, p->rd_irq, (collected_number)irq); - rrddim_set_by_pointer(p->st, p->rd_dpc, (collected_number)dpc); - rrddim_set_by_pointer(p->st, p->rd_idle, (collected_number)idle); - rrdset_done(p->st); - -// if(!p->st2) { -// p->st2 = rrdset_create_localhost( -// is_total ? "system" : "cpu2" -// , is_total ? "cpu3" : buffer -// , NULL -// , is_total ? "utilization" : buffer -// , is_total ? "system.cpu3" : "cpu2.cpu" -// , is_total ? "Total CPU Utilization" : "Core Utilization" -// , "percentage" -// , PLUGIN_WINDOWS_NAME -// , "PerflibProcessor" -// , is_total ? NETDATA_CHART_PRIO_SYSTEM_CPU : NETDATA_CHART_PRIO_CPU_PER_CORE -// , update_every -// , RRDSET_TYPE_STACKED -// ); -// -// p->rd2_busy = perflib_rrddim_add(p->st2, "busy", NULL, 1, 1, &p->percentProcessorTime); -// rrddim_hide(p->st2, "idle"); -// -// if(!is_total) -// rrdlabels_add(p->st->rrdlabels, "cpu", buffer, RRDLABEL_SRC_AUTO); -// } -// -// perflib_rrddim_set_by_pointer(p->st2, p->rd2_busy, &p->percentProcessorTime); -// rrdset_done(p->st2); - } - - if(cpus_var) - rrdvar_host_variable_set(localhost, cpus_var, cores_found); - - common_interrupts(totalIPC, update_every, NULL); - - return true; -} - -int do_PerflibProcessor(int update_every, usec_t dt __maybe_unused) { - static bool initialized = false; - - if(unlikely(!initialized)) { - initialize(); - initialized = true; - } - - DWORD id = RegistryFindIDByName("Processor"); - if(id == PERFLIB_REGISTRY_NAME_NOT_FOUND) - return -1; - - PERF_DATA_BLOCK *pDataBlock = perflibGetPerformanceData(id); - if(!pDataBlock) return -1; - - do_processors(pDataBlock, update_every); - - return 0; -} +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "windows_plugin.h" +#include "windows-internals.h" + +#define _COMMON_PLUGIN_NAME "windows.plugin" +#define _COMMON_PLUGIN_MODULE_NAME "PerflibProcesses" +#include "../common-contexts/common-contexts.h" + +struct processor { + bool collected_metadata; + + RRDSET *st; + RRDDIM *rd_user; + RRDDIM *rd_system; + RRDDIM *rd_irq; + RRDDIM *rd_dpc; + RRDDIM *rd_idle; + +// RRDSET *st2; +// RRDDIM *rd2_busy; + + COUNTER_DATA percentProcessorTime; + COUNTER_DATA percentUserTime; + COUNTER_DATA percentPrivilegedTime; + COUNTER_DATA percentDPCTime; + COUNTER_DATA percentInterruptTime; + COUNTER_DATA percentIdleTime; + + COUNTER_DATA interruptsPerSec; +}; + +struct processor total = { 0 }; + +void initialize_processor_keys(struct processor *p) { + p->percentProcessorTime.key = "% Processor Time"; + p->percentUserTime.key = "% User Time"; + p->percentPrivilegedTime.key = "% Privileged Time"; + p->percentDPCTime.key = "% DPC Time"; + p->percentInterruptTime.key = "% Interrupt Time"; + p->percentIdleTime.key = "% Idle Time"; + p->interruptsPerSec.key = "Interrupts/sec"; +} + +void dict_processor_insert_cb(const DICTIONARY_ITEM *item __maybe_unused, void *value, void *data __maybe_unused) { + struct processor *p = value; + initialize_processor_keys(p); +} + +static DICTIONARY *processors = NULL; + +static void initialize(void) { + initialize_processor_keys(&total); + + processors = dictionary_create_advanced(DICT_OPTION_DONT_OVERWRITE_VALUE | + DICT_OPTION_FIXED_SIZE, NULL, sizeof(struct processor)); + + dictionary_register_insert_callback(processors, dict_processor_insert_cb, NULL); +} + +static bool do_processors(PERF_DATA_BLOCK *pDataBlock, int update_every) { + PERF_OBJECT_TYPE *pObjectType = perflibFindObjectTypeByName(pDataBlock, "Processor"); + if(!pObjectType) return false; + + static const RRDVAR_ACQUIRED *cpus_var = NULL; + int cores_found = 0; + uint64_t totalIPC = 0; + + PERF_INSTANCE_DEFINITION *pi = NULL; + for(LONG i = 0; i < pObjectType->NumInstances ; i++) { + pi = perflibForEachInstance(pDataBlock, pObjectType, pi); + if(!pi) break; + + if(!getInstanceName(pDataBlock, pObjectType, pi, windows_shared_buffer, sizeof(windows_shared_buffer))) + strncpyz(windows_shared_buffer, "[unknown]", sizeof(windows_shared_buffer) - 1); + + bool is_total = false; + struct processor *p; + int cpu = -1; + if(strcasecmp(windows_shared_buffer, "_Total") == 0) { + p = &total; + is_total = true; + cpu = -1; + } + else { + p = dictionary_set(processors, windows_shared_buffer, NULL, sizeof(*p)); + is_total = false; + cpu = str2i(windows_shared_buffer); + snprintfz(windows_shared_buffer, sizeof(windows_shared_buffer), "cpu%d", cpu); + + if(cpu + 1 > cores_found) + cores_found = cpu + 1; + } + + if(!is_total && !p->collected_metadata) { + // TODO collect processor metadata + p->collected_metadata = true; + } + + perflibGetInstanceCounter(pDataBlock, pObjectType, pi, &p->percentProcessorTime); + perflibGetInstanceCounter(pDataBlock, pObjectType, pi, &p->percentUserTime); + perflibGetInstanceCounter(pDataBlock, pObjectType, pi, &p->percentPrivilegedTime); + perflibGetInstanceCounter(pDataBlock, pObjectType, pi, &p->percentDPCTime); + perflibGetInstanceCounter(pDataBlock, pObjectType, pi, &p->percentInterruptTime); + perflibGetInstanceCounter(pDataBlock, pObjectType, pi, &p->percentIdleTime); + + perflibGetInstanceCounter(pDataBlock, pObjectType, pi, &p->interruptsPerSec); + + if(!p->st) { + p->st = rrdset_create_localhost( + is_total ? "system" : "cpu" + , is_total ? "cpu" : windows_shared_buffer, NULL + , is_total ? "cpu" : "utilization" + , is_total ? "system.cpu" : "cpu.cpu" + , is_total ? "Total CPU Utilization" : "Core Utilization" + , "percentage" + , PLUGIN_WINDOWS_NAME + , "PerflibProcessor" + , is_total ? NETDATA_CHART_PRIO_SYSTEM_CPU : NETDATA_CHART_PRIO_CPU_PER_CORE + , update_every + , RRDSET_TYPE_STACKED + ); + + p->rd_irq = rrddim_add(p->st, "interrupts", "irq", 1, 1, RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL); + p->rd_user = rrddim_add(p->st, "user", NULL, 1, 1, RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL); + p->rd_system = rrddim_add(p->st, "privileged", "system", 1, 1, RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL); + p->rd_dpc = rrddim_add(p->st, "dpc", NULL, 1, 1, RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL); + p->rd_idle = rrddim_add(p->st, "idle", NULL, 1, 1, RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL); + rrddim_hide(p->st, "idle"); + + if(!is_total) + rrdlabels_add(p->st->rrdlabels, "cpu", windows_shared_buffer, RRDLABEL_SRC_AUTO); + else + cpus_var = rrdvar_host_variable_add_and_acquire(localhost, "active_processors"); + } + + uint64_t user = p->percentUserTime.current.Data; + uint64_t system = p->percentPrivilegedTime.current.Data; + uint64_t dpc = p->percentDPCTime.current.Data; + uint64_t irq = p->percentInterruptTime.current.Data; + uint64_t idle = p->percentIdleTime.current.Data; + + totalIPC += p->interruptsPerSec.current.Data; + + rrddim_set_by_pointer(p->st, p->rd_user, (collected_number)user); + rrddim_set_by_pointer(p->st, p->rd_system, (collected_number)system); + rrddim_set_by_pointer(p->st, p->rd_irq, (collected_number)irq); + rrddim_set_by_pointer(p->st, p->rd_dpc, (collected_number)dpc); + rrddim_set_by_pointer(p->st, p->rd_idle, (collected_number)idle); + rrdset_done(p->st); + +// if(!p->st2) { +// p->st2 = rrdset_create_localhost( +// is_total ? "system" : "cpu2" +// , is_total ? "cpu3" : buffer +// , NULL +// , is_total ? "utilization" : buffer +// , is_total ? "system.cpu3" : "cpu2.cpu" +// , is_total ? "Total CPU Utilization" : "Core Utilization" +// , "percentage" +// , PLUGIN_WINDOWS_NAME +// , "PerflibProcessor" +// , is_total ? NETDATA_CHART_PRIO_SYSTEM_CPU : NETDATA_CHART_PRIO_CPU_PER_CORE +// , update_every +// , RRDSET_TYPE_STACKED +// ); +// +// p->rd2_busy = perflib_rrddim_add(p->st2, "busy", NULL, 1, 1, &p->percentProcessorTime); +// rrddim_hide(p->st2, "idle"); +// +// if(!is_total) +// rrdlabels_add(p->st->rrdlabels, "cpu", buffer, RRDLABEL_SRC_AUTO); +// } +// +// perflib_rrddim_set_by_pointer(p->st2, p->rd2_busy, &p->percentProcessorTime); +// rrdset_done(p->st2); + } + + if(cpus_var) + rrdvar_host_variable_set(localhost, cpus_var, cores_found); + + common_interrupts(totalIPC, update_every, NULL); + + return true; +} + +int do_PerflibProcessor(int update_every, usec_t dt __maybe_unused) { + static bool initialized = false; + + if(unlikely(!initialized)) { + initialize(); + initialized = true; + } + + DWORD id = RegistryFindIDByName("Processor"); + if(id == PERFLIB_REGISTRY_NAME_NOT_FOUND) + return -1; + + PERF_DATA_BLOCK *pDataBlock = perflibGetPerformanceData(id); + if(!pDataBlock) return -1; + + do_processors(pDataBlock, update_every); + + return 0; +} diff --git a/src/collectors/windows.plugin/perflib-rrd.c b/src/collectors/windows.plugin/perflib-rrd.c index d425307eecaf44..410318c9358a29 100644 --- a/src/collectors/windows.plugin/perflib-rrd.c +++ b/src/collectors/windows.plugin/perflib-rrd.c @@ -1,411 +1,412 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -#include "perflib-rrd.h" - -#define COLLECTED_NUMBER_PRECISION 10000 - -RRDDIM *perflib_rrddim_add(RRDSET *st, const char *id, const char *name, collected_number multiplier, collected_number divider, COUNTER_DATA *cd) { - RRD_ALGORITHM algorithm = RRD_ALGORITHM_ABSOLUTE; - - switch (cd->current.CounterType) { - case PERF_COUNTER_COUNTER: - case PERF_SAMPLE_COUNTER: - case PERF_COUNTER_BULK_COUNT: - // (N1 - N0) / ((D1 - D0) / F) - // multiplier *= cd->current.Frequency / 10000000; - // tested, the frequency is not that useful for netdata - // we get right results without it. - algorithm = RRD_ALGORITHM_INCREMENTAL; - break; - - case PERF_COUNTER_QUEUELEN_TYPE: - case PERF_COUNTER_100NS_QUEUELEN_TYPE: - case PERF_COUNTER_OBJ_TIME_QUEUELEN_TYPE: - case PERF_COUNTER_LARGE_QUEUELEN_TYPE: - case PERF_AVERAGE_BULK: // normally not displayed - // (N1 - N0) / (D1 - D0) - algorithm = RRD_ALGORITHM_INCREMENTAL; - break; - - case PERF_OBJ_TIME_TIMER: - case PERF_COUNTER_TIMER: - case PERF_100NSEC_TIMER: - case PERF_PRECISION_SYSTEM_TIMER: - case PERF_PRECISION_100NS_TIMER: - case PERF_PRECISION_OBJECT_TIMER: - case PERF_SAMPLE_FRACTION: - // 100 * (N1 - N0) / (D1 - D0) - multiplier *= 100; - algorithm = RRD_ALGORITHM_INCREMENTAL; - break; - - case PERF_COUNTER_TIMER_INV: - case PERF_100NSEC_TIMER_INV: - // 100 * (1 - ((N1 - N0) / (D1 - D0))) - divider *= COLLECTED_NUMBER_PRECISION; - algorithm = RRD_ALGORITHM_ABSOLUTE; - break; - - case PERF_COUNTER_MULTI_TIMER: - // 100 * ((N1 - N0) / ((D1 - D0) / TB)) / B1 - divider *= COLLECTED_NUMBER_PRECISION; - algorithm = RRD_ALGORITHM_ABSOLUTE; - break; - - case PERF_100NSEC_MULTI_TIMER: - // 100 * ((N1 - N0) / (D1 - D0)) / B1 - divider *= COLLECTED_NUMBER_PRECISION; - algorithm = RRD_ALGORITHM_ABSOLUTE; - break; - - case PERF_COUNTER_MULTI_TIMER_INV: - case PERF_100NSEC_MULTI_TIMER_INV: - // 100 * (B1 - ((N1 - N0) / (D1 - D0))) - divider *= COLLECTED_NUMBER_PRECISION; - algorithm = RRD_ALGORITHM_ABSOLUTE; - break; - - case PERF_COUNTER_RAWCOUNT: - case PERF_COUNTER_LARGE_RAWCOUNT: - // N as decimal - algorithm = RRD_ALGORITHM_ABSOLUTE; - break; - - case PERF_COUNTER_RAWCOUNT_HEX: - case PERF_COUNTER_LARGE_RAWCOUNT_HEX: - // N as hexadecimal - algorithm = RRD_ALGORITHM_ABSOLUTE; - break; - - case PERF_COUNTER_DELTA: - case PERF_COUNTER_LARGE_DELTA: - // N1 - N0 - algorithm = RRD_ALGORITHM_ABSOLUTE; - break; - - case PERF_RAW_FRACTION: - case PERF_LARGE_RAW_FRACTION: - // 100 * N / B - algorithm = RRD_ALGORITHM_ABSOLUTE; - divider *= COLLECTED_NUMBER_PRECISION; - break; - - case PERF_AVERAGE_TIMER: - // ((N1 - N0) / TB) / (B1 - B0) - // divider *= cd->current.Frequency / 10000000; - algorithm = RRD_ALGORITHM_INCREMENTAL; - break; - - case PERF_ELAPSED_TIME: - // (D0 - N0) / F - algorithm = RRD_ALGORITHM_ABSOLUTE; - break; - - case PERF_COUNTER_TEXT: - case PERF_SAMPLE_BASE: - case PERF_AVERAGE_BASE: - case PERF_COUNTER_MULTI_BASE: - case PERF_RAW_BASE: - case PERF_COUNTER_NODATA: - case PERF_PRECISION_TIMESTAMP: - default: - break; - } - - return rrddim_add(st, id, name, multiplier, divider, algorithm); -} - -#define VALID_DELTA(cd) \ - ((cd)->previous.Time > 0 && (cd)->current.Data >= (cd)->previous.Data && (cd)->current.Time > (cd)->previous.Time) - -collected_number perflib_rrddim_set_by_pointer(RRDSET *st, RRDDIM *rd, COUNTER_DATA *cd) { - ULONGLONG numerator = 0; - LONGLONG denominator = 0; - double doubleValue = 0.0; - collected_number value; - - switch(cd->current.CounterType) { - case PERF_COUNTER_COUNTER: - case PERF_SAMPLE_COUNTER: - case PERF_COUNTER_BULK_COUNT: - // (N1 - N0) / ((D1 - D0) / F) - value = (collected_number)cd->current.Data; - break; - - case PERF_COUNTER_QUEUELEN_TYPE: - case PERF_COUNTER_100NS_QUEUELEN_TYPE: - case PERF_COUNTER_OBJ_TIME_QUEUELEN_TYPE: - case PERF_COUNTER_LARGE_QUEUELEN_TYPE: - case PERF_AVERAGE_BULK: // normally not displayed - // (N1 - N0) / (D1 - D0) - value = (collected_number)cd->current.Data; - break; - - case PERF_OBJ_TIME_TIMER: - case PERF_COUNTER_TIMER: - case PERF_100NSEC_TIMER: - case PERF_PRECISION_SYSTEM_TIMER: - case PERF_PRECISION_100NS_TIMER: - case PERF_PRECISION_OBJECT_TIMER: - case PERF_SAMPLE_FRACTION: - // 100 * (N1 - N0) / (D1 - D0) - value = (collected_number)cd->current.Data; - break; - - case PERF_COUNTER_TIMER_INV: - case PERF_100NSEC_TIMER_INV: - // 100 * (1 - ((N1 - N0) / (D1 - D0))) - if(!VALID_DELTA(cd)) return 0; - numerator = cd->current.Data - cd->previous.Data; - denominator = cd->current.Time - cd->previous.Time; - doubleValue = 100.0 * (1.0 - ((double)numerator / (double)denominator)); - // printf("Display value is (timer-inv): %f%%\n", doubleValue); - value = (collected_number)(doubleValue * COLLECTED_NUMBER_PRECISION); - break; - - case PERF_COUNTER_MULTI_TIMER: - // 100 * ((N1 - N0) / ((D1 - D0) / TB)) / B1 - if(!VALID_DELTA(cd)) return 0; - numerator = cd->current.Data - cd->previous.Data; - denominator = cd->current.Time - cd->previous.Time; - denominator /= cd->current.Frequency; - doubleValue = 100.0 * ((double)numerator / (double)denominator) / cd->current.MultiCounterData; - // printf("Display value is (multi-timer): %f%%\n", doubleValue); - value = (collected_number)(doubleValue * COLLECTED_NUMBER_PRECISION); - break; - - case PERF_100NSEC_MULTI_TIMER: - // 100 * ((N1 - N0) / (D1 - D0)) / B1 - if(!VALID_DELTA(cd)) return 0; - numerator = cd->current.Data - cd->previous.Data; - denominator = cd->current.Time - cd->previous.Time; - doubleValue = 100.0 * ((double)numerator / (double)denominator) / (double)cd->current.MultiCounterData; - // printf("Display value is (100ns multi-timer): %f%%\n", doubleValue); - value = (collected_number)(doubleValue * COLLECTED_NUMBER_PRECISION); - break; - - case PERF_COUNTER_MULTI_TIMER_INV: - case PERF_100NSEC_MULTI_TIMER_INV: - // 100 * (B1 - ((N1 - N0) / (D1 - D0))) - if(!VALID_DELTA(cd)) return 0; - numerator = cd->current.Data - cd->previous.Data; - denominator = cd->current.Time - cd->previous.Time; - doubleValue = 100.0 * ((double)cd->current.MultiCounterData - ((double)numerator / (double)denominator)); - // printf("Display value is (multi-timer-inv): %f%%\n", doubleValue); - value = (collected_number)(doubleValue * COLLECTED_NUMBER_PRECISION); - break; - - case PERF_COUNTER_RAWCOUNT: - case PERF_COUNTER_LARGE_RAWCOUNT: - // N as decimal - value = (collected_number)cd->current.Data; - break; - - case PERF_COUNTER_RAWCOUNT_HEX: - case PERF_COUNTER_LARGE_RAWCOUNT_HEX: - // N as hexadecimal - value = (collected_number)cd->current.Data; - break; - - case PERF_COUNTER_DELTA: - case PERF_COUNTER_LARGE_DELTA: - if(!VALID_DELTA(cd)) return 0; - value = (collected_number)(cd->current.Data - cd->previous.Data); - break; - - case PERF_RAW_FRACTION: - case PERF_LARGE_RAW_FRACTION: - // 100 * N / B - if(!cd->current.Time) return 0; - doubleValue = 100.0 * (double)cd->current.Data / (double)cd->current.Time; - // printf("Display value is (fraction): %f%%\n", doubleValue); - value = (collected_number)(doubleValue * COLLECTED_NUMBER_PRECISION); - break; - - default: - return 0; - } - - return rrddim_set_by_pointer(st, rd, value); -} - -/* -double perflibCalculateValue(RAW_DATA *current, RAW_DATA *previous) { - ULONGLONG numerator = 0; - LONGLONG denominator = 0; - double doubleValue = 0.0; - DWORD dwordValue = 0; - - if (NULL == previous) { - // Return error if the counter type requires two samples to calculate the value. - switch (current->CounterType) { - default: - if (PERF_DELTA_COUNTER != (current->CounterType & PERF_DELTA_COUNTER)) - break; - __fallthrough; - // fallthrough - - case PERF_AVERAGE_TIMER: // Special case. - case PERF_AVERAGE_BULK: // Special case. - // printf(" > The counter type requires two samples but only one sample was provided.\n"); - return NAN; - } - } - else { - if (current->CounterType != previous->CounterType) { - // printf(" > The samples have inconsistent counter types.\n"); - return NAN; - } - - // Check for integer overflow or bad data from provider (the data from - // sample 2 must be greater than the data from sample 1). - if (current->Data < previous->Data) - { - // Can happen for various reasons. Commonly occurs with the Process counterset when - // multiple processes have the same name and one of them starts or stops. - // Normally you'll just drop the older sample and continue. - // printf("> current (%llu) is smaller than previous (%llu).\n", current->Data, previous->Data); - return NAN; - } - } - - switch (current->CounterType) { - case PERF_COUNTER_COUNTER: - case PERF_SAMPLE_COUNTER: - case PERF_COUNTER_BULK_COUNT: - // (N1 - N0) / ((D1 - D0) / F) - numerator = current->Data - previous->Data; - denominator = current->Time - previous->Time; - dwordValue = (DWORD)(numerator / ((double)denominator / current->Frequency)); - //printf("Display value is (counter): %lu%s\n", (unsigned long)dwordValue, - // (previous->CounterType == PERF_SAMPLE_COUNTER) ? "" : "/sec"); - return (double)dwordValue; - - case PERF_COUNTER_QUEUELEN_TYPE: - case PERF_COUNTER_100NS_QUEUELEN_TYPE: - case PERF_COUNTER_OBJ_TIME_QUEUELEN_TYPE: - case PERF_COUNTER_LARGE_QUEUELEN_TYPE: - case PERF_AVERAGE_BULK: // normally not displayed - // (N1 - N0) / (D1 - D0) - numerator = current->Data - previous->Data; - denominator = current->Time - previous->Time; - doubleValue = (double)numerator / denominator; - if (previous->CounterType != PERF_AVERAGE_BULK) { - // printf("Display value is (queuelen): %f\n", doubleValue); - return doubleValue; - } - return NAN; - - case PERF_OBJ_TIME_TIMER: - case PERF_COUNTER_TIMER: - case PERF_100NSEC_TIMER: - case PERF_PRECISION_SYSTEM_TIMER: - case PERF_PRECISION_100NS_TIMER: - case PERF_PRECISION_OBJECT_TIMER: - case PERF_SAMPLE_FRACTION: - // 100 * (N1 - N0) / (D1 - D0) - numerator = current->Data - previous->Data; - denominator = current->Time - previous->Time; - doubleValue = (double)(100 * numerator) / denominator; - // printf("Display value is (timer): %f%%\n", doubleValue); - return doubleValue; - - case PERF_COUNTER_TIMER_INV: - // 100 * (1 - ((N1 - N0) / (D1 - D0))) - numerator = current->Data - previous->Data; - denominator = current->Time - previous->Time; - doubleValue = 100 * (1 - ((double)numerator / denominator)); - // printf("Display value is (timer-inv): %f%%\n", doubleValue); - return doubleValue; - - case PERF_100NSEC_TIMER_INV: - // 100 * (1- (N1 - N0) / (D1 - D0)) - numerator = current->Data - previous->Data; - denominator = current->Time - previous->Time; - doubleValue = 100 * (1 - (double)numerator / denominator); - // printf("Display value is (100ns-timer-inv): %f%%\n", doubleValue); - return doubleValue; - - case PERF_COUNTER_MULTI_TIMER: - // 100 * ((N1 - N0) / ((D1 - D0) / TB)) / B1 - numerator = current->Data - previous->Data; - denominator = current->Time - previous->Time; - denominator /= current->Frequency; - doubleValue = 100 * ((double)numerator / denominator) / current->MultiCounterData; - // printf("Display value is (multi-timer): %f%%\n", doubleValue); - return doubleValue; - - case PERF_100NSEC_MULTI_TIMER: - // 100 * ((N1 - N0) / (D1 - D0)) / B1 - numerator = current->Data - previous->Data; - denominator = current->Time - previous->Time; - doubleValue = 100 * ((double)numerator / (double)denominator) / (double)current->MultiCounterData; - // printf("Display value is (100ns multi-timer): %f%%\n", doubleValue); - return doubleValue; - - case PERF_COUNTER_MULTI_TIMER_INV: - case PERF_100NSEC_MULTI_TIMER_INV: - // 100 * (B1 - ((N1 - N0) / (D1 - D0))) - numerator = current->Data - previous->Data; - denominator = current->Time - previous->Time; - doubleValue = 100.0 * ((double)current->MultiCounterData - ((double)numerator / (double)denominator)); - // printf("Display value is (multi-timer-inv): %f%%\n", doubleValue); - return doubleValue; - - case PERF_COUNTER_RAWCOUNT: - case PERF_COUNTER_LARGE_RAWCOUNT: - // N as decimal - // printf("Display value is (rawcount): %llu\n", current->Data); - return (double)current->Data; - - case PERF_COUNTER_RAWCOUNT_HEX: - case PERF_COUNTER_LARGE_RAWCOUNT_HEX: - // N as hexadecimal - // printf("Display value is (hex): 0x%llx\n", current->Data); - return (double)current->Data; - - case PERF_COUNTER_DELTA: - case PERF_COUNTER_LARGE_DELTA: - // N1 - N0 - // printf("Display value is (delta): %llu\n", current->Data - previous->Data); - return (double)(current->Data - previous->Data); - - case PERF_RAW_FRACTION: - case PERF_LARGE_RAW_FRACTION: - // 100 * N / B - doubleValue = 100.0 * (double)current->Data / (double)current->Time; - // printf("Display value is (fraction): %f%%\n", doubleValue); - return doubleValue; - - case PERF_AVERAGE_TIMER: - // ((N1 - N0) / TB) / (B1 - B0) - numerator = current->Data - previous->Data; - denominator = current->Time - previous->Time; - doubleValue = (double)numerator / (double)current->Frequency / (double)denominator; - // printf("Display value is (average timer): %f seconds\n", doubleValue); - return doubleValue; - - case PERF_ELAPSED_TIME: - // (D0 - N0) / F - doubleValue = (double)(current->Time - current->Data) / (double)current->Frequency; - // printf("Display value is (elapsed time): %f seconds\n", doubleValue); - return doubleValue; - - case PERF_COUNTER_TEXT: - case PERF_SAMPLE_BASE: - case PERF_AVERAGE_BASE: - case PERF_COUNTER_MULTI_BASE: - case PERF_RAW_BASE: - case PERF_COUNTER_NODATA: - case PERF_PRECISION_TIMESTAMP: - // printf(" > Non-printing counter type: 0x%08x\n", current->CounterType); - return NAN; - break; - - default: - // printf(" > Unrecognized counter type: 0x%08x\n", current->CounterType); - return NAN; - break; - } -} -*/ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "perflib-rrd.h" +#include + +#define COLLECTED_NUMBER_PRECISION 10000 + +RRDDIM *perflib_rrddim_add(RRDSET *st, const char *id, const char *name, collected_number multiplier, collected_number divider, COUNTER_DATA *cd) { + RRD_ALGORITHM algorithm = RRD_ALGORITHM_ABSOLUTE; + + switch (cd->current.CounterType) { + case PERF_COUNTER_COUNTER: + case PERF_SAMPLE_COUNTER: + case PERF_COUNTER_BULK_COUNT: + // (N1 - N0) / ((D1 - D0) / F) + // multiplier *= cd->current.Frequency / 10000000; + // tested, the frequency is not that useful for netdata + // we get right results without it. + algorithm = RRD_ALGORITHM_INCREMENTAL; + break; + + case PERF_COUNTER_QUEUELEN_TYPE: + case PERF_COUNTER_100NS_QUEUELEN_TYPE: + case PERF_COUNTER_OBJ_TIME_QUEUELEN_TYPE: + case PERF_COUNTER_LARGE_QUEUELEN_TYPE: + case PERF_AVERAGE_BULK: // normally not displayed + // (N1 - N0) / (D1 - D0) + algorithm = RRD_ALGORITHM_INCREMENTAL; + break; + + case PERF_OBJ_TIME_TIMER: + case PERF_COUNTER_TIMER: + case PERF_100NSEC_TIMER: + case PERF_PRECISION_SYSTEM_TIMER: + case PERF_PRECISION_100NS_TIMER: + case PERF_PRECISION_OBJECT_TIMER: + case PERF_SAMPLE_FRACTION: + // 100 * (N1 - N0) / (D1 - D0) + multiplier *= 100; + algorithm = RRD_ALGORITHM_INCREMENTAL; + break; + + case PERF_COUNTER_TIMER_INV: + case PERF_100NSEC_TIMER_INV: + // 100 * (1 - ((N1 - N0) / (D1 - D0))) + divider *= COLLECTED_NUMBER_PRECISION; + algorithm = RRD_ALGORITHM_ABSOLUTE; + break; + + case PERF_COUNTER_MULTI_TIMER: + // 100 * ((N1 - N0) / ((D1 - D0) / TB)) / B1 + divider *= COLLECTED_NUMBER_PRECISION; + algorithm = RRD_ALGORITHM_ABSOLUTE; + break; + + case PERF_100NSEC_MULTI_TIMER: + // 100 * ((N1 - N0) / (D1 - D0)) / B1 + divider *= COLLECTED_NUMBER_PRECISION; + algorithm = RRD_ALGORITHM_ABSOLUTE; + break; + + case PERF_COUNTER_MULTI_TIMER_INV: + case PERF_100NSEC_MULTI_TIMER_INV: + // 100 * (B1 - ((N1 - N0) / (D1 - D0))) + divider *= COLLECTED_NUMBER_PRECISION; + algorithm = RRD_ALGORITHM_ABSOLUTE; + break; + + case PERF_COUNTER_RAWCOUNT: + case PERF_COUNTER_LARGE_RAWCOUNT: + // N as decimal + algorithm = RRD_ALGORITHM_ABSOLUTE; + break; + + case PERF_COUNTER_RAWCOUNT_HEX: + case PERF_COUNTER_LARGE_RAWCOUNT_HEX: + // N as hexadecimal + algorithm = RRD_ALGORITHM_ABSOLUTE; + break; + + case PERF_COUNTER_DELTA: + case PERF_COUNTER_LARGE_DELTA: + // N1 - N0 + algorithm = RRD_ALGORITHM_ABSOLUTE; + break; + + case PERF_RAW_FRACTION: + case PERF_LARGE_RAW_FRACTION: + // 100 * N / B + algorithm = RRD_ALGORITHM_ABSOLUTE; + divider *= COLLECTED_NUMBER_PRECISION; + break; + + case PERF_AVERAGE_TIMER: + // ((N1 - N0) / TB) / (B1 - B0) + // divider *= cd->current.Frequency / 10000000; + algorithm = RRD_ALGORITHM_INCREMENTAL; + break; + + case PERF_ELAPSED_TIME: + // (D0 - N0) / F + algorithm = RRD_ALGORITHM_ABSOLUTE; + break; + + case PERF_COUNTER_TEXT: + case PERF_SAMPLE_BASE: + case PERF_AVERAGE_BASE: + case PERF_COUNTER_MULTI_BASE: + case PERF_RAW_BASE: + case PERF_COUNTER_NODATA: + case PERF_PRECISION_TIMESTAMP: + default: + break; + } + + return rrddim_add(st, id, name, multiplier, divider, algorithm); +} + +#define VALID_DELTA(cd) \ + ((cd)->previous.Time > 0 && (cd)->current.Data >= (cd)->previous.Data && (cd)->current.Time > (cd)->previous.Time) + +collected_number perflib_rrddim_set_by_pointer(RRDSET *st, RRDDIM *rd, COUNTER_DATA *cd) { + ULONGLONG numerator = 0; + LONGLONG denominator = 0; + double doubleValue = 0.0; + collected_number value; + + switch(cd->current.CounterType) { + case PERF_COUNTER_COUNTER: + case PERF_SAMPLE_COUNTER: + case PERF_COUNTER_BULK_COUNT: + // (N1 - N0) / ((D1 - D0) / F) + value = (collected_number)cd->current.Data; + break; + + case PERF_COUNTER_QUEUELEN_TYPE: + case PERF_COUNTER_100NS_QUEUELEN_TYPE: + case PERF_COUNTER_OBJ_TIME_QUEUELEN_TYPE: + case PERF_COUNTER_LARGE_QUEUELEN_TYPE: + case PERF_AVERAGE_BULK: // normally not displayed + // (N1 - N0) / (D1 - D0) + value = (collected_number)cd->current.Data; + break; + + case PERF_OBJ_TIME_TIMER: + case PERF_COUNTER_TIMER: + case PERF_100NSEC_TIMER: + case PERF_PRECISION_SYSTEM_TIMER: + case PERF_PRECISION_100NS_TIMER: + case PERF_PRECISION_OBJECT_TIMER: + case PERF_SAMPLE_FRACTION: + // 100 * (N1 - N0) / (D1 - D0) + value = (collected_number)cd->current.Data; + break; + + case PERF_COUNTER_TIMER_INV: + case PERF_100NSEC_TIMER_INV: + // 100 * (1 - ((N1 - N0) / (D1 - D0))) + if(!VALID_DELTA(cd)) return 0; + numerator = cd->current.Data - cd->previous.Data; + denominator = cd->current.Time - cd->previous.Time; + doubleValue = 100.0 * (1.0 - ((double)numerator / (double)denominator)); + // printf("Display value is (timer-inv): %f%%\n", doubleValue); + value = (collected_number)(doubleValue * COLLECTED_NUMBER_PRECISION); + break; + + case PERF_COUNTER_MULTI_TIMER: + // 100 * ((N1 - N0) / ((D1 - D0) / TB)) / B1 + if(!VALID_DELTA(cd)) return 0; + numerator = cd->current.Data - cd->previous.Data; + denominator = cd->current.Time - cd->previous.Time; + denominator /= cd->current.Frequency; + doubleValue = 100.0 * ((double)numerator / (double)denominator) / cd->current.MultiCounterData; + // printf("Display value is (multi-timer): %f%%\n", doubleValue); + value = (collected_number)(doubleValue * COLLECTED_NUMBER_PRECISION); + break; + + case PERF_100NSEC_MULTI_TIMER: + // 100 * ((N1 - N0) / (D1 - D0)) / B1 + if(!VALID_DELTA(cd)) return 0; + numerator = cd->current.Data - cd->previous.Data; + denominator = cd->current.Time - cd->previous.Time; + doubleValue = 100.0 * ((double)numerator / (double)denominator) / (double)cd->current.MultiCounterData; + // printf("Display value is (100ns multi-timer): %f%%\n", doubleValue); + value = (collected_number)(doubleValue * COLLECTED_NUMBER_PRECISION); + break; + + case PERF_COUNTER_MULTI_TIMER_INV: + case PERF_100NSEC_MULTI_TIMER_INV: + // 100 * (B1 - ((N1 - N0) / (D1 - D0))) + if(!VALID_DELTA(cd)) return 0; + numerator = cd->current.Data - cd->previous.Data; + denominator = cd->current.Time - cd->previous.Time; + doubleValue = 100.0 * ((double)cd->current.MultiCounterData - ((double)numerator / (double)denominator)); + // printf("Display value is (multi-timer-inv): %f%%\n", doubleValue); + value = (collected_number)(doubleValue * COLLECTED_NUMBER_PRECISION); + break; + + case PERF_COUNTER_RAWCOUNT: + case PERF_COUNTER_LARGE_RAWCOUNT: + // N as decimal + value = (collected_number)cd->current.Data; + break; + + case PERF_COUNTER_RAWCOUNT_HEX: + case PERF_COUNTER_LARGE_RAWCOUNT_HEX: + // N as hexadecimal + value = (collected_number)cd->current.Data; + break; + + case PERF_COUNTER_DELTA: + case PERF_COUNTER_LARGE_DELTA: + if(!VALID_DELTA(cd)) return 0; + value = (collected_number)(cd->current.Data - cd->previous.Data); + break; + + case PERF_RAW_FRACTION: + case PERF_LARGE_RAW_FRACTION: + // 100 * N / B + if(!cd->current.Time) return 0; + doubleValue = 100.0 * (double)cd->current.Data / (double)cd->current.Time; + // printf("Display value is (fraction): %f%%\n", doubleValue); + value = (collected_number)(doubleValue * COLLECTED_NUMBER_PRECISION); + break; + + default: + return 0; + } + + return rrddim_set_by_pointer(st, rd, value); +} + +/* +double perflibCalculateValue(RAW_DATA *current, RAW_DATA *previous) { + ULONGLONG numerator = 0; + LONGLONG denominator = 0; + double doubleValue = 0.0; + DWORD dwordValue = 0; + + if (NULL == previous) { + // Return error if the counter type requires two samples to calculate the value. + switch (current->CounterType) { + default: + if (PERF_DELTA_COUNTER != (current->CounterType & PERF_DELTA_COUNTER)) + break; + __fallthrough; + // fallthrough + + case PERF_AVERAGE_TIMER: // Special case. + case PERF_AVERAGE_BULK: // Special case. + // printf(" > The counter type requires two samples but only one sample was provided.\n"); + return NAN; + } + } + else { + if (current->CounterType != previous->CounterType) { + // printf(" > The samples have inconsistent counter types.\n"); + return NAN; + } + + // Check for integer overflow or bad data from provider (the data from + // sample 2 must be greater than the data from sample 1). + if (current->Data < previous->Data) + { + // Can happen for various reasons. Commonly occurs with the Process counterset when + // multiple processes have the same name and one of them starts or stops. + // Normally you'll just drop the older sample and continue. + // printf("> current (%llu) is smaller than previous (%llu).\n", current->Data, previous->Data); + return NAN; + } + } + + switch (current->CounterType) { + case PERF_COUNTER_COUNTER: + case PERF_SAMPLE_COUNTER: + case PERF_COUNTER_BULK_COUNT: + // (N1 - N0) / ((D1 - D0) / F) + numerator = current->Data - previous->Data; + denominator = current->Time - previous->Time; + dwordValue = (DWORD)(numerator / ((double)denominator / current->Frequency)); + //printf("Display value is (counter): %lu%s\n", (unsigned long)dwordValue, + // (previous->CounterType == PERF_SAMPLE_COUNTER) ? "" : "/sec"); + return (double)dwordValue; + + case PERF_COUNTER_QUEUELEN_TYPE: + case PERF_COUNTER_100NS_QUEUELEN_TYPE: + case PERF_COUNTER_OBJ_TIME_QUEUELEN_TYPE: + case PERF_COUNTER_LARGE_QUEUELEN_TYPE: + case PERF_AVERAGE_BULK: // normally not displayed + // (N1 - N0) / (D1 - D0) + numerator = current->Data - previous->Data; + denominator = current->Time - previous->Time; + doubleValue = (double)numerator / denominator; + if (previous->CounterType != PERF_AVERAGE_BULK) { + // printf("Display value is (queuelen): %f\n", doubleValue); + return doubleValue; + } + return NAN; + + case PERF_OBJ_TIME_TIMER: + case PERF_COUNTER_TIMER: + case PERF_100NSEC_TIMER: + case PERF_PRECISION_SYSTEM_TIMER: + case PERF_PRECISION_100NS_TIMER: + case PERF_PRECISION_OBJECT_TIMER: + case PERF_SAMPLE_FRACTION: + // 100 * (N1 - N0) / (D1 - D0) + numerator = current->Data - previous->Data; + denominator = current->Time - previous->Time; + doubleValue = (double)(100 * numerator) / denominator; + // printf("Display value is (timer): %f%%\n", doubleValue); + return doubleValue; + + case PERF_COUNTER_TIMER_INV: + // 100 * (1 - ((N1 - N0) / (D1 - D0))) + numerator = current->Data - previous->Data; + denominator = current->Time - previous->Time; + doubleValue = 100 * (1 - ((double)numerator / denominator)); + // printf("Display value is (timer-inv): %f%%\n", doubleValue); + return doubleValue; + + case PERF_100NSEC_TIMER_INV: + // 100 * (1- (N1 - N0) / (D1 - D0)) + numerator = current->Data - previous->Data; + denominator = current->Time - previous->Time; + doubleValue = 100 * (1 - (double)numerator / denominator); + // printf("Display value is (100ns-timer-inv): %f%%\n", doubleValue); + return doubleValue; + + case PERF_COUNTER_MULTI_TIMER: + // 100 * ((N1 - N0) / ((D1 - D0) / TB)) / B1 + numerator = current->Data - previous->Data; + denominator = current->Time - previous->Time; + denominator /= current->Frequency; + doubleValue = 100 * ((double)numerator / denominator) / current->MultiCounterData; + // printf("Display value is (multi-timer): %f%%\n", doubleValue); + return doubleValue; + + case PERF_100NSEC_MULTI_TIMER: + // 100 * ((N1 - N0) / (D1 - D0)) / B1 + numerator = current->Data - previous->Data; + denominator = current->Time - previous->Time; + doubleValue = 100 * ((double)numerator / (double)denominator) / (double)current->MultiCounterData; + // printf("Display value is (100ns multi-timer): %f%%\n", doubleValue); + return doubleValue; + + case PERF_COUNTER_MULTI_TIMER_INV: + case PERF_100NSEC_MULTI_TIMER_INV: + // 100 * (B1 - ((N1 - N0) / (D1 - D0))) + numerator = current->Data - previous->Data; + denominator = current->Time - previous->Time; + doubleValue = 100.0 * ((double)current->MultiCounterData - ((double)numerator / (double)denominator)); + // printf("Display value is (multi-timer-inv): %f%%\n", doubleValue); + return doubleValue; + + case PERF_COUNTER_RAWCOUNT: + case PERF_COUNTER_LARGE_RAWCOUNT: + // N as decimal + // printf("Display value is (rawcount): %llu\n", current->Data); + return (double)current->Data; + + case PERF_COUNTER_RAWCOUNT_HEX: + case PERF_COUNTER_LARGE_RAWCOUNT_HEX: + // N as hexadecimal + // printf("Display value is (hex): 0x%llx\n", current->Data); + return (double)current->Data; + + case PERF_COUNTER_DELTA: + case PERF_COUNTER_LARGE_DELTA: + // N1 - N0 + // printf("Display value is (delta): %llu\n", current->Data - previous->Data); + return (double)(current->Data - previous->Data); + + case PERF_RAW_FRACTION: + case PERF_LARGE_RAW_FRACTION: + // 100 * N / B + doubleValue = 100.0 * (double)current->Data / (double)current->Time; + // printf("Display value is (fraction): %f%%\n", doubleValue); + return doubleValue; + + case PERF_AVERAGE_TIMER: + // ((N1 - N0) / TB) / (B1 - B0) + numerator = current->Data - previous->Data; + denominator = current->Time - previous->Time; + doubleValue = (double)numerator / (double)current->Frequency / (double)denominator; + // printf("Display value is (average timer): %f seconds\n", doubleValue); + return doubleValue; + + case PERF_ELAPSED_TIME: + // (D0 - N0) / F + doubleValue = (double)(current->Time - current->Data) / (double)current->Frequency; + // printf("Display value is (elapsed time): %f seconds\n", doubleValue); + return doubleValue; + + case PERF_COUNTER_TEXT: + case PERF_SAMPLE_BASE: + case PERF_AVERAGE_BASE: + case PERF_COUNTER_MULTI_BASE: + case PERF_RAW_BASE: + case PERF_COUNTER_NODATA: + case PERF_PRECISION_TIMESTAMP: + // printf(" > Non-printing counter type: 0x%08x\n", current->CounterType); + return NAN; + break; + + default: + // printf(" > Unrecognized counter type: 0x%08x\n", current->CounterType); + return NAN; + break; + } +} +*/ diff --git a/src/collectors/windows.plugin/perflib-rrd.h b/src/collectors/windows.plugin/perflib-rrd.h index 0b91de2ec384ad..2347c5b1d8d395 100644 --- a/src/collectors/windows.plugin/perflib-rrd.h +++ b/src/collectors/windows.plugin/perflib-rrd.h @@ -1,12 +1,11 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -#ifndef NETDATA_PERFLIB_RRD_H -#define NETDATA_PERFLIB_RRD_H - -#include "perflib.h" -#include "database/rrd.h" - -RRDDIM *perflib_rrddim_add(RRDSET *st, const char *id, const char *name, collected_number multiplier, collected_number divider, COUNTER_DATA *cd); -collected_number perflib_rrddim_set_by_pointer(RRDSET *st, RRDDIM *rd, COUNTER_DATA *cd); - -#endif //NETDATA_PERFLIB_RRD_H +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_PERFLIB_RRD_H +#define NETDATA_PERFLIB_RRD_H + +#include "database/rrd.h" + +RRDDIM *perflib_rrddim_add(RRDSET *st, const char *id, const char *name, collected_number multiplier, collected_number divider, COUNTER_DATA *cd); +collected_number perflib_rrddim_set_by_pointer(RRDSET *st, RRDDIM *rd, COUNTER_DATA *cd); + +#endif //NETDATA_PERFLIB_RRD_H diff --git a/src/collectors/windows.plugin/perflib-storage.c b/src/collectors/windows.plugin/perflib-storage.c index d3b80052f97bb4..d7df3c959961a9 100644 --- a/src/collectors/windows.plugin/perflib-storage.c +++ b/src/collectors/windows.plugin/perflib-storage.c @@ -1,317 +1,317 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -#include "windows_plugin.h" -#include "windows-internals.h" - -#define _COMMON_PLUGIN_NAME PLUGIN_WINDOWS_NAME -#define _COMMON_PLUGIN_MODULE_NAME "PerflibStorage" -#include "../common-contexts/common-contexts.h" - -struct logical_disk { - bool collected_metadata; - - STRING *filesystem; - - RRDSET *st_disk_space; - RRDDIM *rd_disk_space_used; - RRDDIM *rd_disk_space_free; - - COUNTER_DATA percentDiskFree; - // COUNTER_DATA freeMegabytes; -}; - -struct physical_disk { - bool collected_metadata; - - STRING *device; - STRING *mount_point; - - ND_DISK_IO disk_io; - COUNTER_DATA diskReadBytesPerSec; - COUNTER_DATA diskWriteBytesPerSec; - - COUNTER_DATA percentIdleTime; - COUNTER_DATA percentDiskTime; - COUNTER_DATA percentDiskReadTime; - COUNTER_DATA percentDiskWriteTime; - COUNTER_DATA currentDiskQueueLength; - COUNTER_DATA averageDiskQueueLength; - COUNTER_DATA averageDiskReadQueueLength; - COUNTER_DATA averageDiskWriteQueueLength; - COUNTER_DATA averageDiskSecondsPerTransfer; - COUNTER_DATA averageDiskSecondsPerRead; - COUNTER_DATA averageDiskSecondsPerWrite; - COUNTER_DATA diskTransfersPerSec; - COUNTER_DATA diskReadsPerSec; - COUNTER_DATA diskWritesPerSec; - COUNTER_DATA diskBytesPerSec; - COUNTER_DATA averageDiskBytesPerTransfer; - COUNTER_DATA averageDiskBytesPerRead; - COUNTER_DATA averageDiskBytesPerWrite; - COUNTER_DATA splitIoPerSec; -}; - -struct physical_disk system_physical_total = { - .collected_metadata = true, -}; - -void dict_logical_disk_insert_cb(const DICTIONARY_ITEM *item __maybe_unused, void *value, void *data __maybe_unused) { - struct logical_disk *ld = value; - - ld->percentDiskFree.key = "% Free Space"; - // ld->freeMegabytes.key = "Free Megabytes"; -} - -void initialize_physical_disk(struct physical_disk *pd) { - pd->percentIdleTime.key = "% Idle Time"; - pd->percentDiskTime.key = "% Disk Time"; - pd->percentDiskReadTime.key = "% Disk Read Time"; - pd->percentDiskWriteTime.key = "% Disk Write Time"; - pd->currentDiskQueueLength.key = "Current Disk Queue Length"; - pd->averageDiskQueueLength.key = "Avg. Disk Queue Length"; - pd->averageDiskReadQueueLength.key = "Avg. Disk Read Queue Length"; - pd->averageDiskWriteQueueLength.key = "Avg. Disk Write Queue Length"; - pd->averageDiskSecondsPerTransfer.key = "Avg. Disk sec/Transfer"; - pd->averageDiskSecondsPerRead.key = "Avg. Disk sec/Read"; - pd->averageDiskSecondsPerWrite.key = "Avg. Disk sec/Write"; - pd->diskTransfersPerSec.key = "Disk Transfers/sec"; - pd->diskReadsPerSec.key = "Disk Reads/sec"; - pd->diskWritesPerSec.key = "Disk Writes/sec"; - pd->diskBytesPerSec.key = "Disk Bytes/sec"; - pd->diskReadBytesPerSec.key = "Disk Read Bytes/sec"; - pd->diskWriteBytesPerSec.key = "Disk Write Bytes/sec"; - pd->averageDiskBytesPerTransfer.key = "Avg. Disk Bytes/Transfer"; - pd->averageDiskBytesPerRead.key = "Avg. Disk Bytes/Read"; - pd->averageDiskBytesPerWrite.key = "Avg. Disk Bytes/Write"; - pd->splitIoPerSec.key = "Split IO/Sec"; -} - -void dict_physical_disk_insert_cb(const DICTIONARY_ITEM *item __maybe_unused, void *value, void *data __maybe_unused) { - struct physical_disk *pd = value; - initialize_physical_disk(pd); -} - -static DICTIONARY *logicalDisks = NULL, *physicalDisks = NULL; -static void initialize(void) { - initialize_physical_disk(&system_physical_total); - - logicalDisks = dictionary_create_advanced(DICT_OPTION_DONT_OVERWRITE_VALUE | - DICT_OPTION_FIXED_SIZE, NULL, sizeof(struct logical_disk)); - - dictionary_register_insert_callback(logicalDisks, dict_logical_disk_insert_cb, NULL); - - physicalDisks = dictionary_create_advanced(DICT_OPTION_DONT_OVERWRITE_VALUE | - DICT_OPTION_FIXED_SIZE, NULL, sizeof(struct physical_disk)); - - dictionary_register_insert_callback(physicalDisks, dict_physical_disk_insert_cb, NULL); -} - -static STRING *getFileSystemType(const char* diskName) { - if (!diskName || !*diskName) return NULL; - - char fileSystemNameBuffer[128] = {0}; // Buffer for file system name - char pathBuffer[256] = {0}; // Path buffer to accommodate different formats - DWORD serialNumber = 0; - DWORD maxComponentLength = 0; - DWORD fileSystemFlags = 0; - BOOL success; - - // Check if the input is likely a drive letter (e.g., "C:") - if (isalpha((uint8_t)diskName[0]) && diskName[1] == ':' && diskName[2] == '\0') - snprintf(pathBuffer, sizeof(pathBuffer), "%s\\", diskName); // Format as "C:\\" - else - // Assume it's a Volume GUID path or a device path - snprintf(pathBuffer, sizeof(pathBuffer), "\\\\.\\%s", diskName); // Format as "\\.\HarddiskVolume1" - - // Attempt to get the volume information - success = GetVolumeInformation( - pathBuffer, // Path to the disk - NULL, // We don't need the volume name - 0, // Size of volume name buffer is 0 - &serialNumber, // Volume serial number - &maxComponentLength, // Maximum component length - &fileSystemFlags, // File system flags - fileSystemNameBuffer, // File system name buffer - sizeof(fileSystemNameBuffer) // Size of file system name buffer - ); - - if (success && fileSystemNameBuffer[0]) { - char *s = fileSystemNameBuffer; - while(*s) { *s = tolower((uint8_t)*s); s++; } - return string_strdupz(fileSystemNameBuffer); // Duplicate the file system name - } - else - return NULL; -} - -static bool do_logical_disk(PERF_DATA_BLOCK *pDataBlock, int update_every) { - DICTIONARY *dict = logicalDisks; - - PERF_OBJECT_TYPE *pObjectType = perflibFindObjectTypeByName(pDataBlock, "LogicalDisk"); - if(!pObjectType) return false; - - PERF_INSTANCE_DEFINITION *pi = NULL; - for(LONG i = 0; i < pObjectType->NumInstances ; i++) { - pi = perflibForEachInstance(pDataBlock, pObjectType, pi); - if(!pi) break; - - if(!getInstanceName(pDataBlock, pObjectType, pi, windows_shared_buffer, sizeof(windows_shared_buffer))) - strncpyz(windows_shared_buffer, "[unknown]", sizeof(windows_shared_buffer) - 1); - - if(strcasecmp(windows_shared_buffer, "_Total") == 0) - continue; - - struct logical_disk *d = dictionary_set(dict, windows_shared_buffer, NULL, sizeof(*d)); - - if(!d->collected_metadata) { - d->filesystem = getFileSystemType(windows_shared_buffer); - d->collected_metadata = true; - } - - perflibGetInstanceCounter(pDataBlock, pObjectType, pi, &d->percentDiskFree); - // perflibGetInstanceCounter(pDataBlock, pObjectType, pi, &d->freeMegabytes); - - if(!d->st_disk_space) { - d->st_disk_space = rrdset_create_localhost( - "disk_space" - , windows_shared_buffer, NULL - , windows_shared_buffer, "disk.space" - , "Disk Space Usage" - , "GiB" - , PLUGIN_WINDOWS_NAME - , "PerflibStorage" - , NETDATA_CHART_PRIO_DISKSPACE_SPACE - , update_every - , RRDSET_TYPE_STACKED - ); - - rrdlabels_add(d->st_disk_space->rrdlabels, "mount_point", windows_shared_buffer, RRDLABEL_SRC_AUTO); - // rrdlabels_add(d->st->rrdlabels, "mount_root", name, RRDLABEL_SRC_AUTO); - - if(d->filesystem) - rrdlabels_add(d->st_disk_space->rrdlabels, "filesystem", string2str(d->filesystem), RRDLABEL_SRC_AUTO); - - d->rd_disk_space_free = rrddim_add(d->st_disk_space, "avail", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE); - d->rd_disk_space_used = rrddim_add(d->st_disk_space, "used", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE); - } - - // percentDiskFree has the free space in Data and the size of the disk in Time, in MiB. - rrddim_set_by_pointer(d->st_disk_space, d->rd_disk_space_free, (collected_number)d->percentDiskFree.current.Data); - rrddim_set_by_pointer(d->st_disk_space, d->rd_disk_space_used, (collected_number)(d->percentDiskFree.current.Time - d->percentDiskFree.current.Data)); - rrdset_done(d->st_disk_space); - } - - return true; -} - -static void physical_disk_labels(RRDSET *st, void *data) { - struct physical_disk *d = data; - - if(d->device) - rrdlabels_add(st->rrdlabels, "device", string2str(d->device), RRDLABEL_SRC_AUTO); - - if (d->mount_point) - rrdlabels_add(st->rrdlabels, "mount_point", string2str(d->mount_point), RRDLABEL_SRC_AUTO); -} - -static bool do_physical_disk(PERF_DATA_BLOCK *pDataBlock, int update_every) { - DICTIONARY *dict = physicalDisks; - - PERF_OBJECT_TYPE *pObjectType = perflibFindObjectTypeByName(pDataBlock, "PhysicalDisk"); - if(!pObjectType) return false; - - PERF_INSTANCE_DEFINITION *pi = NULL; - for (LONG i = 0; i < pObjectType->NumInstances; i++) { - pi = perflibForEachInstance(pDataBlock, pObjectType, pi); - if (!pi) - break; - - if (!getInstanceName(pDataBlock, pObjectType, pi, windows_shared_buffer, sizeof(windows_shared_buffer))) - strncpyz(windows_shared_buffer, "[unknown]", sizeof(windows_shared_buffer) - 1); - - char *device = windows_shared_buffer; - char *mount_point = NULL; - - if((mount_point = strchr(device, ' '))) { - *mount_point = '\0'; - mount_point++; - } - - struct physical_disk *d; - bool is_system; - if (strcasecmp(windows_shared_buffer, "_Total") == 0) { - d = &system_physical_total; - is_system = true; - } - else { - d = dictionary_set(dict, device, NULL, sizeof(*d)); - is_system = false; - } - - if (!d->collected_metadata) { - // TODO collect metadata - device_type, serial, id - d->device = string_strdupz(device); - d->mount_point = string_strdupz(mount_point); - d->collected_metadata = true; - } - - if (perflibGetInstanceCounter(pDataBlock, pObjectType, pi, &d->diskReadBytesPerSec) && - perflibGetInstanceCounter(pDataBlock, pObjectType, pi, &d->diskWriteBytesPerSec)) { - if(is_system) - common_system_io(d->diskReadBytesPerSec.current.Data, d->diskWriteBytesPerSec.current.Data, update_every); - else - common_disk_io( - &d->disk_io, - device, - NULL, - d->diskReadBytesPerSec.current.Data, - d->diskWriteBytesPerSec.current.Data, - update_every, - physical_disk_labels, - d); - } - - perflibGetInstanceCounter(pDataBlock, pObjectType, pi, &d->percentIdleTime); - perflibGetInstanceCounter(pDataBlock, pObjectType, pi, &d->percentDiskTime); - perflibGetInstanceCounter(pDataBlock, pObjectType, pi, &d->percentDiskReadTime); - perflibGetInstanceCounter(pDataBlock, pObjectType, pi, &d->percentDiskWriteTime); - perflibGetInstanceCounter(pDataBlock, pObjectType, pi, &d->currentDiskQueueLength); - perflibGetInstanceCounter(pDataBlock, pObjectType, pi, &d->averageDiskQueueLength); - perflibGetInstanceCounter(pDataBlock, pObjectType, pi, &d->averageDiskReadQueueLength); - perflibGetInstanceCounter(pDataBlock, pObjectType, pi, &d->averageDiskWriteQueueLength); - perflibGetInstanceCounter(pDataBlock, pObjectType, pi, &d->averageDiskSecondsPerTransfer); - perflibGetInstanceCounter(pDataBlock, pObjectType, pi, &d->averageDiskSecondsPerRead); - perflibGetInstanceCounter(pDataBlock, pObjectType, pi, &d->averageDiskSecondsPerWrite); - perflibGetInstanceCounter(pDataBlock, pObjectType, pi, &d->diskTransfersPerSec); - perflibGetInstanceCounter(pDataBlock, pObjectType, pi, &d->diskReadsPerSec); - perflibGetInstanceCounter(pDataBlock, pObjectType, pi, &d->diskWritesPerSec); - perflibGetInstanceCounter(pDataBlock, pObjectType, pi, &d->diskBytesPerSec); - perflibGetInstanceCounter(pDataBlock, pObjectType, pi, &d->averageDiskBytesPerTransfer); - perflibGetInstanceCounter(pDataBlock, pObjectType, pi, &d->averageDiskBytesPerRead); - perflibGetInstanceCounter(pDataBlock, pObjectType, pi, &d->averageDiskBytesPerWrite); - perflibGetInstanceCounter(pDataBlock, pObjectType, pi, &d->splitIoPerSec); - } - - return true; -} - -int do_PerflibStorage(int update_every, usec_t dt __maybe_unused) { - static bool initialized = false; - - if(unlikely(!initialized)) { - initialize(); - initialized = true; - } - - DWORD id = RegistryFindIDByName("LogicalDisk"); - if(id == PERFLIB_REGISTRY_NAME_NOT_FOUND) - return -1; - - PERF_DATA_BLOCK *pDataBlock = perflibGetPerformanceData(id); - if(!pDataBlock) return -1; - - do_logical_disk(pDataBlock, update_every); - do_physical_disk(pDataBlock, update_every); - - return 0; -} +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "windows_plugin.h" +#include "windows-internals.h" + +#define _COMMON_PLUGIN_NAME PLUGIN_WINDOWS_NAME +#define _COMMON_PLUGIN_MODULE_NAME "PerflibStorage" +#include "../common-contexts/common-contexts.h" + +struct logical_disk { + bool collected_metadata; + + STRING *filesystem; + + RRDSET *st_disk_space; + RRDDIM *rd_disk_space_used; + RRDDIM *rd_disk_space_free; + + COUNTER_DATA percentDiskFree; + // COUNTER_DATA freeMegabytes; +}; + +struct physical_disk { + bool collected_metadata; + + STRING *device; + STRING *mount_point; + + ND_DISK_IO disk_io; + COUNTER_DATA diskReadBytesPerSec; + COUNTER_DATA diskWriteBytesPerSec; + + COUNTER_DATA percentIdleTime; + COUNTER_DATA percentDiskTime; + COUNTER_DATA percentDiskReadTime; + COUNTER_DATA percentDiskWriteTime; + COUNTER_DATA currentDiskQueueLength; + COUNTER_DATA averageDiskQueueLength; + COUNTER_DATA averageDiskReadQueueLength; + COUNTER_DATA averageDiskWriteQueueLength; + COUNTER_DATA averageDiskSecondsPerTransfer; + COUNTER_DATA averageDiskSecondsPerRead; + COUNTER_DATA averageDiskSecondsPerWrite; + COUNTER_DATA diskTransfersPerSec; + COUNTER_DATA diskReadsPerSec; + COUNTER_DATA diskWritesPerSec; + COUNTER_DATA diskBytesPerSec; + COUNTER_DATA averageDiskBytesPerTransfer; + COUNTER_DATA averageDiskBytesPerRead; + COUNTER_DATA averageDiskBytesPerWrite; + COUNTER_DATA splitIoPerSec; +}; + +struct physical_disk system_physical_total = { + .collected_metadata = true, +}; + +void dict_logical_disk_insert_cb(const DICTIONARY_ITEM *item __maybe_unused, void *value, void *data __maybe_unused) { + struct logical_disk *ld = value; + + ld->percentDiskFree.key = "% Free Space"; + // ld->freeMegabytes.key = "Free Megabytes"; +} + +void initialize_physical_disk(struct physical_disk *pd) { + pd->percentIdleTime.key = "% Idle Time"; + pd->percentDiskTime.key = "% Disk Time"; + pd->percentDiskReadTime.key = "% Disk Read Time"; + pd->percentDiskWriteTime.key = "% Disk Write Time"; + pd->currentDiskQueueLength.key = "Current Disk Queue Length"; + pd->averageDiskQueueLength.key = "Avg. Disk Queue Length"; + pd->averageDiskReadQueueLength.key = "Avg. Disk Read Queue Length"; + pd->averageDiskWriteQueueLength.key = "Avg. Disk Write Queue Length"; + pd->averageDiskSecondsPerTransfer.key = "Avg. Disk sec/Transfer"; + pd->averageDiskSecondsPerRead.key = "Avg. Disk sec/Read"; + pd->averageDiskSecondsPerWrite.key = "Avg. Disk sec/Write"; + pd->diskTransfersPerSec.key = "Disk Transfers/sec"; + pd->diskReadsPerSec.key = "Disk Reads/sec"; + pd->diskWritesPerSec.key = "Disk Writes/sec"; + pd->diskBytesPerSec.key = "Disk Bytes/sec"; + pd->diskReadBytesPerSec.key = "Disk Read Bytes/sec"; + pd->diskWriteBytesPerSec.key = "Disk Write Bytes/sec"; + pd->averageDiskBytesPerTransfer.key = "Avg. Disk Bytes/Transfer"; + pd->averageDiskBytesPerRead.key = "Avg. Disk Bytes/Read"; + pd->averageDiskBytesPerWrite.key = "Avg. Disk Bytes/Write"; + pd->splitIoPerSec.key = "Split IO/Sec"; +} + +void dict_physical_disk_insert_cb(const DICTIONARY_ITEM *item __maybe_unused, void *value, void *data __maybe_unused) { + struct physical_disk *pd = value; + initialize_physical_disk(pd); +} + +static DICTIONARY *logicalDisks = NULL, *physicalDisks = NULL; +static void initialize(void) { + initialize_physical_disk(&system_physical_total); + + logicalDisks = dictionary_create_advanced(DICT_OPTION_DONT_OVERWRITE_VALUE | + DICT_OPTION_FIXED_SIZE, NULL, sizeof(struct logical_disk)); + + dictionary_register_insert_callback(logicalDisks, dict_logical_disk_insert_cb, NULL); + + physicalDisks = dictionary_create_advanced(DICT_OPTION_DONT_OVERWRITE_VALUE | + DICT_OPTION_FIXED_SIZE, NULL, sizeof(struct physical_disk)); + + dictionary_register_insert_callback(physicalDisks, dict_physical_disk_insert_cb, NULL); +} + +static STRING *getFileSystemType(const char* diskName) { + if (!diskName || !*diskName) return NULL; + + char fileSystemNameBuffer[128] = {0}; // Buffer for file system name + char pathBuffer[256] = {0}; // Path buffer to accommodate different formats + DWORD serialNumber = 0; + DWORD maxComponentLength = 0; + DWORD fileSystemFlags = 0; + BOOL success; + + // Check if the input is likely a drive letter (e.g., "C:") + if (isalpha((uint8_t)diskName[0]) && diskName[1] == ':' && diskName[2] == '\0') + snprintf(pathBuffer, sizeof(pathBuffer), "%s\\", diskName); // Format as "C:\\" + else + // Assume it's a Volume GUID path or a device path + snprintf(pathBuffer, sizeof(pathBuffer), "\\\\.\\%s", diskName); // Format as "\\.\HarddiskVolume1" + + // Attempt to get the volume information + success = GetVolumeInformation( + pathBuffer, // Path to the disk + NULL, // We don't need the volume name + 0, // Size of volume name buffer is 0 + &serialNumber, // Volume serial number + &maxComponentLength, // Maximum component length + &fileSystemFlags, // File system flags + fileSystemNameBuffer, // File system name buffer + sizeof(fileSystemNameBuffer) // Size of file system name buffer + ); + + if (success && fileSystemNameBuffer[0]) { + char *s = fileSystemNameBuffer; + while(*s) { *s = tolower((uint8_t)*s); s++; } + return string_strdupz(fileSystemNameBuffer); // Duplicate the file system name + } + else + return NULL; +} + +static bool do_logical_disk(PERF_DATA_BLOCK *pDataBlock, int update_every) { + DICTIONARY *dict = logicalDisks; + + PERF_OBJECT_TYPE *pObjectType = perflibFindObjectTypeByName(pDataBlock, "LogicalDisk"); + if(!pObjectType) return false; + + PERF_INSTANCE_DEFINITION *pi = NULL; + for(LONG i = 0; i < pObjectType->NumInstances ; i++) { + pi = perflibForEachInstance(pDataBlock, pObjectType, pi); + if(!pi) break; + + if(!getInstanceName(pDataBlock, pObjectType, pi, windows_shared_buffer, sizeof(windows_shared_buffer))) + strncpyz(windows_shared_buffer, "[unknown]", sizeof(windows_shared_buffer) - 1); + + if(strcasecmp(windows_shared_buffer, "_Total") == 0) + continue; + + struct logical_disk *d = dictionary_set(dict, windows_shared_buffer, NULL, sizeof(*d)); + + if(!d->collected_metadata) { + d->filesystem = getFileSystemType(windows_shared_buffer); + d->collected_metadata = true; + } + + perflibGetInstanceCounter(pDataBlock, pObjectType, pi, &d->percentDiskFree); + // perflibGetInstanceCounter(pDataBlock, pObjectType, pi, &d->freeMegabytes); + + if(!d->st_disk_space) { + d->st_disk_space = rrdset_create_localhost( + "disk_space" + , windows_shared_buffer, NULL + , windows_shared_buffer, "disk.space" + , "Disk Space Usage" + , "GiB" + , PLUGIN_WINDOWS_NAME + , "PerflibStorage" + , NETDATA_CHART_PRIO_DISKSPACE_SPACE + , update_every + , RRDSET_TYPE_STACKED + ); + + rrdlabels_add(d->st_disk_space->rrdlabels, "mount_point", windows_shared_buffer, RRDLABEL_SRC_AUTO); + // rrdlabels_add(d->st->rrdlabels, "mount_root", name, RRDLABEL_SRC_AUTO); + + if(d->filesystem) + rrdlabels_add(d->st_disk_space->rrdlabels, "filesystem", string2str(d->filesystem), RRDLABEL_SRC_AUTO); + + d->rd_disk_space_free = rrddim_add(d->st_disk_space, "avail", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE); + d->rd_disk_space_used = rrddim_add(d->st_disk_space, "used", NULL, 1, 1024, RRD_ALGORITHM_ABSOLUTE); + } + + // percentDiskFree has the free space in Data and the size of the disk in Time, in MiB. + rrddim_set_by_pointer(d->st_disk_space, d->rd_disk_space_free, (collected_number)d->percentDiskFree.current.Data); + rrddim_set_by_pointer(d->st_disk_space, d->rd_disk_space_used, (collected_number)(d->percentDiskFree.current.Time - d->percentDiskFree.current.Data)); + rrdset_done(d->st_disk_space); + } + + return true; +} + +static void physical_disk_labels(RRDSET *st, void *data) { + struct physical_disk *d = data; + + if(d->device) + rrdlabels_add(st->rrdlabels, "device", string2str(d->device), RRDLABEL_SRC_AUTO); + + if (d->mount_point) + rrdlabels_add(st->rrdlabels, "mount_point", string2str(d->mount_point), RRDLABEL_SRC_AUTO); +} + +static bool do_physical_disk(PERF_DATA_BLOCK *pDataBlock, int update_every) { + DICTIONARY *dict = physicalDisks; + + PERF_OBJECT_TYPE *pObjectType = perflibFindObjectTypeByName(pDataBlock, "PhysicalDisk"); + if(!pObjectType) return false; + + PERF_INSTANCE_DEFINITION *pi = NULL; + for (LONG i = 0; i < pObjectType->NumInstances; i++) { + pi = perflibForEachInstance(pDataBlock, pObjectType, pi); + if (!pi) + break; + + if (!getInstanceName(pDataBlock, pObjectType, pi, windows_shared_buffer, sizeof(windows_shared_buffer))) + strncpyz(windows_shared_buffer, "[unknown]", sizeof(windows_shared_buffer) - 1); + + char *device = windows_shared_buffer; + char *mount_point = NULL; + + if((mount_point = strchr(device, ' '))) { + *mount_point = '\0'; + mount_point++; + } + + struct physical_disk *d; + bool is_system; + if (strcasecmp(windows_shared_buffer, "_Total") == 0) { + d = &system_physical_total; + is_system = true; + } + else { + d = dictionary_set(dict, device, NULL, sizeof(*d)); + is_system = false; + } + + if (!d->collected_metadata) { + // TODO collect metadata - device_type, serial, id + d->device = string_strdupz(device); + d->mount_point = string_strdupz(mount_point); + d->collected_metadata = true; + } + + if (perflibGetInstanceCounter(pDataBlock, pObjectType, pi, &d->diskReadBytesPerSec) && + perflibGetInstanceCounter(pDataBlock, pObjectType, pi, &d->diskWriteBytesPerSec)) { + if(is_system) + common_system_io(d->diskReadBytesPerSec.current.Data, d->diskWriteBytesPerSec.current.Data, update_every); + else + common_disk_io( + &d->disk_io, + device, + NULL, + d->diskReadBytesPerSec.current.Data, + d->diskWriteBytesPerSec.current.Data, + update_every, + physical_disk_labels, + d); + } + + perflibGetInstanceCounter(pDataBlock, pObjectType, pi, &d->percentIdleTime); + perflibGetInstanceCounter(pDataBlock, pObjectType, pi, &d->percentDiskTime); + perflibGetInstanceCounter(pDataBlock, pObjectType, pi, &d->percentDiskReadTime); + perflibGetInstanceCounter(pDataBlock, pObjectType, pi, &d->percentDiskWriteTime); + perflibGetInstanceCounter(pDataBlock, pObjectType, pi, &d->currentDiskQueueLength); + perflibGetInstanceCounter(pDataBlock, pObjectType, pi, &d->averageDiskQueueLength); + perflibGetInstanceCounter(pDataBlock, pObjectType, pi, &d->averageDiskReadQueueLength); + perflibGetInstanceCounter(pDataBlock, pObjectType, pi, &d->averageDiskWriteQueueLength); + perflibGetInstanceCounter(pDataBlock, pObjectType, pi, &d->averageDiskSecondsPerTransfer); + perflibGetInstanceCounter(pDataBlock, pObjectType, pi, &d->averageDiskSecondsPerRead); + perflibGetInstanceCounter(pDataBlock, pObjectType, pi, &d->averageDiskSecondsPerWrite); + perflibGetInstanceCounter(pDataBlock, pObjectType, pi, &d->diskTransfersPerSec); + perflibGetInstanceCounter(pDataBlock, pObjectType, pi, &d->diskReadsPerSec); + perflibGetInstanceCounter(pDataBlock, pObjectType, pi, &d->diskWritesPerSec); + perflibGetInstanceCounter(pDataBlock, pObjectType, pi, &d->diskBytesPerSec); + perflibGetInstanceCounter(pDataBlock, pObjectType, pi, &d->averageDiskBytesPerTransfer); + perflibGetInstanceCounter(pDataBlock, pObjectType, pi, &d->averageDiskBytesPerRead); + perflibGetInstanceCounter(pDataBlock, pObjectType, pi, &d->averageDiskBytesPerWrite); + perflibGetInstanceCounter(pDataBlock, pObjectType, pi, &d->splitIoPerSec); + } + + return true; +} + +int do_PerflibStorage(int update_every, usec_t dt __maybe_unused) { + static bool initialized = false; + + if(unlikely(!initialized)) { + initialize(); + initialized = true; + } + + DWORD id = RegistryFindIDByName("LogicalDisk"); + if(id == PERFLIB_REGISTRY_NAME_NOT_FOUND) + return -1; + + PERF_DATA_BLOCK *pDataBlock = perflibGetPerformanceData(id); + if(!pDataBlock) return -1; + + do_logical_disk(pDataBlock, update_every); + do_physical_disk(pDataBlock, update_every); + + return 0; +} diff --git a/src/collectors/windows.plugin/perflib-thermalzone.c b/src/collectors/windows.plugin/perflib-thermalzone.c index 608baafe1b0677..66d25743ee4e98 100644 --- a/src/collectors/windows.plugin/perflib-thermalzone.c +++ b/src/collectors/windows.plugin/perflib-thermalzone.c @@ -1,105 +1,105 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -#include "windows_plugin.h" -#include "windows-internals.h" - -typedef struct thermal_zone { - RRDSET *st; - RRDDIM *rd; - - COUNTER_DATA thermalZoneTemperature; -}; - -static inline void initialize_thermal_zone_keys(struct thermal_zone *p) { - p->thermalZoneTemperature.key = "Temperature"; -} - -void dict_thermal_zone_insert_cb(const DICTIONARY_ITEM *item __maybe_unused, void *value, void *data __maybe_unused) { - struct thermal_zone *p = value; - initialize_thermal_zone_keys(p); -} - -static DICTIONARY *thermal_zones = NULL; - -static void initialize(void) { - thermal_zones = dictionary_create_advanced(DICT_OPTION_DONT_OVERWRITE_VALUE | - DICT_OPTION_FIXED_SIZE, NULL, sizeof(struct thermal_zone)); - - dictionary_register_insert_callback(thermal_zones, dict_thermal_zone_insert_cb, NULL); -} - -static bool do_thermal_zones(PERF_DATA_BLOCK *pDataBlock, int update_every) { - PERF_OBJECT_TYPE *pObjectType = perflibFindObjectTypeByName(pDataBlock, "Thermal Zone Information"); - if(!pObjectType) return false; - - PERF_INSTANCE_DEFINITION *pi = NULL; - for(LONG i = 0; i < pObjectType->NumInstances ; i++) { - pi = perflibForEachInstance(pDataBlock, pObjectType, pi); - if(!pi) break; - - if(!getInstanceName(pDataBlock, pObjectType, pi, windows_shared_buffer, sizeof(windows_shared_buffer))) - strncpyz(windows_shared_buffer, "[unknown]", sizeof(windows_shared_buffer) - 1); - - netdata_fix_chart_name(windows_shared_buffer); - struct thermal_zone *p = dictionary_set(thermal_zones, windows_shared_buffer, NULL, sizeof(*p)); - - perflibGetInstanceCounter(pDataBlock, pObjectType, pi, &p->thermalZoneTemperature); - - // https://learn.microsoft.com/en-us/windows-hardware/design/device-experiences/design-guide - if(!p->st) { - char id[RRD_ID_LENGTH_MAX + 1]; - snprintfz(id, RRD_ID_LENGTH_MAX, "thermalzone_%s_temperature", windows_shared_buffer); - p->st = rrdset_create_localhost( - "system" - , id, NULL - , "thermalzone" - , "system.thermalzone_temperature" - , "Thermal zone temperature" - , "Celsius" - , PLUGIN_WINDOWS_NAME - , "ThermalZone" - , NETDATA_CHART_PRIO_WINDOWS_THERMAL_ZONES - , update_every - , RRDSET_TYPE_LINE - ); - - p->rd = rrddim_add(p->st, - id, - "temperature", - 1, - 1, - RRD_ALGORITHM_ABSOLUTE); - - rrdlabels_add(p->st->rrdlabels, "thermalzone", windows_shared_buffer, RRDLABEL_SRC_AUTO); - } - - // Convert to Celsius before to plot - NETDATA_DOUBLE kTemperature = (NETDATA_DOUBLE)p->thermalZoneTemperature.current.Data; - kTemperature -= 273.15; - - rrddim_set_by_pointer(p->st, p->rd, (collected_number)kTemperature); - rrdset_done(p->st); - } - - return true; -} - -int do_PerflibThermalZone(int update_every, usec_t dt __maybe_unused) { - static bool initialized = false; - - if(unlikely(!initialized)) { - initialize(); - initialized = true; - } - - DWORD id = RegistryFindIDByName("Thermal Zone Information"); - if(id == PERFLIB_REGISTRY_NAME_NOT_FOUND) - return -1; - - PERF_DATA_BLOCK *pDataBlock = perflibGetPerformanceData(id); - if(!pDataBlock) return -1; - - do_thermal_zones(pDataBlock, update_every); - - return 0; -} +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "windows_plugin.h" +#include "windows-internals.h" + +struct thermal_zone { + RRDSET *st; + RRDDIM *rd; + + COUNTER_DATA thermalZoneTemperature; +}; + +static inline void initialize_thermal_zone_keys(struct thermal_zone *p) { + p->thermalZoneTemperature.key = "Temperature"; +} + +void dict_thermal_zone_insert_cb(const DICTIONARY_ITEM *item __maybe_unused, void *value, void *data __maybe_unused) { + struct thermal_zone *p = value; + initialize_thermal_zone_keys(p); +} + +static DICTIONARY *thermal_zones = NULL; + +static void initialize(void) { + thermal_zones = dictionary_create_advanced(DICT_OPTION_DONT_OVERWRITE_VALUE | + DICT_OPTION_FIXED_SIZE, NULL, sizeof(struct thermal_zone)); + + dictionary_register_insert_callback(thermal_zones, dict_thermal_zone_insert_cb, NULL); +} + +static bool do_thermal_zones(PERF_DATA_BLOCK *pDataBlock, int update_every) { + PERF_OBJECT_TYPE *pObjectType = perflibFindObjectTypeByName(pDataBlock, "Thermal Zone Information"); + if(!pObjectType) return false; + + PERF_INSTANCE_DEFINITION *pi = NULL; + for(LONG i = 0; i < pObjectType->NumInstances ; i++) { + pi = perflibForEachInstance(pDataBlock, pObjectType, pi); + if(!pi) break; + + if(!getInstanceName(pDataBlock, pObjectType, pi, windows_shared_buffer, sizeof(windows_shared_buffer))) + strncpyz(windows_shared_buffer, "[unknown]", sizeof(windows_shared_buffer) - 1); + + netdata_fix_chart_name(windows_shared_buffer); + struct thermal_zone *p = dictionary_set(thermal_zones, windows_shared_buffer, NULL, sizeof(*p)); + + perflibGetInstanceCounter(pDataBlock, pObjectType, pi, &p->thermalZoneTemperature); + + // https://learn.microsoft.com/en-us/windows-hardware/design/device-experiences/design-guide + if(!p->st) { + char id[RRD_ID_LENGTH_MAX + 1]; + snprintfz(id, RRD_ID_LENGTH_MAX, "thermalzone_%s_temperature", windows_shared_buffer); + p->st = rrdset_create_localhost( + "system" + , id, NULL + , "thermalzone" + , "system.thermalzone_temperature" + , "Thermal zone temperature" + , "Celsius" + , PLUGIN_WINDOWS_NAME + , "ThermalZone" + , NETDATA_CHART_PRIO_WINDOWS_THERMAL_ZONES + , update_every + , RRDSET_TYPE_LINE + ); + + p->rd = rrddim_add(p->st, + id, + "temperature", + 1, + 1, + RRD_ALGORITHM_ABSOLUTE); + + rrdlabels_add(p->st->rrdlabels, "thermalzone", windows_shared_buffer, RRDLABEL_SRC_AUTO); + } + + // Convert to Celsius before to plot + NETDATA_DOUBLE kTemperature = (NETDATA_DOUBLE)p->thermalZoneTemperature.current.Data; + kTemperature -= 273.15; + + rrddim_set_by_pointer(p->st, p->rd, (collected_number)kTemperature); + rrdset_done(p->st); + } + + return true; +} + +int do_PerflibThermalZone(int update_every, usec_t dt __maybe_unused) { + static bool initialized = false; + + if(unlikely(!initialized)) { + initialize(); + initialized = true; + } + + DWORD id = RegistryFindIDByName("Thermal Zone Information"); + if(id == PERFLIB_REGISTRY_NAME_NOT_FOUND) + return -1; + + PERF_DATA_BLOCK *pDataBlock = perflibGetPerformanceData(id); + if(!pDataBlock) return -1; + + do_thermal_zones(pDataBlock, update_every); + + return 0; +} diff --git a/src/collectors/windows.plugin/windows-internals.h b/src/collectors/windows.plugin/windows-internals.h index 1b7cccc730e725..b135fa4701ac00 100644 --- a/src/collectors/windows.plugin/windows-internals.h +++ b/src/collectors/windows.plugin/windows-internals.h @@ -1,18 +1,17 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -#ifndef NETDATA_WINDOWS_INTERNALS_H -#define NETDATA_WINDOWS_INTERNALS_H - -#include - -static inline ULONGLONG FileTimeToULL(FILETIME ft) { - ULARGE_INTEGER ul; - ul.LowPart = ft.dwLowDateTime; - ul.HighPart = ft.dwHighDateTime; - return ul.QuadPart; -} - -#include "perflib.h" -#include "perflib-rrd.h" - -#endif //NETDATA_WINDOWS_INTERNALS_H +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_WINDOWS_INTERNALS_H +#define NETDATA_WINDOWS_INTERNALS_H + +#include + +static inline ULONGLONG FileTimeToULL(FILETIME ft) { + ULARGE_INTEGER ul; + ul.LowPart = ft.dwLowDateTime; + ul.HighPart = ft.dwHighDateTime; + return ul.QuadPart; +} + +#include "perflib-rrd.h" + +#endif //NETDATA_WINDOWS_INTERNALS_H diff --git a/src/collectors/windows.plugin/windows_plugin.h b/src/collectors/windows.plugin/windows_plugin.h index 1d3b13fe4ab9a7..0a3c7d4fbca44e 100644 --- a/src/collectors/windows.plugin/windows_plugin.h +++ b/src/collectors/windows.plugin/windows_plugin.h @@ -26,6 +26,4 @@ int do_PerflibMemory(int update_every, usec_t dt); int do_PerflibObjects(int update_every, usec_t dt); int do_PerflibThermalZone(int update_every, usec_t dt); -#include "perflib.h" - #endif //NETDATA_WINDOWS_PLUGIN_H diff --git a/src/libnetdata/libnetdata.c b/src/libnetdata/libnetdata.c index 84aa6e339f4f67..fad00c3a7fe2d4 100644 --- a/src/libnetdata/libnetdata.c +++ b/src/libnetdata/libnetdata.c @@ -478,536 +478,6 @@ void json_fix_string(char *s) { } } -unsigned char netdata_map_chart_names[256] = { - [0] = '\0', // - [1] = '_', // - [2] = '_', // - [3] = '_', // - [4] = '_', // - [5] = '_', // - [6] = '_', // - [7] = '_', // - [8] = '_', // - [9] = '_', // - [10] = '_', // - [11] = '_', // - [12] = '_', // - [13] = '_', // - [14] = '_', // - [15] = '_', // - [16] = '_', // - [17] = '_', // - [18] = '_', // - [19] = '_', // - [20] = '_', // - [21] = '_', // - [22] = '_', // - [23] = '_', // - [24] = '_', // - [25] = '_', // - [26] = '_', // - [27] = '_', // - [28] = '_', // - [29] = '_', // - [30] = '_', // - [31] = '_', // - [32] = '_', // - [33] = '_', // ! - [34] = '_', // " - [35] = '_', // # - [36] = '_', // $ - [37] = '_', // % - [38] = '_', // & - [39] = '_', // ' - [40] = '_', // ( - [41] = '_', // ) - [42] = '_', // * - [43] = '_', // + - [44] = '.', // , - [45] = '-', // - - [46] = '.', // . - [47] = '/', // / - [48] = '0', // 0 - [49] = '1', // 1 - [50] = '2', // 2 - [51] = '3', // 3 - [52] = '4', // 4 - [53] = '5', // 5 - [54] = '6', // 6 - [55] = '7', // 7 - [56] = '8', // 8 - [57] = '9', // 9 - [58] = '_', // : - [59] = '_', // ; - [60] = '_', // < - [61] = '_', // = - [62] = '_', // > - [63] = '_', // ? - [64] = '_', // @ - [65] = 'a', // A - [66] = 'b', // B - [67] = 'c', // C - [68] = 'd', // D - [69] = 'e', // E - [70] = 'f', // F - [71] = 'g', // G - [72] = 'h', // H - [73] = 'i', // I - [74] = 'j', // J - [75] = 'k', // K - [76] = 'l', // L - [77] = 'm', // M - [78] = 'n', // N - [79] = 'o', // O - [80] = 'p', // P - [81] = 'q', // Q - [82] = 'r', // R - [83] = 's', // S - [84] = 't', // T - [85] = 'u', // U - [86] = 'v', // V - [87] = 'w', // W - [88] = 'x', // X - [89] = 'y', // Y - [90] = 'z', // Z - [91] = '_', // [ - [92] = '/', // backslash - [93] = '_', // ] - [94] = '_', // ^ - [95] = '_', // _ - [96] = '_', // ` - [97] = 'a', // a - [98] = 'b', // b - [99] = 'c', // c - [100] = 'd', // d - [101] = 'e', // e - [102] = 'f', // f - [103] = 'g', // g - [104] = 'h', // h - [105] = 'i', // i - [106] = 'j', // j - [107] = 'k', // k - [108] = 'l', // l - [109] = 'm', // m - [110] = 'n', // n - [111] = 'o', // o - [112] = 'p', // p - [113] = 'q', // q - [114] = 'r', // r - [115] = 's', // s - [116] = 't', // t - [117] = 'u', // u - [118] = 'v', // v - [119] = 'w', // w - [120] = 'x', // x - [121] = 'y', // y - [122] = 'z', // z - [123] = '_', // { - [124] = '_', // | - [125] = '_', // } - [126] = '_', // ~ - [127] = '_', // - [128] = '_', // - [129] = '_', // - [130] = '_', // - [131] = '_', // - [132] = '_', // - [133] = '_', // - [134] = '_', // - [135] = '_', // - [136] = '_', // - [137] = '_', // - [138] = '_', // - [139] = '_', // - [140] = '_', // - [141] = '_', // - [142] = '_', // - [143] = '_', // - [144] = '_', // - [145] = '_', // - [146] = '_', // - [147] = '_', // - [148] = '_', // - [149] = '_', // - [150] = '_', // - [151] = '_', // - [152] = '_', // - [153] = '_', // - [154] = '_', // - [155] = '_', // - [156] = '_', // - [157] = '_', // - [158] = '_', // - [159] = '_', // - [160] = '_', // - [161] = '_', // - [162] = '_', // - [163] = '_', // - [164] = '_', // - [165] = '_', // - [166] = '_', // - [167] = '_', // - [168] = '_', // - [169] = '_', // - [170] = '_', // - [171] = '_', // - [172] = '_', // - [173] = '_', // - [174] = '_', // - [175] = '_', // - [176] = '_', // - [177] = '_', // - [178] = '_', // - [179] = '_', // - [180] = '_', // - [181] = '_', // - [182] = '_', // - [183] = '_', // - [184] = '_', // - [185] = '_', // - [186] = '_', // - [187] = '_', // - [188] = '_', // - [189] = '_', // - [190] = '_', // - [191] = '_', // - [192] = '_', // - [193] = '_', // - [194] = '_', // - [195] = '_', // - [196] = '_', // - [197] = '_', // - [198] = '_', // - [199] = '_', // - [200] = '_', // - [201] = '_', // - [202] = '_', // - [203] = '_', // - [204] = '_', // - [205] = '_', // - [206] = '_', // - [207] = '_', // - [208] = '_', // - [209] = '_', // - [210] = '_', // - [211] = '_', // - [212] = '_', // - [213] = '_', // - [214] = '_', // - [215] = '_', // - [216] = '_', // - [217] = '_', // - [218] = '_', // - [219] = '_', // - [220] = '_', // - [221] = '_', // - [222] = '_', // - [223] = '_', // - [224] = '_', // - [225] = '_', // - [226] = '_', // - [227] = '_', // - [228] = '_', // - [229] = '_', // - [230] = '_', // - [231] = '_', // - [232] = '_', // - [233] = '_', // - [234] = '_', // - [235] = '_', // - [236] = '_', // - [237] = '_', // - [238] = '_', // - [239] = '_', // - [240] = '_', // - [241] = '_', // - [242] = '_', // - [243] = '_', // - [244] = '_', // - [245] = '_', // - [246] = '_', // - [247] = '_', // - [248] = '_', // - [249] = '_', // - [250] = '_', // - [251] = '_', // - [252] = '_', // - [253] = '_', // - [254] = '_', // - [255] = '_' // -}; - -// make sure the supplied string -// is good for a netdata chart/dimension ID/NAME -void netdata_fix_chart_name(char *s) { - while ((*s = netdata_map_chart_names[(unsigned char) *s])) s++; -} - -unsigned char netdata_map_chart_ids[256] = { - [0] = '\0', // - [1] = '_', // - [2] = '_', // - [3] = '_', // - [4] = '_', // - [5] = '_', // - [6] = '_', // - [7] = '_', // - [8] = '_', // - [9] = '_', // - [10] = '_', // - [11] = '_', // - [12] = '_', // - [13] = '_', // - [14] = '_', // - [15] = '_', // - [16] = '_', // - [17] = '_', // - [18] = '_', // - [19] = '_', // - [20] = '_', // - [21] = '_', // - [22] = '_', // - [23] = '_', // - [24] = '_', // - [25] = '_', // - [26] = '_', // - [27] = '_', // - [28] = '_', // - [29] = '_', // - [30] = '_', // - [31] = '_', // - [32] = '_', // - [33] = '_', // ! - [34] = '_', // " - [35] = '_', // # - [36] = '_', // $ - [37] = '_', // % - [38] = '_', // & - [39] = '_', // ' - [40] = '_', // ( - [41] = '_', // ) - [42] = '_', // * - [43] = '_', // + - [44] = '.', // , - [45] = '-', // - - [46] = '.', // . - [47] = '_', // / - [48] = '0', // 0 - [49] = '1', // 1 - [50] = '2', // 2 - [51] = '3', // 3 - [52] = '4', // 4 - [53] = '5', // 5 - [54] = '6', // 6 - [55] = '7', // 7 - [56] = '8', // 8 - [57] = '9', // 9 - [58] = '_', // : - [59] = '_', // ; - [60] = '_', // < - [61] = '_', // = - [62] = '_', // > - [63] = '_', // ? - [64] = '_', // @ - [65] = 'a', // A - [66] = 'b', // B - [67] = 'c', // C - [68] = 'd', // D - [69] = 'e', // E - [70] = 'f', // F - [71] = 'g', // G - [72] = 'h', // H - [73] = 'i', // I - [74] = 'j', // J - [75] = 'k', // K - [76] = 'l', // L - [77] = 'm', // M - [78] = 'n', // N - [79] = 'o', // O - [80] = 'p', // P - [81] = 'q', // Q - [82] = 'r', // R - [83] = 's', // S - [84] = 't', // T - [85] = 'u', // U - [86] = 'v', // V - [87] = 'w', // W - [88] = 'x', // X - [89] = 'y', // Y - [90] = 'z', // Z - [91] = '_', // [ - [92] = '_', // backslash - [93] = '_', // ] - [94] = '_', // ^ - [95] = '_', // _ - [96] = '_', // ` - [97] = 'a', // a - [98] = 'b', // b - [99] = 'c', // c - [100] = 'd', // d - [101] = 'e', // e - [102] = 'f', // f - [103] = 'g', // g - [104] = 'h', // h - [105] = 'i', // i - [106] = 'j', // j - [107] = 'k', // k - [108] = 'l', // l - [109] = 'm', // m - [110] = 'n', // n - [111] = 'o', // o - [112] = 'p', // p - [113] = 'q', // q - [114] = 'r', // r - [115] = 's', // s - [116] = 't', // t - [117] = 'u', // u - [118] = 'v', // v - [119] = 'w', // w - [120] = 'x', // x - [121] = 'y', // y - [122] = 'z', // z - [123] = '_', // { - [124] = '_', // | - [125] = '_', // } - [126] = '_', // ~ - [127] = '_', // - [128] = '_', // - [129] = '_', // - [130] = '_', // - [131] = '_', // - [132] = '_', // - [133] = '_', // - [134] = '_', // - [135] = '_', // - [136] = '_', // - [137] = '_', // - [138] = '_', // - [139] = '_', // - [140] = '_', // - [141] = '_', // - [142] = '_', // - [143] = '_', // - [144] = '_', // - [145] = '_', // - [146] = '_', // - [147] = '_', // - [148] = '_', // - [149] = '_', // - [150] = '_', // - [151] = '_', // - [152] = '_', // - [153] = '_', // - [154] = '_', // - [155] = '_', // - [156] = '_', // - [157] = '_', // - [158] = '_', // - [159] = '_', // - [160] = '_', // - [161] = '_', // - [162] = '_', // - [163] = '_', // - [164] = '_', // - [165] = '_', // - [166] = '_', // - [167] = '_', // - [168] = '_', // - [169] = '_', // - [170] = '_', // - [171] = '_', // - [172] = '_', // - [173] = '_', // - [174] = '_', // - [175] = '_', // - [176] = '_', // - [177] = '_', // - [178] = '_', // - [179] = '_', // - [180] = '_', // - [181] = '_', // - [182] = '_', // - [183] = '_', // - [184] = '_', // - [185] = '_', // - [186] = '_', // - [187] = '_', // - [188] = '_', // - [189] = '_', // - [190] = '_', // - [191] = '_', // - [192] = '_', // - [193] = '_', // - [194] = '_', // - [195] = '_', // - [196] = '_', // - [197] = '_', // - [198] = '_', // - [199] = '_', // - [200] = '_', // - [201] = '_', // - [202] = '_', // - [203] = '_', // - [204] = '_', // - [205] = '_', // - [206] = '_', // - [207] = '_', // - [208] = '_', // - [209] = '_', // - [210] = '_', // - [211] = '_', // - [212] = '_', // - [213] = '_', // - [214] = '_', // - [215] = '_', // - [216] = '_', // - [217] = '_', // - [218] = '_', // - [219] = '_', // - [220] = '_', // - [221] = '_', // - [222] = '_', // - [223] = '_', // - [224] = '_', // - [225] = '_', // - [226] = '_', // - [227] = '_', // - [228] = '_', // - [229] = '_', // - [230] = '_', // - [231] = '_', // - [232] = '_', // - [233] = '_', // - [234] = '_', // - [235] = '_', // - [236] = '_', // - [237] = '_', // - [238] = '_', // - [239] = '_', // - [240] = '_', // - [241] = '_', // - [242] = '_', // - [243] = '_', // - [244] = '_', // - [245] = '_', // - [246] = '_', // - [247] = '_', // - [248] = '_', // - [249] = '_', // - [250] = '_', // - [251] = '_', // - [252] = '_', // - [253] = '_', // - [254] = '_', // - [255] = '_' // -}; - -// make sure the supplied string -// is good for a netdata chart/dimension ID/NAME -void netdata_fix_chart_id(char *s) { - while ((*s = netdata_map_chart_ids[(unsigned char) *s])) s++; -} - static int memory_file_open(const char *filename, size_t size) { // netdata_log_info("memory_file_open('%s', %zu", filename, size); diff --git a/src/libnetdata/libnetdata.h b/src/libnetdata/libnetdata.h index d27d33e5a88959..ba796062c2a4ed 100644 --- a/src/libnetdata/libnetdata.h +++ b/src/libnetdata/libnetdata.h @@ -327,9 +327,6 @@ typedef uint32_t uid_t; #include "storage-point.h" #include "paths/paths.h" -void netdata_fix_chart_id(char *s); -void netdata_fix_chart_name(char *s); - int madvise_sequential(void *mem, size_t len); int madvise_random(void *mem, size_t len); int madvise_dontfork(void *mem, size_t len); @@ -475,6 +472,8 @@ extern const char *netdata_configured_host_prefix; #include "string/string.h" #include "dictionary/dictionary.h" #include "dictionary/thread-cache.h" +#include "sanitizers/chart_id_and_name.h" + #if defined(HAVE_LIBBPF) && !defined(__cplusplus) #include "ebpf/ebpf.h" #endif diff --git a/src/libnetdata/os/get_system_cpus.c b/src/libnetdata/os/get_system_cpus.c index 5a76d8aa5d4d8d..79acb188b1f640 100644 --- a/src/libnetdata/os/get_system_cpus.c +++ b/src/libnetdata/os/get_system_cpus.c @@ -82,7 +82,14 @@ long os_get_system_cpus_cached(bool cache, bool for_netdata) { SYSTEM_INFO sysInfo; GetSystemInfo(&sysInfo); - return (long) sysInfo.dwNumberOfProcessors; + processors[index] = sysInfo.dwNumberOfProcessors; + + if(processors[index] < 1) { + processors[index] = 1; + netdata_log_error("Assuming system has %ld processors.", processors[index]); + } + + return processors[index]; #else diff --git a/src/libnetdata/os/os.h b/src/libnetdata/os/os.h index b87b35c96abbd7..04bbaf7d1300ff 100644 --- a/src/libnetdata/os/os.h +++ b/src/libnetdata/os/os.h @@ -23,6 +23,7 @@ #include "os-freebsd-wrappers.h" #include "os-macos-wrappers.h" #include "os-windows-wrappers.h" +#include "windows-perflib/perflib.h" // ===================================================================================================================== // common defs for Apple/FreeBSD/Linux diff --git a/src/collectors/windows.plugin/perflib-dump.c b/src/libnetdata/os/windows-perflib/perflib-dump.c similarity index 97% rename from src/collectors/windows.plugin/perflib-dump.c rename to src/libnetdata/os/windows-perflib/perflib-dump.c index e01813a49ffcdf..6f0ee8de6ecb84 100644 --- a/src/collectors/windows.plugin/perflib-dump.c +++ b/src/libnetdata/os/windows-perflib/perflib-dump.c @@ -1,529 +1,533 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -#include "perflib.h" -#include "windows-internals.h" - -static const char *getCounterType(DWORD CounterType) { - switch (CounterType) { - case PERF_COUNTER_COUNTER: - return "PERF_COUNTER_COUNTER"; - - case PERF_COUNTER_TIMER: - return "PERF_COUNTER_TIMER"; - - case PERF_COUNTER_QUEUELEN_TYPE: - return "PERF_COUNTER_QUEUELEN_TYPE"; - - case PERF_COUNTER_LARGE_QUEUELEN_TYPE: - return "PERF_COUNTER_LARGE_QUEUELEN_TYPE"; - - case PERF_COUNTER_100NS_QUEUELEN_TYPE: - return "PERF_COUNTER_100NS_QUEUELEN_TYPE"; - - case PERF_COUNTER_OBJ_TIME_QUEUELEN_TYPE: - return "PERF_COUNTER_OBJ_TIME_QUEUELEN_TYPE"; - - case PERF_COUNTER_BULK_COUNT: - return "PERF_COUNTER_BULK_COUNT"; - - case PERF_COUNTER_TEXT: - return "PERF_COUNTER_TEXT"; - - case PERF_COUNTER_RAWCOUNT: - return "PERF_COUNTER_RAWCOUNT"; - - case PERF_COUNTER_LARGE_RAWCOUNT: - return "PERF_COUNTER_LARGE_RAWCOUNT"; - - case PERF_COUNTER_RAWCOUNT_HEX: - return "PERF_COUNTER_RAWCOUNT_HEX"; - - case PERF_COUNTER_LARGE_RAWCOUNT_HEX: - return "PERF_COUNTER_LARGE_RAWCOUNT_HEX"; - - case PERF_SAMPLE_FRACTION: - return "PERF_SAMPLE_FRACTION"; - - case PERF_SAMPLE_COUNTER: - return "PERF_SAMPLE_COUNTER"; - - case PERF_COUNTER_NODATA: - return "PERF_COUNTER_NODATA"; - - case PERF_COUNTER_TIMER_INV: - return "PERF_COUNTER_TIMER_INV"; - - case PERF_SAMPLE_BASE: - return "PERF_SAMPLE_BASE"; - - case PERF_AVERAGE_TIMER: - return "PERF_AVERAGE_TIMER"; - - case PERF_AVERAGE_BASE: - return "PERF_AVERAGE_BASE"; - - case PERF_AVERAGE_BULK: - return "PERF_AVERAGE_BULK"; - - case PERF_OBJ_TIME_TIMER: - return "PERF_OBJ_TIME_TIMER"; - - case PERF_100NSEC_TIMER: - return "PERF_100NSEC_TIMER"; - - case PERF_100NSEC_TIMER_INV: - return "PERF_100NSEC_TIMER_INV"; - - case PERF_COUNTER_MULTI_TIMER: - return "PERF_COUNTER_MULTI_TIMER"; - - case PERF_COUNTER_MULTI_TIMER_INV: - return "PERF_COUNTER_MULTI_TIMER_INV"; - - case PERF_COUNTER_MULTI_BASE: - return "PERF_COUNTER_MULTI_BASE"; - - case PERF_100NSEC_MULTI_TIMER: - return "PERF_100NSEC_MULTI_TIMER"; - - case PERF_100NSEC_MULTI_TIMER_INV: - return "PERF_100NSEC_MULTI_TIMER_INV"; - - case PERF_RAW_FRACTION: - return "PERF_RAW_FRACTION"; - - case PERF_LARGE_RAW_FRACTION: - return "PERF_LARGE_RAW_FRACTION"; - - case PERF_RAW_BASE: - return "PERF_RAW_BASE"; - - case PERF_LARGE_RAW_BASE: - return "PERF_LARGE_RAW_BASE"; - - case PERF_ELAPSED_TIME: - return "PERF_ELAPSED_TIME"; - - case PERF_COUNTER_HISTOGRAM_TYPE: - return "PERF_COUNTER_HISTOGRAM_TYPE"; - - case PERF_COUNTER_DELTA: - return "PERF_COUNTER_DELTA"; - - case PERF_COUNTER_LARGE_DELTA: - return "PERF_COUNTER_LARGE_DELTA"; - - case PERF_PRECISION_SYSTEM_TIMER: - return "PERF_PRECISION_SYSTEM_TIMER"; - - case PERF_PRECISION_100NS_TIMER: - return "PERF_PRECISION_100NS_TIMER"; - - case PERF_PRECISION_OBJECT_TIMER: - return "PERF_PRECISION_OBJECT_TIMER"; - - default: - return "UNKNOWN_COUNTER_TYPE"; - } -} - -static const char *getCounterDescription(DWORD CounterType) { - switch (CounterType) { - case PERF_COUNTER_COUNTER: - return "32-bit Counter. Divide delta by delta time. Display suffix: \"/sec\""; - - case PERF_COUNTER_TIMER: - return "64-bit Timer. Divide delta by delta time. Display suffix: \"%\""; - - case PERF_COUNTER_QUEUELEN_TYPE: - case PERF_COUNTER_LARGE_QUEUELEN_TYPE: - return "Queue Length Space-Time Product. Divide delta by delta time. No Display Suffix"; - - case PERF_COUNTER_100NS_QUEUELEN_TYPE: - return "Queue Length Space-Time Product using 100 Ns timebase. Divide delta by delta time. No Display Suffix"; - - case PERF_COUNTER_OBJ_TIME_QUEUELEN_TYPE: - return "Queue Length Space-Time Product using Object specific timebase. Divide delta by delta time. No Display Suffix."; - - case PERF_COUNTER_BULK_COUNT: - return "64-bit Counter. Divide delta by delta time. Display Suffix: \"/sec\""; - - case PERF_COUNTER_TEXT: - return "Unicode text Display as text."; - - case PERF_COUNTER_RAWCOUNT: - case PERF_COUNTER_LARGE_RAWCOUNT: - return "A counter which should not be time averaged on display (such as an error counter on a serial line). Display as is. No Display Suffix."; - - case PERF_COUNTER_RAWCOUNT_HEX: - case PERF_COUNTER_LARGE_RAWCOUNT_HEX: - return "Special case for RAWCOUNT which should be displayed in hex. A counter which should not be time averaged on display (such as an error counter on a serial line). Display as is. No Display Suffix."; - - case PERF_SAMPLE_FRACTION: - return "A count which is either 1 or 0 on each sampling interrupt (% busy). Divide delta by delta base. Display Suffix: \"%\""; - - case PERF_SAMPLE_COUNTER: - return "A count which is sampled on each sampling interrupt (queue length). Divide delta by delta time. No Display Suffix."; - - case PERF_COUNTER_NODATA: - return "A label: no data is associated with this counter (it has 0 length). Do not display."; - - case PERF_COUNTER_TIMER_INV: - return "64-bit Timer inverse (e.g., idle is measured, but display busy %). Display 100 - delta divided by delta time. Display suffix: \"%\""; - - case PERF_SAMPLE_BASE: - return "The divisor for a sample, used with the previous counter to form a sampled %. You must check for >0 before dividing by this! This counter will directly follow the numerator counter. It should not be displayed to the user."; - - case PERF_AVERAGE_TIMER: - return "A timer which, when divided by an average base, produces a time in seconds which is the average time of some operation. This timer times total operations, and the base is the number of operations. Display Suffix: \"sec\""; - - case PERF_AVERAGE_BASE: - return "Used as the denominator in the computation of time or count averages. Must directly follow the numerator counter. Not displayed to the user."; - - case PERF_AVERAGE_BULK: - return "A bulk count which, when divided (typically) by the number of operations, gives (typically) the number of bytes per operation. No Display Suffix."; - - case PERF_OBJ_TIME_TIMER: - return "64-bit Timer in object specific units. Display delta divided by delta time as returned in the object type header structure. Display suffix: \"%\""; - - case PERF_100NSEC_TIMER: - return "64-bit Timer in 100 nsec units. Display delta divided by delta time. Display suffix: \"%\""; - - case PERF_100NSEC_TIMER_INV: - return "64-bit Timer inverse (e.g., idle is measured, but display busy %). Display 100 - delta divided by delta time. Display suffix: \"%\""; - - case PERF_COUNTER_MULTI_TIMER: - return "64-bit Timer. Divide delta by delta time. Display suffix: \"%\". Timer for multiple instances, so result can exceed 100%."; - - case PERF_COUNTER_MULTI_TIMER_INV: - return "64-bit Timer inverse (e.g., idle is measured, but display busy %). Display 100 * _MULTI_BASE - delta divided by delta time. Display suffix: \"%\" Timer for multiple instances, so result can exceed 100%. Followed by a counter of type _MULTI_BASE."; - - case PERF_COUNTER_MULTI_BASE: - return "Number of instances to which the preceding _MULTI_..._INV counter applies. Used as a factor to get the percentage."; - - case PERF_100NSEC_MULTI_TIMER: - return "64-bit Timer in 100 nsec units. Display delta divided by delta time. Display suffix: \"%\" Timer for multiple instances, so result can exceed 100%."; - - case PERF_100NSEC_MULTI_TIMER_INV: - return "64-bit Timer inverse (e.g., idle is measured, but display busy %). Display 100 * _MULTI_BASE - delta divided by delta time. Display suffix: \"%\" Timer for multiple instances, so result can exceed 100%. Followed by a counter of type _MULTI_BASE."; - - case PERF_LARGE_RAW_FRACTION: - case PERF_RAW_FRACTION: - return "Indicates the data is a fraction of the following counter which should not be time averaged on display (such as free space over total space.) Display as is. Display the quotient as \"%\""; - - case PERF_RAW_BASE: - case PERF_LARGE_RAW_BASE: - return "Indicates the data is a base for the preceding counter which should not be time averaged on display (such as free space over total space.)"; - - case PERF_ELAPSED_TIME: - return "The data collected in this counter is actually the start time of the item being measured. For display, this data is subtracted from the sample time to yield the elapsed time as the difference between the two. In the definition below, the PerfTime field of the Object contains the sample time as indicated by the PERF_OBJECT_TIMER bit and the difference is scaled by the PerfFreq of the Object to convert the time units into seconds."; - - case PERF_COUNTER_HISTOGRAM_TYPE: - return "Counter type can be used with the preceding types to define a range of values to be displayed in a histogram."; - - case PERF_COUNTER_DELTA: - case PERF_COUNTER_LARGE_DELTA: - return "This counter is used to display the difference from one sample to the next. The counter value is a constantly increasing number and the value displayed is the difference between the current value and the previous value. Negative numbers are not allowed which shouldn't be a problem as long as the counter value is increasing or unchanged."; - - case PERF_PRECISION_SYSTEM_TIMER: - return "The precision counters are timers that consist of two counter values:\r\n\t1) the count of elapsed time of the event being monitored\r\n\t2) the \"clock\" time in the same units\r\nthe precision timers are used where the standard system timers are not precise enough for accurate readings. It's assumed that the service providing the data is also providing a timestamp at the same time which will eliminate any error that may occur since some small and variable time elapses between the time the system timestamp is captured and when the data is collected from the performance DLL. Only in extreme cases has this been observed to be problematic.\r\nwhen using this type of timer, the definition of the PERF_PRECISION_TIMESTAMP counter must immediately follow the definition of the PERF_PRECISION_*_TIMER in the Object header\r\nThe timer used has the same frequency as the System Performance Timer"; - - case PERF_PRECISION_100NS_TIMER: - return "The precision counters are timers that consist of two counter values:\r\n\t1) the count of elapsed time of the event being monitored\r\n\t2) the \"clock\" time in the same units\r\nthe precision timers are used where the standard system timers are not precise enough for accurate readings. It's assumed that the service providing the data is also providing a timestamp at the same time which will eliminate any error that may occur since some small and variable time elapses between the time the system timestamp is captured and when the data is collected from the performance DLL. Only in extreme cases has this been observed to be problematic.\r\nwhen using this type of timer, the definition of the PERF_PRECISION_TIMESTAMP counter must immediately follow the definition of the PERF_PRECISION_*_TIMER in the Object header\r\nThe timer used has the same frequency as the 100 NanoSecond Timer"; - - case PERF_PRECISION_OBJECT_TIMER: - return "The precision counters are timers that consist of two counter values:\r\n\t1) the count of elapsed time of the event being monitored\r\n\t2) the \"clock\" time in the same units\r\nthe precision timers are used where the standard system timers are not precise enough for accurate readings. It's assumed that the service providing the data is also providing a timestamp at the same time which will eliminate any error that may occur since some small and variable time elapses between the time the system timestamp is captured and when the data is collected from the performance DLL. Only in extreme cases has this been observed to be problematic.\r\nwhen using this type of timer, the definition of the PERF_PRECISION_TIMESTAMP counter must immediately follow the definition of the PERF_PRECISION_*_TIMER in the Object header\r\nThe timer used is of the frequency specified in the Object header's. PerfFreq field (PerfTime is ignored)"; - - default: - return ""; - } -} - -static const char *getCounterAlgorithm(DWORD CounterType) { - switch (CounterType) - { - case PERF_COUNTER_COUNTER: - case PERF_SAMPLE_COUNTER: - case PERF_COUNTER_BULK_COUNT: - return "(data1 - data0) / ((time1 - time0) / frequency)"; - - case PERF_COUNTER_QUEUELEN_TYPE: - case PERF_COUNTER_100NS_QUEUELEN_TYPE: - case PERF_COUNTER_OBJ_TIME_QUEUELEN_TYPE: - case PERF_COUNTER_LARGE_QUEUELEN_TYPE: - case PERF_AVERAGE_BULK: // normally not displayed - return "(data1 - data0) / (time1 - time0)"; - - case PERF_OBJ_TIME_TIMER: - case PERF_COUNTER_TIMER: - case PERF_100NSEC_TIMER: - case PERF_PRECISION_SYSTEM_TIMER: - case PERF_PRECISION_100NS_TIMER: - case PERF_PRECISION_OBJECT_TIMER: - case PERF_SAMPLE_FRACTION: - return "100 * (data1 - data0) / (time1 - time0)"; - - case PERF_COUNTER_TIMER_INV: - return "100 * (1 - ((data1 - data0) / (time1 - time0)))"; - - case PERF_100NSEC_TIMER_INV: - return "100 * (1- (data1 - data0) / (time1 - time0))"; - - case PERF_COUNTER_MULTI_TIMER: - return "100 * ((data1 - data0) / ((time1 - time0) / frequency1)) / multi1"; - - case PERF_100NSEC_MULTI_TIMER: - return "100 * ((data1 - data0) / (time1 - time0)) / multi1"; - - case PERF_COUNTER_MULTI_TIMER_INV: - case PERF_100NSEC_MULTI_TIMER_INV: - return "100 * (multi1 - ((data1 - data0) / (time1 - time0)))"; - - case PERF_COUNTER_RAWCOUNT: - case PERF_COUNTER_LARGE_RAWCOUNT: - return "data0"; - - case PERF_COUNTER_RAWCOUNT_HEX: - case PERF_COUNTER_LARGE_RAWCOUNT_HEX: - return "hex(data0)"; - - case PERF_COUNTER_DELTA: - case PERF_COUNTER_LARGE_DELTA: - return "data1 - data0"; - - case PERF_RAW_FRACTION: - case PERF_LARGE_RAW_FRACTION: - return "100 * data0 / time0"; - - case PERF_AVERAGE_TIMER: - return "((data1 - data0) / frequency1) / (time1 - time0)"; - - case PERF_ELAPSED_TIME: - return "(time0 - data0) / frequency0"; - - case PERF_COUNTER_TEXT: - case PERF_SAMPLE_BASE: - case PERF_AVERAGE_BASE: - case PERF_COUNTER_MULTI_BASE: - case PERF_RAW_BASE: - case PERF_COUNTER_NODATA: - case PERF_PRECISION_TIMESTAMP: - default: - return ""; - } -} - -void dumpSystemTime(BUFFER *wb, SYSTEMTIME *st) { - buffer_json_member_add_uint64(wb, "Year", st->wYear); - buffer_json_member_add_uint64(wb, "Month", st->wMonth); - buffer_json_member_add_uint64(wb, "DayOfWeek", st->wDayOfWeek); - buffer_json_member_add_uint64(wb, "Day", st->wDay); - buffer_json_member_add_uint64(wb, "Hour", st->wHour); - buffer_json_member_add_uint64(wb, "Minute", st->wMinute); - buffer_json_member_add_uint64(wb, "Second", st->wSecond); - buffer_json_member_add_uint64(wb, "Milliseconds", st->wMilliseconds); -} - -bool dumpDataCb(PERF_DATA_BLOCK *pDataBlock, void *data) { - char name[4096]; - if(!getSystemName(pDataBlock, name, sizeof(name))) - strncpyz(name, "[failed]", sizeof(name) - 1); - - BUFFER *wb = data; - buffer_json_member_add_string(wb, "SystemName", name); - - // Number of types of objects being reported - // Type: DWORD - buffer_json_member_add_int64(wb, "NumObjectTypes", pDataBlock->NumObjectTypes); - - buffer_json_member_add_int64(wb, "LittleEndian", pDataBlock->LittleEndian); - - // Version and Revision of these data structures. - // Version starts at 1. - // Revision starts at 0 for each Version. - // Type: DWORD - buffer_json_member_add_int64(wb, "Version", pDataBlock->Version); - buffer_json_member_add_int64(wb, "Revision", pDataBlock->Revision); - - // Object Title Index of default object to display when data from this system is retrieved - // (-1 = none, but this is not expected to be used) - // Type: LONG - buffer_json_member_add_int64(wb, "DefaultObject", pDataBlock->DefaultObject); - - // Performance counter frequency at the system under measurement - // Type: LARGE_INTEGER - buffer_json_member_add_int64(wb, "PerfFreq", pDataBlock->PerfFreq.QuadPart); - - // Performance counter value at the system under measurement - // Type: LARGE_INTEGER - buffer_json_member_add_int64(wb, "PerfTime", pDataBlock->PerfTime.QuadPart); - - // Performance counter time in 100 nsec units at the system under measurement - // Type: LARGE_INTEGER - buffer_json_member_add_int64(wb, "PerfTime100nSec", pDataBlock->PerfTime100nSec.QuadPart); - - // Time at the system under measurement in UTC - // Type: SYSTEMTIME - buffer_json_member_add_object(wb, "SystemTime"); - dumpSystemTime(wb, &pDataBlock->SystemTime); - buffer_json_object_close(wb); - - if(pDataBlock->NumObjectTypes) - buffer_json_member_add_array(wb, "Objects"); - - return true; -} - -static const char *GetDetailLevel(DWORD num) { - switch (num) { - case 100: - return "Novice (100)"; - case 200: - return "Advanced (200)"; - case 300: - return "Expert (300)"; - case 400: - return "Wizard (400)"; - - default: - return "Unknown"; - } -} - -bool dumpObjectCb(PERF_DATA_BLOCK *pDataBlock, PERF_OBJECT_TYPE *pObjectType, void *data) { - (void)pDataBlock; - BUFFER *wb = data; - if(!pObjectType) { - buffer_json_array_close(wb); // instances or counters - buffer_json_object_close(wb); // objectType - return true; - } - - buffer_json_add_array_item_object(wb); // objectType - buffer_json_member_add_int64(wb, "NameId", pObjectType->ObjectNameTitleIndex); - buffer_json_member_add_string(wb, "Name", RegistryFindNameByID(pObjectType->ObjectNameTitleIndex)); - buffer_json_member_add_int64(wb, "HelpId", pObjectType->ObjectHelpTitleIndex); - buffer_json_member_add_string(wb, "Help", RegistryFindHelpByID(pObjectType->ObjectHelpTitleIndex)); - buffer_json_member_add_int64(wb, "NumInstances", pObjectType->NumInstances); - buffer_json_member_add_int64(wb, "NumCounters", pObjectType->NumCounters); - buffer_json_member_add_int64(wb, "PerfTime", pObjectType->PerfTime.QuadPart); - buffer_json_member_add_int64(wb, "PerfFreq", pObjectType->PerfFreq.QuadPart); - buffer_json_member_add_int64(wb, "CodePage", pObjectType->CodePage); - buffer_json_member_add_int64(wb, "DefaultCounter", pObjectType->DefaultCounter); - buffer_json_member_add_string(wb, "DetailLevel", GetDetailLevel(pObjectType->DetailLevel)); - - if(ObjectTypeHasInstances(pDataBlock, pObjectType)) - buffer_json_member_add_array(wb, "Instances"); - else - buffer_json_member_add_array(wb, "Counters"); - - return true; -} - -bool dumpInstanceCb(PERF_DATA_BLOCK *pDataBlock, PERF_OBJECT_TYPE *pObjectType, PERF_INSTANCE_DEFINITION *pInstance, void *data) { - (void)pDataBlock; - BUFFER *wb = data; - if(!pInstance) { - buffer_json_array_close(wb); // counters - buffer_json_object_close(wb); // instance - return true; - } - - char name[4096]; - if(!getInstanceName(pDataBlock, pObjectType, pInstance, name, sizeof(name))) - strncpyz(name, "[failed]", sizeof(name) - 1); - - buffer_json_add_array_item_object(wb); - buffer_json_member_add_string(wb, "Instance", name); - buffer_json_member_add_int64(wb, "UniqueID", pInstance->UniqueID); - buffer_json_member_add_array(wb, "Labels"); - { - buffer_json_add_array_item_object(wb); - { - buffer_json_member_add_string(wb, "key", RegistryFindNameByID(pObjectType->ObjectNameTitleIndex)); - buffer_json_member_add_string(wb, "value", name); - } - buffer_json_object_close(wb); - - if(pInstance->ParentObjectTitleIndex) { - PERF_INSTANCE_DEFINITION *pi = pInstance; - while(pi->ParentObjectTitleIndex) { - PERF_OBJECT_TYPE *po = getObjectTypeByIndex(pDataBlock, pInstance->ParentObjectTitleIndex); - pi = getInstanceByPosition(pDataBlock, po, pi->ParentObjectInstance); - - if(!getInstanceName(pDataBlock, po, pi, name, sizeof(name))) - strncpyz(name, "[failed]", sizeof(name) - 1); - - buffer_json_add_array_item_object(wb); - { - buffer_json_member_add_string(wb, "key", RegistryFindNameByID(po->ObjectNameTitleIndex)); - buffer_json_member_add_string(wb, "value", name); - } - buffer_json_object_close(wb); - } - } - } - buffer_json_array_close(wb); // rrdlabels - - buffer_json_member_add_array(wb, "Counters"); - return true; -} - -void dumpSample(BUFFER *wb, RAW_DATA *d) { - buffer_json_member_add_object(wb, "Value"); - buffer_json_member_add_uint64(wb, "data", d->Data); - buffer_json_member_add_int64(wb, "time", d->Time); - buffer_json_member_add_uint64(wb, "type", d->CounterType); - buffer_json_member_add_int64(wb, "multi", d->MultiCounterData); - buffer_json_member_add_int64(wb, "frequency", d->Frequency); - buffer_json_object_close(wb); -} - -bool dumpCounterCb(PERF_DATA_BLOCK *pDataBlock, PERF_OBJECT_TYPE *pObjectType, PERF_COUNTER_DEFINITION *pCounter, RAW_DATA *sample, void *data) { - (void)pDataBlock; - (void)pObjectType; - BUFFER *wb = data; - buffer_json_add_array_item_object(wb); - buffer_json_member_add_string(wb, "Counter", RegistryFindNameByID(pCounter->CounterNameTitleIndex)); - dumpSample(wb, sample); - buffer_json_member_add_string(wb, "Help", RegistryFindHelpByID(pCounter->CounterHelpTitleIndex)); - buffer_json_member_add_string(wb, "Type", getCounterType(pCounter->CounterType)); - buffer_json_member_add_string(wb, "Algorithm", getCounterAlgorithm(pCounter->CounterType)); - buffer_json_member_add_string(wb, "Description", getCounterDescription(pCounter->CounterType)); - buffer_json_object_close(wb); - return true; -} - -bool dumpInstanceCounterCb(PERF_DATA_BLOCK *pDataBlock, PERF_OBJECT_TYPE *pObjectType, PERF_INSTANCE_DEFINITION *pInstance, PERF_COUNTER_DEFINITION *pCounter, RAW_DATA *sample, void *data) { - (void)pInstance; - return dumpCounterCb(pDataBlock, pObjectType, pCounter, sample, data); -} - - -int windows_perflib_dump(const char *key) { - if(key && !*key) - key = NULL; - - PerflibNamesRegistryInitialize(); - - DWORD id = 0; - if(key) { - id = RegistryFindIDByName(key); - if(id == PERFLIB_REGISTRY_NAME_NOT_FOUND) { - fprintf(stderr, "Cannot find key '%s' in Windows Performance Counters Registry.\n", key); - exit(1); - } - } - - CLEAN_BUFFER *wb = buffer_create(0, NULL); - buffer_json_initialize(wb, "\"", "\"", 0, true, BUFFER_JSON_OPTIONS_MINIFY); - - perflibQueryAndTraverse(id, dumpDataCb, dumpObjectCb, dumpInstanceCb, dumpInstanceCounterCb, dumpCounterCb, wb); - - buffer_json_finalize(wb); - printf("\n%s\n", buffer_tostring(wb)); - - perflibFreePerformanceData(); - - return 0; -} +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "perflib.h" + +#if defined(OS_WINDOWS) +#include + +static const char *getCounterType(DWORD CounterType) { + switch (CounterType) { + case PERF_COUNTER_COUNTER: + return "PERF_COUNTER_COUNTER"; + + case PERF_COUNTER_TIMER: + return "PERF_COUNTER_TIMER"; + + case PERF_COUNTER_QUEUELEN_TYPE: + return "PERF_COUNTER_QUEUELEN_TYPE"; + + case PERF_COUNTER_LARGE_QUEUELEN_TYPE: + return "PERF_COUNTER_LARGE_QUEUELEN_TYPE"; + + case PERF_COUNTER_100NS_QUEUELEN_TYPE: + return "PERF_COUNTER_100NS_QUEUELEN_TYPE"; + + case PERF_COUNTER_OBJ_TIME_QUEUELEN_TYPE: + return "PERF_COUNTER_OBJ_TIME_QUEUELEN_TYPE"; + + case PERF_COUNTER_BULK_COUNT: + return "PERF_COUNTER_BULK_COUNT"; + + case PERF_COUNTER_TEXT: + return "PERF_COUNTER_TEXT"; + + case PERF_COUNTER_RAWCOUNT: + return "PERF_COUNTER_RAWCOUNT"; + + case PERF_COUNTER_LARGE_RAWCOUNT: + return "PERF_COUNTER_LARGE_RAWCOUNT"; + + case PERF_COUNTER_RAWCOUNT_HEX: + return "PERF_COUNTER_RAWCOUNT_HEX"; + + case PERF_COUNTER_LARGE_RAWCOUNT_HEX: + return "PERF_COUNTER_LARGE_RAWCOUNT_HEX"; + + case PERF_SAMPLE_FRACTION: + return "PERF_SAMPLE_FRACTION"; + + case PERF_SAMPLE_COUNTER: + return "PERF_SAMPLE_COUNTER"; + + case PERF_COUNTER_NODATA: + return "PERF_COUNTER_NODATA"; + + case PERF_COUNTER_TIMER_INV: + return "PERF_COUNTER_TIMER_INV"; + + case PERF_SAMPLE_BASE: + return "PERF_SAMPLE_BASE"; + + case PERF_AVERAGE_TIMER: + return "PERF_AVERAGE_TIMER"; + + case PERF_AVERAGE_BASE: + return "PERF_AVERAGE_BASE"; + + case PERF_AVERAGE_BULK: + return "PERF_AVERAGE_BULK"; + + case PERF_OBJ_TIME_TIMER: + return "PERF_OBJ_TIME_TIMER"; + + case PERF_100NSEC_TIMER: + return "PERF_100NSEC_TIMER"; + + case PERF_100NSEC_TIMER_INV: + return "PERF_100NSEC_TIMER_INV"; + + case PERF_COUNTER_MULTI_TIMER: + return "PERF_COUNTER_MULTI_TIMER"; + + case PERF_COUNTER_MULTI_TIMER_INV: + return "PERF_COUNTER_MULTI_TIMER_INV"; + + case PERF_COUNTER_MULTI_BASE: + return "PERF_COUNTER_MULTI_BASE"; + + case PERF_100NSEC_MULTI_TIMER: + return "PERF_100NSEC_MULTI_TIMER"; + + case PERF_100NSEC_MULTI_TIMER_INV: + return "PERF_100NSEC_MULTI_TIMER_INV"; + + case PERF_RAW_FRACTION: + return "PERF_RAW_FRACTION"; + + case PERF_LARGE_RAW_FRACTION: + return "PERF_LARGE_RAW_FRACTION"; + + case PERF_RAW_BASE: + return "PERF_RAW_BASE"; + + case PERF_LARGE_RAW_BASE: + return "PERF_LARGE_RAW_BASE"; + + case PERF_ELAPSED_TIME: + return "PERF_ELAPSED_TIME"; + + case PERF_COUNTER_HISTOGRAM_TYPE: + return "PERF_COUNTER_HISTOGRAM_TYPE"; + + case PERF_COUNTER_DELTA: + return "PERF_COUNTER_DELTA"; + + case PERF_COUNTER_LARGE_DELTA: + return "PERF_COUNTER_LARGE_DELTA"; + + case PERF_PRECISION_SYSTEM_TIMER: + return "PERF_PRECISION_SYSTEM_TIMER"; + + case PERF_PRECISION_100NS_TIMER: + return "PERF_PRECISION_100NS_TIMER"; + + case PERF_PRECISION_OBJECT_TIMER: + return "PERF_PRECISION_OBJECT_TIMER"; + + default: + return "UNKNOWN_COUNTER_TYPE"; + } +} + +static const char *getCounterDescription(DWORD CounterType) { + switch (CounterType) { + case PERF_COUNTER_COUNTER: + return "32-bit Counter. Divide delta by delta time. Display suffix: \"/sec\""; + + case PERF_COUNTER_TIMER: + return "64-bit Timer. Divide delta by delta time. Display suffix: \"%\""; + + case PERF_COUNTER_QUEUELEN_TYPE: + case PERF_COUNTER_LARGE_QUEUELEN_TYPE: + return "Queue Length Space-Time Product. Divide delta by delta time. No Display Suffix"; + + case PERF_COUNTER_100NS_QUEUELEN_TYPE: + return "Queue Length Space-Time Product using 100 Ns timebase. Divide delta by delta time. No Display Suffix"; + + case PERF_COUNTER_OBJ_TIME_QUEUELEN_TYPE: + return "Queue Length Space-Time Product using Object specific timebase. Divide delta by delta time. No Display Suffix."; + + case PERF_COUNTER_BULK_COUNT: + return "64-bit Counter. Divide delta by delta time. Display Suffix: \"/sec\""; + + case PERF_COUNTER_TEXT: + return "Unicode text Display as text."; + + case PERF_COUNTER_RAWCOUNT: + case PERF_COUNTER_LARGE_RAWCOUNT: + return "A counter which should not be time averaged on display (such as an error counter on a serial line). Display as is. No Display Suffix."; + + case PERF_COUNTER_RAWCOUNT_HEX: + case PERF_COUNTER_LARGE_RAWCOUNT_HEX: + return "Special case for RAWCOUNT which should be displayed in hex. A counter which should not be time averaged on display (such as an error counter on a serial line). Display as is. No Display Suffix."; + + case PERF_SAMPLE_FRACTION: + return "A count which is either 1 or 0 on each sampling interrupt (% busy). Divide delta by delta base. Display Suffix: \"%\""; + + case PERF_SAMPLE_COUNTER: + return "A count which is sampled on each sampling interrupt (queue length). Divide delta by delta time. No Display Suffix."; + + case PERF_COUNTER_NODATA: + return "A label: no data is associated with this counter (it has 0 length). Do not display."; + + case PERF_COUNTER_TIMER_INV: + return "64-bit Timer inverse (e.g., idle is measured, but display busy %). Display 100 - delta divided by delta time. Display suffix: \"%\""; + + case PERF_SAMPLE_BASE: + return "The divisor for a sample, used with the previous counter to form a sampled %. You must check for >0 before dividing by this! This counter will directly follow the numerator counter. It should not be displayed to the user."; + + case PERF_AVERAGE_TIMER: + return "A timer which, when divided by an average base, produces a time in seconds which is the average time of some operation. This timer times total operations, and the base is the number of operations. Display Suffix: \"sec\""; + + case PERF_AVERAGE_BASE: + return "Used as the denominator in the computation of time or count averages. Must directly follow the numerator counter. Not displayed to the user."; + + case PERF_AVERAGE_BULK: + return "A bulk count which, when divided (typically) by the number of operations, gives (typically) the number of bytes per operation. No Display Suffix."; + + case PERF_OBJ_TIME_TIMER: + return "64-bit Timer in object specific units. Display delta divided by delta time as returned in the object type header structure. Display suffix: \"%\""; + + case PERF_100NSEC_TIMER: + return "64-bit Timer in 100 nsec units. Display delta divided by delta time. Display suffix: \"%\""; + + case PERF_100NSEC_TIMER_INV: + return "64-bit Timer inverse (e.g., idle is measured, but display busy %). Display 100 - delta divided by delta time. Display suffix: \"%\""; + + case PERF_COUNTER_MULTI_TIMER: + return "64-bit Timer. Divide delta by delta time. Display suffix: \"%\". Timer for multiple instances, so result can exceed 100%."; + + case PERF_COUNTER_MULTI_TIMER_INV: + return "64-bit Timer inverse (e.g., idle is measured, but display busy %). Display 100 * _MULTI_BASE - delta divided by delta time. Display suffix: \"%\" Timer for multiple instances, so result can exceed 100%. Followed by a counter of type _MULTI_BASE."; + + case PERF_COUNTER_MULTI_BASE: + return "Number of instances to which the preceding _MULTI_..._INV counter applies. Used as a factor to get the percentage."; + + case PERF_100NSEC_MULTI_TIMER: + return "64-bit Timer in 100 nsec units. Display delta divided by delta time. Display suffix: \"%\" Timer for multiple instances, so result can exceed 100%."; + + case PERF_100NSEC_MULTI_TIMER_INV: + return "64-bit Timer inverse (e.g., idle is measured, but display busy %). Display 100 * _MULTI_BASE - delta divided by delta time. Display suffix: \"%\" Timer for multiple instances, so result can exceed 100%. Followed by a counter of type _MULTI_BASE."; + + case PERF_LARGE_RAW_FRACTION: + case PERF_RAW_FRACTION: + return "Indicates the data is a fraction of the following counter which should not be time averaged on display (such as free space over total space.) Display as is. Display the quotient as \"%\""; + + case PERF_RAW_BASE: + case PERF_LARGE_RAW_BASE: + return "Indicates the data is a base for the preceding counter which should not be time averaged on display (such as free space over total space.)"; + + case PERF_ELAPSED_TIME: + return "The data collected in this counter is actually the start time of the item being measured. For display, this data is subtracted from the sample time to yield the elapsed time as the difference between the two. In the definition below, the PerfTime field of the Object contains the sample time as indicated by the PERF_OBJECT_TIMER bit and the difference is scaled by the PerfFreq of the Object to convert the time units into seconds."; + + case PERF_COUNTER_HISTOGRAM_TYPE: + return "Counter type can be used with the preceding types to define a range of values to be displayed in a histogram."; + + case PERF_COUNTER_DELTA: + case PERF_COUNTER_LARGE_DELTA: + return "This counter is used to display the difference from one sample to the next. The counter value is a constantly increasing number and the value displayed is the difference between the current value and the previous value. Negative numbers are not allowed which shouldn't be a problem as long as the counter value is increasing or unchanged."; + + case PERF_PRECISION_SYSTEM_TIMER: + return "The precision counters are timers that consist of two counter values:\r\n\t1) the count of elapsed time of the event being monitored\r\n\t2) the \"clock\" time in the same units\r\nthe precision timers are used where the standard system timers are not precise enough for accurate readings. It's assumed that the service providing the data is also providing a timestamp at the same time which will eliminate any error that may occur since some small and variable time elapses between the time the system timestamp is captured and when the data is collected from the performance DLL. Only in extreme cases has this been observed to be problematic.\r\nwhen using this type of timer, the definition of the PERF_PRECISION_TIMESTAMP counter must immediately follow the definition of the PERF_PRECISION_*_TIMER in the Object header\r\nThe timer used has the same frequency as the System Performance Timer"; + + case PERF_PRECISION_100NS_TIMER: + return "The precision counters are timers that consist of two counter values:\r\n\t1) the count of elapsed time of the event being monitored\r\n\t2) the \"clock\" time in the same units\r\nthe precision timers are used where the standard system timers are not precise enough for accurate readings. It's assumed that the service providing the data is also providing a timestamp at the same time which will eliminate any error that may occur since some small and variable time elapses between the time the system timestamp is captured and when the data is collected from the performance DLL. Only in extreme cases has this been observed to be problematic.\r\nwhen using this type of timer, the definition of the PERF_PRECISION_TIMESTAMP counter must immediately follow the definition of the PERF_PRECISION_*_TIMER in the Object header\r\nThe timer used has the same frequency as the 100 NanoSecond Timer"; + + case PERF_PRECISION_OBJECT_TIMER: + return "The precision counters are timers that consist of two counter values:\r\n\t1) the count of elapsed time of the event being monitored\r\n\t2) the \"clock\" time in the same units\r\nthe precision timers are used where the standard system timers are not precise enough for accurate readings. It's assumed that the service providing the data is also providing a timestamp at the same time which will eliminate any error that may occur since some small and variable time elapses between the time the system timestamp is captured and when the data is collected from the performance DLL. Only in extreme cases has this been observed to be problematic.\r\nwhen using this type of timer, the definition of the PERF_PRECISION_TIMESTAMP counter must immediately follow the definition of the PERF_PRECISION_*_TIMER in the Object header\r\nThe timer used is of the frequency specified in the Object header's. PerfFreq field (PerfTime is ignored)"; + + default: + return ""; + } +} + +static const char *getCounterAlgorithm(DWORD CounterType) { + switch (CounterType) + { + case PERF_COUNTER_COUNTER: + case PERF_SAMPLE_COUNTER: + case PERF_COUNTER_BULK_COUNT: + return "(data1 - data0) / ((time1 - time0) / frequency)"; + + case PERF_COUNTER_QUEUELEN_TYPE: + case PERF_COUNTER_100NS_QUEUELEN_TYPE: + case PERF_COUNTER_OBJ_TIME_QUEUELEN_TYPE: + case PERF_COUNTER_LARGE_QUEUELEN_TYPE: + case PERF_AVERAGE_BULK: // normally not displayed + return "(data1 - data0) / (time1 - time0)"; + + case PERF_OBJ_TIME_TIMER: + case PERF_COUNTER_TIMER: + case PERF_100NSEC_TIMER: + case PERF_PRECISION_SYSTEM_TIMER: + case PERF_PRECISION_100NS_TIMER: + case PERF_PRECISION_OBJECT_TIMER: + case PERF_SAMPLE_FRACTION: + return "100 * (data1 - data0) / (time1 - time0)"; + + case PERF_COUNTER_TIMER_INV: + return "100 * (1 - ((data1 - data0) / (time1 - time0)))"; + + case PERF_100NSEC_TIMER_INV: + return "100 * (1- (data1 - data0) / (time1 - time0))"; + + case PERF_COUNTER_MULTI_TIMER: + return "100 * ((data1 - data0) / ((time1 - time0) / frequency1)) / multi1"; + + case PERF_100NSEC_MULTI_TIMER: + return "100 * ((data1 - data0) / (time1 - time0)) / multi1"; + + case PERF_COUNTER_MULTI_TIMER_INV: + case PERF_100NSEC_MULTI_TIMER_INV: + return "100 * (multi1 - ((data1 - data0) / (time1 - time0)))"; + + case PERF_COUNTER_RAWCOUNT: + case PERF_COUNTER_LARGE_RAWCOUNT: + return "data0"; + + case PERF_COUNTER_RAWCOUNT_HEX: + case PERF_COUNTER_LARGE_RAWCOUNT_HEX: + return "hex(data0)"; + + case PERF_COUNTER_DELTA: + case PERF_COUNTER_LARGE_DELTA: + return "data1 - data0"; + + case PERF_RAW_FRACTION: + case PERF_LARGE_RAW_FRACTION: + return "100 * data0 / time0"; + + case PERF_AVERAGE_TIMER: + return "((data1 - data0) / frequency1) / (time1 - time0)"; + + case PERF_ELAPSED_TIME: + return "(time0 - data0) / frequency0"; + + case PERF_COUNTER_TEXT: + case PERF_SAMPLE_BASE: + case PERF_AVERAGE_BASE: + case PERF_COUNTER_MULTI_BASE: + case PERF_RAW_BASE: + case PERF_COUNTER_NODATA: + case PERF_PRECISION_TIMESTAMP: + default: + return ""; + } +} + +void dumpSystemTime(BUFFER *wb, SYSTEMTIME *st) { + buffer_json_member_add_uint64(wb, "Year", st->wYear); + buffer_json_member_add_uint64(wb, "Month", st->wMonth); + buffer_json_member_add_uint64(wb, "DayOfWeek", st->wDayOfWeek); + buffer_json_member_add_uint64(wb, "Day", st->wDay); + buffer_json_member_add_uint64(wb, "Hour", st->wHour); + buffer_json_member_add_uint64(wb, "Minute", st->wMinute); + buffer_json_member_add_uint64(wb, "Second", st->wSecond); + buffer_json_member_add_uint64(wb, "Milliseconds", st->wMilliseconds); +} + +bool dumpDataCb(PERF_DATA_BLOCK *pDataBlock, void *data) { + char name[4096]; + if(!getSystemName(pDataBlock, name, sizeof(name))) + strncpyz(name, "[failed]", sizeof(name) - 1); + + BUFFER *wb = data; + buffer_json_member_add_string(wb, "SystemName", name); + + // Number of types of objects being reported + // Type: DWORD + buffer_json_member_add_int64(wb, "NumObjectTypes", pDataBlock->NumObjectTypes); + + buffer_json_member_add_int64(wb, "LittleEndian", pDataBlock->LittleEndian); + + // Version and Revision of these data structures. + // Version starts at 1. + // Revision starts at 0 for each Version. + // Type: DWORD + buffer_json_member_add_int64(wb, "Version", pDataBlock->Version); + buffer_json_member_add_int64(wb, "Revision", pDataBlock->Revision); + + // Object Title Index of default object to display when data from this system is retrieved + // (-1 = none, but this is not expected to be used) + // Type: LONG + buffer_json_member_add_int64(wb, "DefaultObject", pDataBlock->DefaultObject); + + // Performance counter frequency at the system under measurement + // Type: LARGE_INTEGER + buffer_json_member_add_int64(wb, "PerfFreq", pDataBlock->PerfFreq.QuadPart); + + // Performance counter value at the system under measurement + // Type: LARGE_INTEGER + buffer_json_member_add_int64(wb, "PerfTime", pDataBlock->PerfTime.QuadPart); + + // Performance counter time in 100 nsec units at the system under measurement + // Type: LARGE_INTEGER + buffer_json_member_add_int64(wb, "PerfTime100nSec", pDataBlock->PerfTime100nSec.QuadPart); + + // Time at the system under measurement in UTC + // Type: SYSTEMTIME + buffer_json_member_add_object(wb, "SystemTime"); + dumpSystemTime(wb, &pDataBlock->SystemTime); + buffer_json_object_close(wb); + + if(pDataBlock->NumObjectTypes) + buffer_json_member_add_array(wb, "Objects"); + + return true; +} + +static const char *GetDetailLevel(DWORD num) { + switch (num) { + case 100: + return "Novice (100)"; + case 200: + return "Advanced (200)"; + case 300: + return "Expert (300)"; + case 400: + return "Wizard (400)"; + + default: + return "Unknown"; + } +} + +bool dumpObjectCb(PERF_DATA_BLOCK *pDataBlock, PERF_OBJECT_TYPE *pObjectType, void *data) { + (void)pDataBlock; + BUFFER *wb = data; + if(!pObjectType) { + buffer_json_array_close(wb); // instances or counters + buffer_json_object_close(wb); // objectType + return true; + } + + buffer_json_add_array_item_object(wb); // objectType + buffer_json_member_add_int64(wb, "NameId", pObjectType->ObjectNameTitleIndex); + buffer_json_member_add_string(wb, "Name", RegistryFindNameByID(pObjectType->ObjectNameTitleIndex)); + buffer_json_member_add_int64(wb, "HelpId", pObjectType->ObjectHelpTitleIndex); + buffer_json_member_add_string(wb, "Help", RegistryFindHelpByID(pObjectType->ObjectHelpTitleIndex)); + buffer_json_member_add_int64(wb, "NumInstances", pObjectType->NumInstances); + buffer_json_member_add_int64(wb, "NumCounters", pObjectType->NumCounters); + buffer_json_member_add_int64(wb, "PerfTime", pObjectType->PerfTime.QuadPart); + buffer_json_member_add_int64(wb, "PerfFreq", pObjectType->PerfFreq.QuadPart); + buffer_json_member_add_int64(wb, "CodePage", pObjectType->CodePage); + buffer_json_member_add_int64(wb, "DefaultCounter", pObjectType->DefaultCounter); + buffer_json_member_add_string(wb, "DetailLevel", GetDetailLevel(pObjectType->DetailLevel)); + + if(ObjectTypeHasInstances(pDataBlock, pObjectType)) + buffer_json_member_add_array(wb, "Instances"); + else + buffer_json_member_add_array(wb, "Counters"); + + return true; +} + +bool dumpInstanceCb(PERF_DATA_BLOCK *pDataBlock, PERF_OBJECT_TYPE *pObjectType, PERF_INSTANCE_DEFINITION *pInstance, void *data) { + (void)pDataBlock; + BUFFER *wb = data; + if(!pInstance) { + buffer_json_array_close(wb); // counters + buffer_json_object_close(wb); // instance + return true; + } + + char name[4096]; + if(!getInstanceName(pDataBlock, pObjectType, pInstance, name, sizeof(name))) + strncpyz(name, "[failed]", sizeof(name) - 1); + + buffer_json_add_array_item_object(wb); + buffer_json_member_add_string(wb, "Instance", name); + buffer_json_member_add_int64(wb, "UniqueID", pInstance->UniqueID); + buffer_json_member_add_array(wb, "Labels"); + { + buffer_json_add_array_item_object(wb); + { + buffer_json_member_add_string(wb, "key", RegistryFindNameByID(pObjectType->ObjectNameTitleIndex)); + buffer_json_member_add_string(wb, "value", name); + } + buffer_json_object_close(wb); + + if(pInstance->ParentObjectTitleIndex) { + PERF_INSTANCE_DEFINITION *pi = pInstance; + while(pi->ParentObjectTitleIndex) { + PERF_OBJECT_TYPE *po = getObjectTypeByIndex(pDataBlock, pInstance->ParentObjectTitleIndex); + pi = getInstanceByPosition(pDataBlock, po, pi->ParentObjectInstance); + + if(!getInstanceName(pDataBlock, po, pi, name, sizeof(name))) + strncpyz(name, "[failed]", sizeof(name) - 1); + + buffer_json_add_array_item_object(wb); + { + buffer_json_member_add_string(wb, "key", RegistryFindNameByID(po->ObjectNameTitleIndex)); + buffer_json_member_add_string(wb, "value", name); + } + buffer_json_object_close(wb); + } + } + } + buffer_json_array_close(wb); // rrdlabels + + buffer_json_member_add_array(wb, "Counters"); + return true; +} + +void dumpSample(BUFFER *wb, RAW_DATA *d) { + buffer_json_member_add_object(wb, "Value"); + buffer_json_member_add_uint64(wb, "data", d->Data); + buffer_json_member_add_int64(wb, "time", d->Time); + buffer_json_member_add_uint64(wb, "type", d->CounterType); + buffer_json_member_add_int64(wb, "multi", d->MultiCounterData); + buffer_json_member_add_int64(wb, "frequency", d->Frequency); + buffer_json_object_close(wb); +} + +bool dumpCounterCb(PERF_DATA_BLOCK *pDataBlock, PERF_OBJECT_TYPE *pObjectType, PERF_COUNTER_DEFINITION *pCounter, RAW_DATA *sample, void *data) { + (void)pDataBlock; + (void)pObjectType; + BUFFER *wb = data; + buffer_json_add_array_item_object(wb); + buffer_json_member_add_string(wb, "Counter", RegistryFindNameByID(pCounter->CounterNameTitleIndex)); + dumpSample(wb, sample); + buffer_json_member_add_string(wb, "Help", RegistryFindHelpByID(pCounter->CounterHelpTitleIndex)); + buffer_json_member_add_string(wb, "Type", getCounterType(pCounter->CounterType)); + buffer_json_member_add_string(wb, "Algorithm", getCounterAlgorithm(pCounter->CounterType)); + buffer_json_member_add_string(wb, "Description", getCounterDescription(pCounter->CounterType)); + buffer_json_object_close(wb); + return true; +} + +bool dumpInstanceCounterCb(PERF_DATA_BLOCK *pDataBlock, PERF_OBJECT_TYPE *pObjectType, PERF_INSTANCE_DEFINITION *pInstance, PERF_COUNTER_DEFINITION *pCounter, RAW_DATA *sample, void *data) { + (void)pInstance; + return dumpCounterCb(pDataBlock, pObjectType, pCounter, sample, data); +} + + +int windows_perflib_dump(const char *key) { + if(key && !*key) + key = NULL; + + PerflibNamesRegistryInitialize(); + + DWORD id = 0; + if(key) { + id = RegistryFindIDByName(key); + if(id == PERFLIB_REGISTRY_NAME_NOT_FOUND) { + fprintf(stderr, "Cannot find key '%s' in Windows Performance Counters Registry.\n", key); + exit(1); + } + } + + CLEAN_BUFFER *wb = buffer_create(0, NULL); + buffer_json_initialize(wb, "\"", "\"", 0, true, BUFFER_JSON_OPTIONS_MINIFY); + + perflibQueryAndTraverse(id, dumpDataCb, dumpObjectCb, dumpInstanceCb, dumpInstanceCounterCb, dumpCounterCb, wb); + + buffer_json_finalize(wb); + printf("\n%s\n", buffer_tostring(wb)); + + perflibFreePerformanceData(); + + return 0; +} + +#endif // OS_WINDOWS \ No newline at end of file diff --git a/src/collectors/windows.plugin/perflib-names.c b/src/libnetdata/os/windows-perflib/perflib-names.c similarity index 96% rename from src/collectors/windows.plugin/perflib-names.c rename to src/libnetdata/os/windows-perflib/perflib-names.c index 5b47cbce79fe36..b4404e1ea22ad4 100644 --- a/src/collectors/windows.plugin/perflib-names.c +++ b/src/libnetdata/os/windows-perflib/perflib-names.c @@ -1,242 +1,247 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -#include "perflib.h" - -#define REGISTRY_KEY "SOFTWARE\\Microsoft\\Windows NT\\CurrentVersion\\Perflib\\009" - -typedef struct perflib_registry { - DWORD id; - char *key; - char *help; -} perfLibRegistryEntry; - -static inline bool compare_perfLibRegistryEntry(const char *k1, const char *k2) { - return strcmp(k1, k2) == 0; -} - -static inline const char *value2key_perfLibRegistryEntry(perfLibRegistryEntry *entry) { - return entry->key; -} - -#define SIMPLE_HASHTABLE_COMPARE_KEYS_FUNCTION compare_perfLibRegistryEntry -#define SIMPLE_HASHTABLE_VALUE2KEY_FUNCTION value2key_perfLibRegistryEntry -#define SIMPLE_HASHTABLE_KEY_TYPE const char -#define SIMPLE_HASHTABLE_VALUE_TYPE perfLibRegistryEntry -#define SIMPLE_HASHTABLE_NAME _PERFLIB -#include "libnetdata/simple_hashtable.h" - -static struct { - SPINLOCK spinlock; - size_t size; - perfLibRegistryEntry **array; - struct simple_hashtable_PERFLIB hashtable; - FILETIME lastWriteTime; -} names_globals = { - .spinlock = NETDATA_SPINLOCK_INITIALIZER, - .size = 0, - .array = NULL, -}; - -DWORD RegistryFindIDByName(const char *name) { - DWORD rc = PERFLIB_REGISTRY_NAME_NOT_FOUND; - - spinlock_lock(&names_globals.spinlock); - XXH64_hash_t hash = XXH3_64bits((void *)name, strlen(name)); - SIMPLE_HASHTABLE_SLOT_PERFLIB *sl = simple_hashtable_get_slot_PERFLIB(&names_globals.hashtable, hash, name, false); - perfLibRegistryEntry *e = SIMPLE_HASHTABLE_SLOT_DATA(sl); - if(e) rc = e->id; - spinlock_unlock(&names_globals.spinlock); - - return rc; -} - -static inline void RegistryAddToHashTable_unsafe(perfLibRegistryEntry *entry) { - XXH64_hash_t hash = XXH3_64bits((void *)entry->key, strlen(entry->key)); - SIMPLE_HASHTABLE_SLOT_PERFLIB *sl = simple_hashtable_get_slot_PERFLIB(&names_globals.hashtable, hash, entry->key, true); - perfLibRegistryEntry *e = SIMPLE_HASHTABLE_SLOT_DATA(sl); - if(!e || e->id > entry->id) - simple_hashtable_set_slot_PERFLIB(&names_globals.hashtable, sl, hash, entry); -} - -static void RegistrySetData_unsafe(DWORD id, const char *key, const char *help) { - if(id >= names_globals.size) { - // increase the size of the array - - size_t old_size = names_globals.size; - - if(!names_globals.size) - names_globals.size = 20000; - else - names_globals.size *= 2; - - names_globals.array = reallocz(names_globals.array, names_globals.size * sizeof(perfLibRegistryEntry *)); - - memset(names_globals.array + old_size, 0, (names_globals.size - old_size) * sizeof(perfLibRegistryEntry *)); - } - - perfLibRegistryEntry *entry = names_globals.array[id]; - if(!entry) - entry = names_globals.array[id] = (perfLibRegistryEntry *)calloc(1, sizeof(perfLibRegistryEntry)); - - bool add_to_hash = false; - if(key && !entry->key) { - entry->key = strdup(key); - add_to_hash = true; - } - - if(help && !entry->help) - entry->help = strdup(help); - - entry->id = id; - - if(add_to_hash) - RegistryAddToHashTable_unsafe(entry); -} - -const char *RegistryFindNameByID(DWORD id) { - const char *s = ""; - spinlock_lock(&names_globals.spinlock); - - if(id < names_globals.size) { - perfLibRegistryEntry *titleEntry = names_globals.array[id]; - if(titleEntry && titleEntry->key) - s = titleEntry->key; - } - - spinlock_unlock(&names_globals.spinlock); - return s; -} - -const char *RegistryFindHelpByID(DWORD id) { - const char *s = ""; - spinlock_lock(&names_globals.spinlock); - - if(id < names_globals.size) { - perfLibRegistryEntry *titleEntry = names_globals.array[id]; - if(titleEntry && titleEntry->help) - s = titleEntry->help; - } - - spinlock_unlock(&names_globals.spinlock); - return s; -} - -// ---------------------------------------------------------- - -static inline void readRegistryKeys_unsafe(BOOL helps) { - TCHAR *pData = NULL; - - HKEY hKey; - DWORD dwType; - DWORD dwSize = 0; - LONG lStatus; - - LPCSTR valueName; - if(helps) - valueName = TEXT("help"); - else - valueName = TEXT("CounterDefinition"); - - // Open the key for the English counters - lStatus = RegOpenKeyEx(HKEY_LOCAL_MACHINE, TEXT(REGISTRY_KEY), 0, KEY_READ, &hKey); - if (lStatus != ERROR_SUCCESS) { - nd_log(NDLS_COLLECTORS, NDLP_ERR, - "Failed to open registry key HKEY_LOCAL_MACHINE, subkey '%s', error %ld\n", REGISTRY_KEY, (long)lStatus); - return; - } - - // Get the size of the 'Counters' data - lStatus = RegQueryValueEx(hKey, valueName, NULL, &dwType, NULL, &dwSize); - if (lStatus != ERROR_SUCCESS) { - nd_log(NDLS_COLLECTORS, NDLP_ERR, - "Failed to get registry key HKEY_LOCAL_MACHINE, subkey '%s', value '%s', size of data, error %ld\n", - REGISTRY_KEY, (const char *)valueName, (long)lStatus); - goto cleanup; - } - - // Allocate memory for the data - pData = mallocz(dwSize); - - // Read the 'Counters' data - lStatus = RegQueryValueEx(hKey, valueName, NULL, &dwType, (LPBYTE)pData, &dwSize); - if (lStatus != ERROR_SUCCESS || dwType != REG_MULTI_SZ) { - nd_log(NDLS_COLLECTORS, NDLP_ERR, - "Failed to get registry key HKEY_LOCAL_MACHINE, subkey '%s', value '%s', data, error %ld\n", - REGISTRY_KEY, (const char *)valueName, (long)lStatus); - goto cleanup; - } - - // Process the counter data - TCHAR *ptr = pData; - while (*ptr) { - TCHAR *sid = ptr; // First string is the ID - ptr += lstrlen(ptr) + 1; // Move to the next string - TCHAR *name = ptr; // Second string is the name - ptr += lstrlen(ptr) + 1; // Move to the next pair - - DWORD id = strtoul(sid, NULL, 10); - - if(helps) - RegistrySetData_unsafe(id, NULL, name); - else - RegistrySetData_unsafe(id, name, NULL); - } - -cleanup: - if(pData) freez(pData); - RegCloseKey(hKey); -} - -static BOOL RegistryKeyModification(FILETIME *lastWriteTime) { - HKEY hKey; - LONG lResult; - BOOL ret = FALSE; - - // Open the registry key - lResult = RegOpenKeyEx(HKEY_LOCAL_MACHINE, TEXT(REGISTRY_KEY), 0, KEY_READ, &hKey); - if (lResult != ERROR_SUCCESS) { - nd_log(NDLS_COLLECTORS, NDLP_ERR, - "Failed to open registry key HKEY_LOCAL_MACHINE, subkey '%s', error %ld\n", REGISTRY_KEY, (long)lResult); - return FALSE; - } - - // Get the last write time - lResult = RegQueryInfoKey(hKey, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, lastWriteTime); - if (lResult != ERROR_SUCCESS) { - nd_log(NDLS_COLLECTORS, NDLP_ERR, - "Failed to query registry key HKEY_LOCAL_MACHINE, subkey '%s', last write time, error %ld\n", REGISTRY_KEY, (long)lResult); - ret = FALSE; - } - else - ret = TRUE; - - RegCloseKey(hKey); - return ret; -} - -static inline void RegistryFetchAll_unsafe(void) { - readRegistryKeys_unsafe(FALSE); - readRegistryKeys_unsafe(TRUE); -} - -void PerflibNamesRegistryInitialize(void) { - spinlock_lock(&names_globals.spinlock); - simple_hashtable_init_PERFLIB(&names_globals.hashtable, 20000); - RegistryKeyModification(&names_globals.lastWriteTime); - RegistryFetchAll_unsafe(); - spinlock_unlock(&names_globals.spinlock); -} - -void PerflibNamesRegistryUpdate(void) { - FILETIME lastWriteTime = { 0 }; - RegistryKeyModification(&lastWriteTime); - - if(CompareFileTime(&lastWriteTime, &names_globals.lastWriteTime) > 0) { - spinlock_lock(&names_globals.spinlock); - if(CompareFileTime(&lastWriteTime, &names_globals.lastWriteTime) > 0) { - names_globals.lastWriteTime = lastWriteTime; - RegistryFetchAll_unsafe(); - } - spinlock_unlock(&names_globals.spinlock); - } -} +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "perflib.h" + +#if defined(OS_WINDOWS) +#include + +#define REGISTRY_KEY "SOFTWARE\\Microsoft\\Windows NT\\CurrentVersion\\Perflib\\009" + +typedef struct perflib_registry { + DWORD id; + char *key; + char *help; +} perfLibRegistryEntry; + +static inline bool compare_perfLibRegistryEntry(const char *k1, const char *k2) { + return strcmp(k1, k2) == 0; +} + +static inline const char *value2key_perfLibRegistryEntry(perfLibRegistryEntry *entry) { + return entry->key; +} + +#define SIMPLE_HASHTABLE_COMPARE_KEYS_FUNCTION compare_perfLibRegistryEntry +#define SIMPLE_HASHTABLE_VALUE2KEY_FUNCTION value2key_perfLibRegistryEntry +#define SIMPLE_HASHTABLE_KEY_TYPE const char +#define SIMPLE_HASHTABLE_VALUE_TYPE perfLibRegistryEntry +#define SIMPLE_HASHTABLE_NAME _PERFLIB +#include "libnetdata/simple_hashtable.h" + +static struct { + SPINLOCK spinlock; + size_t size; + perfLibRegistryEntry **array; + struct simple_hashtable_PERFLIB hashtable; + FILETIME lastWriteTime; +} names_globals = { + .spinlock = NETDATA_SPINLOCK_INITIALIZER, + .size = 0, + .array = NULL, +}; + +DWORD RegistryFindIDByName(const char *name) { + DWORD rc = PERFLIB_REGISTRY_NAME_NOT_FOUND; + + spinlock_lock(&names_globals.spinlock); + XXH64_hash_t hash = XXH3_64bits((void *)name, strlen(name)); + SIMPLE_HASHTABLE_SLOT_PERFLIB *sl = simple_hashtable_get_slot_PERFLIB(&names_globals.hashtable, hash, name, false); + perfLibRegistryEntry *e = SIMPLE_HASHTABLE_SLOT_DATA(sl); + if(e) rc = e->id; + spinlock_unlock(&names_globals.spinlock); + + return rc; +} + +static inline void RegistryAddToHashTable_unsafe(perfLibRegistryEntry *entry) { + XXH64_hash_t hash = XXH3_64bits((void *)entry->key, strlen(entry->key)); + SIMPLE_HASHTABLE_SLOT_PERFLIB *sl = simple_hashtable_get_slot_PERFLIB(&names_globals.hashtable, hash, entry->key, true); + perfLibRegistryEntry *e = SIMPLE_HASHTABLE_SLOT_DATA(sl); + if(!e || e->id > entry->id) + simple_hashtable_set_slot_PERFLIB(&names_globals.hashtable, sl, hash, entry); +} + +static void RegistrySetData_unsafe(DWORD id, const char *key, const char *help) { + if(id >= names_globals.size) { + // increase the size of the array + + size_t old_size = names_globals.size; + + if(!names_globals.size) + names_globals.size = 20000; + else + names_globals.size *= 2; + + names_globals.array = reallocz(names_globals.array, names_globals.size * sizeof(perfLibRegistryEntry *)); + + memset(names_globals.array + old_size, 0, (names_globals.size - old_size) * sizeof(perfLibRegistryEntry *)); + } + + perfLibRegistryEntry *entry = names_globals.array[id]; + if(!entry) + entry = names_globals.array[id] = (perfLibRegistryEntry *)calloc(1, sizeof(perfLibRegistryEntry)); + + bool add_to_hash = false; + if(key && !entry->key) { + entry->key = strdup(key); + add_to_hash = true; + } + + if(help && !entry->help) + entry->help = strdup(help); + + entry->id = id; + + if(add_to_hash) + RegistryAddToHashTable_unsafe(entry); +} + +const char *RegistryFindNameByID(DWORD id) { + const char *s = ""; + spinlock_lock(&names_globals.spinlock); + + if(id < names_globals.size) { + perfLibRegistryEntry *titleEntry = names_globals.array[id]; + if(titleEntry && titleEntry->key) + s = titleEntry->key; + } + + spinlock_unlock(&names_globals.spinlock); + return s; +} + +const char *RegistryFindHelpByID(DWORD id) { + const char *s = ""; + spinlock_lock(&names_globals.spinlock); + + if(id < names_globals.size) { + perfLibRegistryEntry *titleEntry = names_globals.array[id]; + if(titleEntry && titleEntry->help) + s = titleEntry->help; + } + + spinlock_unlock(&names_globals.spinlock); + return s; +} + +// ---------------------------------------------------------- + +static inline void readRegistryKeys_unsafe(BOOL helps) { + TCHAR *pData = NULL; + + HKEY hKey; + DWORD dwType; + DWORD dwSize = 0; + LONG lStatus; + + LPCSTR valueName; + if(helps) + valueName = TEXT("help"); + else + valueName = TEXT("CounterDefinition"); + + // Open the key for the English counters + lStatus = RegOpenKeyEx(HKEY_LOCAL_MACHINE, TEXT(REGISTRY_KEY), 0, KEY_READ, &hKey); + if (lStatus != ERROR_SUCCESS) { + nd_log(NDLS_COLLECTORS, NDLP_ERR, + "Failed to open registry key HKEY_LOCAL_MACHINE, subkey '%s', error %ld\n", REGISTRY_KEY, (long)lStatus); + return; + } + + // Get the size of the 'Counters' data + lStatus = RegQueryValueEx(hKey, valueName, NULL, &dwType, NULL, &dwSize); + if (lStatus != ERROR_SUCCESS) { + nd_log(NDLS_COLLECTORS, NDLP_ERR, + "Failed to get registry key HKEY_LOCAL_MACHINE, subkey '%s', value '%s', size of data, error %ld\n", + REGISTRY_KEY, (const char *)valueName, (long)lStatus); + goto cleanup; + } + + // Allocate memory for the data + pData = mallocz(dwSize); + + // Read the 'Counters' data + lStatus = RegQueryValueEx(hKey, valueName, NULL, &dwType, (LPBYTE)pData, &dwSize); + if (lStatus != ERROR_SUCCESS || dwType != REG_MULTI_SZ) { + nd_log(NDLS_COLLECTORS, NDLP_ERR, + "Failed to get registry key HKEY_LOCAL_MACHINE, subkey '%s', value '%s', data, error %ld\n", + REGISTRY_KEY, (const char *)valueName, (long)lStatus); + goto cleanup; + } + + // Process the counter data + TCHAR *ptr = pData; + while (*ptr) { + TCHAR *sid = ptr; // First string is the ID + ptr += lstrlen(ptr) + 1; // Move to the next string + TCHAR *name = ptr; // Second string is the name + ptr += lstrlen(ptr) + 1; // Move to the next pair + + DWORD id = strtoul(sid, NULL, 10); + + if(helps) + RegistrySetData_unsafe(id, NULL, name); + else + RegistrySetData_unsafe(id, name, NULL); + } + +cleanup: + if(pData) freez(pData); + RegCloseKey(hKey); +} + +static BOOL RegistryKeyModification(FILETIME *lastWriteTime) { + HKEY hKey; + LONG lResult; + BOOL ret = FALSE; + + // Open the registry key + lResult = RegOpenKeyEx(HKEY_LOCAL_MACHINE, TEXT(REGISTRY_KEY), 0, KEY_READ, &hKey); + if (lResult != ERROR_SUCCESS) { + nd_log(NDLS_COLLECTORS, NDLP_ERR, + "Failed to open registry key HKEY_LOCAL_MACHINE, subkey '%s', error %ld\n", REGISTRY_KEY, (long)lResult); + return FALSE; + } + + // Get the last write time + lResult = RegQueryInfoKey(hKey, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, lastWriteTime); + if (lResult != ERROR_SUCCESS) { + nd_log(NDLS_COLLECTORS, NDLP_ERR, + "Failed to query registry key HKEY_LOCAL_MACHINE, subkey '%s', last write time, error %ld\n", REGISTRY_KEY, (long)lResult); + ret = FALSE; + } + else + ret = TRUE; + + RegCloseKey(hKey); + return ret; +} + +static inline void RegistryFetchAll_unsafe(void) { + readRegistryKeys_unsafe(FALSE); + readRegistryKeys_unsafe(TRUE); +} + +void PerflibNamesRegistryInitialize(void) { + spinlock_lock(&names_globals.spinlock); + simple_hashtable_init_PERFLIB(&names_globals.hashtable, 20000); + RegistryKeyModification(&names_globals.lastWriteTime); + RegistryFetchAll_unsafe(); + spinlock_unlock(&names_globals.spinlock); +} + +void PerflibNamesRegistryUpdate(void) { + FILETIME lastWriteTime = { 0 }; + RegistryKeyModification(&lastWriteTime); + + if(CompareFileTime(&lastWriteTime, &names_globals.lastWriteTime) > 0) { + spinlock_lock(&names_globals.spinlock); + if(CompareFileTime(&lastWriteTime, &names_globals.lastWriteTime) > 0) { + names_globals.lastWriteTime = lastWriteTime; + RegistryFetchAll_unsafe(); + } + spinlock_unlock(&names_globals.spinlock); + } +} + +#endif // OS_WINDOWS diff --git a/src/collectors/windows.plugin/perflib.c b/src/libnetdata/os/windows-perflib/perflib.c similarity index 97% rename from src/collectors/windows.plugin/perflib.c rename to src/libnetdata/os/windows-perflib/perflib.c index 4df48acfbfa946..940b3c6e6076d1 100644 --- a/src/collectors/windows.plugin/perflib.c +++ b/src/libnetdata/os/windows-perflib/perflib.c @@ -1,671 +1,676 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -#include "perflib.h" - -// -------------------------------------------------------------------------------- - -// Retrieve a buffer that contains the specified performance data. -// The pwszSource parameter determines the data that GetRegistryBuffer returns. -// -// Typically, when calling RegQueryValueEx, you can specify zero for the size of the buffer -// and the RegQueryValueEx will set your size variable to the required buffer size. However, -// if the source is "Global" or one or more object index values, you will need to increment -// the buffer size in a loop until RegQueryValueEx does not return ERROR_MORE_DATA. -static LPBYTE getPerformanceData(const char *pwszSource) { - static __thread DWORD size = 0; - static __thread LPBYTE buffer = NULL; - - if(pwszSource == (const char *)0x01) { - freez(buffer); - buffer = NULL; - size = 0; - return NULL; - } - - if(!size) { - size = 32 * 1024; - buffer = mallocz(size); - } - - LONG status = ERROR_SUCCESS; - while ((status = RegQueryValueEx(HKEY_PERFORMANCE_DATA, pwszSource, - NULL, NULL, buffer, &size)) == ERROR_MORE_DATA) { - size *= 2; - buffer = reallocz(buffer, size); - } - - if (status != ERROR_SUCCESS) { - nd_log(NDLS_COLLECTORS, NDLP_ERR, "RegQueryValueEx failed with 0x%x.\n", status); - return NULL; - } - - return buffer; -} - -void perflibFreePerformanceData(void) { - getPerformanceData((const char *)0x01); -} - -// -------------------------------------------------------------------------------------------------------------------- - -// Retrieve the raw counter value and any supporting data needed to calculate -// a displayable counter value. Use the counter type to determine the information -// needed to calculate the value. - -static BOOL getCounterData( - PERF_DATA_BLOCK *pDataBlock, - PERF_OBJECT_TYPE* pObject, - PERF_COUNTER_DEFINITION* pCounter, - PERF_COUNTER_BLOCK* pCounterDataBlock, - PRAW_DATA pRawData) -{ - PVOID pData = NULL; - UNALIGNED ULONGLONG* pullData = NULL; - PERF_COUNTER_DEFINITION* pBaseCounter = NULL; - BOOL fSuccess = TRUE; - - //Point to the raw counter data. - pData = (PVOID)((LPBYTE)pCounterDataBlock + pCounter->CounterOffset); - - //Now use the PERF_COUNTER_DEFINITION.CounterType value to figure out what - //other information you need to calculate a displayable value. - switch (pCounter->CounterType) { - - case PERF_COUNTER_COUNTER: - case PERF_COUNTER_QUEUELEN_TYPE: - case PERF_SAMPLE_COUNTER: - pRawData->Data = (ULONGLONG)(*(DWORD*)pData); - pRawData->Time = pDataBlock->PerfTime.QuadPart; - if (PERF_COUNTER_COUNTER == pCounter->CounterType || PERF_SAMPLE_COUNTER == pCounter->CounterType) - pRawData->Frequency = pDataBlock->PerfFreq.QuadPart; - break; - - case PERF_OBJ_TIME_TIMER: - pRawData->Data = (ULONGLONG)(*(DWORD*)pData); - pRawData->Time = pObject->PerfTime.QuadPart; - break; - - case PERF_COUNTER_100NS_QUEUELEN_TYPE: - pRawData->Data = *(UNALIGNED ULONGLONG *)pData; - pRawData->Time = pDataBlock->PerfTime100nSec.QuadPart; - break; - - case PERF_COUNTER_OBJ_TIME_QUEUELEN_TYPE: - pRawData->Data = *(UNALIGNED ULONGLONG *)pData; - pRawData->Time = pObject->PerfTime.QuadPart; - break; - - case PERF_COUNTER_TIMER: - case PERF_COUNTER_TIMER_INV: - case PERF_COUNTER_BULK_COUNT: - case PERF_COUNTER_LARGE_QUEUELEN_TYPE: - pullData = (UNALIGNED ULONGLONG *)pData; - pRawData->Data = *pullData; - pRawData->Time = pDataBlock->PerfTime.QuadPart; - if (pCounter->CounterType == PERF_COUNTER_BULK_COUNT) - pRawData->Frequency = pDataBlock->PerfFreq.QuadPart; - break; - - case PERF_COUNTER_MULTI_TIMER: - case PERF_COUNTER_MULTI_TIMER_INV: - pullData = (UNALIGNED ULONGLONG *)pData; - pRawData->Data = *pullData; - pRawData->Frequency = pDataBlock->PerfFreq.QuadPart; - pRawData->Time = pDataBlock->PerfTime.QuadPart; - - //These counter types have a second counter value that is adjacent to - //this counter value in the counter data block. The value is needed for - //the calculation. - if ((pCounter->CounterType & PERF_MULTI_COUNTER) == PERF_MULTI_COUNTER) { - ++pullData; - pRawData->MultiCounterData = *(DWORD*)pullData; - } - break; - - //These counters do not use any time reference. - case PERF_COUNTER_RAWCOUNT: - case PERF_COUNTER_RAWCOUNT_HEX: - case PERF_COUNTER_DELTA: - // some counters in these categories, have CounterSize = sizeof(ULONGLONG) - // but the official documentation always uses them as sizeof(DWORD) - pRawData->Data = (ULONGLONG)(*(DWORD*)pData); - pRawData->Time = 0; - break; - - case PERF_COUNTER_LARGE_RAWCOUNT: - case PERF_COUNTER_LARGE_RAWCOUNT_HEX: - case PERF_COUNTER_LARGE_DELTA: - pRawData->Data = *(UNALIGNED ULONGLONG*)pData; - pRawData->Time = 0; - break; - - //These counters use the 100ns time base in their calculation. - case PERF_100NSEC_TIMER: - case PERF_100NSEC_TIMER_INV: - case PERF_100NSEC_MULTI_TIMER: - case PERF_100NSEC_MULTI_TIMER_INV: - pullData = (UNALIGNED ULONGLONG*)pData; - pRawData->Data = *pullData; - pRawData->Time = pDataBlock->PerfTime100nSec.QuadPart; - - //These counter types have a second counter value that is adjacent to - //this counter value in the counter data block. The value is needed for - //the calculation. - if ((pCounter->CounterType & PERF_MULTI_COUNTER) == PERF_MULTI_COUNTER) { - ++pullData; - pRawData->MultiCounterData = *(DWORD*)pullData; - } - break; - - //These counters use two data points, this value and one from this counter's - //base counter. The base counter should be the next counter in the object's - //list of counters. - case PERF_SAMPLE_FRACTION: - case PERF_RAW_FRACTION: - pRawData->Data = (ULONGLONG)(*(DWORD*)pData); - pBaseCounter = pCounter + 1; //Get base counter - if ((pBaseCounter->CounterType & PERF_COUNTER_BASE) == PERF_COUNTER_BASE) { - pData = (PVOID)((LPBYTE)pCounterDataBlock + pBaseCounter->CounterOffset); - pRawData->Time = (LONGLONG)(*(DWORD*)pData); - } - else - fSuccess = FALSE; - break; - - case PERF_LARGE_RAW_FRACTION: - case PERF_PRECISION_SYSTEM_TIMER: - case PERF_PRECISION_100NS_TIMER: - case PERF_PRECISION_OBJECT_TIMER: - pRawData->Data = *(UNALIGNED ULONGLONG*)pData; - pBaseCounter = pCounter + 1; - if ((pBaseCounter->CounterType & PERF_COUNTER_BASE) == PERF_COUNTER_BASE) { - pData = (PVOID)((LPBYTE)pCounterDataBlock + pBaseCounter->CounterOffset); - pRawData->Time = *(LONGLONG*)pData; - } - else - fSuccess = FALSE; - break; - - case PERF_AVERAGE_TIMER: - case PERF_AVERAGE_BULK: - pRawData->Data = *(UNALIGNED ULONGLONG*)pData; - pBaseCounter = pCounter+1; - if ((pBaseCounter->CounterType & PERF_COUNTER_BASE) == PERF_COUNTER_BASE) { - pData = (PVOID)((LPBYTE)pCounterDataBlock + pBaseCounter->CounterOffset); - pRawData->Time = *(DWORD*)pData; - } - else - fSuccess = FALSE; - - if (pCounter->CounterType == PERF_AVERAGE_TIMER) - pRawData->Frequency = pDataBlock->PerfFreq.QuadPart; - break; - - //These are base counters and are used in calculations for other counters. - //This case should never be entered. - case PERF_SAMPLE_BASE: - case PERF_AVERAGE_BASE: - case PERF_COUNTER_MULTI_BASE: - case PERF_RAW_BASE: - case PERF_LARGE_RAW_BASE: - pRawData->Data = 0; - pRawData->Time = 0; - fSuccess = FALSE; - break; - - case PERF_ELAPSED_TIME: - pRawData->Data = *(UNALIGNED ULONGLONG*)pData; - pRawData->Time = pObject->PerfTime.QuadPart; - pRawData->Frequency = pObject->PerfFreq.QuadPart; - break; - - //These counters are currently not supported. - case PERF_COUNTER_TEXT: - case PERF_COUNTER_NODATA: - case PERF_COUNTER_HISTOGRAM_TYPE: - default: // unknown counter types - pRawData->Data = 0; - pRawData->Time = 0; - fSuccess = FALSE; - break; - } - - return fSuccess; -} - -// -------------------------------------------------------------------------------------------------------------------- - -static inline BOOL isValidPointer(PERF_DATA_BLOCK *pDataBlock __maybe_unused, void *ptr __maybe_unused) { -#ifdef NETDATA_INTERNAL_CHECKS - return (PBYTE)ptr >= (PBYTE)pDataBlock + pDataBlock->TotalByteLength ? FALSE : TRUE; -#else - return TRUE; -#endif -} - -static inline BOOL isValidStructure(PERF_DATA_BLOCK *pDataBlock __maybe_unused, void *ptr __maybe_unused, size_t length __maybe_unused) { -#ifdef NETDATA_INTERNAL_CHECKS - return (PBYTE)ptr + length > (PBYTE)pDataBlock + pDataBlock->TotalByteLength ? FALSE : TRUE; -#else - return TRUE; -#endif -} - -static inline PERF_DATA_BLOCK *getDataBlock(BYTE *pBuffer) { - PERF_DATA_BLOCK *pDataBlock = (PERF_DATA_BLOCK *)pBuffer; - - static WCHAR signature[] = { 'P', 'E', 'R', 'F' }; - - if(memcmp(pDataBlock->Signature, signature, sizeof(signature)) != 0) { - nd_log(NDLS_COLLECTORS, NDLP_ERR, - "WINDOWS: PERFLIB: Invalid data block signature."); - return NULL; - } - - if(!isValidPointer(pDataBlock, (PBYTE)pDataBlock + pDataBlock->SystemNameOffset) || - !isValidStructure(pDataBlock, (PBYTE)pDataBlock + pDataBlock->SystemNameOffset, pDataBlock->SystemNameLength)) { - nd_log(NDLS_COLLECTORS, NDLP_ERR, - "WINDOWS: PERFLIB: Invalid system name array."); - return NULL; - } - - return pDataBlock; -} - -static inline PERF_OBJECT_TYPE *getObjectType(PERF_DATA_BLOCK* pDataBlock, PERF_OBJECT_TYPE *lastObjectType) { - PERF_OBJECT_TYPE* pObjectType = NULL; - - if(!lastObjectType) - pObjectType = (PERF_OBJECT_TYPE *)((PBYTE)pDataBlock + pDataBlock->HeaderLength); - else if (lastObjectType->TotalByteLength != 0) - pObjectType = (PERF_OBJECT_TYPE *)((PBYTE)lastObjectType + lastObjectType->TotalByteLength); - - if(pObjectType && (!isValidPointer(pDataBlock, pObjectType) || !isValidStructure(pDataBlock, pObjectType, pObjectType->TotalByteLength))) { - nd_log(NDLS_COLLECTORS, NDLP_ERR, - "WINDOWS: PERFLIB: Invalid ObjectType!"); - pObjectType = NULL; - } - - return pObjectType; -} - -inline PERF_OBJECT_TYPE *getObjectTypeByIndex(PERF_DATA_BLOCK *pDataBlock, DWORD ObjectNameTitleIndex) { - PERF_OBJECT_TYPE *po = NULL; - for(DWORD o = 0; o < pDataBlock->NumObjectTypes ; o++) { - po = getObjectType(pDataBlock, po); - if(po->ObjectNameTitleIndex == ObjectNameTitleIndex) - return po; - } - - return NULL; -} - -static inline PERF_INSTANCE_DEFINITION *getInstance( - PERF_DATA_BLOCK *pDataBlock, - PERF_OBJECT_TYPE *pObjectType, - PERF_COUNTER_BLOCK *lastCounterBlock -) { - PERF_INSTANCE_DEFINITION *pInstance; - - if(!lastCounterBlock) - pInstance = (PERF_INSTANCE_DEFINITION *)((PBYTE)pObjectType + pObjectType->DefinitionLength); - else - pInstance = (PERF_INSTANCE_DEFINITION *)((PBYTE)lastCounterBlock + lastCounterBlock->ByteLength); - - if(pInstance && (!isValidPointer(pDataBlock, pInstance) || !isValidStructure(pDataBlock, pInstance, pInstance->ByteLength))) { - nd_log(NDLS_COLLECTORS, NDLP_ERR, - "WINDOWS: PERFLIB: Invalid Instance Definition!"); - pInstance = NULL; - } - - return pInstance; -} - -static inline PERF_COUNTER_BLOCK *getObjectTypeCounterBlock( - PERF_DATA_BLOCK *pDataBlock, - PERF_OBJECT_TYPE *pObjectType -) { - PERF_COUNTER_BLOCK *pCounterBlock = (PERF_COUNTER_BLOCK *)((PBYTE)pObjectType + pObjectType->DefinitionLength); - - if(pCounterBlock && (!isValidPointer(pDataBlock, pCounterBlock) || !isValidStructure(pDataBlock, pCounterBlock, pCounterBlock->ByteLength))) { - nd_log(NDLS_COLLECTORS, NDLP_ERR, - "WINDOWS: PERFLIB: Invalid ObjectType CounterBlock!"); - pCounterBlock = NULL; - } - - return pCounterBlock; -} - -static inline PERF_COUNTER_BLOCK *getInstanceCounterBlock( - PERF_DATA_BLOCK *pDataBlock, - PERF_OBJECT_TYPE *pObjectType, - PERF_INSTANCE_DEFINITION *pInstance -) { - (void)pObjectType; - PERF_COUNTER_BLOCK *pCounterBlock = (PERF_COUNTER_BLOCK *)((PBYTE)pInstance + pInstance->ByteLength); - - if(pCounterBlock && (!isValidPointer(pDataBlock, pCounterBlock) || !isValidStructure(pDataBlock, pCounterBlock, pCounterBlock->ByteLength))) { - nd_log(NDLS_COLLECTORS, NDLP_ERR, - "WINDOWS: PERFLIB: Invalid Instance CounterBlock!"); - pCounterBlock = NULL; - } - - return pCounterBlock; -} - -inline PERF_INSTANCE_DEFINITION *getInstanceByPosition(PERF_DATA_BLOCK *pDataBlock, PERF_OBJECT_TYPE *pObjectType, DWORD instancePosition) { - PERF_INSTANCE_DEFINITION *pi = NULL; - PERF_COUNTER_BLOCK *pc = NULL; - for(DWORD i = 0; i <= instancePosition ;i++) { - pi = getInstance(pDataBlock, pObjectType, pc); - pc = getInstanceCounterBlock(pDataBlock, pObjectType, pi); - } - return pi; -} - -static inline PERF_COUNTER_DEFINITION *getCounterDefinition(PERF_DATA_BLOCK *pDataBlock, PERF_OBJECT_TYPE *pObjectType, PERF_COUNTER_DEFINITION *lastCounterDefinition) { - PERF_COUNTER_DEFINITION *pCounterDefinition = NULL; - - if(!lastCounterDefinition) - pCounterDefinition = (PERF_COUNTER_DEFINITION *)((PBYTE)pObjectType + pObjectType->HeaderLength); - else - pCounterDefinition = (PERF_COUNTER_DEFINITION *)((PBYTE)lastCounterDefinition + lastCounterDefinition->ByteLength); - - if(pCounterDefinition && (!isValidPointer(pDataBlock, pCounterDefinition) || !isValidStructure(pDataBlock, pCounterDefinition, pCounterDefinition->ByteLength))) { - nd_log(NDLS_COLLECTORS, NDLP_ERR, - "WINDOWS: PERFLIB: Invalid Counter Definition!"); - pCounterDefinition = NULL; - } - - return pCounterDefinition; -} - -// -------------------------------------------------------------------------------------------------------------------- - -static inline BOOL getEncodedStringToUTF8(char *dst, size_t dst_len, DWORD CodePage, char *start, DWORD length) { - WCHAR *tempBuffer; // Temporary buffer for Unicode data - DWORD charsCopied = 0; - BOOL free_tempBuffer; - - if (CodePage == 0) { - // Input is already Unicode (UTF-16) - tempBuffer = (WCHAR *)start; - charsCopied = length / sizeof(WCHAR); // Convert byte length to number of WCHARs - free_tempBuffer = FALSE; - } - else { - // Convert the multi-byte instance name to Unicode (UTF-16) - // Calculate maximum possible characters in UTF-16 - - int charCount = MultiByteToWideChar(CodePage, 0, start, (int)length, NULL, 0); - tempBuffer = (WCHAR *)malloc(charCount * sizeof(WCHAR)); - if (!tempBuffer) return FALSE; - - charsCopied = MultiByteToWideChar(CodePage, 0, start, (int)length, tempBuffer, charCount); - if (charsCopied == 0) { - free(tempBuffer); - dst[0] = '\0'; - return FALSE; - } - - free_tempBuffer = TRUE; - } - - // Now convert from Unicode (UTF-16) to UTF-8 - int bytesCopied = WideCharToMultiByte(CP_UTF8, 0, tempBuffer, (int)charsCopied, dst, (int)dst_len, NULL, NULL); - if (bytesCopied == 0) { - if (free_tempBuffer) free(tempBuffer); - dst[0] = '\0'; // Ensure the buffer is null-terminated even on failure - return FALSE; - } - - dst[bytesCopied] = '\0'; // Ensure buffer is null-terminated - if (free_tempBuffer) free(tempBuffer); // Free temporary buffer if used - return TRUE; -} - -inline BOOL getInstanceName(PERF_DATA_BLOCK *pDataBlock, PERF_OBJECT_TYPE *pObjectType, PERF_INSTANCE_DEFINITION *pInstance, - char *buffer, size_t bufferLen) { - (void)pDataBlock; - if (!pInstance || !buffer || !bufferLen) return FALSE; - - return getEncodedStringToUTF8(buffer, bufferLen, pObjectType->CodePage, - ((char *)pInstance + pInstance->NameOffset), pInstance->NameLength); -} - -inline BOOL getSystemName(PERF_DATA_BLOCK *pDataBlock, char *buffer, size_t bufferLen) { - return getEncodedStringToUTF8(buffer, bufferLen, 0, - ((char *)pDataBlock + pDataBlock->SystemNameOffset), pDataBlock->SystemNameLength); -} - -inline bool ObjectTypeHasInstances(PERF_DATA_BLOCK *pDataBlock, PERF_OBJECT_TYPE *pObjectType) { - (void)pDataBlock; - return pObjectType->NumInstances != PERF_NO_INSTANCES && pObjectType->NumInstances > 0; -} - -PERF_OBJECT_TYPE *perflibFindObjectTypeByName(PERF_DATA_BLOCK *pDataBlock, const char *name) { - PERF_OBJECT_TYPE* pObjectType = NULL; - for(DWORD o = 0; o < pDataBlock->NumObjectTypes; o++) { - pObjectType = getObjectType(pDataBlock, pObjectType); - if(strcmp(name, RegistryFindNameByID(pObjectType->ObjectNameTitleIndex)) == 0) - return pObjectType; - } - - return NULL; -} - -PERF_INSTANCE_DEFINITION *perflibForEachInstance(PERF_DATA_BLOCK *pDataBlock, PERF_OBJECT_TYPE *pObjectType, PERF_INSTANCE_DEFINITION *lastInstance) { - if(!ObjectTypeHasInstances(pDataBlock, pObjectType)) - return NULL; - - return getInstance(pDataBlock, pObjectType, - lastInstance ? - getInstanceCounterBlock(pDataBlock, pObjectType, lastInstance) : - NULL ); -} - -bool perflibGetInstanceCounter(PERF_DATA_BLOCK *pDataBlock, PERF_OBJECT_TYPE *pObjectType, PERF_INSTANCE_DEFINITION *pInstance, COUNTER_DATA *cd) { - PERF_COUNTER_DEFINITION *pCounterDefinition = NULL; - for(DWORD c = 0; c < pObjectType->NumCounters ;c++) { - pCounterDefinition = getCounterDefinition(pDataBlock, pObjectType, pCounterDefinition); - if(!pCounterDefinition) { - nd_log(NDLS_COLLECTORS, NDLP_ERR, - "WINDOWS: PERFLIB: Cannot read counter definition No %u (out of %u)", - c, pObjectType->NumCounters); - break; - } - - if(cd->id) { - if(cd->id != pCounterDefinition->CounterNameTitleIndex) - continue; - } - else { - if(strcmp(RegistryFindNameByID(pCounterDefinition->CounterNameTitleIndex), cd->key) != 0) - continue; - - cd->id = pCounterDefinition->CounterNameTitleIndex; - } - - cd->current.CounterType = cd->OverwriteCounterType ? cd->OverwriteCounterType : pCounterDefinition->CounterType; - PERF_COUNTER_BLOCK *pCounterBlock = getInstanceCounterBlock(pDataBlock, pObjectType, pInstance); - - cd->previous = cd->current; - cd->updated = getCounterData(pDataBlock, pObjectType, pCounterDefinition, pCounterBlock, &cd->current); - return cd->updated; - } - - cd->previous = cd->current; - cd->current = RAW_DATA_EMPTY; - cd->updated = false; - return false; -} - -bool perflibGetObjectCounter(PERF_DATA_BLOCK *pDataBlock, PERF_OBJECT_TYPE *pObjectType, COUNTER_DATA *cd) { - PERF_COUNTER_DEFINITION *pCounterDefinition = NULL; - for(DWORD c = 0; c < pObjectType->NumCounters ;c++) { - pCounterDefinition = getCounterDefinition(pDataBlock, pObjectType, pCounterDefinition); - if(!pCounterDefinition) { - nd_log(NDLS_COLLECTORS, NDLP_ERR, - "WINDOWS: PERFLIB: Cannot read counter definition No %u (out of %u)", - c, pObjectType->NumCounters); - break; - } - - if(cd->id) { - if(cd->id != pCounterDefinition->CounterNameTitleIndex) - continue; - } - else { - if(strcmp(RegistryFindNameByID(pCounterDefinition->CounterNameTitleIndex), cd->key) != 0) - continue; - - cd->id = pCounterDefinition->CounterNameTitleIndex; - } - - cd->current.CounterType = cd->OverwriteCounterType ? cd->OverwriteCounterType : pCounterDefinition->CounterType; - PERF_COUNTER_BLOCK *pCounterBlock = getObjectTypeCounterBlock(pDataBlock, pObjectType); - - cd->previous = cd->current; - cd->updated = getCounterData(pDataBlock, pObjectType, pCounterDefinition, pCounterBlock, &cd->current); - return cd->updated; - } - - cd->previous = cd->current; - cd->current = RAW_DATA_EMPTY; - cd->updated = false; - return false; -} - -PERF_DATA_BLOCK *perflibGetPerformanceData(DWORD id) { - char source[24]; - snprintfz(source, sizeof(source), "%u", id); - - LPBYTE pData = (LPBYTE)getPerformanceData((id > 0) ? source : NULL); - if (!pData) return NULL; - - PERF_DATA_BLOCK *pDataBlock = getDataBlock(pData); - if(!pDataBlock) return NULL; - - return pDataBlock; -} - -int perflibQueryAndTraverse(DWORD id, - perflib_data_cb dataCb, - perflib_object_cb objectCb, - perflib_instance_cb instanceCb, - perflib_instance_counter_cb instanceCounterCb, - perflib_counter_cb counterCb, - void *data) { - int counters = -1; - - PERF_DATA_BLOCK *pDataBlock = perflibGetPerformanceData(id); - if(!pDataBlock) goto cleanup; - - bool do_data = true; - if(dataCb) - do_data = dataCb(pDataBlock, data); - - PERF_OBJECT_TYPE* pObjectType = NULL; - for(DWORD o = 0; do_data && o < pDataBlock->NumObjectTypes; o++) { - pObjectType = getObjectType(pDataBlock, pObjectType); - if(!pObjectType) { - nd_log(NDLS_COLLECTORS, NDLP_ERR, - "WINDOWS: PERFLIB: Cannot read object type No %d (out of %d)", - o, pDataBlock->NumObjectTypes); - break; - } - - bool do_object = true; - if(objectCb) - do_object = objectCb(pDataBlock, pObjectType, data); - - if(!do_object) - continue; - - if(ObjectTypeHasInstances(pDataBlock, pObjectType)) { - PERF_INSTANCE_DEFINITION *pInstance = NULL; - PERF_COUNTER_BLOCK *pCounterBlock = NULL; - for(LONG i = 0; i < pObjectType->NumInstances ;i++) { - pInstance = getInstance(pDataBlock, pObjectType, pCounterBlock); - if(!pInstance) { - nd_log(NDLS_COLLECTORS, NDLP_ERR, - "WINDOWS: PERFLIB: Cannot read Instance No %d (out of %d)", - i, pObjectType->NumInstances); - break; - } - - pCounterBlock = getInstanceCounterBlock(pDataBlock, pObjectType, pInstance); - if(!pCounterBlock) { - nd_log(NDLS_COLLECTORS, NDLP_ERR, - "WINDOWS: PERFLIB: Cannot read CounterBlock of instance No %d (out of %d)", - i, pObjectType->NumInstances); - break; - } - - bool do_instance = true; - if(instanceCb) - do_instance = instanceCb(pDataBlock, pObjectType, pInstance, data); - - if(!do_instance) - continue; - - PERF_COUNTER_DEFINITION *pCounterDefinition = NULL; - for(DWORD c = 0; c < pObjectType->NumCounters ;c++) { - pCounterDefinition = getCounterDefinition(pDataBlock, pObjectType, pCounterDefinition); - if(!pCounterDefinition) { - nd_log(NDLS_COLLECTORS, NDLP_ERR, - "WINDOWS: PERFLIB: Cannot read counter definition No %u (out of %u)", - c, pObjectType->NumCounters); - break; - } - - RAW_DATA sample = { - .CounterType = pCounterDefinition->CounterType, - }; - if(getCounterData(pDataBlock, pObjectType, pCounterDefinition, pCounterBlock, &sample)) { - // DisplayCalculatedValue(&sample, &sample); - - if(instanceCounterCb) { - instanceCounterCb(pDataBlock, pObjectType, pInstance, pCounterDefinition, &sample, data); - counters++; - } - } - } - - if(instanceCb) - instanceCb(pDataBlock, pObjectType, NULL, data); - } - } - else { - PERF_COUNTER_BLOCK *pCounterBlock = getObjectTypeCounterBlock(pDataBlock, pObjectType); - PERF_COUNTER_DEFINITION *pCounterDefinition = NULL; - for(DWORD c = 0; c < pObjectType->NumCounters ;c++) { - pCounterDefinition = getCounterDefinition(pDataBlock, pObjectType, pCounterDefinition); - if(!pCounterDefinition) { - nd_log(NDLS_COLLECTORS, NDLP_ERR, - "WINDOWS: PERFLIB: Cannot read counter definition No %u (out of %u)", - c, pObjectType->NumCounters); - break; - } - - RAW_DATA sample = { - .CounterType = pCounterDefinition->CounterType, - }; - if(getCounterData(pDataBlock, pObjectType, pCounterDefinition, pCounterBlock, &sample)) { - // DisplayCalculatedValue(&sample, &sample); - - if(counterCb) { - counterCb(pDataBlock, pObjectType, pCounterDefinition, &sample, data); - counters++; - } - } - } - } - - if(objectCb) - objectCb(pDataBlock, NULL, data); - } - -cleanup: - return counters; -} +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "perflib.h" + +#if defined(OS_WINDOWS) +#include + +// -------------------------------------------------------------------------------- + +// Retrieve a buffer that contains the specified performance data. +// The pwszSource parameter determines the data that GetRegistryBuffer returns. +// +// Typically, when calling RegQueryValueEx, you can specify zero for the size of the buffer +// and the RegQueryValueEx will set your size variable to the required buffer size. However, +// if the source is "Global" or one or more object index values, you will need to increment +// the buffer size in a loop until RegQueryValueEx does not return ERROR_MORE_DATA. +static LPBYTE getPerformanceData(const char *pwszSource) { + static __thread DWORD size = 0; + static __thread LPBYTE buffer = NULL; + + if(pwszSource == (const char *)0x01) { + freez(buffer); + buffer = NULL; + size = 0; + return NULL; + } + + if(!size) { + size = 32 * 1024; + buffer = mallocz(size); + } + + LONG status = ERROR_SUCCESS; + while ((status = RegQueryValueEx(HKEY_PERFORMANCE_DATA, pwszSource, + NULL, NULL, buffer, &size)) == ERROR_MORE_DATA) { + size *= 2; + buffer = reallocz(buffer, size); + } + + if (status != ERROR_SUCCESS) { + nd_log(NDLS_COLLECTORS, NDLP_ERR, "RegQueryValueEx failed with 0x%x.\n", status); + return NULL; + } + + return buffer; +} + +void perflibFreePerformanceData(void) { + getPerformanceData((const char *)0x01); +} + +// -------------------------------------------------------------------------------------------------------------------- + +// Retrieve the raw counter value and any supporting data needed to calculate +// a displayable counter value. Use the counter type to determine the information +// needed to calculate the value. + +static BOOL getCounterData( + PERF_DATA_BLOCK *pDataBlock, + PERF_OBJECT_TYPE* pObject, + PERF_COUNTER_DEFINITION* pCounter, + PERF_COUNTER_BLOCK* pCounterDataBlock, + PRAW_DATA pRawData) +{ + PVOID pData = NULL; + UNALIGNED ULONGLONG* pullData = NULL; + PERF_COUNTER_DEFINITION* pBaseCounter = NULL; + BOOL fSuccess = TRUE; + + //Point to the raw counter data. + pData = (PVOID)((LPBYTE)pCounterDataBlock + pCounter->CounterOffset); + + //Now use the PERF_COUNTER_DEFINITION.CounterType value to figure out what + //other information you need to calculate a displayable value. + switch (pCounter->CounterType) { + + case PERF_COUNTER_COUNTER: + case PERF_COUNTER_QUEUELEN_TYPE: + case PERF_SAMPLE_COUNTER: + pRawData->Data = (ULONGLONG)(*(DWORD*)pData); + pRawData->Time = pDataBlock->PerfTime.QuadPart; + if (PERF_COUNTER_COUNTER == pCounter->CounterType || PERF_SAMPLE_COUNTER == pCounter->CounterType) + pRawData->Frequency = pDataBlock->PerfFreq.QuadPart; + break; + + case PERF_OBJ_TIME_TIMER: + pRawData->Data = (ULONGLONG)(*(DWORD*)pData); + pRawData->Time = pObject->PerfTime.QuadPart; + break; + + case PERF_COUNTER_100NS_QUEUELEN_TYPE: + pRawData->Data = *(UNALIGNED ULONGLONG *)pData; + pRawData->Time = pDataBlock->PerfTime100nSec.QuadPart; + break; + + case PERF_COUNTER_OBJ_TIME_QUEUELEN_TYPE: + pRawData->Data = *(UNALIGNED ULONGLONG *)pData; + pRawData->Time = pObject->PerfTime.QuadPart; + break; + + case PERF_COUNTER_TIMER: + case PERF_COUNTER_TIMER_INV: + case PERF_COUNTER_BULK_COUNT: + case PERF_COUNTER_LARGE_QUEUELEN_TYPE: + pullData = (UNALIGNED ULONGLONG *)pData; + pRawData->Data = *pullData; + pRawData->Time = pDataBlock->PerfTime.QuadPart; + if (pCounter->CounterType == PERF_COUNTER_BULK_COUNT) + pRawData->Frequency = pDataBlock->PerfFreq.QuadPart; + break; + + case PERF_COUNTER_MULTI_TIMER: + case PERF_COUNTER_MULTI_TIMER_INV: + pullData = (UNALIGNED ULONGLONG *)pData; + pRawData->Data = *pullData; + pRawData->Frequency = pDataBlock->PerfFreq.QuadPart; + pRawData->Time = pDataBlock->PerfTime.QuadPart; + + //These counter types have a second counter value that is adjacent to + //this counter value in the counter data block. The value is needed for + //the calculation. + if ((pCounter->CounterType & PERF_MULTI_COUNTER) == PERF_MULTI_COUNTER) { + ++pullData; + pRawData->MultiCounterData = *(DWORD*)pullData; + } + break; + + //These counters do not use any time reference. + case PERF_COUNTER_RAWCOUNT: + case PERF_COUNTER_RAWCOUNT_HEX: + case PERF_COUNTER_DELTA: + // some counters in these categories, have CounterSize = sizeof(ULONGLONG) + // but the official documentation always uses them as sizeof(DWORD) + pRawData->Data = (ULONGLONG)(*(DWORD*)pData); + pRawData->Time = 0; + break; + + case PERF_COUNTER_LARGE_RAWCOUNT: + case PERF_COUNTER_LARGE_RAWCOUNT_HEX: + case PERF_COUNTER_LARGE_DELTA: + pRawData->Data = *(UNALIGNED ULONGLONG*)pData; + pRawData->Time = 0; + break; + + //These counters use the 100ns time base in their calculation. + case PERF_100NSEC_TIMER: + case PERF_100NSEC_TIMER_INV: + case PERF_100NSEC_MULTI_TIMER: + case PERF_100NSEC_MULTI_TIMER_INV: + pullData = (UNALIGNED ULONGLONG*)pData; + pRawData->Data = *pullData; + pRawData->Time = pDataBlock->PerfTime100nSec.QuadPart; + + //These counter types have a second counter value that is adjacent to + //this counter value in the counter data block. The value is needed for + //the calculation. + if ((pCounter->CounterType & PERF_MULTI_COUNTER) == PERF_MULTI_COUNTER) { + ++pullData; + pRawData->MultiCounterData = *(DWORD*)pullData; + } + break; + + //These counters use two data points, this value and one from this counter's + //base counter. The base counter should be the next counter in the object's + //list of counters. + case PERF_SAMPLE_FRACTION: + case PERF_RAW_FRACTION: + pRawData->Data = (ULONGLONG)(*(DWORD*)pData); + pBaseCounter = pCounter + 1; //Get base counter + if ((pBaseCounter->CounterType & PERF_COUNTER_BASE) == PERF_COUNTER_BASE) { + pData = (PVOID)((LPBYTE)pCounterDataBlock + pBaseCounter->CounterOffset); + pRawData->Time = (LONGLONG)(*(DWORD*)pData); + } + else + fSuccess = FALSE; + break; + + case PERF_LARGE_RAW_FRACTION: + case PERF_PRECISION_SYSTEM_TIMER: + case PERF_PRECISION_100NS_TIMER: + case PERF_PRECISION_OBJECT_TIMER: + pRawData->Data = *(UNALIGNED ULONGLONG*)pData; + pBaseCounter = pCounter + 1; + if ((pBaseCounter->CounterType & PERF_COUNTER_BASE) == PERF_COUNTER_BASE) { + pData = (PVOID)((LPBYTE)pCounterDataBlock + pBaseCounter->CounterOffset); + pRawData->Time = *(LONGLONG*)pData; + } + else + fSuccess = FALSE; + break; + + case PERF_AVERAGE_TIMER: + case PERF_AVERAGE_BULK: + pRawData->Data = *(UNALIGNED ULONGLONG*)pData; + pBaseCounter = pCounter+1; + if ((pBaseCounter->CounterType & PERF_COUNTER_BASE) == PERF_COUNTER_BASE) { + pData = (PVOID)((LPBYTE)pCounterDataBlock + pBaseCounter->CounterOffset); + pRawData->Time = *(DWORD*)pData; + } + else + fSuccess = FALSE; + + if (pCounter->CounterType == PERF_AVERAGE_TIMER) + pRawData->Frequency = pDataBlock->PerfFreq.QuadPart; + break; + + //These are base counters and are used in calculations for other counters. + //This case should never be entered. + case PERF_SAMPLE_BASE: + case PERF_AVERAGE_BASE: + case PERF_COUNTER_MULTI_BASE: + case PERF_RAW_BASE: + case PERF_LARGE_RAW_BASE: + pRawData->Data = 0; + pRawData->Time = 0; + fSuccess = FALSE; + break; + + case PERF_ELAPSED_TIME: + pRawData->Data = *(UNALIGNED ULONGLONG*)pData; + pRawData->Time = pObject->PerfTime.QuadPart; + pRawData->Frequency = pObject->PerfFreq.QuadPart; + break; + + //These counters are currently not supported. + case PERF_COUNTER_TEXT: + case PERF_COUNTER_NODATA: + case PERF_COUNTER_HISTOGRAM_TYPE: + default: // unknown counter types + pRawData->Data = 0; + pRawData->Time = 0; + fSuccess = FALSE; + break; + } + + return fSuccess; +} + +// -------------------------------------------------------------------------------------------------------------------- + +static inline BOOL isValidPointer(PERF_DATA_BLOCK *pDataBlock __maybe_unused, void *ptr __maybe_unused) { +#ifdef NETDATA_INTERNAL_CHECKS + return (PBYTE)ptr >= (PBYTE)pDataBlock + pDataBlock->TotalByteLength ? FALSE : TRUE; +#else + return TRUE; +#endif +} + +static inline BOOL isValidStructure(PERF_DATA_BLOCK *pDataBlock __maybe_unused, void *ptr __maybe_unused, size_t length __maybe_unused) { +#ifdef NETDATA_INTERNAL_CHECKS + return (PBYTE)ptr + length > (PBYTE)pDataBlock + pDataBlock->TotalByteLength ? FALSE : TRUE; +#else + return TRUE; +#endif +} + +static inline PERF_DATA_BLOCK *getDataBlock(BYTE *pBuffer) { + PERF_DATA_BLOCK *pDataBlock = (PERF_DATA_BLOCK *)pBuffer; + + static WCHAR signature[] = { 'P', 'E', 'R', 'F' }; + + if(memcmp(pDataBlock->Signature, signature, sizeof(signature)) != 0) { + nd_log(NDLS_COLLECTORS, NDLP_ERR, + "WINDOWS: PERFLIB: Invalid data block signature."); + return NULL; + } + + if(!isValidPointer(pDataBlock, (PBYTE)pDataBlock + pDataBlock->SystemNameOffset) || + !isValidStructure(pDataBlock, (PBYTE)pDataBlock + pDataBlock->SystemNameOffset, pDataBlock->SystemNameLength)) { + nd_log(NDLS_COLLECTORS, NDLP_ERR, + "WINDOWS: PERFLIB: Invalid system name array."); + return NULL; + } + + return pDataBlock; +} + +static inline PERF_OBJECT_TYPE *getObjectType(PERF_DATA_BLOCK* pDataBlock, PERF_OBJECT_TYPE *lastObjectType) { + PERF_OBJECT_TYPE* pObjectType = NULL; + + if(!lastObjectType) + pObjectType = (PERF_OBJECT_TYPE *)((PBYTE)pDataBlock + pDataBlock->HeaderLength); + else if (lastObjectType->TotalByteLength != 0) + pObjectType = (PERF_OBJECT_TYPE *)((PBYTE)lastObjectType + lastObjectType->TotalByteLength); + + if(pObjectType && (!isValidPointer(pDataBlock, pObjectType) || !isValidStructure(pDataBlock, pObjectType, pObjectType->TotalByteLength))) { + nd_log(NDLS_COLLECTORS, NDLP_ERR, + "WINDOWS: PERFLIB: Invalid ObjectType!"); + pObjectType = NULL; + } + + return pObjectType; +} + +inline PERF_OBJECT_TYPE *getObjectTypeByIndex(PERF_DATA_BLOCK *pDataBlock, DWORD ObjectNameTitleIndex) { + PERF_OBJECT_TYPE *po = NULL; + for(DWORD o = 0; o < pDataBlock->NumObjectTypes ; o++) { + po = getObjectType(pDataBlock, po); + if(po->ObjectNameTitleIndex == ObjectNameTitleIndex) + return po; + } + + return NULL; +} + +static inline PERF_INSTANCE_DEFINITION *getInstance( + PERF_DATA_BLOCK *pDataBlock, + PERF_OBJECT_TYPE *pObjectType, + PERF_COUNTER_BLOCK *lastCounterBlock +) { + PERF_INSTANCE_DEFINITION *pInstance; + + if(!lastCounterBlock) + pInstance = (PERF_INSTANCE_DEFINITION *)((PBYTE)pObjectType + pObjectType->DefinitionLength); + else + pInstance = (PERF_INSTANCE_DEFINITION *)((PBYTE)lastCounterBlock + lastCounterBlock->ByteLength); + + if(pInstance && (!isValidPointer(pDataBlock, pInstance) || !isValidStructure(pDataBlock, pInstance, pInstance->ByteLength))) { + nd_log(NDLS_COLLECTORS, NDLP_ERR, + "WINDOWS: PERFLIB: Invalid Instance Definition!"); + pInstance = NULL; + } + + return pInstance; +} + +static inline PERF_COUNTER_BLOCK *getObjectTypeCounterBlock( + PERF_DATA_BLOCK *pDataBlock, + PERF_OBJECT_TYPE *pObjectType +) { + PERF_COUNTER_BLOCK *pCounterBlock = (PERF_COUNTER_BLOCK *)((PBYTE)pObjectType + pObjectType->DefinitionLength); + + if(pCounterBlock && (!isValidPointer(pDataBlock, pCounterBlock) || !isValidStructure(pDataBlock, pCounterBlock, pCounterBlock->ByteLength))) { + nd_log(NDLS_COLLECTORS, NDLP_ERR, + "WINDOWS: PERFLIB: Invalid ObjectType CounterBlock!"); + pCounterBlock = NULL; + } + + return pCounterBlock; +} + +static inline PERF_COUNTER_BLOCK *getInstanceCounterBlock( + PERF_DATA_BLOCK *pDataBlock, + PERF_OBJECT_TYPE *pObjectType, + PERF_INSTANCE_DEFINITION *pInstance +) { + (void)pObjectType; + PERF_COUNTER_BLOCK *pCounterBlock = (PERF_COUNTER_BLOCK *)((PBYTE)pInstance + pInstance->ByteLength); + + if(pCounterBlock && (!isValidPointer(pDataBlock, pCounterBlock) || !isValidStructure(pDataBlock, pCounterBlock, pCounterBlock->ByteLength))) { + nd_log(NDLS_COLLECTORS, NDLP_ERR, + "WINDOWS: PERFLIB: Invalid Instance CounterBlock!"); + pCounterBlock = NULL; + } + + return pCounterBlock; +} + +inline PERF_INSTANCE_DEFINITION *getInstanceByPosition(PERF_DATA_BLOCK *pDataBlock, PERF_OBJECT_TYPE *pObjectType, DWORD instancePosition) { + PERF_INSTANCE_DEFINITION *pi = NULL; + PERF_COUNTER_BLOCK *pc = NULL; + for(DWORD i = 0; i <= instancePosition ;i++) { + pi = getInstance(pDataBlock, pObjectType, pc); + pc = getInstanceCounterBlock(pDataBlock, pObjectType, pi); + } + return pi; +} + +static inline PERF_COUNTER_DEFINITION *getCounterDefinition(PERF_DATA_BLOCK *pDataBlock, PERF_OBJECT_TYPE *pObjectType, PERF_COUNTER_DEFINITION *lastCounterDefinition) { + PERF_COUNTER_DEFINITION *pCounterDefinition = NULL; + + if(!lastCounterDefinition) + pCounterDefinition = (PERF_COUNTER_DEFINITION *)((PBYTE)pObjectType + pObjectType->HeaderLength); + else + pCounterDefinition = (PERF_COUNTER_DEFINITION *)((PBYTE)lastCounterDefinition + lastCounterDefinition->ByteLength); + + if(pCounterDefinition && (!isValidPointer(pDataBlock, pCounterDefinition) || !isValidStructure(pDataBlock, pCounterDefinition, pCounterDefinition->ByteLength))) { + nd_log(NDLS_COLLECTORS, NDLP_ERR, + "WINDOWS: PERFLIB: Invalid Counter Definition!"); + pCounterDefinition = NULL; + } + + return pCounterDefinition; +} + +// -------------------------------------------------------------------------------------------------------------------- + +static inline BOOL getEncodedStringToUTF8(char *dst, size_t dst_len, DWORD CodePage, char *start, DWORD length) { + WCHAR *tempBuffer; // Temporary buffer for Unicode data + DWORD charsCopied = 0; + BOOL free_tempBuffer; + + if (CodePage == 0) { + // Input is already Unicode (UTF-16) + tempBuffer = (WCHAR *)start; + charsCopied = length / sizeof(WCHAR); // Convert byte length to number of WCHARs + free_tempBuffer = FALSE; + } + else { + // Convert the multi-byte instance name to Unicode (UTF-16) + // Calculate maximum possible characters in UTF-16 + + int charCount = MultiByteToWideChar(CodePage, 0, start, (int)length, NULL, 0); + tempBuffer = (WCHAR *)malloc(charCount * sizeof(WCHAR)); + if (!tempBuffer) return FALSE; + + charsCopied = MultiByteToWideChar(CodePage, 0, start, (int)length, tempBuffer, charCount); + if (charsCopied == 0) { + free(tempBuffer); + dst[0] = '\0'; + return FALSE; + } + + free_tempBuffer = TRUE; + } + + // Now convert from Unicode (UTF-16) to UTF-8 + int bytesCopied = WideCharToMultiByte(CP_UTF8, 0, tempBuffer, (int)charsCopied, dst, (int)dst_len, NULL, NULL); + if (bytesCopied == 0) { + if (free_tempBuffer) free(tempBuffer); + dst[0] = '\0'; // Ensure the buffer is null-terminated even on failure + return FALSE; + } + + dst[bytesCopied] = '\0'; // Ensure buffer is null-terminated + if (free_tempBuffer) free(tempBuffer); // Free temporary buffer if used + return TRUE; +} + +inline BOOL getInstanceName(PERF_DATA_BLOCK *pDataBlock, PERF_OBJECT_TYPE *pObjectType, PERF_INSTANCE_DEFINITION *pInstance, + char *buffer, size_t bufferLen) { + (void)pDataBlock; + if (!pInstance || !buffer || !bufferLen) return FALSE; + + return getEncodedStringToUTF8(buffer, bufferLen, pObjectType->CodePage, + ((char *)pInstance + pInstance->NameOffset), pInstance->NameLength); +} + +inline BOOL getSystemName(PERF_DATA_BLOCK *pDataBlock, char *buffer, size_t bufferLen) { + return getEncodedStringToUTF8(buffer, bufferLen, 0, + ((char *)pDataBlock + pDataBlock->SystemNameOffset), pDataBlock->SystemNameLength); +} + +inline bool ObjectTypeHasInstances(PERF_DATA_BLOCK *pDataBlock, PERF_OBJECT_TYPE *pObjectType) { + (void)pDataBlock; + return pObjectType->NumInstances != PERF_NO_INSTANCES && pObjectType->NumInstances > 0; +} + +PERF_OBJECT_TYPE *perflibFindObjectTypeByName(PERF_DATA_BLOCK *pDataBlock, const char *name) { + PERF_OBJECT_TYPE* pObjectType = NULL; + for(DWORD o = 0; o < pDataBlock->NumObjectTypes; o++) { + pObjectType = getObjectType(pDataBlock, pObjectType); + if(strcmp(name, RegistryFindNameByID(pObjectType->ObjectNameTitleIndex)) == 0) + return pObjectType; + } + + return NULL; +} + +PERF_INSTANCE_DEFINITION *perflibForEachInstance(PERF_DATA_BLOCK *pDataBlock, PERF_OBJECT_TYPE *pObjectType, PERF_INSTANCE_DEFINITION *lastInstance) { + if(!ObjectTypeHasInstances(pDataBlock, pObjectType)) + return NULL; + + return getInstance(pDataBlock, pObjectType, + lastInstance ? + getInstanceCounterBlock(pDataBlock, pObjectType, lastInstance) : + NULL ); +} + +bool perflibGetInstanceCounter(PERF_DATA_BLOCK *pDataBlock, PERF_OBJECT_TYPE *pObjectType, PERF_INSTANCE_DEFINITION *pInstance, COUNTER_DATA *cd) { + PERF_COUNTER_DEFINITION *pCounterDefinition = NULL; + for(DWORD c = 0; c < pObjectType->NumCounters ;c++) { + pCounterDefinition = getCounterDefinition(pDataBlock, pObjectType, pCounterDefinition); + if(!pCounterDefinition) { + nd_log(NDLS_COLLECTORS, NDLP_ERR, + "WINDOWS: PERFLIB: Cannot read counter definition No %u (out of %u)", + c, pObjectType->NumCounters); + break; + } + + if(cd->id) { + if(cd->id != pCounterDefinition->CounterNameTitleIndex) + continue; + } + else { + if(strcmp(RegistryFindNameByID(pCounterDefinition->CounterNameTitleIndex), cd->key) != 0) + continue; + + cd->id = pCounterDefinition->CounterNameTitleIndex; + } + + cd->current.CounterType = cd->OverwriteCounterType ? cd->OverwriteCounterType : pCounterDefinition->CounterType; + PERF_COUNTER_BLOCK *pCounterBlock = getInstanceCounterBlock(pDataBlock, pObjectType, pInstance); + + cd->previous = cd->current; + cd->updated = getCounterData(pDataBlock, pObjectType, pCounterDefinition, pCounterBlock, &cd->current); + return cd->updated; + } + + cd->previous = cd->current; + cd->current = RAW_DATA_EMPTY; + cd->updated = false; + return false; +} + +bool perflibGetObjectCounter(PERF_DATA_BLOCK *pDataBlock, PERF_OBJECT_TYPE *pObjectType, COUNTER_DATA *cd) { + PERF_COUNTER_DEFINITION *pCounterDefinition = NULL; + for(DWORD c = 0; c < pObjectType->NumCounters ;c++) { + pCounterDefinition = getCounterDefinition(pDataBlock, pObjectType, pCounterDefinition); + if(!pCounterDefinition) { + nd_log(NDLS_COLLECTORS, NDLP_ERR, + "WINDOWS: PERFLIB: Cannot read counter definition No %u (out of %u)", + c, pObjectType->NumCounters); + break; + } + + if(cd->id) { + if(cd->id != pCounterDefinition->CounterNameTitleIndex) + continue; + } + else { + if(strcmp(RegistryFindNameByID(pCounterDefinition->CounterNameTitleIndex), cd->key) != 0) + continue; + + cd->id = pCounterDefinition->CounterNameTitleIndex; + } + + cd->current.CounterType = cd->OverwriteCounterType ? cd->OverwriteCounterType : pCounterDefinition->CounterType; + PERF_COUNTER_BLOCK *pCounterBlock = getObjectTypeCounterBlock(pDataBlock, pObjectType); + + cd->previous = cd->current; + cd->updated = getCounterData(pDataBlock, pObjectType, pCounterDefinition, pCounterBlock, &cd->current); + return cd->updated; + } + + cd->previous = cd->current; + cd->current = RAW_DATA_EMPTY; + cd->updated = false; + return false; +} + +PERF_DATA_BLOCK *perflibGetPerformanceData(DWORD id) { + char source[24]; + snprintfz(source, sizeof(source), "%u", id); + + LPBYTE pData = (LPBYTE)getPerformanceData((id > 0) ? source : NULL); + if (!pData) return NULL; + + PERF_DATA_BLOCK *pDataBlock = getDataBlock(pData); + if(!pDataBlock) return NULL; + + return pDataBlock; +} + +int perflibQueryAndTraverse(DWORD id, + perflib_data_cb dataCb, + perflib_object_cb objectCb, + perflib_instance_cb instanceCb, + perflib_instance_counter_cb instanceCounterCb, + perflib_counter_cb counterCb, + void *data) { + int counters = -1; + + PERF_DATA_BLOCK *pDataBlock = perflibGetPerformanceData(id); + if(!pDataBlock) goto cleanup; + + bool do_data = true; + if(dataCb) + do_data = dataCb(pDataBlock, data); + + PERF_OBJECT_TYPE* pObjectType = NULL; + for(DWORD o = 0; do_data && o < pDataBlock->NumObjectTypes; o++) { + pObjectType = getObjectType(pDataBlock, pObjectType); + if(!pObjectType) { + nd_log(NDLS_COLLECTORS, NDLP_ERR, + "WINDOWS: PERFLIB: Cannot read object type No %d (out of %d)", + o, pDataBlock->NumObjectTypes); + break; + } + + bool do_object = true; + if(objectCb) + do_object = objectCb(pDataBlock, pObjectType, data); + + if(!do_object) + continue; + + if(ObjectTypeHasInstances(pDataBlock, pObjectType)) { + PERF_INSTANCE_DEFINITION *pInstance = NULL; + PERF_COUNTER_BLOCK *pCounterBlock = NULL; + for(LONG i = 0; i < pObjectType->NumInstances ;i++) { + pInstance = getInstance(pDataBlock, pObjectType, pCounterBlock); + if(!pInstance) { + nd_log(NDLS_COLLECTORS, NDLP_ERR, + "WINDOWS: PERFLIB: Cannot read Instance No %d (out of %d)", + i, pObjectType->NumInstances); + break; + } + + pCounterBlock = getInstanceCounterBlock(pDataBlock, pObjectType, pInstance); + if(!pCounterBlock) { + nd_log(NDLS_COLLECTORS, NDLP_ERR, + "WINDOWS: PERFLIB: Cannot read CounterBlock of instance No %d (out of %d)", + i, pObjectType->NumInstances); + break; + } + + bool do_instance = true; + if(instanceCb) + do_instance = instanceCb(pDataBlock, pObjectType, pInstance, data); + + if(!do_instance) + continue; + + PERF_COUNTER_DEFINITION *pCounterDefinition = NULL; + for(DWORD c = 0; c < pObjectType->NumCounters ;c++) { + pCounterDefinition = getCounterDefinition(pDataBlock, pObjectType, pCounterDefinition); + if(!pCounterDefinition) { + nd_log(NDLS_COLLECTORS, NDLP_ERR, + "WINDOWS: PERFLIB: Cannot read counter definition No %u (out of %u)", + c, pObjectType->NumCounters); + break; + } + + RAW_DATA sample = { + .CounterType = pCounterDefinition->CounterType, + }; + if(getCounterData(pDataBlock, pObjectType, pCounterDefinition, pCounterBlock, &sample)) { + // DisplayCalculatedValue(&sample, &sample); + + if(instanceCounterCb) { + instanceCounterCb(pDataBlock, pObjectType, pInstance, pCounterDefinition, &sample, data); + counters++; + } + } + } + + if(instanceCb) + instanceCb(pDataBlock, pObjectType, NULL, data); + } + } + else { + PERF_COUNTER_BLOCK *pCounterBlock = getObjectTypeCounterBlock(pDataBlock, pObjectType); + PERF_COUNTER_DEFINITION *pCounterDefinition = NULL; + for(DWORD c = 0; c < pObjectType->NumCounters ;c++) { + pCounterDefinition = getCounterDefinition(pDataBlock, pObjectType, pCounterDefinition); + if(!pCounterDefinition) { + nd_log(NDLS_COLLECTORS, NDLP_ERR, + "WINDOWS: PERFLIB: Cannot read counter definition No %u (out of %u)", + c, pObjectType->NumCounters); + break; + } + + RAW_DATA sample = { + .CounterType = pCounterDefinition->CounterType, + }; + if(getCounterData(pDataBlock, pObjectType, pCounterDefinition, pCounterBlock, &sample)) { + // DisplayCalculatedValue(&sample, &sample); + + if(counterCb) { + counterCb(pDataBlock, pObjectType, pCounterDefinition, &sample, data); + counters++; + } + } + } + } + + if(objectCb) + objectCb(pDataBlock, NULL, data); + } + +cleanup: + return counters; +} + +#endif // OS_WINDOWS \ No newline at end of file diff --git a/src/collectors/windows.plugin/perflib.h b/src/libnetdata/os/windows-perflib/perflib.h similarity index 86% rename from src/collectors/windows.plugin/perflib.h rename to src/libnetdata/os/windows-perflib/perflib.h index deba4e9a320822..0d853edcc3e07f 100644 --- a/src/collectors/windows.plugin/perflib.h +++ b/src/libnetdata/os/windows-perflib/perflib.h @@ -1,72 +1,88 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -#ifndef NETDATA_PERFLIB_H -#define NETDATA_PERFLIB_H - -#include "libnetdata/libnetdata.h" -#include - -const char *RegistryFindNameByID(DWORD id); -const char *RegistryFindHelpByID(DWORD id); -DWORD RegistryFindIDByName(const char *name); -#define PERFLIB_REGISTRY_NAME_NOT_FOUND (DWORD)-1 - -PERF_DATA_BLOCK *perflibGetPerformanceData(DWORD id); -void perflibFreePerformanceData(void); -PERF_OBJECT_TYPE *perflibFindObjectTypeByName(PERF_DATA_BLOCK *pDataBlock, const char *name); -PERF_INSTANCE_DEFINITION *perflibForEachInstance(PERF_DATA_BLOCK *pDataBlock, PERF_OBJECT_TYPE *pObjectType, PERF_INSTANCE_DEFINITION *lastInstance); - -typedef struct _rawdata { - DWORD CounterType; - DWORD MultiCounterData; // Second raw counter value for multi-valued counters - ULONGLONG Data; // Raw counter data - LONGLONG Time; // Is a time value or a base value - LONGLONG Frequency; -} RAW_DATA, *PRAW_DATA; - -typedef struct _counterdata { - DWORD id; - bool updated; - const char *key; - DWORD OverwriteCounterType; // if set, the counter type will be overwritten once read - RAW_DATA current; - RAW_DATA previous; -} COUNTER_DATA; - -#define RAW_DATA_EMPTY (RAW_DATA){ 0 } - -bool perflibGetInstanceCounter(PERF_DATA_BLOCK *pDataBlock, PERF_OBJECT_TYPE *pObjectType, PERF_INSTANCE_DEFINITION *pInstance, COUNTER_DATA *cd); -bool perflibGetObjectCounter(PERF_DATA_BLOCK *pDataBlock, PERF_OBJECT_TYPE *pObjectType, COUNTER_DATA *cd); - -typedef bool (*perflib_data_cb)(PERF_DATA_BLOCK *pDataBlock, void *data); -typedef bool (*perflib_object_cb)(PERF_DATA_BLOCK *pDataBlock, PERF_OBJECT_TYPE *pObjectType, void *data); -typedef bool (*perflib_instance_cb)(PERF_DATA_BLOCK *pDataBlock, PERF_OBJECT_TYPE *pObjectType, PERF_INSTANCE_DEFINITION *pInstance, void *data); -typedef bool (*perflib_instance_counter_cb)(PERF_DATA_BLOCK *pDataBlock, PERF_OBJECT_TYPE *pObjectType, PERF_INSTANCE_DEFINITION *pInstance, PERF_COUNTER_DEFINITION *pCounter, RAW_DATA *sample, void *data); -typedef bool (*perflib_counter_cb)(PERF_DATA_BLOCK *pDataBlock, PERF_OBJECT_TYPE *pObjectType, PERF_COUNTER_DEFINITION *pCounter, RAW_DATA *sample, void *data); - -int perflibQueryAndTraverse(DWORD id, - perflib_data_cb dataCb, - perflib_object_cb objectCb, - perflib_instance_cb instanceCb, - perflib_instance_counter_cb instanceCounterCb, - perflib_counter_cb counterCb, - void *data); - -bool ObjectTypeHasInstances(PERF_DATA_BLOCK *pDataBlock, PERF_OBJECT_TYPE *pObjectType); - -BOOL getInstanceName(PERF_DATA_BLOCK *pDataBlock, PERF_OBJECT_TYPE *pObjectType, PERF_INSTANCE_DEFINITION *pInstance, - char *buffer, size_t bufferLen); - -BOOL getSystemName(PERF_DATA_BLOCK *pDataBlock, char *buffer, size_t bufferLen); - -PERF_OBJECT_TYPE *getObjectTypeByIndex(PERF_DATA_BLOCK *pDataBlock, DWORD ObjectNameTitleIndex); - -PERF_INSTANCE_DEFINITION *getInstanceByPosition( - PERF_DATA_BLOCK *pDataBlock, - PERF_OBJECT_TYPE *pObjectType, - DWORD instancePosition); - -void PerflibNamesRegistryInitialize(void); -void PerflibNamesRegistryUpdate(void); - -#endif //NETDATA_PERFLIB_H +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_PERFLIB_H +#define NETDATA_PERFLIB_H + +#include "libnetdata/libnetdata.h" + +#if defined(OS_WINDOWS) + +typedef uint32_t DWORD; +typedef long long LONGLONG; +typedef unsigned long long ULONGLONG; +typedef int BOOL; + +struct _PERF_DATA_BLOCK; +typedef struct _PERF_DATA_BLOCK PERF_DATA_BLOCK; +struct _PERF_OBJECT_TYPE; +typedef struct _PERF_OBJECT_TYPE PERF_OBJECT_TYPE; +struct _PERF_INSTANCE_DEFINITION; +typedef struct _PERF_INSTANCE_DEFINITION PERF_INSTANCE_DEFINITION; +struct _PERF_COUNTER_DEFINITION; +typedef struct _PERF_COUNTER_DEFINITION PERF_COUNTER_DEFINITION; + +const char *RegistryFindNameByID(DWORD id); +const char *RegistryFindHelpByID(DWORD id); +DWORD RegistryFindIDByName(const char *name); +#define PERFLIB_REGISTRY_NAME_NOT_FOUND (DWORD)-1 + +PERF_DATA_BLOCK *perflibGetPerformanceData(DWORD id); +void perflibFreePerformanceData(void); +PERF_OBJECT_TYPE *perflibFindObjectTypeByName(PERF_DATA_BLOCK *pDataBlock, const char *name); +PERF_INSTANCE_DEFINITION *perflibForEachInstance(PERF_DATA_BLOCK *pDataBlock, PERF_OBJECT_TYPE *pObjectType, PERF_INSTANCE_DEFINITION *lastInstance); + +typedef struct _rawdata { + DWORD CounterType; + DWORD MultiCounterData; // Second raw counter value for multi-valued counters + ULONGLONG Data; // Raw counter data + LONGLONG Time; // Is a time value or a base value + LONGLONG Frequency; +} RAW_DATA, *PRAW_DATA; + +typedef struct _counterdata { + DWORD id; + bool updated; + const char *key; + DWORD OverwriteCounterType; // if set, the counter type will be overwritten once read + RAW_DATA current; + RAW_DATA previous; +} COUNTER_DATA; + +#define RAW_DATA_EMPTY (RAW_DATA){ 0 } + +bool perflibGetInstanceCounter(PERF_DATA_BLOCK *pDataBlock, PERF_OBJECT_TYPE *pObjectType, PERF_INSTANCE_DEFINITION *pInstance, COUNTER_DATA *cd); +bool perflibGetObjectCounter(PERF_DATA_BLOCK *pDataBlock, PERF_OBJECT_TYPE *pObjectType, COUNTER_DATA *cd); + +typedef bool (*perflib_data_cb)(PERF_DATA_BLOCK *pDataBlock, void *data); +typedef bool (*perflib_object_cb)(PERF_DATA_BLOCK *pDataBlock, PERF_OBJECT_TYPE *pObjectType, void *data); +typedef bool (*perflib_instance_cb)(PERF_DATA_BLOCK *pDataBlock, PERF_OBJECT_TYPE *pObjectType, PERF_INSTANCE_DEFINITION *pInstance, void *data); +typedef bool (*perflib_instance_counter_cb)(PERF_DATA_BLOCK *pDataBlock, PERF_OBJECT_TYPE *pObjectType, PERF_INSTANCE_DEFINITION *pInstance, PERF_COUNTER_DEFINITION *pCounter, RAW_DATA *sample, void *data); +typedef bool (*perflib_counter_cb)(PERF_DATA_BLOCK *pDataBlock, PERF_OBJECT_TYPE *pObjectType, PERF_COUNTER_DEFINITION *pCounter, RAW_DATA *sample, void *data); + +int perflibQueryAndTraverse(DWORD id, + perflib_data_cb dataCb, + perflib_object_cb objectCb, + perflib_instance_cb instanceCb, + perflib_instance_counter_cb instanceCounterCb, + perflib_counter_cb counterCb, + void *data); + +bool ObjectTypeHasInstances(PERF_DATA_BLOCK *pDataBlock, PERF_OBJECT_TYPE *pObjectType); + +BOOL getInstanceName(PERF_DATA_BLOCK *pDataBlock, PERF_OBJECT_TYPE *pObjectType, PERF_INSTANCE_DEFINITION *pInstance, + char *buffer, size_t bufferLen); + +BOOL getSystemName(PERF_DATA_BLOCK *pDataBlock, char *buffer, size_t bufferLen); + +PERF_OBJECT_TYPE *getObjectTypeByIndex(PERF_DATA_BLOCK *pDataBlock, DWORD ObjectNameTitleIndex); + +PERF_INSTANCE_DEFINITION *getInstanceByPosition( + PERF_DATA_BLOCK *pDataBlock, + PERF_OBJECT_TYPE *pObjectType, + DWORD instancePosition); + +void PerflibNamesRegistryInitialize(void); +void PerflibNamesRegistryUpdate(void); + +#endif // OS_WINDOWS +#endif //NETDATA_PERFLIB_H diff --git a/src/libnetdata/procfile/procfile.c b/src/libnetdata/procfile/procfile.c index 2b7eeeb5619def..f360666d01fd73 100644 --- a/src/libnetdata/procfile/procfile.c +++ b/src/libnetdata/procfile/procfile.c @@ -230,8 +230,12 @@ static void procfile_parser(procfile *ff) { } else if(likely(ct == PF_CHAR_IS_OPEN)) { if(s == t) { + if(!opened) + t = ++s; + else + ++s; + opened++; - t = ++s; } else if(opened) { opened++; diff --git a/src/libnetdata/sanitizers/chart_id_and_name.c b/src/libnetdata/sanitizers/chart_id_and_name.c new file mode 100644 index 00000000000000..089463111f4ed7 --- /dev/null +++ b/src/libnetdata/sanitizers/chart_id_and_name.c @@ -0,0 +1,618 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "../libnetdata.h" + +static uint8_t netdata_map_chart_names[256] = { + [0] = '\0', // + [1] = '_', // + [2] = '_', // + [3] = '_', // + [4] = '_', // + [5] = '_', // + [6] = '_', // + [7] = '_', // + [8] = '_', // + [9] = '_', // + [10] = '_', // + [11] = '_', // + [12] = '_', // + [13] = '_', // + [14] = '_', // + [15] = '_', // + [16] = '_', // + [17] = '_', // + [18] = '_', // + [19] = '_', // + [20] = '_', // + [21] = '_', // + [22] = '_', // + [23] = '_', // + [24] = '_', // + [25] = '_', // + [26] = '_', // + [27] = '_', // + [28] = '_', // + [29] = '_', // + [30] = '_', // + [31] = '_', // + [32] = '_', // + [33] = '_', // ! + [34] = '_', // " + [35] = '_', // # + [36] = '_', // $ + [37] = '_', // % + [38] = '_', // & + [39] = '_', // ' + [40] = '_', // ( + [41] = '_', // ) + [42] = '_', // * + [43] = '_', // + + [44] = '.', // , + [45] = '-', // - + [46] = '.', // . + [47] = '/', // / + [48] = '0', // 0 + [49] = '1', // 1 + [50] = '2', // 2 + [51] = '3', // 3 + [52] = '4', // 4 + [53] = '5', // 5 + [54] = '6', // 6 + [55] = '7', // 7 + [56] = '8', // 8 + [57] = '9', // 9 + [58] = '_', // : + [59] = '_', // ; + [60] = '_', // < + [61] = '_', // = + [62] = '_', // > + [63] = '_', // ? + [64] = '_', // @ + [65] = 'a', // A + [66] = 'b', // B + [67] = 'c', // C + [68] = 'd', // D + [69] = 'e', // E + [70] = 'f', // F + [71] = 'g', // G + [72] = 'h', // H + [73] = 'i', // I + [74] = 'j', // J + [75] = 'k', // K + [76] = 'l', // L + [77] = 'm', // M + [78] = 'n', // N + [79] = 'o', // O + [80] = 'p', // P + [81] = 'q', // Q + [82] = 'r', // R + [83] = 's', // S + [84] = 't', // T + [85] = 'u', // U + [86] = 'v', // V + [87] = 'w', // W + [88] = 'x', // X + [89] = 'y', // Y + [90] = 'z', // Z + [91] = '_', // [ + [92] = '/', // backslash + [93] = '_', // ] + [94] = '_', // ^ + [95] = '_', // _ + [96] = '_', // ` + [97] = 'a', // a + [98] = 'b', // b + [99] = 'c', // c + [100] = 'd', // d + [101] = 'e', // e + [102] = 'f', // f + [103] = 'g', // g + [104] = 'h', // h + [105] = 'i', // i + [106] = 'j', // j + [107] = 'k', // k + [108] = 'l', // l + [109] = 'm', // m + [110] = 'n', // n + [111] = 'o', // o + [112] = 'p', // p + [113] = 'q', // q + [114] = 'r', // r + [115] = 's', // s + [116] = 't', // t + [117] = 'u', // u + [118] = 'v', // v + [119] = 'w', // w + [120] = 'x', // x + [121] = 'y', // y + [122] = 'z', // z + [123] = '_', // { + [124] = '_', // | + [125] = '_', // } + [126] = '_', // ~ + [127] = '_', // + [128] = '_', // + [129] = '_', // + [130] = '_', // + [131] = '_', // + [132] = '_', // + [133] = '_', // + [134] = '_', // + [135] = '_', // + [136] = '_', // + [137] = '_', // + [138] = '_', // + [139] = '_', // + [140] = '_', // + [141] = '_', // + [142] = '_', // + [143] = '_', // + [144] = '_', // + [145] = '_', // + [146] = '_', // + [147] = '_', // + [148] = '_', // + [149] = '_', // + [150] = '_', // + [151] = '_', // + [152] = '_', // + [153] = '_', // + [154] = '_', // + [155] = '_', // + [156] = '_', // + [157] = '_', // + [158] = '_', // + [159] = '_', // + [160] = '_', // + [161] = '_', // + [162] = '_', // + [163] = '_', // + [164] = '_', // + [165] = '_', // + [166] = '_', // + [167] = '_', // + [168] = '_', // + [169] = '_', // + [170] = '_', // + [171] = '_', // + [172] = '_', // + [173] = '_', // + [174] = '_', // + [175] = '_', // + [176] = '_', // + [177] = '_', // + [178] = '_', // + [179] = '_', // + [180] = '_', // + [181] = '_', // + [182] = '_', // + [183] = '_', // + [184] = '_', // + [185] = '_', // + [186] = '_', // + [187] = '_', // + [188] = '_', // + [189] = '_', // + [190] = '_', // + [191] = '_', // + [192] = '_', // + [193] = '_', // + [194] = '_', // + [195] = '_', // + [196] = '_', // + [197] = '_', // + [198] = '_', // + [199] = '_', // + [200] = '_', // + [201] = '_', // + [202] = '_', // + [203] = '_', // + [204] = '_', // + [205] = '_', // + [206] = '_', // + [207] = '_', // + [208] = '_', // + [209] = '_', // + [210] = '_', // + [211] = '_', // + [212] = '_', // + [213] = '_', // + [214] = '_', // + [215] = '_', // + [216] = '_', // + [217] = '_', // + [218] = '_', // + [219] = '_', // + [220] = '_', // + [221] = '_', // + [222] = '_', // + [223] = '_', // + [224] = '_', // + [225] = '_', // + [226] = '_', // + [227] = '_', // + [228] = '_', // + [229] = '_', // + [230] = '_', // + [231] = '_', // + [232] = '_', // + [233] = '_', // + [234] = '_', // + [235] = '_', // + [236] = '_', // + [237] = '_', // + [238] = '_', // + [239] = '_', // + [240] = '_', // + [241] = '_', // + [242] = '_', // + [243] = '_', // + [244] = '_', // + [245] = '_', // + [246] = '_', // + [247] = '_', // + [248] = '_', // + [249] = '_', // + [250] = '_', // + [251] = '_', // + [252] = '_', // + [253] = '_', // + [254] = '_', // + [255] = '_' // +}; + +// make sure the supplied string +// is good for a netdata chart/dimension ID/NAME +void netdata_fix_chart_name(char *s) { + while ((*s = netdata_map_chart_names[(uint8_t)*s])) s++; +} + +static uint8_t netdata_map_chart_ids[256] = { + [0] = '\0', // + [1] = '_', // + [2] = '_', // + [3] = '_', // + [4] = '_', // + [5] = '_', // + [6] = '_', // + [7] = '_', // + [8] = '_', // + [9] = '_', // + [10] = '_', // + [11] = '_', // + [12] = '_', // + [13] = '_', // + [14] = '_', // + [15] = '_', // + [16] = '_', // + [17] = '_', // + [18] = '_', // + [19] = '_', // + [20] = '_', // + [21] = '_', // + [22] = '_', // + [23] = '_', // + [24] = '_', // + [25] = '_', // + [26] = '_', // + [27] = '_', // + [28] = '_', // + [29] = '_', // + [30] = '_', // + [31] = '_', // + [32] = '_', // + [33] = '_', // ! + [34] = '_', // " + [35] = '_', // # + [36] = '_', // $ + [37] = '_', // % + [38] = '_', // & + [39] = '_', // ' + [40] = '_', // ( + [41] = '_', // ) + [42] = '_', // * + [43] = '_', // + + [44] = '.', // , + [45] = '-', // - + [46] = '.', // . + [47] = '_', // / + [48] = '0', // 0 + [49] = '1', // 1 + [50] = '2', // 2 + [51] = '3', // 3 + [52] = '4', // 4 + [53] = '5', // 5 + [54] = '6', // 6 + [55] = '7', // 7 + [56] = '8', // 8 + [57] = '9', // 9 + [58] = '_', // : + [59] = '_', // ; + [60] = '_', // < + [61] = '_', // = + [62] = '_', // > + [63] = '_', // ? + [64] = '_', // @ + [65] = 'a', // A + [66] = 'b', // B + [67] = 'c', // C + [68] = 'd', // D + [69] = 'e', // E + [70] = 'f', // F + [71] = 'g', // G + [72] = 'h', // H + [73] = 'i', // I + [74] = 'j', // J + [75] = 'k', // K + [76] = 'l', // L + [77] = 'm', // M + [78] = 'n', // N + [79] = 'o', // O + [80] = 'p', // P + [81] = 'q', // Q + [82] = 'r', // R + [83] = 's', // S + [84] = 't', // T + [85] = 'u', // U + [86] = 'v', // V + [87] = 'w', // W + [88] = 'x', // X + [89] = 'y', // Y + [90] = 'z', // Z + [91] = '_', // [ + [92] = '_', // backslash + [93] = '_', // ] + [94] = '_', // ^ + [95] = '_', // _ + [96] = '_', // ` + [97] = 'a', // a + [98] = 'b', // b + [99] = 'c', // c + [100] = 'd', // d + [101] = 'e', // e + [102] = 'f', // f + [103] = 'g', // g + [104] = 'h', // h + [105] = 'i', // i + [106] = 'j', // j + [107] = 'k', // k + [108] = 'l', // l + [109] = 'm', // m + [110] = 'n', // n + [111] = 'o', // o + [112] = 'p', // p + [113] = 'q', // q + [114] = 'r', // r + [115] = 's', // s + [116] = 't', // t + [117] = 'u', // u + [118] = 'v', // v + [119] = 'w', // w + [120] = 'x', // x + [121] = 'y', // y + [122] = 'z', // z + [123] = '_', // { + [124] = '_', // | + [125] = '_', // } + [126] = '_', // ~ + [127] = '_', // + [128] = '_', // + [129] = '_', // + [130] = '_', // + [131] = '_', // + [132] = '_', // + [133] = '_', // + [134] = '_', // + [135] = '_', // + [136] = '_', // + [137] = '_', // + [138] = '_', // + [139] = '_', // + [140] = '_', // + [141] = '_', // + [142] = '_', // + [143] = '_', // + [144] = '_', // + [145] = '_', // + [146] = '_', // + [147] = '_', // + [148] = '_', // + [149] = '_', // + [150] = '_', // + [151] = '_', // + [152] = '_', // + [153] = '_', // + [154] = '_', // + [155] = '_', // + [156] = '_', // + [157] = '_', // + [158] = '_', // + [159] = '_', // + [160] = '_', // + [161] = '_', // + [162] = '_', // + [163] = '_', // + [164] = '_', // + [165] = '_', // + [166] = '_', // + [167] = '_', // + [168] = '_', // + [169] = '_', // + [170] = '_', // + [171] = '_', // + [172] = '_', // + [173] = '_', // + [174] = '_', // + [175] = '_', // + [176] = '_', // + [177] = '_', // + [178] = '_', // + [179] = '_', // + [180] = '_', // + [181] = '_', // + [182] = '_', // + [183] = '_', // + [184] = '_', // + [185] = '_', // + [186] = '_', // + [187] = '_', // + [188] = '_', // + [189] = '_', // + [190] = '_', // + [191] = '_', // + [192] = '_', // + [193] = '_', // + [194] = '_', // + [195] = '_', // + [196] = '_', // + [197] = '_', // + [198] = '_', // + [199] = '_', // + [200] = '_', // + [201] = '_', // + [202] = '_', // + [203] = '_', // + [204] = '_', // + [205] = '_', // + [206] = '_', // + [207] = '_', // + [208] = '_', // + [209] = '_', // + [210] = '_', // + [211] = '_', // + [212] = '_', // + [213] = '_', // + [214] = '_', // + [215] = '_', // + [216] = '_', // + [217] = '_', // + [218] = '_', // + [219] = '_', // + [220] = '_', // + [221] = '_', // + [222] = '_', // + [223] = '_', // + [224] = '_', // + [225] = '_', // + [226] = '_', // + [227] = '_', // + [228] = '_', // + [229] = '_', // + [230] = '_', // + [231] = '_', // + [232] = '_', // + [233] = '_', // + [234] = '_', // + [235] = '_', // + [236] = '_', // + [237] = '_', // + [238] = '_', // + [239] = '_', // + [240] = '_', // + [241] = '_', // + [242] = '_', // + [243] = '_', // + [244] = '_', // + [245] = '_', // + [246] = '_', // + [247] = '_', // + [248] = '_', // + [249] = '_', // + [250] = '_', // + [251] = '_', // + [252] = '_', // + [253] = '_', // + [254] = '_', // + [255] = '_' // +}; + +// make sure the supplied string +// is good for a netdata chart/dimension ID/NAME +void netdata_fix_chart_id(char *s) { + while ((*s = netdata_map_chart_ids[(uint8_t) *s])) s++; +} + +// -------------------------------------------------------------------------------------------------------------------- + +/* + * Sanitize Chart Meta + * + * We should only remove characters that somehow influence the functionality of Netdata. + * + * The following ASCII characters are replaced with underscores: + * + * comma and pipe Used in HTTP GET as list separators + * colon Used in Labels as value list separators + * space Used in HTTP GET as parameter separators + * asterisk Used in simple patterns as wildcard + * exclamation mark Used in simple patterns for negative matches + * Control characters and not-printable characters (<= ' ') + * + * Non-ASCII UTF8 characters are retained as-is. + * + * When multiple consecutive characters are converted to underscores + * only 1 underscore is copied to the output buffer. + * + */ +void sanitize_chart_meta(char *buf) { + char *s = buf, *d = buf; + bool last_was_underscore = false; + + // Process the input buffer + while (*s) { + if (!IS_UTF8_BYTE(*s)) { // ASCII character + if (*s == '_') { + if(!last_was_underscore) + *d++ = *s++; + else + s++; + + last_was_underscore = true; + } + else if (*s <= ' ' || // all non-printable and control characters, including the space + *s == ',' || // HTTP GET list separator + *s == ':' || // labels value list separator + *s == '|' || // HTTP GET list separator + *s == '*' || // simple pattern wildcard + *s == '!' // simple pattern negative match + ) { + if (!last_was_underscore) + *d++ = '_'; + + s++; + last_was_underscore = true; + } + else { + *d++ = *s++; + last_was_underscore = false; + } + } + else if (IS_UTF8_STARTBYTE(*s)) { + // copy the start byte + *d++ = *s++; + + // copy the rest of the wide character + while (IS_UTF8_CONTBYTE(*s)) + *d++ = *s++; + + last_was_underscore = false; + } + else + // an invalid UTF8 continuation byte - skip it + s++; + } + + // Null-terminate the cleaned string + *d = '\0'; + + // Remove a trailing dash (if any) + if (d > buf && *(d - 1) == '_') + *(d - 1) = '\0'; +} + +STRING *sanitize_chart_meta_string(STRING *s) { + char buf[string_strlen(s) + 1]; + memcpy(buf, string2str(s), sizeof(buf)); + sanitize_chart_meta(buf); + return string_strdupz(buf); +} diff --git a/src/libnetdata/sanitizers/chart_id_and_name.h b/src/libnetdata/sanitizers/chart_id_and_name.h new file mode 100644 index 00000000000000..55bd667b720055 --- /dev/null +++ b/src/libnetdata/sanitizers/chart_id_and_name.h @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_CHART_ID_AND_NAME_H +#define NETDATA_CHART_ID_AND_NAME_H + +#include "../libnetdata.h" + +void netdata_fix_chart_id(char *s); +void netdata_fix_chart_name(char *s); + +void sanitize_chart_meta(char *buf); +STRING *sanitize_chart_meta_string(STRING *s); + +#endif //NETDATA_CHART_ID_AND_NAME_H diff --git a/src/libnetdata/string/string.c b/src/libnetdata/string/string.c index 257a3cc4b91fbf..107c7eea5bae32 100644 --- a/src/libnetdata/string/string.c +++ b/src/libnetdata/string/string.c @@ -347,16 +347,34 @@ void string_freez(STRING *string) { string_stats_atomic_increment(partition, releases); } -inline size_t string_strlen(STRING *string) { +inline size_t string_strlen(const STRING *string) { if(unlikely(!string)) return 0; return string->length - 1; } -inline const char *string2str(STRING *string) { +inline const char *string2str(const STRING *string) { if(unlikely(!string)) return ""; return string->str; } +bool string_ends_with_string(const STRING *whole, const STRING *end) { + if(whole == end) return true; + if(!whole || !end) return false; + if(end->length > whole->length) return false; + if(end->length == whole->length) return strcmp(string2str(whole), string2str(end)) == 0; + const char *we = string2str(whole); + we = &we[string_strlen(whole) - string_strlen(end)]; + return strncmp(we, end->str, string_strlen(end)) == 0; +} + +bool string_starts_with_string(const STRING *whole, const STRING *end) { + if(whole == end) return true; + if(!whole || !end) return false; + if(end->length > whole->length) return false; + if(end->length == whole->length) return strcmp(string2str(whole), string2str(end)) == 0; + return strncmp(string2str(whole), string2str(end), string_strlen(end)) == 0; +} + STRING *string_2way_merge(STRING *a, STRING *b) { static STRING *X = NULL; diff --git a/src/libnetdata/string/string.h b/src/libnetdata/string/string.h index 1d5e5164a6b4c0..e86ac6fb5cb8a1 100644 --- a/src/libnetdata/string/string.h +++ b/src/libnetdata/string/string.h @@ -14,8 +14,10 @@ STRING *string_strndupz(const char *str, size_t len); STRING *string_dup(STRING *string); void string_freez(STRING *string); -size_t string_strlen(STRING *string); -const char *string2str(STRING *string) NEVERNULL; +size_t string_strlen(const STRING *string); +const char *string2str(const STRING *string) NEVERNULL; +bool string_ends_with_string(const STRING *whole, const STRING *end); +bool string_starts_with_string(const STRING *whole, const STRING *end); // keep common prefix/suffix and replace everything else with [x] STRING *string_2way_merge(STRING *a, STRING *b); diff --git a/src/libnetdata/string/utf8.h b/src/libnetdata/string/utf8.h index 3e6c8c28834308..a7beaeb63341c3 100644 --- a/src/libnetdata/string/utf8.h +++ b/src/libnetdata/string/utf8.h @@ -3,7 +3,8 @@ #ifndef NETDATA_STRING_UTF8_H #define NETDATA_STRING_UTF8_H 1 -#define IS_UTF8_BYTE(x) ((x) & 0x80) -#define IS_UTF8_STARTBYTE(x) (IS_UTF8_BYTE(x)&&((x) & 0x40)) +#define IS_UTF8_BYTE(x) ((uint8_t)(x) & (uint8_t)0x80) +#define IS_UTF8_STARTBYTE(x) (IS_UTF8_BYTE(x) && ((uint8_t)(x) & (uint8_t)0x40)) +#define IS_UTF8_CONTBYTE(x) (IS_UTF8_BYTE(x) && !IS_UTF8_STARTBYTE(x)) #endif /* NETDATA_STRING_UTF8_H */ From 23067a7cb6ebfeade923a5c44abf6946266785fe Mon Sep 17 00:00:00 2001 From: Fotis Voutsas Date: Mon, 30 Sep 2024 17:40:20 +0300 Subject: [PATCH 08/23] Port the OpenLDAP collector from Python to Go (#18625) Co-authored-by: ilyam8 --- src/go/go.mod | 3 + src/go/go.sum | 48 +++ src/go/plugin/go.d/README.md | 2 + src/go/plugin/go.d/config/go.d.conf | 3 +- src/go/plugin/go.d/config/go.d/openldap.conf | 8 + .../go.d/config/go.d/sd/net_listeners.conf | 7 + src/go/plugin/go.d/modules/init.go | 1 + src/go/plugin/go.d/modules/openldap/charts.go | 141 +++++++ src/go/plugin/go.d/modules/openldap/client.go | 83 ++++ .../plugin/go.d/modules/openldap/collect.go | 55 +++ .../modules/openldap/collect_mon_counters.go | 63 +++ .../modules/openldap/collect_operations.go | 71 ++++ .../go.d/modules/openldap/config_schema.json | 110 ++++++ .../go.d/modules/openldap/metadata.yaml | 192 +++++++++ .../plugin/go.d/modules/openldap/openldap.go | 115 ++++++ .../go.d/modules/openldap/openldap_test.go | 363 ++++++++++++++++++ .../modules/openldap/testdata/config.json | 11 + .../modules/openldap/testdata/config.yaml | 8 + 18 files changed, 1283 insertions(+), 1 deletion(-) create mode 100644 src/go/plugin/go.d/config/go.d/openldap.conf create mode 100644 src/go/plugin/go.d/modules/openldap/charts.go create mode 100644 src/go/plugin/go.d/modules/openldap/client.go create mode 100644 src/go/plugin/go.d/modules/openldap/collect.go create mode 100644 src/go/plugin/go.d/modules/openldap/collect_mon_counters.go create mode 100644 src/go/plugin/go.d/modules/openldap/collect_operations.go create mode 100644 src/go/plugin/go.d/modules/openldap/config_schema.json create mode 100644 src/go/plugin/go.d/modules/openldap/metadata.yaml create mode 100644 src/go/plugin/go.d/modules/openldap/openldap.go create mode 100644 src/go/plugin/go.d/modules/openldap/openldap_test.go create mode 100644 src/go/plugin/go.d/modules/openldap/testdata/config.json create mode 100644 src/go/plugin/go.d/modules/openldap/testdata/config.yaml diff --git a/src/go/go.mod b/src/go/go.mod index 0e7b207b5e4946..7a372d9440840f 100644 --- a/src/go/go.mod +++ b/src/go/go.mod @@ -19,6 +19,7 @@ require ( github.com/docker/docker v27.3.1+incompatible github.com/facebook/time v0.0.0-20240626113945-18207c5d8ddc github.com/fsnotify/fsnotify v1.7.0 + github.com/go-ldap/ldap/v3 v3.4.8 github.com/go-sql-driver/mysql v1.8.1 github.com/godbus/dbus/v5 v5.1.0 github.com/gofrs/flock v0.12.1 @@ -62,6 +63,7 @@ require ( dario.cat/mergo v1.0.1 // indirect filippo.io/edwards25519 v1.1.0 // indirect github.com/Azure/go-ansiterm v0.0.0-20210617225240-d185dfc1b5a1 // indirect + github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358 // indirect github.com/Masterminds/goutils v1.1.1 // indirect github.com/Masterminds/semver/v3 v3.3.0 // indirect github.com/Microsoft/go-winio v0.6.1 // indirect @@ -75,6 +77,7 @@ require ( github.com/emicklei/go-restful/v3 v3.11.0 // indirect github.com/felixge/httpsnoop v1.0.4 // indirect github.com/fxamacker/cbor/v2 v2.7.0 // indirect + github.com/go-asn1-ber/asn1-ber v1.5.5 // indirect github.com/go-logr/logr v1.4.2 // indirect github.com/go-logr/stdr v1.2.2 // indirect github.com/go-openapi/jsonpointer v0.20.0 // indirect diff --git a/src/go/go.sum b/src/go/go.sum index c4220579a46958..6bca992d1a7e12 100644 --- a/src/go/go.sum +++ b/src/go/go.sum @@ -4,6 +4,8 @@ filippo.io/edwards25519 v1.1.0 h1:FNf4tywRC1HmFuKW5xopWpigGjJKiJSV0Cqo0cJWDaA= filippo.io/edwards25519 v1.1.0/go.mod h1:BxyFTGdWcka3PhytdK4V28tE5sGfRvvvRV7EaN4VDT4= github.com/Azure/go-ansiterm v0.0.0-20210617225240-d185dfc1b5a1 h1:UQHMgLO+TxOElx5B5HZ4hJQsoJ/PvUvKRhJHDQXO8P8= github.com/Azure/go-ansiterm v0.0.0-20210617225240-d185dfc1b5a1/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E= +github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358 h1:mFRzDkZVAjdal+s7s0MwaRv9igoPqLRdzOLzw/8Xvq8= +github.com/Azure/go-ntlmssp v0.0.0-20221128193559-754e69321358/go.mod h1:chxPXzSsl7ZWRAuOIE23GDNzjWuZquvFlgA8xmpunjU= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/DATA-DOG/go-sqlmock v1.5.2 h1:OcvFkGmslmlZibjAjaHm3L//6LiuBgolP7OputlJIzU= github.com/DATA-DOG/go-sqlmock v1.5.2/go.mod h1:88MAG/4G7SMwSE3CeA0ZKzrT5CiOU3OJ+JlNzwDqpNU= @@ -18,6 +20,8 @@ github.com/Microsoft/go-winio v0.6.1 h1:9/kr64B9VUZrLm5YYwbGtUJnMgqWVOdUAXu6Migc github.com/Microsoft/go-winio v0.6.1/go.mod h1:LRdKpFKfdobln8UmuiYcKPot9D2v6svN5+sAH+4kjUM= github.com/Wing924/ltsv v0.3.1 h1:hbjzQ6YuS/sOm7nQJG7ddT9ua1yYmcH25Q8lsuiQE0A= github.com/Wing924/ltsv v0.3.1/go.mod h1:zl47wq7H23LocdDHg7yJAH/Qdc4MWHXu1Evx9Ahilmo= +github.com/alexbrainman/sspi v0.0.0-20231016080023-1a75b4708caa h1:LHTHcTQiSGT7VVbI0o4wBRNQIgn917usHWOd6VAffYI= +github.com/alexbrainman/sspi v0.0.0-20231016080023-1a75b4708caa/go.mod h1:cEWa1LVoE5KvSD9ONXsZrj0z6KqySlCCNKHlLzbqAt4= github.com/apparentlymart/go-cidr v1.1.0 h1:2mAhrMoF+nhXqxTzSZMUzDHkLjmIHC+Zzn4tdgBZjnU= github.com/apparentlymart/go-cidr v1.1.0/go.mod h1:EBcsNrHc3zQeuaeCeCtQruQm+n9/YjEn/vI25Lg7Gwc= github.com/araddon/dateparse v0.0.0-20210429162001-6b43995a97de h1:FxWPpzIjnTlhPwqqXc4/vE0f7GvRjuAsbW+HOIe8KnA= @@ -83,7 +87,11 @@ github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nos github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM= github.com/fxamacker/cbor/v2 v2.7.0 h1:iM5WgngdRBanHcxugY4JySA0nk1wZorNOpTgCMedv5E= github.com/fxamacker/cbor/v2 v2.7.0/go.mod h1:pxXPTn3joSm21Gbwsv0w9OSA2y1HFR9qXEeXQVeNoDQ= +github.com/go-asn1-ber/asn1-ber v1.5.5 h1:MNHlNMBDgEKD4TcKr36vQN68BA00aDfjIt3/bD50WnA= +github.com/go-asn1-ber/asn1-ber v1.5.5/go.mod h1:hEBeB/ic+5LoWskz+yKT7vGhhPYkProFKoKdwZRWMe0= github.com/go-kit/log v0.1.0/go.mod h1:zbhenjAZHb184qTLMA9ZjW7ThYL0H2mk7Q6pNt4vbaY= +github.com/go-ldap/ldap/v3 v3.4.8 h1:loKJyspcRezt2Q3ZRMq2p/0v8iOurlmeXDPw6fikSvQ= +github.com/go-ldap/ldap/v3 v3.4.8/go.mod h1:qS3Sjlu76eHfHGpUdWkAXQTw4beih+cHsco2jXlIXrk= github.com/go-logfmt/logfmt v0.5.0/go.mod h1:wCYkCAKZfumFQihp8CzCvQ3paCTfi41vtzG1KdI/P7A= github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY= @@ -135,6 +143,8 @@ github.com/google/pprof v0.0.0-20240525223248-4bfdf5a9a2af/go.mod h1:K1liHPHnj73 github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/gorilla/securecookie v1.1.1/go.mod h1:ra0sb63/xPlUeL+yeDciTfxMRAA+MP+HVt/4epWDjd4= +github.com/gorilla/sessions v1.2.1/go.mod h1:dk2InVEVJ0sfLlnXv9EAgkf6ecYs/i80K/zI+bUmuGM= github.com/gosnmp/gosnmp v1.38.0 h1:I5ZOMR8kb0DXAFg/88ACurnuwGwYkXWq3eLpJPHMEYc= github.com/gosnmp/gosnmp v1.38.0/go.mod h1:FE+PEZvKrFz9afP9ii1W3cprXuVZ17ypCcyyfYuu5LY= github.com/grafana/regexp v0.0.0-20221122212121-6b5c0a4cb7fd h1:PpuIBO5P3e9hpqBD0O/HjhShYuM6XE0i/lbE6J94kww= @@ -144,6 +154,9 @@ github.com/grpc-ecosystem/grpc-gateway/v2 v2.18.0 h1:RtRsiaGvWxcwd8y3BiRZxsylPT8 github.com/grpc-ecosystem/grpc-gateway/v2 v2.18.0/go.mod h1:TzP6duP4Py2pHLVPPQp42aoYI92+PCrVotyR5e8Vqlk= github.com/hailocab/go-hostpool v0.0.0-20160125115350-e80d13ce29ed h1:5upAirOpQc1Q53c0bnx2ufif5kANL7bfZWcc6VJWJd8= github.com/hailocab/go-hostpool v0.0.0-20160125115350-e80d13ce29ed/go.mod h1:tMWxXQ9wFIaZeTI9F+hmhFiGpFmhOHzyShyFUhRm0H4= +github.com/hashicorp/go-uuid v1.0.2/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro= +github.com/hashicorp/go-uuid v1.0.3 h1:2gKiV6YVmrJ1i2CKKa9obLvRieoRGviZFL26PcT/Co8= +github.com/hashicorp/go-uuid v1.0.3/go.mod h1:6SBZvOh/SIDV7/2o3Jml5SYk/TvGqwFJ/bN7x4byOro= github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= github.com/huandu/xstrings v1.5.0 h1:2ag3IFq9ZDANvthTwTiqSSZLjDc+BedvHPAp5tJy2TI= github.com/huandu/xstrings v1.5.0/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE= @@ -202,6 +215,18 @@ github.com/jackc/puddle v0.0.0-20190608224051-11cab39313c9/go.mod h1:m4B5Dj62Y0f github.com/jackc/puddle v1.1.3/go.mod h1:m4B5Dj62Y0fbyuIc15OsIqK0+JU8nkqQjsgx7dvjSWk= github.com/jackc/puddle/v2 v2.2.2 h1:PR8nw+E/1w0GLuRFSmiioY6UooMp6KJv0/61nB7icHo= github.com/jackc/puddle/v2 v2.2.2/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4= +github.com/jcmturner/aescts/v2 v2.0.0 h1:9YKLH6ey7H4eDBXW8khjYslgyqG2xZikXP0EQFKrle8= +github.com/jcmturner/aescts/v2 v2.0.0/go.mod h1:AiaICIRyfYg35RUkr8yESTqvSy7csK90qZ5xfvvsoNs= +github.com/jcmturner/dnsutils/v2 v2.0.0 h1:lltnkeZGL0wILNvrNiVCR6Ro5PGU/SeBvVO/8c/iPbo= +github.com/jcmturner/dnsutils/v2 v2.0.0/go.mod h1:b0TnjGOvI/n42bZa+hmXL+kFJZsFT7G4t3HTlQ184QM= +github.com/jcmturner/gofork v1.7.6 h1:QH0l3hzAU1tfT3rZCnW5zXl+orbkNMMRGJfdJjHVETg= +github.com/jcmturner/gofork v1.7.6/go.mod h1:1622LH6i/EZqLloHfE7IeZ0uEJwMSUyQ/nDd82IeqRo= +github.com/jcmturner/goidentity/v6 v6.0.1 h1:VKnZd2oEIMorCTsFBnJWbExfNN7yZr3EhJAxwOkZg6o= +github.com/jcmturner/goidentity/v6 v6.0.1/go.mod h1:X1YW3bgtvwAXju7V3LCIMpY0Gbxyjn/mY9zx4tFonSg= +github.com/jcmturner/gokrb5/v8 v8.4.4 h1:x1Sv4HaTpepFkXbt2IkL29DXRf8sOfZXo8eRKh687T8= +github.com/jcmturner/gokrb5/v8 v8.4.4/go.mod h1:1btQEpgT6k+unzCwX1KdWMEwPPkkgBtP+F6aCACiMrs= +github.com/jcmturner/rpc/v2 v2.0.3 h1:7FXXj8Ti1IaVFpSAziCZWNzbNuZmnvw/i6CqLNdWfZY= +github.com/jcmturner/rpc/v2 v2.0.3/go.mod h1:VUJYCIDm3PVOEHw8sgt091/20OJjskO/YJki3ELg/Hc= github.com/jessevdk/go-flags v1.6.1 h1:Cvu5U8UGrLay1rZfv/zP7iLpSHGUZ/Ou68T0iX1bBK4= github.com/jessevdk/go-flags v1.6.1/go.mod h1:Mk8T1hIAWpOiJiHa9rJASDK2UGWji0EuPGBnNLMooyc= github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= @@ -421,6 +446,9 @@ golang.org/x/crypto v0.0.0-20201203163018-be400aefbc4c/go.mod h1:jdWPYTVW3xRLrWP golang.org/x/crypto v0.0.0-20210616213533-5ff15b29337e/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= golang.org/x/crypto v0.0.0-20210711020723-a769d52b0f97/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= +golang.org/x/crypto v0.6.0/go.mod h1:OFC/31mSvZgRz0V1QTNCzfAI1aIRzbiufJtkMIlEp58= +golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDfU= +golang.org/x/crypto v0.21.0/go.mod h1:0BP7YvVV9gBbVKyeTG0Gyn+gZm94bibOW5BjDEYAOMs= golang.org/x/crypto v0.27.0 h1:GXm2NjJrPaiv/h1tb2UH8QfgC/hOf/+z0p6PT8o1w7A= golang.org/x/crypto v0.27.0/go.mod h1:1Xngt8kV6Dvbssa53Ziq6Eqn0HqbZi5Z6R0ZpwQzt70= golang.org/x/exp v0.0.0-20240119083558-1b970713d09a h1:Q8/wZp0KX97QFTc2ywcOE0YRjZPVIx+MXInMzdvQqcA= @@ -432,6 +460,7 @@ golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= +golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= golang.org/x/mod v0.18.0 h1:5+9lSbEzPSdWkH32vYPBwEpX8KwDbM52Ud9xBUvNlb0= golang.org/x/mod v0.18.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -439,11 +468,17 @@ golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20190813141303-74dc4d7220e7/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200114155413-6afb5195e5aa/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM= golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= +golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= +golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= +golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg= +golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44= +golang.org/x/net v0.22.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg= golang.org/x/net v0.29.0 h1:5ORfpBpCs4HzDYoodCDBbwHzdR5UrLBZ3sOnUJmFoHo= golang.org/x/net v0.29.0/go.mod h1:gLkgy8jTGERgjzMic6DS9+SP0ajcu6Xu3Orq/SpETg0= golang.org/x/oauth2 v0.22.0 h1:BzDx2FehcG7jJwgWLELCdmLuxk2i+x9UDpSiss2u0ZA= @@ -454,6 +489,7 @@ golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ= golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= @@ -477,12 +513,20 @@ golang.org/x/sys v0.0.0-20210616094352-59db8d763f22/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/sys v0.25.0 h1:r+8e+loiHxRqhXVl6ML1nO3l1+oFoWbnlu2Ehimmi34= golang.org/x/sys v0.25.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= +golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= +golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo= +golang.org/x/term v0.17.0/go.mod h1:lLRBjIVuehSbZlaOtGMbcMncT+aqLLLmKrsjNrUguwk= +golang.org/x/term v0.18.0/go.mod h1:ILwASektA3OnRv7amZ1xhE/KTR+u50pbXfZ03+6Nx58= golang.org/x/term v0.24.0 h1:Mh5cbb+Zk2hqqXNO7S1iTjEphVL+jb8ZWaqh/g+JWkM= golang.org/x/term v0.24.0/go.mod h1:lOBK/LVxemqiMij05LGJ0tzNr8xlmwBRJ81PX6wVLH8= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= @@ -492,6 +536,9 @@ golang.org/x/text v0.3.4/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ= +golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= +golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= +golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= golang.org/x/text v0.18.0 h1:XvMDiNzPAl0jr17s6W9lcaIhGUfUORdGCNsuLmPG224= golang.org/x/text v0.18.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= golang.org/x/time v0.5.0 h1:o7cqy6amK/52YcAKIPlM3a+Fpj35zvRj2TP+e1xFSfk= @@ -509,6 +556,7 @@ golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roY golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/tools v0.1.1/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= +golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= golang.org/x/tools v0.22.0 h1:gqSGLZqv+AI9lIQzniJ0nZDRG5GBPsSi+DRNHWNz6yA= golang.org/x/tools v0.22.0/go.mod h1:aCwcsjqvq7Yqt6TNyX7QMU2enbQ/Gt0bo6krSeEri+c= golang.org/x/xerrors v0.0.0-20190410155217-1f06c39b4373/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= diff --git a/src/go/plugin/go.d/README.md b/src/go/plugin/go.d/README.md index 9a1e922b3ecc6a..ebd187a5c3bede 100644 --- a/src/go/plugin/go.d/README.md +++ b/src/go/plugin/go.d/README.md @@ -111,7 +111,9 @@ see the appropriate collector readme. | [nginxvts](https://github.com/netdata/netdata/tree/master/src/go/plugin/go.d/modules/nginxvts) | NGINX VTS | | [nsd](https://github.com/netdata/netdata/tree/master/src/go/plugin/go.d/modules/nsd) | NSD (NLnet Labs) | | [ntpd](https://github.com/netdata/netdata/tree/master/src/go/plugin/go.d/modules/ntpd) | NTP daemon | +| [nvidia_smi](https://github.com/netdata/netdata/tree/master/src/go/plugin/go.d/modules/nvidia_smi) | Nvidia SMI | | [nvme](https://github.com/netdata/netdata/tree/master/src/go/plugin/go.d/modules/nvme) | NVMe devices | +| [openldap](https://github.com/netdata/netdata/tree/master/src/go/plugin/go.d/modules/openldap) | OpenLDAP | | [openvpn](https://github.com/netdata/netdata/tree/master/src/go/plugin/go.d/modules/openvpn) | OpenVPN | | [openvpn_status_log](https://github.com/netdata/netdata/tree/master/src/go/plugin/go.d/modules/openvpn_status_log) | OpenVPN | | [pgbouncer](https://github.com/netdata/netdata/tree/master/src/go/plugin/go.d/modules/pgbouncer) | PgBouncer | diff --git a/src/go/plugin/go.d/config/go.d.conf b/src/go/plugin/go.d/config/go.d.conf index 91a1e0eebd6a02..2a055885fa8979 100644 --- a/src/go/plugin/go.d/config/go.d.conf +++ b/src/go/plugin/go.d/config/go.d.conf @@ -75,8 +75,9 @@ modules: # nginxvts: yes # nsd: yes # ntpd: yes -# nvme: yes # nvidia_smi: no +# nvme: yes +# openldap: yes # openvpn: no # openvpn_status_log: yes # ping: yes diff --git a/src/go/plugin/go.d/config/go.d/openldap.conf b/src/go/plugin/go.d/config/go.d/openldap.conf new file mode 100644 index 00000000000000..6d10059439cd67 --- /dev/null +++ b/src/go/plugin/go.d/config/go.d/openldap.conf @@ -0,0 +1,8 @@ +## All available configuration options, their descriptions and default values: +## https://github.com/netdata/netdata/tree/master/src/go/plugin/go.d/modules/openldap#readme + +#jobs: +# - name: local +# url: ldap://127.0.0.1:389 +# username: cn=user,dc=example,dc=com +# password: password diff --git a/src/go/plugin/go.d/config/go.d/sd/net_listeners.conf b/src/go/plugin/go.d/config/go.d/sd/net_listeners.conf index 7bd39559c10167..387da22783daee 100644 --- a/src/go/plugin/go.d/config/go.d/sd/net_listeners.conf +++ b/src/go/plugin/go.d/config/go.d/sd/net_listeners.conf @@ -96,6 +96,8 @@ classify: expr: '{{ and (eq .Port "8000") (eq .Comm "unit") }}' - tags: "ntpd" expr: '{{ or (eq .Port "123") (eq .Comm "ntpd") }}' + - tags: "openldap" + expr: '{{ eq .Comm "slapd" }}' - tags: "openvpn" expr: '{{ and (eq .Port "7505") (eq .Comm "openvpn") }}' - tags: "pgbouncer" @@ -405,6 +407,11 @@ compose: name: local address: {{.Address}} collect_peers: no + - selector: "openldap" + template: | + module: openldap + name: local + url: ldap://{{.Address}} - selector: "openvpn" template: | module: openvpn diff --git a/src/go/plugin/go.d/modules/init.go b/src/go/plugin/go.d/modules/init.go index 8544f235cfe626..ca86c49728194d 100644 --- a/src/go/plugin/go.d/modules/init.go +++ b/src/go/plugin/go.d/modules/init.go @@ -68,6 +68,7 @@ import ( _ "github.com/netdata/netdata/go/plugins/plugin/go.d/modules/ntpd" _ "github.com/netdata/netdata/go/plugins/plugin/go.d/modules/nvidia_smi" _ "github.com/netdata/netdata/go/plugins/plugin/go.d/modules/nvme" + _ "github.com/netdata/netdata/go/plugins/plugin/go.d/modules/openldap" _ "github.com/netdata/netdata/go/plugins/plugin/go.d/modules/openvpn" _ "github.com/netdata/netdata/go/plugins/plugin/go.d/modules/openvpn_status_log" _ "github.com/netdata/netdata/go/plugins/plugin/go.d/modules/pgbouncer" diff --git a/src/go/plugin/go.d/modules/openldap/charts.go b/src/go/plugin/go.d/modules/openldap/charts.go new file mode 100644 index 00000000000000..0dc6a17c73763d --- /dev/null +++ b/src/go/plugin/go.d/modules/openldap/charts.go @@ -0,0 +1,141 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +package openldap + +import ( + "github.com/netdata/netdata/go/plugins/plugin/go.d/agent/module" +) + +const ( + prioCurrentConnections = module.Priority + iota + prioTotalConnections + prioBytesSent + prioEntries + prioReferrals + prioOperations + prioOperationsByType + prioWaiters +) + +var charts = module.Charts{ + currentConnectionsChart.Copy(), + connectionsChart.Copy(), + + bytesSentChart.Copy(), + referralsSentChart.Copy(), + entriesSentChart.Copy(), + + operationsChart.Copy(), + operationsByTypeChart.Copy(), + + waitersChart.Copy(), +} + +var ( + currentConnectionsChart = module.Chart{ + ID: "current_connections", + Title: "Current Connections", + Units: "connections", + Fam: "connections", + Ctx: "openldap.current_connections", + Priority: prioCurrentConnections, + Type: module.Line, + Dims: module.Dims{ + {ID: "current_connections", Name: "active"}, + }, + } + connectionsChart = module.Chart{ + ID: "connections", + Title: "Connections", + Units: "connections/s", + Fam: "connections", + Ctx: "openldap.connections", + Priority: prioTotalConnections, + Type: module.Line, + Dims: module.Dims{ + {ID: "total_connections", Name: "connections", Algo: module.Incremental}, + }, + } + + bytesSentChart = module.Chart{ + ID: "bytes_sent", + Title: "Traffic", + Units: "bytes/s", + Fam: "activity", + Ctx: "openldap.traffic", + Priority: prioBytesSent, + Type: module.Area, + Dims: module.Dims{ + {ID: "bytes_sent", Name: "sent", Algo: module.Incremental}, + }, + } + entriesSentChart = module.Chart{ + ID: "entries_sent", + Title: "Entries", + Units: "entries/s", + Fam: "activity", + Ctx: "openldap.entries", + Priority: prioEntries, + Type: module.Line, + Dims: module.Dims{ + {ID: "entries_sent", Name: "sent", Algo: module.Incremental}, + }, + } + referralsSentChart = module.Chart{ + ID: "referrals_sent", + Title: "Referrals", + Units: "referrals/s", + Fam: "activity", + Ctx: "openldap.referrals", + Priority: prioReferrals, + Type: module.Line, + Dims: module.Dims{ + {ID: "referrals_sent", Name: "sent", Algo: module.Incremental}, + }, + } + + operationsChart = module.Chart{ + ID: "operations", + Title: "Operations", + Units: "operations/s", + Fam: "operations", + Ctx: "openldap.operations", + Priority: prioOperations, + Type: module.Line, + Dims: module.Dims{ + {ID: "completed_operations", Name: "completed", Algo: module.Incremental}, + {ID: "initiated_operations", Name: "initiated", Algo: module.Incremental}, + }, + } + operationsByTypeChart = module.Chart{ + ID: "operations_by_type", + Title: "Operations by Type", + Units: "operations/s", + Fam: "operations", + Ctx: "openldap.operations_by_type", + Priority: prioOperationsByType, + Type: module.Stacked, + Dims: module.Dims{ + {ID: "completed_bind_operations", Name: "bind", Algo: module.Incremental}, + {ID: "completed_search_operations", Name: "search", Algo: module.Incremental}, + {ID: "completed_unbind_operations", Name: "unbind", Algo: module.Incremental}, + {ID: "completed_add_operations", Name: "add", Algo: module.Incremental}, + {ID: "completed_delete_operations", Name: "delete", Algo: module.Incremental}, + {ID: "completed_modify_operations", Name: "modify", Algo: module.Incremental}, + {ID: "completed_compare_operations", Name: "compare", Algo: module.Incremental}, + }, + } + waitersChart = module.Chart{ + ID: "waiters", + Title: "Waiters", + Units: "waiters/s", + Fam: "operations", + Ctx: "openldap.waiters", + Priority: prioWaiters, + Type: module.Line, + Dims: module.Dims{ + {ID: "read_waiters", Name: "read", Algo: module.Incremental}, + {ID: "write_waiters", Name: "write", Algo: module.Incremental}, + }, + } +) diff --git a/src/go/plugin/go.d/modules/openldap/client.go b/src/go/plugin/go.d/modules/openldap/client.go new file mode 100644 index 00000000000000..4af5f5fb4d07ec --- /dev/null +++ b/src/go/plugin/go.d/modules/openldap/client.go @@ -0,0 +1,83 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +package openldap + +import ( + "net" + + "github.com/go-ldap/ldap/v3" + + "github.com/netdata/netdata/go/plugins/plugin/go.d/pkg/tlscfg" +) + +type ldapConn interface { + connect() error + disconnect() error + search(*ldap.SearchRequest) (*ldap.SearchResult, error) +} + +func newLdapConn(cfg Config) ldapConn { + return &ldapClient{Config: cfg} +} + +type ldapClient struct { + Config + + conn *ldap.Conn +} + +func (c *ldapClient) search(req *ldap.SearchRequest) (*ldap.SearchResult, error) { + return c.conn.Search(req) +} + +func (c *ldapClient) connect() error { + opts, err := c.connectOpts() + if err != nil { + return err + } + + conn, err := ldap.DialURL(c.URL, opts...) + if err != nil { + return err + } + + if c.Password == "" { + err = conn.UnauthenticatedBind(c.Username) + } else { + err = conn.Bind(c.Username, c.Password) + } + if err != nil { + _ = conn.Close() + return err + } + + c.conn = conn + + return nil +} + +func (c *ldapClient) connectOpts() ([]ldap.DialOpt, error) { + d := &net.Dialer{ + Timeout: c.Timeout.Duration(), + } + + opts := []ldap.DialOpt{ldap.DialWithDialer(d)} + + tlsConf, err := tlscfg.NewTLSConfig(c.TLSConfig) + if err != nil { + return nil, err + } + if tlsConf != nil { + opts = append(opts, ldap.DialWithTLSConfig(tlsConf)) + } + + return opts, nil +} + +func (c *ldapClient) disconnect() error { + defer func() { c.conn = nil }() + if c.conn != nil { + return c.conn.Close() + } + return nil +} diff --git a/src/go/plugin/go.d/modules/openldap/collect.go b/src/go/plugin/go.d/modules/openldap/collect.go new file mode 100644 index 00000000000000..de8ac41a89e578 --- /dev/null +++ b/src/go/plugin/go.d/modules/openldap/collect.go @@ -0,0 +1,55 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +package openldap + +import ( + "github.com/go-ldap/ldap/v3" +) + +func (l *OpenLDAP) collect() (map[string]int64, error) { + if l.conn == nil { + conn, err := l.establishConn() + if err != nil { + return nil, err + } + l.conn = conn + } + + mx := make(map[string]int64) + + if err := l.collectMonitorCounters(mx); err != nil { + l.Cleanup() + return nil, err + } + if err := l.collectOperations(mx); err != nil { + l.Cleanup() + return nil, err + } + + return mx, nil +} + +func (l *OpenLDAP) doSearchRequest(req *ldap.SearchRequest, fn func(*ldap.Entry)) error { + resp, err := l.conn.search(req) + if err != nil { + return err + } + + for _, entry := range resp.Entries { + if len(entry.Attributes) != 0 { + fn(entry) + } + } + + return nil +} + +func (l *OpenLDAP) establishConn() (ldapConn, error) { + conn := l.newConn(l.Config) + + if err := conn.connect(); err != nil { + return nil, err + } + + return conn, nil +} diff --git a/src/go/plugin/go.d/modules/openldap/collect_mon_counters.go b/src/go/plugin/go.d/modules/openldap/collect_mon_counters.go new file mode 100644 index 00000000000000..be96b3c0350c21 --- /dev/null +++ b/src/go/plugin/go.d/modules/openldap/collect_mon_counters.go @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +package openldap + +import ( + "strconv" + + "github.com/go-ldap/ldap/v3" +) + +const ( + attrMonitorCounter = "monitorCounter" +) + +func (l *OpenLDAP) collectMonitorCounters(mx map[string]int64) error { + req := newLdapMonitorCountersSearchRequest() + + dnMetricMap := map[string]string{ + "cn=Current,cn=Connections,cn=Monitor": "current_connections", + "cn=Total,cn=Connections,cn=Monitor": "total_connections", + "cn=Bytes,cn=Statistics,cn=Monitor": "bytes_sent", + "cn=Referrals,cn=Statistics,cn=Monitor": "referrals_sent", + "cn=Entries,cn=Statistics,cn=Monitor": "entries_sent", + "cn=Write,cn=Waiters,cn=Monitor": "write_waiters", + "cn=Read,cn=Waiters,cn=Monitor": "read_waiters", + } + + return l.doSearchRequest(req, func(entry *ldap.Entry) { + metric := dnMetricMap[entry.DN] + if metric == "" { + l.Debugf("skipping entry '%s'", entry.DN) + return + } + + s := entry.GetAttributeValue(attrMonitorCounter) + if s == "" { + l.Debugf("entry '%s' does not have attribute '%s'", entry.DN, attrMonitorCounter) + return + } + + v, err := strconv.ParseInt(s, 10, 64) + if err != nil { + l.Debugf("failed to parse entry '%s' value '%s': %v", entry.DN, s, err) + return + } + + mx[metric] = v + }) +} + +func newLdapMonitorCountersSearchRequest() *ldap.SearchRequest { + return ldap.NewSearchRequest( + "cn=Monitor", + ldap.ScopeWholeSubtree, + ldap.NeverDerefAliases, + 0, + 0, + false, + "(objectclass=monitorCounterObject)", + []string{attrMonitorCounter}, + nil, + ) +} diff --git a/src/go/plugin/go.d/modules/openldap/collect_operations.go b/src/go/plugin/go.d/modules/openldap/collect_operations.go new file mode 100644 index 00000000000000..09593d84208103 --- /dev/null +++ b/src/go/plugin/go.d/modules/openldap/collect_operations.go @@ -0,0 +1,71 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +package openldap + +import ( + "strconv" + + "github.com/go-ldap/ldap/v3" +) + +const ( + attrMonitorOpInitiated = "monitorOpInitiated" + attrMonitorOpCompleted = "monitorOpCompleted" +) + +func (l *OpenLDAP) collectOperations(mx map[string]int64) error { + req := newLdapOperationsSearchRequest() + + dnMetricMap := map[string]string{ + "cn=Bind,cn=Operations,cn=Monitor": "bind_operations", + "cn=Unbind,cn=Operations,cn=Monitor": "unbind_operations", + "cn=Add,cn=Operations,cn=Monitor": "add_operations", + "cn=Delete,cn=Operations,cn=Monitor": "delete_operations", + "cn=Modify,cn=Operations,cn=Monitor": "modify_operations", + "cn=Compare,cn=Operations,cn=Monitor": "compare_operations", + "cn=Search,cn=Operations,cn=Monitor": "search_operations", + } + + return l.doSearchRequest(req, func(entry *ldap.Entry) { + metric := dnMetricMap[entry.DN] + if metric == "" { + l.Debugf("skipping entry '%s'", entry.DN) + return + } + + attrs := map[string]string{ + "initiated": attrMonitorOpInitiated, + "completed": attrMonitorOpCompleted, + } + + for prefix, attr := range attrs { + s := entry.GetAttributeValue(attr) + if s == "" { + l.Debugf("entry '%s' does not have attribute '%s'", entry.DN, attr) + continue + } + v, err := strconv.ParseInt(s, 10, 64) + if err != nil { + l.Debugf("failed to parse entry '%s' value '%s': %v", entry.DN, s, err) + continue + } + + mx[prefix+"_"+metric] = v + mx[prefix+"_operations"] += v + } + }) +} + +func newLdapOperationsSearchRequest() *ldap.SearchRequest { + return ldap.NewSearchRequest( + "cn=Operations,cn=Monitor", + ldap.ScopeWholeSubtree, + ldap.NeverDerefAliases, + 0, + 0, + false, + "(objectclass=monitorOperation)", + []string{attrMonitorOpInitiated, attrMonitorOpCompleted}, + nil, + ) +} diff --git a/src/go/plugin/go.d/modules/openldap/config_schema.json b/src/go/plugin/go.d/modules/openldap/config_schema.json new file mode 100644 index 00000000000000..e6724a827467c5 --- /dev/null +++ b/src/go/plugin/go.d/modules/openldap/config_schema.json @@ -0,0 +1,110 @@ +{ + "jsonSchema": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "OpenLDAP collector configuration.", + "type": "object", + "properties": { + "update_every": { + "title": "Update every", + "description": "Data collection interval, measured in seconds.", + "type": "integer", + "minimum": 1, + "default": 1 + }, + "timeout": { + "title": "Timeout", + "description": "Timeout for establishing a connection and communication (reading and writing) in seconds.", + "type": "number", + "minimum": 0.5, + "default": 2 + }, + "url": { + "title": "URL", + "description": "LDAP server URL.", + "type": "string", + "default": "ldap://127.0.0.1:389" + }, + "username": { + "title": "DN", + "description": "The distinguished name (DN) of the user authorized to view the monitor database.", + "type": "string", + "default": "" + }, + "password": { + "title": "Password", + "description": "The password associated with the user identified by the DN.", + "type": "string", + "default": "" + }, + "tls_skip_verify": { + "title": "Skip TLS verification", + "description": "If set, TLS certificate verification will be skipped.", + "type": "boolean" + }, + "tls_ca": { + "title": "TLS CA", + "description": "The path to the CA certificate file for TLS verification.", + "type": "string", + "pattern": "^$|^/" + }, + "tls_cert": { + "title": "TLS certificate", + "description": "The path to the client certificate file for TLS authentication.", + "type": "string", + "pattern": "^$|^/" + }, + "tls_key": { + "title": "TLS key", + "description": "The path to the client key file for TLS authentication.", + "type": "string", + "pattern": "^$|^/" + } + }, + "required": [ + "url", + "username" + ], + "additionalProperties": false, + "patternProperties": { + "^name$": {} + } + }, + "uiSchema": { + "uiOptions": { + "fullPage": true + }, + "ui:flavour": "tabs", + "ui:options": { + "tabs": [ + { + "title": "Base", + "fields": [ + "update_every", + "url", + "timeout", + "username", + "password" + ] + }, + { + "title": "TLS", + "fields": [ + "tls_skip_verify", + "tls_ca", + "tls_cert", + "tls_key" + ] + } + ] + }, + "timeout": { + "ui:help": "Accepts decimals for precise control (e.g., type 1.5 for 1.5 seconds)." + }, + "username": { + "ui:placeholder": "cn=admin,dc=example,dc=com" + }, + "password": { + "ui:widget": "password" + } + } +} diff --git a/src/go/plugin/go.d/modules/openldap/metadata.yaml b/src/go/plugin/go.d/modules/openldap/metadata.yaml new file mode 100644 index 00000000000000..8d5b9c1a516c62 --- /dev/null +++ b/src/go/plugin/go.d/modules/openldap/metadata.yaml @@ -0,0 +1,192 @@ +plugin_name: go.d.plugin +modules: + - meta: + plugin_name: go.d.plugin + module_name: openldap + monitored_instance: + name: OpenLDAP + link: https://www.openldap.org/ + categories: + - data-collection.authentication-and-authorization + icon_filename: openldap.svg + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: "" + keywords: + - openldap + - RBAC + - Directory access + most_popular: false + overview: + data_collection: + metrics_description: | + This collector monitors OpenLDAP metrics about connections, operations, referrals and more. + method_description: | + It gathers the metrics using the [go-ldap](https://github.com/go-ldap/ldap) module and the [Monitor backend](https://www.openldap.org/doc/admin24/monitoringslapd.html) of OpenLDAP. + supported_platforms: + include: ["Linux"] + exclude: [] + multi_instance: true + additional_permissions: + description: "" + default_behavior: + auto_detection: + description: "The collector cannot auto-detect OpenLDAP instances, because credential configuration is required." + limits: + description: "" + performance_impact: + description: "" + setup: + prerequisites: + list: + - title: Enable the openLDAP Monitor Backend. + description: | + Follow instructions from https://www.openldap.org/doc/admin24/monitoringslapd.html to activate monitoring interface. + configuration: + file: + name: go.d/openldap.conf + options: + description: | + The following options can be defined globally: update_every. + folding: + title: Config options + enabled: true + list: + - name: update_every + description: Data collection frequency. + default_value: 1 + required: false + - name: timeout + description: Timeout for establishing a connection and communication (reading and writing) in seconds. + default_value: 2 + required: false + - name: url + description: LDAP server URL. + default_value: ldap://127.0.0.1:389 + required: true + - name: username + description: The distinguished name (DN) of the user authorized to view the monitor database. + default_value: "" + required: true + - name: password + description: The password associated with the user identified by the DN. + default_value: "" + required: true + - name: tls_skip_verify + description: Server certificate chain and hostname validation policy. Controls whether the client performs this check. + default_value: false + required: false + - name: tls_ca + description: Certification authority that the client uses when verifying the server's certificates. + default_value: "" + required: false + - name: tls_cert + description: Client TLS certificate. + default_value: "" + required: false + - name: tls_key + description: Client TLS key. + default_value: "" + required: false + examples: + folding: + title: "" + enabled: false + list: + - name: Basic + description: A basic example configuration. + config: | + jobs: + - name: local + url: ldap://localhost:389 + username: cn=netdata,dc=example,dc=com + password: secret + - name: Multi-instance + description: | + > **Note**: When you define multiple jobs, their names must be unique. + + Collecting metrics from local and remote instances. + config: | + jobs: + - name: local + url: ldap://localhost:389 + username: cn=netdata,dc=example,dc=com + password: secret + + - name: remote + url: ldap://192.0.2.1:389 + username: cn=netdata,dc=example,dc=com + password: secret + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: "These metrics refer to the entire monitored application." + labels: [] + metrics: + - name: openldap.current_connections + description: Current Connections + unit: "connections" + chart_type: line + dimensions: + - name: active + - name: openldap.connections + description: Connections + unit: "connections/s" + chart_type: line + dimensions: + - name: connections + - name: openldap.traffic + description: Traffic + unit: "bytes/s" + chart_type: area + dimensions: + - name: sent + - name: openldap.entries + description: Entries + unit: "entries/s" + chart_type: line + dimensions: + - name: sent + - name: openldap.referrals + description: Referrals + unit: "referrals/s" + chart_type: line + dimensions: + - name: sent + - name: openldap.operations + description: Operations + unit: "operations/s" + chart_type: line + dimensions: + - name: completed + - name: initiated + - name: openldap.operations_by_type + description: Operations by Typ + unit: "operations/s" + chart_type: stacked + dimensions: + - name: bind + - name: search + - name: unbind + - name: add + - name: delete + - name: modify + - name: compare + - name: openldap.waiters + description: Waiters + unit: "waiters/s" + chart_type: line + dimensions: + - name: write + - name: read diff --git a/src/go/plugin/go.d/modules/openldap/openldap.go b/src/go/plugin/go.d/modules/openldap/openldap.go new file mode 100644 index 00000000000000..2e85054d16bcfd --- /dev/null +++ b/src/go/plugin/go.d/modules/openldap/openldap.go @@ -0,0 +1,115 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +package openldap + +import ( + _ "embed" + "errors" + "time" + + "github.com/netdata/netdata/go/plugins/plugin/go.d/agent/module" + "github.com/netdata/netdata/go/plugins/plugin/go.d/pkg/confopt" + "github.com/netdata/netdata/go/plugins/plugin/go.d/pkg/tlscfg" +) + +//go:embed "config_schema.json" +var configSchema string + +func init() { + module.Register("openldap", module.Creator{ + JobConfigSchema: configSchema, + Defaults: module.Defaults{ + UpdateEvery: 1, + }, + Create: func() module.Module { return New() }, + Config: func() any { return &Config{} }, + }) +} + +func New() *OpenLDAP { + return &OpenLDAP{ + Config: Config{ + URL: "ldap://127.0.0.1:389", + Timeout: confopt.Duration(time.Second * 2), + }, + + newConn: newLdapConn, + + charts: charts.Copy(), + } + +} + +type Config struct { + UpdateEvery int `yaml:"update_every,omitempty" json:"update_every"` + URL string `yaml:"url" json:"url"` + Timeout confopt.Duration `yaml:"timeout,omitempty" json:"timeout"` + Username string `yaml:"username" json:"username"` + Password string `yaml:"password" json:"password"` + tlscfg.TLSConfig `yaml:",inline" json:""` +} + +type OpenLDAP struct { + module.Base + Config `yaml:",inline" json:""` + + charts *module.Charts + + conn ldapConn + newConn func(Config) ldapConn +} + +func (l *OpenLDAP) Configuration() any { + return l.Config +} + +func (l *OpenLDAP) Init() error { + if l.URL == "" { + return errors.New("empty LDAP server url") + } + if l.Username == "" { + return errors.New("empty LDAP username") + } + + return nil +} + +func (l *OpenLDAP) Check() error { + mx, err := l.collect() + if err != nil { + l.Error(err) + return err + } + + if len(mx) == 0 { + return errors.New("no metrics collected") + } + + return nil +} + +func (l *OpenLDAP) Charts() *module.Charts { + return l.charts +} + +func (l *OpenLDAP) Collect() map[string]int64 { + mx, err := l.collect() + if err != nil { + l.Error(err) + } + + if len(mx) == 0 { + return nil + } + + return mx +} + +func (l *OpenLDAP) Cleanup() { + if l.conn != nil { + if err := l.conn.disconnect(); err != nil { + l.Warningf("error disconnecting ldap client: %v", err) + } + l.conn = nil + } +} diff --git a/src/go/plugin/go.d/modules/openldap/openldap_test.go b/src/go/plugin/go.d/modules/openldap/openldap_test.go new file mode 100644 index 00000000000000..aa624bfda5bbda --- /dev/null +++ b/src/go/plugin/go.d/modules/openldap/openldap_test.go @@ -0,0 +1,363 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +package openldap + +import ( + "errors" + "fmt" + "os" + "testing" + + "github.com/go-ldap/ldap/v3" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/netdata/netdata/go/plugins/plugin/go.d/agent/module" +) + +var ( + dataConfigJSON, _ = os.ReadFile("testdata/config.json") + dataConfigYAML, _ = os.ReadFile("testdata/config.yaml") +) + +func Test_testDataIsValid(t *testing.T) { + for name, data := range map[string][]byte{ + "dataConfigJSON": dataConfigJSON, + "dataConfigYAML": dataConfigYAML, + } { + assert.NotNil(t, data, name) + } +} + +func TestOpenLDAP_ConfigurationSerialize(t *testing.T) { + module.TestConfigurationSerialize(t, &OpenLDAP{}, dataConfigJSON, dataConfigYAML) +} + +func TestOpenLDAP_Init(t *testing.T) { + tests := map[string]struct { + config Config + wantFail bool + }{ + "fails with default config": { + wantFail: true, + config: New().Config, + }, + "fails if URL not set": { + wantFail: true, + config: func() Config { + conf := New().Config + conf.URL = "" + return conf + }(), + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + oldap := New() + oldap.Config = test.config + + if test.wantFail { + assert.Error(t, oldap.Init()) + } else { + assert.NoError(t, oldap.Init()) + } + }) + } +} + +func TestOpenLDAP_Cleanup(t *testing.T) { + tests := map[string]struct { + prepare func() *OpenLDAP + }{ + "not initialized": { + prepare: func() *OpenLDAP { + return New() + }, + }, + "after check": { + prepare: func() *OpenLDAP { + oldap := New() + oldap.newConn = func(Config) ldapConn { return prepareMockOk() } + _ = oldap.Check() + return oldap + }, + }, + "after collect": { + prepare: func() *OpenLDAP { + oldap := New() + oldap.newConn = func(Config) ldapConn { return prepareMockOk() } + _ = oldap.Collect() + return oldap + }, + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + oldap := test.prepare() + + assert.NotPanics(t, oldap.Cleanup) + }) + } +} + +func TestOpenLDAP_Charts(t *testing.T) { + assert.NotNil(t, New().Charts()) +} + +func TestOpenLDAP_Check(t *testing.T) { + tests := map[string]struct { + prepareMock func() *mockOpenLDAPConn + wantFail bool + }{ + "success case": { + wantFail: false, + prepareMock: prepareMockOk, + }, + "err on connect": { + wantFail: true, + prepareMock: prepareMockErrOnConnect, + }, + "err on search": { + wantFail: true, + prepareMock: prepareMockErrOnSearch, + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + oldap := New() + mock := test.prepareMock() + oldap.newConn = func(Config) ldapConn { return mock } + + if test.wantFail { + assert.Error(t, oldap.Check()) + } else { + assert.NoError(t, oldap.Check()) + } + }) + } +} + +func TestOpenLDAP_Collect(t *testing.T) { + tests := map[string]struct { + prepareMock func() *mockOpenLDAPConn + wantMetrics map[string]int64 + disconnectBeforeCleanup bool + disconnectAfterCleanup bool + }{ + "success case": { + prepareMock: prepareMockOk, + disconnectBeforeCleanup: false, + disconnectAfterCleanup: true, + wantMetrics: map[string]int64{ + "bytes_sent": 1, + "completed_add_operations": 1, + "completed_bind_operations": 1, + "completed_compare_operations": 1, + "completed_delete_operations": 1, + "completed_modify_operations": 1, + "completed_operations": 7, + "completed_search_operations": 1, + "completed_unbind_operations": 1, + "current_connections": 1, + "entries_sent": 1, + "initiated_add_operations": 1, + "initiated_bind_operations": 1, + "initiated_compare_operations": 1, + "initiated_delete_operations": 1, + "initiated_modify_operations": 1, + "initiated_operations": 7, + "initiated_search_operations": 1, + "initiated_unbind_operations": 1, + "read_waiters": 1, + "referrals_sent": 1, + "total_connections": 1, + "write_waiters": 1, + }, + }, + "err on connect": { + prepareMock: prepareMockErrOnConnect, + disconnectBeforeCleanup: false, + disconnectAfterCleanup: false, + }, + "err on search": { + prepareMock: prepareMockErrOnSearch, + disconnectBeforeCleanup: true, + disconnectAfterCleanup: true, + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + oldap := New() + mock := test.prepareMock() + oldap.newConn = func(Config) ldapConn { return mock } + + mx := oldap.Collect() + + require.Equal(t, test.wantMetrics, mx) + + if len(test.wantMetrics) > 0 { + module.TestMetricsHasAllChartsDims(t, oldap.Charts(), mx) + } + + assert.Equal(t, test.disconnectBeforeCleanup, mock.disconnectCalled, "disconnect before cleanup") + oldap.Cleanup() + assert.Equal(t, test.disconnectAfterCleanup, mock.disconnectCalled, "disconnect after cleanup") + }) + } +} + +func prepareMockOk() *mockOpenLDAPConn { + return &mockOpenLDAPConn{ + dataSearchMonCounters: &ldap.SearchResult{ + Entries: []*ldap.Entry{ + { + DN: "cn=Current,cn=Connections,cn=Monitor", + Attributes: []*ldap.EntryAttribute{ + {Name: attrMonitorCounter, Values: []string{"1"}}, + }, + }, + { + DN: "cn=Total,cn=Connections,cn=Monitor", + Attributes: []*ldap.EntryAttribute{ + {Name: attrMonitorCounter, Values: []string{"1"}}, + }, + }, + { + DN: "cn=Bytes,cn=Statistics,cn=Monitor", + Attributes: []*ldap.EntryAttribute{ + {Name: attrMonitorCounter, Values: []string{"1"}}, + }, + }, + { + DN: "cn=Referrals,cn=Statistics,cn=Monitor", + Attributes: []*ldap.EntryAttribute{ + {Name: attrMonitorCounter, Values: []string{"1"}}, + }, + }, + { + DN: "cn=Entries,cn=Statistics,cn=Monitor", + Attributes: []*ldap.EntryAttribute{ + {Name: attrMonitorCounter, Values: []string{"1"}}, + }, + }, + { + DN: "cn=Write,cn=Waiters,cn=Monitor", + Attributes: []*ldap.EntryAttribute{ + {Name: attrMonitorCounter, Values: []string{"1"}}, + }, + }, + { + DN: "cn=Read,cn=Waiters,cn=Monitor", + Attributes: []*ldap.EntryAttribute{ + {Name: attrMonitorCounter, Values: []string{"1"}}, + }, + }, + }, + }, + dataSearchMonOperations: &ldap.SearchResult{ + Entries: []*ldap.Entry{ + { + DN: "cn=Bind,cn=Operations,cn=Monitor", + Attributes: []*ldap.EntryAttribute{ + {Name: attrMonitorOpInitiated, Values: []string{"1"}}, + {Name: attrMonitorOpCompleted, Values: []string{"1"}}, + }, + }, + { + DN: "cn=Unbind,cn=Operations,cn=Monitor", + Attributes: []*ldap.EntryAttribute{ + {Name: attrMonitorOpInitiated, Values: []string{"1"}}, + {Name: attrMonitorOpCompleted, Values: []string{"1"}}, + }, + }, + { + DN: "cn=Add,cn=Operations,cn=Monitor", + Attributes: []*ldap.EntryAttribute{ + {Name: attrMonitorOpInitiated, Values: []string{"1"}}, + {Name: attrMonitorOpCompleted, Values: []string{"1"}}, + }, + }, + { + DN: "cn=Delete,cn=Operations,cn=Monitor", + Attributes: []*ldap.EntryAttribute{ + {Name: attrMonitorOpInitiated, Values: []string{"1"}}, + {Name: attrMonitorOpCompleted, Values: []string{"1"}}, + }, + }, + { + DN: "cn=Modify,cn=Operations,cn=Monitor", + Attributes: []*ldap.EntryAttribute{ + {Name: attrMonitorOpInitiated, Values: []string{"1"}}, + {Name: attrMonitorOpCompleted, Values: []string{"1"}}, + }, + }, + { + DN: "cn=Compare,cn=Operations,cn=Monitor", + Attributes: []*ldap.EntryAttribute{ + {Name: attrMonitorOpInitiated, Values: []string{"1"}}, + {Name: attrMonitorOpCompleted, Values: []string{"1"}}, + }, + }, + { + DN: "cn=Search,cn=Operations,cn=Monitor", + Attributes: []*ldap.EntryAttribute{ + {Name: attrMonitorOpInitiated, Values: []string{"1"}}, + {Name: attrMonitorOpCompleted, Values: []string{"1"}}, + }, + }, + }, + }, + } +} + +func prepareMockErrOnConnect() *mockOpenLDAPConn { + return &mockOpenLDAPConn{ + errOnConnect: true, + } +} + +func prepareMockErrOnSearch() *mockOpenLDAPConn { + return &mockOpenLDAPConn{ + errOnSearch: true, + } +} + +type mockOpenLDAPConn struct { + errOnConnect bool + disconnectCalled bool + + dataSearchMonCounters *ldap.SearchResult + dataSearchMonOperations *ldap.SearchResult + errOnSearch bool +} + +func (m *mockOpenLDAPConn) connect() error { + if m.errOnConnect { + return errors.New("mock.connect() error") + } + return nil +} + +func (m *mockOpenLDAPConn) disconnect() error { + m.disconnectCalled = true + return nil +} + +func (m *mockOpenLDAPConn) search(req *ldap.SearchRequest) (*ldap.SearchResult, error) { + if m.errOnSearch { + return nil, errors.New("mock.search() error") + } + + switch req.BaseDN { + case "cn=Monitor": + return m.dataSearchMonCounters, nil + case "cn=Operations,cn=Monitor": + return m.dataSearchMonOperations, nil + default: + return nil, fmt.Errorf("mock.search(): unknown BaseDSN: %s", req.BaseDN) + } +} diff --git a/src/go/plugin/go.d/modules/openldap/testdata/config.json b/src/go/plugin/go.d/modules/openldap/testdata/config.json new file mode 100644 index 00000000000000..2c69135fdfc2be --- /dev/null +++ b/src/go/plugin/go.d/modules/openldap/testdata/config.json @@ -0,0 +1,11 @@ +{ + "update_every": 123, + "url": "ok", + "timeout": 123.123, + "username": "ok", + "password": "ok", + "tls_ca": "ok", + "tls_cert": "ok", + "tls_key": "ok", + "tls_skip_verify": false +} diff --git a/src/go/plugin/go.d/modules/openldap/testdata/config.yaml b/src/go/plugin/go.d/modules/openldap/testdata/config.yaml new file mode 100644 index 00000000000000..97014066dae35f --- /dev/null +++ b/src/go/plugin/go.d/modules/openldap/testdata/config.yaml @@ -0,0 +1,8 @@ +update_every: 123 +url: "ok" +timeout: 123.123 +username: "ok" +password: "ok" +tls_ca: "ok" +tls_cert: "ok" +tls_key: "ok" From b8c8e44bc4af69f90678c77e5d63233020ac69fa Mon Sep 17 00:00:00 2001 From: Netdata bot <43409846+netdatabot@users.noreply.github.com> Date: Mon, 30 Sep 2024 10:52:31 -0400 Subject: [PATCH 09/23] Regenerate integrations.js (#18647) Co-authored-by: ilyam8 <22274335+ilyam8@users.noreply.github.com> --- integrations/integrations.js | 76 +++--- integrations/integrations.json | 76 +++--- src/collectors/COLLECTORS.md | 2 +- src/go/plugin/go.d/modules/openldap/README.md | 1 + .../modules/openldap/integrations/openldap.md | 228 ++++++++++++++++++ 5 files changed, 306 insertions(+), 77 deletions(-) create mode 120000 src/go/plugin/go.d/modules/openldap/README.md create mode 100644 src/go/plugin/go.d/modules/openldap/integrations/openldap.md diff --git a/integrations/integrations.js b/integrations/integrations.js index fc915f264f0e06..acc63bfa5fa07b 100644 --- a/integrations/integrations.js +++ b/integrations/integrations.js @@ -5738,6 +5738,43 @@ export const integrations = [ "edit_link": "https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/nvme/metadata.yaml", "related_resources": "" }, + { + "meta": { + "plugin_name": "go.d.plugin", + "module_name": "openldap", + "monitored_instance": { + "name": "OpenLDAP", + "link": "https://www.openldap.org/", + "categories": [ + "data-collection.authentication-and-authorization" + ], + "icon_filename": "openldap.svg" + }, + "related_resources": { + "integrations": { + "list": [] + } + }, + "info_provided_to_referring_integrations": { + "description": "" + }, + "keywords": [ + "openldap", + "RBAC", + "Directory access" + ], + "most_popular": false + }, + "overview": "# OpenLDAP\n\nPlugin: go.d.plugin\nModule: openldap\n\n## Overview\n\nThis collector monitors OpenLDAP metrics about connections, operations, referrals and more.\n\n\nIt gathers the metrics using the [go-ldap](https://github.com/go-ldap/ldap) module and the [Monitor backend](https://www.openldap.org/doc/admin24/monitoringslapd.html) of OpenLDAP.\n\n\nThis collector is only supported on the following platforms:\n\n- Linux\n\nThis collector supports collecting metrics from multiple instances of this integration, including remote instances.\n\n\n### Default Behavior\n\n#### Auto-Detection\n\nThe collector cannot auto-detect OpenLDAP instances, because credential configuration is required.\n\n#### Limits\n\nThe default configuration for this integration does not impose any limits on data collection.\n\n#### Performance Impact\n\nThe default configuration for this integration is not expected to impose a significant performance impact on the system.\n", + "setup": "## Setup\n\n### Prerequisites\n\n#### Enable the openLDAP Monitor Backend.\n\nFollow instructions from https://www.openldap.org/doc/admin24/monitoringslapd.html to activate monitoring interface.\n\n\n\n### Configuration\n\n#### File\n\nThe configuration file name for this integration is `go.d/openldap.conf`.\n\n\nYou can edit the configuration file using the `edit-config` script from the\nNetdata [config directory](/docs/netdata-agent/configuration/README.md#the-netdata-config-directory).\n\n```bash\ncd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata\nsudo ./edit-config go.d/openldap.conf\n```\n#### Options\n\nThe following options can be defined globally: update_every.\n\n\n{% details open=true summary=\"Config options\" %}\n| Name | Description | Default | Required |\n|:----|:-----------|:-------|:--------:|\n| update_every | Data collection frequency. | 1 | no |\n| timeout | Timeout for establishing a connection and communication (reading and writing) in seconds. | 2 | no |\n| url | LDAP server URL. | ldap://127.0.0.1:389 | yes |\n| username | The distinguished name (DN) of the user authorized to view the monitor database. | | yes |\n| password | The password associated with the user identified by the DN. | | yes |\n| tls_skip_verify | Server certificate chain and hostname validation policy. Controls whether the client performs this check. | no | no |\n| tls_ca | Certification authority that the client uses when verifying the server's certificates. | | no |\n| tls_cert | Client TLS certificate. | | no |\n| tls_key | Client TLS key. | | no |\n\n{% /details %}\n#### Examples\n\n##### Basic\n\nA basic example configuration.\n\n```yaml\njobs:\n - name: local\n url: ldap://localhost:389\n username: cn=netdata,dc=example,dc=com \n password: secret\n\n```\n##### Multi-instance\n\n> **Note**: When you define multiple jobs, their names must be unique.\n\nCollecting metrics from local and remote instances.\n\n\n```yaml\njobs:\n - name: local\n url: ldap://localhost:389\n username: cn=netdata,dc=example,dc=com \n password: secret\n\n - name: remote\n url: ldap://192.0.2.1:389\n username: cn=netdata,dc=example,dc=com \n password: secret\n\n```\n", + "troubleshooting": "## Troubleshooting\n\n### Debug Mode\n\n**Important**: Debug mode is not supported for data collection jobs created via the UI using the Dyncfg feature.\n\nTo troubleshoot issues with the `openldap` collector, run the `go.d.plugin` with the debug option enabled. The output\nshould give you clues as to why the collector isn't working.\n\n- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on\n your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`.\n\n ```bash\n cd /usr/libexec/netdata/plugins.d/\n ```\n\n- Switch to the `netdata` user.\n\n ```bash\n sudo -u netdata -s\n ```\n\n- Run the `go.d.plugin` to debug the collector:\n\n ```bash\n ./go.d.plugin -d -m openldap\n ```\n\n### Getting Logs\n\nIf you're encountering problems with the `openldap` collector, follow these steps to retrieve logs and identify potential issues:\n\n- **Run the command** specific to your system (systemd, non-systemd, or Docker container).\n- **Examine the output** for any warnings or error messages that might indicate issues. These messages should provide clues about the root cause of the problem.\n\n#### System with systemd\n\nUse the following command to view logs generated since the last Netdata service restart:\n\n```bash\njournalctl _SYSTEMD_INVOCATION_ID=\"$(systemctl show --value --property=InvocationID netdata)\" --namespace=netdata --grep openldap\n```\n\n#### System without systemd\n\nLocate the collector log file, typically at `/var/log/netdata/collector.log`, and use `grep` to filter for collector's name:\n\n```bash\ngrep openldap /var/log/netdata/collector.log\n```\n\n**Note**: This method shows logs from all restarts. Focus on the **latest entries** for troubleshooting current issues.\n\n#### Docker Container\n\nIf your Netdata runs in a Docker container named \"netdata\" (replace if different), use this command:\n\n```bash\ndocker logs netdata 2>&1 | grep openldap\n```\n\n", + "alerts": "## Alerts\n\nThere are no alerts configured by default for this integration.\n", + "metrics": "## Metrics\n\nMetrics grouped by *scope*.\n\nThe scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.\n\n\n\n### Per OpenLDAP instance\n\nThese metrics refer to the entire monitored application.\n\nThis scope has no labels.\n\nMetrics:\n\n| Metric | Dimensions | Unit |\n|:------|:----------|:----|\n| openldap.current_connections | active | connections |\n| openldap.connections | connections | connections/s |\n| openldap.traffic | sent | bytes/s |\n| openldap.entries | sent | entries/s |\n| openldap.referrals | sent | referrals/s |\n| openldap.operations | completed, initiated | operations/s |\n| openldap.operations_by_type | bind, search, unbind, add, delete, modify, compare | operations/s |\n| openldap.waiters | write, read | waiters/s |\n\n", + "integration_type": "collector", + "id": "go.d.plugin-openldap-OpenLDAP", + "edit_link": "https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/openldap/metadata.yaml", + "related_resources": "" + }, { "meta": { "id": "collector-go.d.plugin-openvpn", @@ -19257,7 +19294,7 @@ export const integrations = [ "most_popular": false }, "overview": "# Go applications (EXPVAR)\n\nPlugin: python.d.plugin\nModule: go_expvar\n\n## Overview\n\nThis collector monitors Go applications that expose their metrics with the use of the `expvar` package from the Go standard library. It produces charts for Go runtime memory statistics and optionally any number of custom charts.\n\nIt connects via http to gather the metrics exposed via the `expvar` package.\n\nThis collector is supported on all platforms.\n\nThis collector supports collecting metrics from multiple instances of this integration, including remote instances.\n\n\n### Default Behavior\n\n#### Auto-Detection\n\nThis integration doesn't support auto-detection.\n\n#### Limits\n\nThe default configuration for this integration does not impose any limits on data collection.\n\n#### Performance Impact\n\nThe default configuration for this integration is not expected to impose a significant performance impact on the system.\n", - "setup": "## Setup\n\n### Prerequisites\n\n#### Enable the go_expvar collector\n\nThe `go_expvar` collector is disabled by default. To enable it, use `edit-config` from the Netdata [config directory](/docs/netdata-agent/configuration/README.md), which is typically at `/etc/netdata`, to edit the `python.d.conf` file.\n\n```bash\ncd /etc/netdata # Replace this path with your Netdata config directory, if different\nsudo ./edit-config python.d.conf\n```\n\nChange the value of the `go_expvar` setting to `yes`. Save the file and restart the Netdata Agent with `sudo systemctl restart netdata`, or the [appropriate method](/packaging/installer/README.md#maintaining-a-netdata-agent-installation) for your system.\n\n\n#### Sample `expvar` usage in a Go application\n\nThe `expvar` package exposes metrics over HTTP and is very easy to use.\nConsider this minimal sample below:\n\n```go\npackage main\n\nimport (\n _ \"expvar\"\n \"net/http\"\n)\n\nfunc main() {\n http.ListenAndServe(\"127.0.0.1:8080\", nil)\n}\n```\n\nWhen imported this way, the `expvar` package registers a HTTP handler at `/debug/vars` that\nexposes Go runtime's memory statistics in JSON format. You can inspect the output by opening\nthe URL in your browser (or by using `wget` or `curl`).\n\nSample output:\n\n```json\n{\n\"cmdline\": [\"./expvar-demo-binary\"],\n\"memstats\": {\"Alloc\":630856,\"TotalAlloc\":630856,\"Sys\":3346432,\"Lookups\":27, }\n}\n```\n\nYou can of course expose and monitor your own variables as well.\nHere is a sample Go application that exposes a few custom variables:\n\n```go\npackage main\n\nimport (\n \"expvar\"\n \"net/http\"\n \"runtime\"\n \"time\"\n)\n\nfunc main() {\n\n tick := time.NewTicker(1 * time.Second)\n num_go := expvar.NewInt(\"runtime.goroutines\")\n counters := expvar.NewMap(\"counters\")\n counters.Set(\"cnt1\", new(expvar.Int))\n counters.Set(\"cnt2\", new(expvar.Float))\n\n go http.ListenAndServe(\":8080\", nil)\n\n for {\n select {\n case <- tick.C:\n num_go.Set(int64(runtime.NumGoroutine()))\n counters.Add(\"cnt1\", 1)\n counters.AddFloat(\"cnt2\", 1.452)\n }\n }\n}\n```\n\nApart from the runtime memory stats, this application publishes two counters and the\nnumber of currently running Goroutines and updates these stats every second.\n\n\n\n### Configuration\n\n#### File\n\nThe configuration file name for this integration is `python.d/go_expvar.conf`.\n\n\nYou can edit the configuration file using the `edit-config` script from the\nNetdata [config directory](/docs/netdata-agent/configuration/README.md#the-netdata-config-directory).\n\n```bash\ncd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata\nsudo ./edit-config python.d/go_expvar.conf\n```\n#### Options\n\nThere are 2 sections:\n\n* Global variables\n* One or more JOBS that can define multiple different instances to monitor.\n\nThe following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values.\n\nAdditionally, the following collapsed table contains all the options that can be configured inside a JOB definition.\n\nEvery configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. Each JOB can be used to monitor a different Go application.\n\n\n{% details open=true summary=\"Config options\" %}\n| Name | Description | Default | Required |\n|:----|:-----------|:-------|:--------:|\n| update_every | Sets the default data collection frequency. | 5 | no |\n| priority | Controls the order of charts at the netdata dashboard. | 60000 | no |\n| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no |\n| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no |\n| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | no |\n| url | the URL and port of the expvar endpoint. Please include the whole path of the endpoint, as the expvar handler can be installed in a non-standard location. | | yes |\n| user | If the URL is password protected, this is the username to use. | | no |\n| pass | If the URL is password protected, this is the password to use. | | no |\n| collect_memstats | Enables charts for Go runtime's memory statistics. | | no |\n| extra_charts | Defines extra data/charts to monitor, please see the example below. | | no |\n\n{% /details %}\n#### Examples\n\n##### Monitor a Go app1 application\n\nThe example below sets a configuration for a Go application, called `app1`. Besides the `memstats`, the application also exposes two counters and the number of currently running Goroutines and updates these stats every second.\n\nThe `go_expvar` collector can monitor these as well with the use of the `extra_charts` configuration variable.\n\nThe `extra_charts` variable is a YaML list of Netdata chart definitions.\nEach chart definition has the following keys:\n\n```\nid: Netdata chart ID\noptions: a key-value mapping of chart options\nlines: a list of line definitions\n```\n\n**Note: please do not use dots in the chart or line ID field.\nSee [this issue](https://github.com/netdata/netdata/pull/1902#issuecomment-284494195) for explanation.**\n\nPlease see these two links to the official Netdata documentation for more information about the values:\n\n- [External plugins - charts](/src/collectors/plugins.d/README.md#chart)\n- [Chart variables](/src/collectors/python.d.plugin/README.md#global-variables-order-and-chart)\n\n**Line definitions**\n\nEach chart can define multiple lines (dimensions).\nA line definition is a key-value mapping of line options.\nEach line can have the following options:\n\n```\n# mandatory\nexpvar_key: the name of the expvar as present in the JSON output of /debug/vars endpoint\nexpvar_type: value type; supported are \"float\" or \"int\"\nid: the id of this line/dimension in Netdata\n\n# optional - Netdata defaults are used if these options are not defined\nname: ''\nalgorithm: absolute\nmultiplier: 1\ndivisor: 100 if expvar_type == float, 1 if expvar_type == int\nhidden: False\n```\n\nPlease see the following link for more information about the options and their default values:\n[External plugins - dimensions](/src/collectors/plugins.d/README.md#dimension)\n\nApart from top-level expvars, this plugin can also parse expvars stored in a multi-level map;\nAll dicts in the resulting JSON document are then flattened to one level.\nExpvar names are joined together with '.' when flattening.\n\nExample:\n\n```\n{\n \"counters\": {\"cnt1\": 1042, \"cnt2\": 1512.9839999999983},\n \"runtime.goroutines\": 5\n}\n```\n\nIn the above case, the exported variables will be available under `runtime.goroutines`,\n`counters.cnt1` and `counters.cnt2` expvar_keys. If the flattening results in a key collision,\nthe first defined key wins and all subsequent keys with the same name are ignored.\n\n\n```yaml\napp1:\n name : 'app1'\n url : 'http://127.0.0.1:8080/debug/vars'\n collect_memstats: true\n extra_charts:\n - id: \"runtime_goroutines\"\n options:\n name: num_goroutines\n title: \"runtime: number of goroutines\"\n units: goroutines\n family: runtime\n context: expvar.runtime.goroutines\n chart_type: line\n lines:\n - {expvar_key: 'runtime.goroutines', expvar_type: int, id: runtime_goroutines}\n - id: \"foo_counters\"\n options:\n name: counters\n title: \"some random counters\"\n units: awesomeness\n family: counters\n context: expvar.foo.counters\n chart_type: line\n lines:\n - {expvar_key: 'counters.cnt1', expvar_type: int, id: counters_cnt1}\n - {expvar_key: 'counters.cnt2', expvar_type: float, id: counters_cnt2}\n\n```\n", + "setup": "## Setup\n\n### Prerequisites\n\n#### Enable the go_expvar collector\n\nThe `go_expvar` collector is disabled by default. To enable it, use `edit-config` from the Netdata [config directory](/docs/netdata-agent/configuration/README.md), which is typically at `/etc/netdata`, to edit the `python.d.conf` file.\n\n```bash\ncd /etc/netdata # Replace this path with your Netdata config directory, if different\nsudo ./edit-config python.d.conf\n```\n\nChange the value of the `go_expvar` setting to `yes`. Save the file and restart the Netdata Agent with `sudo systemctl restart netdata`, or the [appropriate method](/packaging/installer/README.md#maintaining-a-netdata-agent-installation) for your system.\n\n\n#### Sample `expvar` usage in a Go application\n\nThe `expvar` package exposes metrics over HTTP and is very easy to use.\nConsider this minimal sample below:\n\n```go\npackage main\n\nimport (\n _ \"expvar\"\n \"net/http\"\n)\n\nfunc main() {\n http.ListenAndServe(\"127.0.0.1:8080\", nil)\n}\n```\n\nWhen imported this way, the `expvar` package registers a HTTP handler at `/debug/vars` that\nexposes Go runtime's memory statistics in JSON format. You can inspect the output by opening\nthe URL in your browser (or by using `wget` or `curl`).\n\nSample output:\n\n```json\n{\n\"cmdline\": [\"./expvar-demo-binary\"],\n\"memstats\": {\"Alloc\":630856,\"TotalAlloc\":630856,\"Sys\":3346432,\"Lookups\":27, }\n}\n```\n\nYou can of course expose and monitor your own variables as well.\nHere is a sample Go application that exposes a few custom variables:\n\n```go\npackage main\n\nimport (\n \"expvar\"\n \"net/http\"\n \"runtime\"\n \"time\"\n)\n\nfunc main() {\n\n tick := time.NewTicker(1 * time.Second)\n num_go := expvar.NewInt(\"runtime.goroutines\")\n counters := expvar.NewMap(\"counters\")\n counters.Set(\"cnt1\", new(expvar.Int))\n counters.Set(\"cnt2\", new(expvar.Float))\n\n go http.ListenAndServe(\":8080\", nil)\n\n for {\n select {\n case <- tick.C:\n num_go.Set(int64(runtime.NumGoroutine()))\n counters.Add(\"cnt1\", 1)\n counters.AddFloat(\"cnt2\", 1.452)\n }\n }\n}\n```\n\nApart from the runtime memory stats, this application publishes two counters and the\nnumber of currently running Goroutines and updates these stats every second.\n\n\n\n### Configuration\n\n#### File\n\nThe configuration file name for this integration is `python.d/go_expvar.conf`.\n\n\nYou can edit the configuration file using the `edit-config` script from the\nNetdata [config directory](/docs/netdata-agent/configuration/README.md#the-netdata-config-directory).\n\n```bash\ncd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata\nsudo ./edit-config python.d/go_expvar.conf\n```\n#### Options\n\nThere are 2 sections:\n\n* Global variables\n* One or more JOBS that can define multiple different instances to monitor.\n\nThe following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values.\n\nAdditionally, the following collapsed table contains all the options that can be configured inside a JOB definition.\n\nEvery configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. Each JOB can be used to monitor a different Go application.\n\n\n{% details open=true summary=\"Config options\" %}\n| Name | Description | Default | Required |\n|:----|:-----------|:-------|:--------:|\n| update_every | Sets the default data collection frequency. | 5 | no |\n| priority | Controls the order of charts at the netdata dashboard. | 60000 | no |\n| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no |\n| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no |\n| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | no |\n| url | the URL and port of the expvar endpoint. Please include the whole path of the endpoint, as the expvar handler can be installed in a non-standard location. | | yes |\n| user | If the URL is password protected, this is the username to use. | | no |\n| pass | If the URL is password protected, this is the password to use. | | no |\n| collect_memstats | Enables charts for Go runtime's memory statistics. | | no |\n| extra_charts | Defines extra data/charts to monitor, please see the example below. | | no |\n\n{% /details %}\n#### Examples\n\n##### Monitor a Go app1 application\n\nThe example below sets a configuration for a Go application, called `app1`. Besides the `memstats`, the application also exposes two counters and the number of currently running Goroutines and updates these stats every second.\n\nThe `go_expvar` collector can monitor these as well with the use of the `extra_charts` configuration variable.\n\nThe `extra_charts` variable is a YaML list of Netdata chart definitions.\nEach chart definition has the following keys:\n\n```\nid: Netdata chart ID\noptions: a key-value mapping of chart options\nlines: a list of line definitions\n```\n\n**Note: please do not use dots in the chart or line ID field.\nSee [this issue](https://github.com/netdata/netdata/pull/1902#issuecomment-284494195) for explanation.**\n\nPlease see these two links to the official Netdata documentation for more information about the values:\n\n- [External plugins - charts](/src/plugins.d/README.md#chart)\n- [Chart variables](/src/collectors/python.d.plugin/README.md#global-variables-order-and-chart)\n\n**Line definitions**\n\nEach chart can define multiple lines (dimensions).\nA line definition is a key-value mapping of line options.\nEach line can have the following options:\n\n```\n# mandatory\nexpvar_key: the name of the expvar as present in the JSON output of /debug/vars endpoint\nexpvar_type: value type; supported are \"float\" or \"int\"\nid: the id of this line/dimension in Netdata\n\n# optional - Netdata defaults are used if these options are not defined\nname: ''\nalgorithm: absolute\nmultiplier: 1\ndivisor: 100 if expvar_type == float, 1 if expvar_type == int\nhidden: False\n```\n\nPlease see the following link for more information about the options and their default values:\n[External plugins - dimensions](/src/plugins.d/README.md#dimension)\n\nApart from top-level expvars, this plugin can also parse expvars stored in a multi-level map;\nAll dicts in the resulting JSON document are then flattened to one level.\nExpvar names are joined together with '.' when flattening.\n\nExample:\n\n```\n{\n \"counters\": {\"cnt1\": 1042, \"cnt2\": 1512.9839999999983},\n \"runtime.goroutines\": 5\n}\n```\n\nIn the above case, the exported variables will be available under `runtime.goroutines`,\n`counters.cnt1` and `counters.cnt2` expvar_keys. If the flattening results in a key collision,\nthe first defined key wins and all subsequent keys with the same name are ignored.\n\n\n```yaml\napp1:\n name : 'app1'\n url : 'http://127.0.0.1:8080/debug/vars'\n collect_memstats: true\n extra_charts:\n - id: \"runtime_goroutines\"\n options:\n name: num_goroutines\n title: \"runtime: number of goroutines\"\n units: goroutines\n family: runtime\n context: expvar.runtime.goroutines\n chart_type: line\n lines:\n - {expvar_key: 'runtime.goroutines', expvar_type: int, id: runtime_goroutines}\n - id: \"foo_counters\"\n options:\n name: counters\n title: \"some random counters\"\n units: awesomeness\n family: counters\n context: expvar.foo.counters\n chart_type: line\n lines:\n - {expvar_key: 'counters.cnt1', expvar_type: int, id: counters_cnt1}\n - {expvar_key: 'counters.cnt2', expvar_type: float, id: counters_cnt2}\n\n```\n", "troubleshooting": "## Troubleshooting\n\n### Debug Mode\n\n\nTo troubleshoot issues with the `go_expvar` collector, run the `python.d.plugin` with the debug option enabled. The output\nshould give you clues as to why the collector isn't working.\n\n- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on\n your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`.\n\n ```bash\n cd /usr/libexec/netdata/plugins.d/\n ```\n\n- Switch to the `netdata` user.\n\n ```bash\n sudo -u netdata -s\n ```\n\n- Run the `python.d.plugin` to debug the collector:\n\n ```bash\n ./python.d.plugin go_expvar debug trace\n ```\n\n### Getting Logs\n\nIf you're encountering problems with the `go_expvar` collector, follow these steps to retrieve logs and identify potential issues:\n\n- **Run the command** specific to your system (systemd, non-systemd, or Docker container).\n- **Examine the output** for any warnings or error messages that might indicate issues. These messages should provide clues about the root cause of the problem.\n\n#### System with systemd\n\nUse the following command to view logs generated since the last Netdata service restart:\n\n```bash\njournalctl _SYSTEMD_INVOCATION_ID=\"$(systemctl show --value --property=InvocationID netdata)\" --namespace=netdata --grep go_expvar\n```\n\n#### System without systemd\n\nLocate the collector log file, typically at `/var/log/netdata/collector.log`, and use `grep` to filter for collector's name:\n\n```bash\ngrep go_expvar /var/log/netdata/collector.log\n```\n\n**Note**: This method shows logs from all restarts. Focus on the **latest entries** for troubleshooting current issues.\n\n#### Docker Container\n\nIf your Netdata runs in a Docker container named \"netdata\" (replace if different), use this command:\n\n```bash\ndocker logs netdata 2>&1 | grep go_expvar\n```\n\n", "alerts": "## Alerts\n\nThere are no alerts configured by default for this integration.\n", "metrics": "## Metrics\n\nMetrics grouped by *scope*.\n\nThe scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.\n\n\n\n### Per Go applications (EXPVAR) instance\n\nThese metrics refer to the entire monitored application.\n\nThis scope has no labels.\n\nMetrics:\n\n| Metric | Dimensions | Unit |\n|:------|:----------|:----|\n| expvar.memstats.heap | alloc, inuse | KiB |\n| expvar.memstats.stack | inuse | KiB |\n| expvar.memstats.mspan | inuse | KiB |\n| expvar.memstats.mcache | inuse | KiB |\n| expvar.memstats.live_objects | live | objects |\n| expvar.memstats.sys | sys | KiB |\n| expvar.memstats.gc_pauses | avg | ns |\n\n", @@ -19266,43 +19303,6 @@ export const integrations = [ "edit_link": "https://github.com/netdata/netdata/blob/master/src/collectors/python.d.plugin/go_expvar/metadata.yaml", "related_resources": "" }, - { - "meta": { - "plugin_name": "python.d.plugin", - "module_name": "openldap", - "monitored_instance": { - "name": "OpenLDAP", - "link": "https://www.openldap.org/", - "categories": [ - "data-collection.authentication-and-authorization" - ], - "icon_filename": "statsd.png" - }, - "related_resources": { - "integrations": { - "list": [] - } - }, - "info_provided_to_referring_integrations": { - "description": "" - }, - "keywords": [ - "openldap", - "RBAC", - "Directory access" - ], - "most_popular": false - }, - "overview": "# OpenLDAP\n\nPlugin: python.d.plugin\nModule: openldap\n\n## Overview\n\nThis collector monitors OpenLDAP metrics about connections, operations, referrals and more.\n\nStatistics are taken from the monitoring interface of a openLDAP (slapd) server\n\n\nThis collector is supported on all platforms.\n\nThis collector only supports collecting metrics from a single instance of this integration.\n\n\n### Default Behavior\n\n#### Auto-Detection\n\nThis collector doesn't work until all the prerequisites are checked.\n\n\n#### Limits\n\nThe default configuration for this integration does not impose any limits on data collection.\n\n#### Performance Impact\n\nThe default configuration for this integration is not expected to impose a significant performance impact on the system.\n", - "setup": "## Setup\n\n### Prerequisites\n\n#### Configure the openLDAP server to expose metrics to monitor it.\n\nFollow instructions from https://www.openldap.org/doc/admin24/monitoringslapd.html to activate monitoring interface.\n\n\n#### Install python-ldap module\n\nInstall python ldap module \n\n1. From pip package manager\n\n```bash\npip install ldap\n```\n\n2. With apt package manager (in most deb based distros)\n\n\n```bash\napt-get install python-ldap\n```\n\n\n3. With yum package manager (in most rpm based distros)\n\n\n```bash\nyum install python-ldap\n```\n\n\n#### Insert credentials for Netdata to access openLDAP server\n\nUse the `ldappasswd` utility to set a password for the username you will use.\n\n\n\n### Configuration\n\n#### File\n\nThe configuration file name for this integration is `python.d/openldap.conf`.\n\n\nYou can edit the configuration file using the `edit-config` script from the\nNetdata [config directory](/docs/netdata-agent/configuration/README.md#the-netdata-config-directory).\n\n```bash\ncd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata\nsudo ./edit-config python.d/openldap.conf\n```\n#### Options\n\nThere are 2 sections:\n\n* Global variables\n* One or more JOBS that can define multiple different instances to monitor.\n\nThe following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values.\n\nAdditionally, the following collapsed table contains all the options that can be configured inside a JOB definition.\n\nEvery configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified.\n\n\n{% details open=true summary=\"Config options\" %}\n| Name | Description | Default | Required |\n|:----|:-----------|:-------|:--------:|\n| update_every | Sets the default data collection frequency. | 5 | no |\n| priority | Controls the order of charts at the netdata dashboard. | 60000 | no |\n| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no |\n| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no |\n| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | no |\n| username | The bind user with right to access monitor statistics | | yes |\n| password | The password for the binded user | | yes |\n| server | The listening address of the LDAP server. In case of TLS, use the hostname which the certificate is published for. | | yes |\n| port | The listening port of the LDAP server. Change to 636 port in case of TLS connection. | 389 | yes |\n| use_tls | Make True if a TLS connection is used over ldaps:// | no | no |\n| use_start_tls | Make True if a TLS connection is used over ldap:// | no | no |\n| cert_check | False if you want to ignore certificate check | True | yes |\n| timeout | Seconds to timeout if no connection exist | | yes |\n\n{% /details %}\n#### Examples\n\n##### Basic\n\nA basic example configuration.\n\n```yaml\nusername: \"cn=admin\"\npassword: \"pass\"\nserver: \"localhost\"\nport: \"389\"\ncheck_cert: True\ntimeout: 1\n\n```\n", - "troubleshooting": "## Troubleshooting\n\n### Debug Mode\n\n\nTo troubleshoot issues with the `openldap` collector, run the `python.d.plugin` with the debug option enabled. The output\nshould give you clues as to why the collector isn't working.\n\n- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on\n your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`.\n\n ```bash\n cd /usr/libexec/netdata/plugins.d/\n ```\n\n- Switch to the `netdata` user.\n\n ```bash\n sudo -u netdata -s\n ```\n\n- Run the `python.d.plugin` to debug the collector:\n\n ```bash\n ./python.d.plugin openldap debug trace\n ```\n\n### Getting Logs\n\nIf you're encountering problems with the `openldap` collector, follow these steps to retrieve logs and identify potential issues:\n\n- **Run the command** specific to your system (systemd, non-systemd, or Docker container).\n- **Examine the output** for any warnings or error messages that might indicate issues. These messages should provide clues about the root cause of the problem.\n\n#### System with systemd\n\nUse the following command to view logs generated since the last Netdata service restart:\n\n```bash\njournalctl _SYSTEMD_INVOCATION_ID=\"$(systemctl show --value --property=InvocationID netdata)\" --namespace=netdata --grep openldap\n```\n\n#### System without systemd\n\nLocate the collector log file, typically at `/var/log/netdata/collector.log`, and use `grep` to filter for collector's name:\n\n```bash\ngrep openldap /var/log/netdata/collector.log\n```\n\n**Note**: This method shows logs from all restarts. Focus on the **latest entries** for troubleshooting current issues.\n\n#### Docker Container\n\nIf your Netdata runs in a Docker container named \"netdata\" (replace if different), use this command:\n\n```bash\ndocker logs netdata 2>&1 | grep openldap\n```\n\n", - "alerts": "## Alerts\n\nThere are no alerts configured by default for this integration.\n", - "metrics": "## Metrics\n\nMetrics grouped by *scope*.\n\nThe scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.\n\n\n\n### Per OpenLDAP instance\n\nThese metrics refer to the entire monitored application.\n\nThis scope has no labels.\n\nMetrics:\n\n| Metric | Dimensions | Unit |\n|:------|:----------|:----|\n| openldap.total_connections | connections | connections/s |\n| openldap.traffic_stats | sent | KiB/s |\n| openldap.operations_status | completed, initiated | ops/s |\n| openldap.referrals | sent | referrals/s |\n| openldap.entries | sent | entries/s |\n| openldap.ldap_operations | bind, search, unbind, add, delete, modify, compare | ops/s |\n| openldap.waiters | write, read | waiters/s |\n\n", - "integration_type": "collector", - "id": "python.d.plugin-openldap-OpenLDAP", - "edit_link": "https://github.com/netdata/netdata/blob/master/src/collectors/python.d.plugin/openldap/metadata.yaml", - "related_resources": "" - }, { "meta": { "plugin_name": "python.d.plugin", diff --git a/integrations/integrations.json b/integrations/integrations.json index a1d26ec9ab4ca5..9f3832d1aeb555 100644 --- a/integrations/integrations.json +++ b/integrations/integrations.json @@ -5736,6 +5736,43 @@ "edit_link": "https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/nvme/metadata.yaml", "related_resources": "" }, + { + "meta": { + "plugin_name": "go.d.plugin", + "module_name": "openldap", + "monitored_instance": { + "name": "OpenLDAP", + "link": "https://www.openldap.org/", + "categories": [ + "data-collection.authentication-and-authorization" + ], + "icon_filename": "openldap.svg" + }, + "related_resources": { + "integrations": { + "list": [] + } + }, + "info_provided_to_referring_integrations": { + "description": "" + }, + "keywords": [ + "openldap", + "RBAC", + "Directory access" + ], + "most_popular": false + }, + "overview": "# OpenLDAP\n\nPlugin: go.d.plugin\nModule: openldap\n\n## Overview\n\nThis collector monitors OpenLDAP metrics about connections, operations, referrals and more.\n\n\nIt gathers the metrics using the [go-ldap](https://github.com/go-ldap/ldap) module and the [Monitor backend](https://www.openldap.org/doc/admin24/monitoringslapd.html) of OpenLDAP.\n\n\nThis collector is only supported on the following platforms:\n\n- Linux\n\nThis collector supports collecting metrics from multiple instances of this integration, including remote instances.\n\n\n### Default Behavior\n\n#### Auto-Detection\n\nThe collector cannot auto-detect OpenLDAP instances, because credential configuration is required.\n\n#### Limits\n\nThe default configuration for this integration does not impose any limits on data collection.\n\n#### Performance Impact\n\nThe default configuration for this integration is not expected to impose a significant performance impact on the system.\n", + "setup": "## Setup\n\n### Prerequisites\n\n#### Enable the openLDAP Monitor Backend.\n\nFollow instructions from https://www.openldap.org/doc/admin24/monitoringslapd.html to activate monitoring interface.\n\n\n\n### Configuration\n\n#### File\n\nThe configuration file name for this integration is `go.d/openldap.conf`.\n\n\nYou can edit the configuration file using the `edit-config` script from the\nNetdata [config directory](/docs/netdata-agent/configuration/README.md#the-netdata-config-directory).\n\n```bash\ncd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata\nsudo ./edit-config go.d/openldap.conf\n```\n#### Options\n\nThe following options can be defined globally: update_every.\n\n\n| Name | Description | Default | Required |\n|:----|:-----------|:-------|:--------:|\n| update_every | Data collection frequency. | 1 | no |\n| timeout | Timeout for establishing a connection and communication (reading and writing) in seconds. | 2 | no |\n| url | LDAP server URL. | ldap://127.0.0.1:389 | yes |\n| username | The distinguished name (DN) of the user authorized to view the monitor database. | | yes |\n| password | The password associated with the user identified by the DN. | | yes |\n| tls_skip_verify | Server certificate chain and hostname validation policy. Controls whether the client performs this check. | no | no |\n| tls_ca | Certification authority that the client uses when verifying the server's certificates. | | no |\n| tls_cert | Client TLS certificate. | | no |\n| tls_key | Client TLS key. | | no |\n\n#### Examples\n\n##### Basic\n\nA basic example configuration.\n\n```yaml\njobs:\n - name: local\n url: ldap://localhost:389\n username: cn=netdata,dc=example,dc=com \n password: secret\n\n```\n##### Multi-instance\n\n> **Note**: When you define multiple jobs, their names must be unique.\n\nCollecting metrics from local and remote instances.\n\n\n```yaml\njobs:\n - name: local\n url: ldap://localhost:389\n username: cn=netdata,dc=example,dc=com \n password: secret\n\n - name: remote\n url: ldap://192.0.2.1:389\n username: cn=netdata,dc=example,dc=com \n password: secret\n\n```\n", + "troubleshooting": "## Troubleshooting\n\n### Debug Mode\n\n**Important**: Debug mode is not supported for data collection jobs created via the UI using the Dyncfg feature.\n\nTo troubleshoot issues with the `openldap` collector, run the `go.d.plugin` with the debug option enabled. The output\nshould give you clues as to why the collector isn't working.\n\n- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on\n your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`.\n\n ```bash\n cd /usr/libexec/netdata/plugins.d/\n ```\n\n- Switch to the `netdata` user.\n\n ```bash\n sudo -u netdata -s\n ```\n\n- Run the `go.d.plugin` to debug the collector:\n\n ```bash\n ./go.d.plugin -d -m openldap\n ```\n\n### Getting Logs\n\nIf you're encountering problems with the `openldap` collector, follow these steps to retrieve logs and identify potential issues:\n\n- **Run the command** specific to your system (systemd, non-systemd, or Docker container).\n- **Examine the output** for any warnings or error messages that might indicate issues. These messages should provide clues about the root cause of the problem.\n\n#### System with systemd\n\nUse the following command to view logs generated since the last Netdata service restart:\n\n```bash\njournalctl _SYSTEMD_INVOCATION_ID=\"$(systemctl show --value --property=InvocationID netdata)\" --namespace=netdata --grep openldap\n```\n\n#### System without systemd\n\nLocate the collector log file, typically at `/var/log/netdata/collector.log`, and use `grep` to filter for collector's name:\n\n```bash\ngrep openldap /var/log/netdata/collector.log\n```\n\n**Note**: This method shows logs from all restarts. Focus on the **latest entries** for troubleshooting current issues.\n\n#### Docker Container\n\nIf your Netdata runs in a Docker container named \"netdata\" (replace if different), use this command:\n\n```bash\ndocker logs netdata 2>&1 | grep openldap\n```\n\n", + "alerts": "## Alerts\n\nThere are no alerts configured by default for this integration.\n", + "metrics": "## Metrics\n\nMetrics grouped by *scope*.\n\nThe scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.\n\n\n\n### Per OpenLDAP instance\n\nThese metrics refer to the entire monitored application.\n\nThis scope has no labels.\n\nMetrics:\n\n| Metric | Dimensions | Unit |\n|:------|:----------|:----|\n| openldap.current_connections | active | connections |\n| openldap.connections | connections | connections/s |\n| openldap.traffic | sent | bytes/s |\n| openldap.entries | sent | entries/s |\n| openldap.referrals | sent | referrals/s |\n| openldap.operations | completed, initiated | operations/s |\n| openldap.operations_by_type | bind, search, unbind, add, delete, modify, compare | operations/s |\n| openldap.waiters | write, read | waiters/s |\n\n", + "integration_type": "collector", + "id": "go.d.plugin-openldap-OpenLDAP", + "edit_link": "https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/openldap/metadata.yaml", + "related_resources": "" + }, { "meta": { "id": "collector-go.d.plugin-openvpn", @@ -19255,7 +19292,7 @@ "most_popular": false }, "overview": "# Go applications (EXPVAR)\n\nPlugin: python.d.plugin\nModule: go_expvar\n\n## Overview\n\nThis collector monitors Go applications that expose their metrics with the use of the `expvar` package from the Go standard library. It produces charts for Go runtime memory statistics and optionally any number of custom charts.\n\nIt connects via http to gather the metrics exposed via the `expvar` package.\n\nThis collector is supported on all platforms.\n\nThis collector supports collecting metrics from multiple instances of this integration, including remote instances.\n\n\n### Default Behavior\n\n#### Auto-Detection\n\nThis integration doesn't support auto-detection.\n\n#### Limits\n\nThe default configuration for this integration does not impose any limits on data collection.\n\n#### Performance Impact\n\nThe default configuration for this integration is not expected to impose a significant performance impact on the system.\n", - "setup": "## Setup\n\n### Prerequisites\n\n#### Enable the go_expvar collector\n\nThe `go_expvar` collector is disabled by default. To enable it, use `edit-config` from the Netdata [config directory](/docs/netdata-agent/configuration/README.md), which is typically at `/etc/netdata`, to edit the `python.d.conf` file.\n\n```bash\ncd /etc/netdata # Replace this path with your Netdata config directory, if different\nsudo ./edit-config python.d.conf\n```\n\nChange the value of the `go_expvar` setting to `yes`. Save the file and restart the Netdata Agent with `sudo systemctl restart netdata`, or the [appropriate method](/packaging/installer/README.md#maintaining-a-netdata-agent-installation) for your system.\n\n\n#### Sample `expvar` usage in a Go application\n\nThe `expvar` package exposes metrics over HTTP and is very easy to use.\nConsider this minimal sample below:\n\n```go\npackage main\n\nimport (\n _ \"expvar\"\n \"net/http\"\n)\n\nfunc main() {\n http.ListenAndServe(\"127.0.0.1:8080\", nil)\n}\n```\n\nWhen imported this way, the `expvar` package registers a HTTP handler at `/debug/vars` that\nexposes Go runtime's memory statistics in JSON format. You can inspect the output by opening\nthe URL in your browser (or by using `wget` or `curl`).\n\nSample output:\n\n```json\n{\n\"cmdline\": [\"./expvar-demo-binary\"],\n\"memstats\": {\"Alloc\":630856,\"TotalAlloc\":630856,\"Sys\":3346432,\"Lookups\":27, }\n}\n```\n\nYou can of course expose and monitor your own variables as well.\nHere is a sample Go application that exposes a few custom variables:\n\n```go\npackage main\n\nimport (\n \"expvar\"\n \"net/http\"\n \"runtime\"\n \"time\"\n)\n\nfunc main() {\n\n tick := time.NewTicker(1 * time.Second)\n num_go := expvar.NewInt(\"runtime.goroutines\")\n counters := expvar.NewMap(\"counters\")\n counters.Set(\"cnt1\", new(expvar.Int))\n counters.Set(\"cnt2\", new(expvar.Float))\n\n go http.ListenAndServe(\":8080\", nil)\n\n for {\n select {\n case <- tick.C:\n num_go.Set(int64(runtime.NumGoroutine()))\n counters.Add(\"cnt1\", 1)\n counters.AddFloat(\"cnt2\", 1.452)\n }\n }\n}\n```\n\nApart from the runtime memory stats, this application publishes two counters and the\nnumber of currently running Goroutines and updates these stats every second.\n\n\n\n### Configuration\n\n#### File\n\nThe configuration file name for this integration is `python.d/go_expvar.conf`.\n\n\nYou can edit the configuration file using the `edit-config` script from the\nNetdata [config directory](/docs/netdata-agent/configuration/README.md#the-netdata-config-directory).\n\n```bash\ncd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata\nsudo ./edit-config python.d/go_expvar.conf\n```\n#### Options\n\nThere are 2 sections:\n\n* Global variables\n* One or more JOBS that can define multiple different instances to monitor.\n\nThe following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values.\n\nAdditionally, the following collapsed table contains all the options that can be configured inside a JOB definition.\n\nEvery configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. Each JOB can be used to monitor a different Go application.\n\n\n| Name | Description | Default | Required |\n|:----|:-----------|:-------|:--------:|\n| update_every | Sets the default data collection frequency. | 5 | no |\n| priority | Controls the order of charts at the netdata dashboard. | 60000 | no |\n| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no |\n| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no |\n| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | no |\n| url | the URL and port of the expvar endpoint. Please include the whole path of the endpoint, as the expvar handler can be installed in a non-standard location. | | yes |\n| user | If the URL is password protected, this is the username to use. | | no |\n| pass | If the URL is password protected, this is the password to use. | | no |\n| collect_memstats | Enables charts for Go runtime's memory statistics. | | no |\n| extra_charts | Defines extra data/charts to monitor, please see the example below. | | no |\n\n#### Examples\n\n##### Monitor a Go app1 application\n\nThe example below sets a configuration for a Go application, called `app1`. Besides the `memstats`, the application also exposes two counters and the number of currently running Goroutines and updates these stats every second.\n\nThe `go_expvar` collector can monitor these as well with the use of the `extra_charts` configuration variable.\n\nThe `extra_charts` variable is a YaML list of Netdata chart definitions.\nEach chart definition has the following keys:\n\n```\nid: Netdata chart ID\noptions: a key-value mapping of chart options\nlines: a list of line definitions\n```\n\n**Note: please do not use dots in the chart or line ID field.\nSee [this issue](https://github.com/netdata/netdata/pull/1902#issuecomment-284494195) for explanation.**\n\nPlease see these two links to the official Netdata documentation for more information about the values:\n\n- [External plugins - charts](/src/collectors/plugins.d/README.md#chart)\n- [Chart variables](/src/collectors/python.d.plugin/README.md#global-variables-order-and-chart)\n\n**Line definitions**\n\nEach chart can define multiple lines (dimensions).\nA line definition is a key-value mapping of line options.\nEach line can have the following options:\n\n```\n# mandatory\nexpvar_key: the name of the expvar as present in the JSON output of /debug/vars endpoint\nexpvar_type: value type; supported are \"float\" or \"int\"\nid: the id of this line/dimension in Netdata\n\n# optional - Netdata defaults are used if these options are not defined\nname: ''\nalgorithm: absolute\nmultiplier: 1\ndivisor: 100 if expvar_type == float, 1 if expvar_type == int\nhidden: False\n```\n\nPlease see the following link for more information about the options and their default values:\n[External plugins - dimensions](/src/collectors/plugins.d/README.md#dimension)\n\nApart from top-level expvars, this plugin can also parse expvars stored in a multi-level map;\nAll dicts in the resulting JSON document are then flattened to one level.\nExpvar names are joined together with '.' when flattening.\n\nExample:\n\n```\n{\n \"counters\": {\"cnt1\": 1042, \"cnt2\": 1512.9839999999983},\n \"runtime.goroutines\": 5\n}\n```\n\nIn the above case, the exported variables will be available under `runtime.goroutines`,\n`counters.cnt1` and `counters.cnt2` expvar_keys. If the flattening results in a key collision,\nthe first defined key wins and all subsequent keys with the same name are ignored.\n\n\n```yaml\napp1:\n name : 'app1'\n url : 'http://127.0.0.1:8080/debug/vars'\n collect_memstats: true\n extra_charts:\n - id: \"runtime_goroutines\"\n options:\n name: num_goroutines\n title: \"runtime: number of goroutines\"\n units: goroutines\n family: runtime\n context: expvar.runtime.goroutines\n chart_type: line\n lines:\n - {expvar_key: 'runtime.goroutines', expvar_type: int, id: runtime_goroutines}\n - id: \"foo_counters\"\n options:\n name: counters\n title: \"some random counters\"\n units: awesomeness\n family: counters\n context: expvar.foo.counters\n chart_type: line\n lines:\n - {expvar_key: 'counters.cnt1', expvar_type: int, id: counters_cnt1}\n - {expvar_key: 'counters.cnt2', expvar_type: float, id: counters_cnt2}\n\n```\n", + "setup": "## Setup\n\n### Prerequisites\n\n#### Enable the go_expvar collector\n\nThe `go_expvar` collector is disabled by default. To enable it, use `edit-config` from the Netdata [config directory](/docs/netdata-agent/configuration/README.md), which is typically at `/etc/netdata`, to edit the `python.d.conf` file.\n\n```bash\ncd /etc/netdata # Replace this path with your Netdata config directory, if different\nsudo ./edit-config python.d.conf\n```\n\nChange the value of the `go_expvar` setting to `yes`. Save the file and restart the Netdata Agent with `sudo systemctl restart netdata`, or the [appropriate method](/packaging/installer/README.md#maintaining-a-netdata-agent-installation) for your system.\n\n\n#### Sample `expvar` usage in a Go application\n\nThe `expvar` package exposes metrics over HTTP and is very easy to use.\nConsider this minimal sample below:\n\n```go\npackage main\n\nimport (\n _ \"expvar\"\n \"net/http\"\n)\n\nfunc main() {\n http.ListenAndServe(\"127.0.0.1:8080\", nil)\n}\n```\n\nWhen imported this way, the `expvar` package registers a HTTP handler at `/debug/vars` that\nexposes Go runtime's memory statistics in JSON format. You can inspect the output by opening\nthe URL in your browser (or by using `wget` or `curl`).\n\nSample output:\n\n```json\n{\n\"cmdline\": [\"./expvar-demo-binary\"],\n\"memstats\": {\"Alloc\":630856,\"TotalAlloc\":630856,\"Sys\":3346432,\"Lookups\":27, }\n}\n```\n\nYou can of course expose and monitor your own variables as well.\nHere is a sample Go application that exposes a few custom variables:\n\n```go\npackage main\n\nimport (\n \"expvar\"\n \"net/http\"\n \"runtime\"\n \"time\"\n)\n\nfunc main() {\n\n tick := time.NewTicker(1 * time.Second)\n num_go := expvar.NewInt(\"runtime.goroutines\")\n counters := expvar.NewMap(\"counters\")\n counters.Set(\"cnt1\", new(expvar.Int))\n counters.Set(\"cnt2\", new(expvar.Float))\n\n go http.ListenAndServe(\":8080\", nil)\n\n for {\n select {\n case <- tick.C:\n num_go.Set(int64(runtime.NumGoroutine()))\n counters.Add(\"cnt1\", 1)\n counters.AddFloat(\"cnt2\", 1.452)\n }\n }\n}\n```\n\nApart from the runtime memory stats, this application publishes two counters and the\nnumber of currently running Goroutines and updates these stats every second.\n\n\n\n### Configuration\n\n#### File\n\nThe configuration file name for this integration is `python.d/go_expvar.conf`.\n\n\nYou can edit the configuration file using the `edit-config` script from the\nNetdata [config directory](/docs/netdata-agent/configuration/README.md#the-netdata-config-directory).\n\n```bash\ncd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata\nsudo ./edit-config python.d/go_expvar.conf\n```\n#### Options\n\nThere are 2 sections:\n\n* Global variables\n* One or more JOBS that can define multiple different instances to monitor.\n\nThe following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values.\n\nAdditionally, the following collapsed table contains all the options that can be configured inside a JOB definition.\n\nEvery configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. Each JOB can be used to monitor a different Go application.\n\n\n| Name | Description | Default | Required |\n|:----|:-----------|:-------|:--------:|\n| update_every | Sets the default data collection frequency. | 5 | no |\n| priority | Controls the order of charts at the netdata dashboard. | 60000 | no |\n| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no |\n| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no |\n| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | no |\n| url | the URL and port of the expvar endpoint. Please include the whole path of the endpoint, as the expvar handler can be installed in a non-standard location. | | yes |\n| user | If the URL is password protected, this is the username to use. | | no |\n| pass | If the URL is password protected, this is the password to use. | | no |\n| collect_memstats | Enables charts for Go runtime's memory statistics. | | no |\n| extra_charts | Defines extra data/charts to monitor, please see the example below. | | no |\n\n#### Examples\n\n##### Monitor a Go app1 application\n\nThe example below sets a configuration for a Go application, called `app1`. Besides the `memstats`, the application also exposes two counters and the number of currently running Goroutines and updates these stats every second.\n\nThe `go_expvar` collector can monitor these as well with the use of the `extra_charts` configuration variable.\n\nThe `extra_charts` variable is a YaML list of Netdata chart definitions.\nEach chart definition has the following keys:\n\n```\nid: Netdata chart ID\noptions: a key-value mapping of chart options\nlines: a list of line definitions\n```\n\n**Note: please do not use dots in the chart or line ID field.\nSee [this issue](https://github.com/netdata/netdata/pull/1902#issuecomment-284494195) for explanation.**\n\nPlease see these two links to the official Netdata documentation for more information about the values:\n\n- [External plugins - charts](/src/plugins.d/README.md#chart)\n- [Chart variables](/src/collectors/python.d.plugin/README.md#global-variables-order-and-chart)\n\n**Line definitions**\n\nEach chart can define multiple lines (dimensions).\nA line definition is a key-value mapping of line options.\nEach line can have the following options:\n\n```\n# mandatory\nexpvar_key: the name of the expvar as present in the JSON output of /debug/vars endpoint\nexpvar_type: value type; supported are \"float\" or \"int\"\nid: the id of this line/dimension in Netdata\n\n# optional - Netdata defaults are used if these options are not defined\nname: ''\nalgorithm: absolute\nmultiplier: 1\ndivisor: 100 if expvar_type == float, 1 if expvar_type == int\nhidden: False\n```\n\nPlease see the following link for more information about the options and their default values:\n[External plugins - dimensions](/src/plugins.d/README.md#dimension)\n\nApart from top-level expvars, this plugin can also parse expvars stored in a multi-level map;\nAll dicts in the resulting JSON document are then flattened to one level.\nExpvar names are joined together with '.' when flattening.\n\nExample:\n\n```\n{\n \"counters\": {\"cnt1\": 1042, \"cnt2\": 1512.9839999999983},\n \"runtime.goroutines\": 5\n}\n```\n\nIn the above case, the exported variables will be available under `runtime.goroutines`,\n`counters.cnt1` and `counters.cnt2` expvar_keys. If the flattening results in a key collision,\nthe first defined key wins and all subsequent keys with the same name are ignored.\n\n\n```yaml\napp1:\n name : 'app1'\n url : 'http://127.0.0.1:8080/debug/vars'\n collect_memstats: true\n extra_charts:\n - id: \"runtime_goroutines\"\n options:\n name: num_goroutines\n title: \"runtime: number of goroutines\"\n units: goroutines\n family: runtime\n context: expvar.runtime.goroutines\n chart_type: line\n lines:\n - {expvar_key: 'runtime.goroutines', expvar_type: int, id: runtime_goroutines}\n - id: \"foo_counters\"\n options:\n name: counters\n title: \"some random counters\"\n units: awesomeness\n family: counters\n context: expvar.foo.counters\n chart_type: line\n lines:\n - {expvar_key: 'counters.cnt1', expvar_type: int, id: counters_cnt1}\n - {expvar_key: 'counters.cnt2', expvar_type: float, id: counters_cnt2}\n\n```\n", "troubleshooting": "## Troubleshooting\n\n### Debug Mode\n\n\nTo troubleshoot issues with the `go_expvar` collector, run the `python.d.plugin` with the debug option enabled. The output\nshould give you clues as to why the collector isn't working.\n\n- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on\n your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`.\n\n ```bash\n cd /usr/libexec/netdata/plugins.d/\n ```\n\n- Switch to the `netdata` user.\n\n ```bash\n sudo -u netdata -s\n ```\n\n- Run the `python.d.plugin` to debug the collector:\n\n ```bash\n ./python.d.plugin go_expvar debug trace\n ```\n\n### Getting Logs\n\nIf you're encountering problems with the `go_expvar` collector, follow these steps to retrieve logs and identify potential issues:\n\n- **Run the command** specific to your system (systemd, non-systemd, or Docker container).\n- **Examine the output** for any warnings or error messages that might indicate issues. These messages should provide clues about the root cause of the problem.\n\n#### System with systemd\n\nUse the following command to view logs generated since the last Netdata service restart:\n\n```bash\njournalctl _SYSTEMD_INVOCATION_ID=\"$(systemctl show --value --property=InvocationID netdata)\" --namespace=netdata --grep go_expvar\n```\n\n#### System without systemd\n\nLocate the collector log file, typically at `/var/log/netdata/collector.log`, and use `grep` to filter for collector's name:\n\n```bash\ngrep go_expvar /var/log/netdata/collector.log\n```\n\n**Note**: This method shows logs from all restarts. Focus on the **latest entries** for troubleshooting current issues.\n\n#### Docker Container\n\nIf your Netdata runs in a Docker container named \"netdata\" (replace if different), use this command:\n\n```bash\ndocker logs netdata 2>&1 | grep go_expvar\n```\n\n", "alerts": "## Alerts\n\nThere are no alerts configured by default for this integration.\n", "metrics": "## Metrics\n\nMetrics grouped by *scope*.\n\nThe scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.\n\n\n\n### Per Go applications (EXPVAR) instance\n\nThese metrics refer to the entire monitored application.\n\nThis scope has no labels.\n\nMetrics:\n\n| Metric | Dimensions | Unit |\n|:------|:----------|:----|\n| expvar.memstats.heap | alloc, inuse | KiB |\n| expvar.memstats.stack | inuse | KiB |\n| expvar.memstats.mspan | inuse | KiB |\n| expvar.memstats.mcache | inuse | KiB |\n| expvar.memstats.live_objects | live | objects |\n| expvar.memstats.sys | sys | KiB |\n| expvar.memstats.gc_pauses | avg | ns |\n\n", @@ -19264,43 +19301,6 @@ "edit_link": "https://github.com/netdata/netdata/blob/master/src/collectors/python.d.plugin/go_expvar/metadata.yaml", "related_resources": "" }, - { - "meta": { - "plugin_name": "python.d.plugin", - "module_name": "openldap", - "monitored_instance": { - "name": "OpenLDAP", - "link": "https://www.openldap.org/", - "categories": [ - "data-collection.authentication-and-authorization" - ], - "icon_filename": "statsd.png" - }, - "related_resources": { - "integrations": { - "list": [] - } - }, - "info_provided_to_referring_integrations": { - "description": "" - }, - "keywords": [ - "openldap", - "RBAC", - "Directory access" - ], - "most_popular": false - }, - "overview": "# OpenLDAP\n\nPlugin: python.d.plugin\nModule: openldap\n\n## Overview\n\nThis collector monitors OpenLDAP metrics about connections, operations, referrals and more.\n\nStatistics are taken from the monitoring interface of a openLDAP (slapd) server\n\n\nThis collector is supported on all platforms.\n\nThis collector only supports collecting metrics from a single instance of this integration.\n\n\n### Default Behavior\n\n#### Auto-Detection\n\nThis collector doesn't work until all the prerequisites are checked.\n\n\n#### Limits\n\nThe default configuration for this integration does not impose any limits on data collection.\n\n#### Performance Impact\n\nThe default configuration for this integration is not expected to impose a significant performance impact on the system.\n", - "setup": "## Setup\n\n### Prerequisites\n\n#### Configure the openLDAP server to expose metrics to monitor it.\n\nFollow instructions from https://www.openldap.org/doc/admin24/monitoringslapd.html to activate monitoring interface.\n\n\n#### Install python-ldap module\n\nInstall python ldap module \n\n1. From pip package manager\n\n```bash\npip install ldap\n```\n\n2. With apt package manager (in most deb based distros)\n\n\n```bash\napt-get install python-ldap\n```\n\n\n3. With yum package manager (in most rpm based distros)\n\n\n```bash\nyum install python-ldap\n```\n\n\n#### Insert credentials for Netdata to access openLDAP server\n\nUse the `ldappasswd` utility to set a password for the username you will use.\n\n\n\n### Configuration\n\n#### File\n\nThe configuration file name for this integration is `python.d/openldap.conf`.\n\n\nYou can edit the configuration file using the `edit-config` script from the\nNetdata [config directory](/docs/netdata-agent/configuration/README.md#the-netdata-config-directory).\n\n```bash\ncd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata\nsudo ./edit-config python.d/openldap.conf\n```\n#### Options\n\nThere are 2 sections:\n\n* Global variables\n* One or more JOBS that can define multiple different instances to monitor.\n\nThe following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values.\n\nAdditionally, the following collapsed table contains all the options that can be configured inside a JOB definition.\n\nEvery configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified.\n\n\n| Name | Description | Default | Required |\n|:----|:-----------|:-------|:--------:|\n| update_every | Sets the default data collection frequency. | 5 | no |\n| priority | Controls the order of charts at the netdata dashboard. | 60000 | no |\n| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no |\n| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no |\n| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | no |\n| username | The bind user with right to access monitor statistics | | yes |\n| password | The password for the binded user | | yes |\n| server | The listening address of the LDAP server. In case of TLS, use the hostname which the certificate is published for. | | yes |\n| port | The listening port of the LDAP server. Change to 636 port in case of TLS connection. | 389 | yes |\n| use_tls | Make True if a TLS connection is used over ldaps:// | no | no |\n| use_start_tls | Make True if a TLS connection is used over ldap:// | no | no |\n| cert_check | False if you want to ignore certificate check | True | yes |\n| timeout | Seconds to timeout if no connection exist | | yes |\n\n#### Examples\n\n##### Basic\n\nA basic example configuration.\n\n```yaml\nusername: \"cn=admin\"\npassword: \"pass\"\nserver: \"localhost\"\nport: \"389\"\ncheck_cert: True\ntimeout: 1\n\n```\n", - "troubleshooting": "## Troubleshooting\n\n### Debug Mode\n\n\nTo troubleshoot issues with the `openldap` collector, run the `python.d.plugin` with the debug option enabled. The output\nshould give you clues as to why the collector isn't working.\n\n- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on\n your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`.\n\n ```bash\n cd /usr/libexec/netdata/plugins.d/\n ```\n\n- Switch to the `netdata` user.\n\n ```bash\n sudo -u netdata -s\n ```\n\n- Run the `python.d.plugin` to debug the collector:\n\n ```bash\n ./python.d.plugin openldap debug trace\n ```\n\n### Getting Logs\n\nIf you're encountering problems with the `openldap` collector, follow these steps to retrieve logs and identify potential issues:\n\n- **Run the command** specific to your system (systemd, non-systemd, or Docker container).\n- **Examine the output** for any warnings or error messages that might indicate issues. These messages should provide clues about the root cause of the problem.\n\n#### System with systemd\n\nUse the following command to view logs generated since the last Netdata service restart:\n\n```bash\njournalctl _SYSTEMD_INVOCATION_ID=\"$(systemctl show --value --property=InvocationID netdata)\" --namespace=netdata --grep openldap\n```\n\n#### System without systemd\n\nLocate the collector log file, typically at `/var/log/netdata/collector.log`, and use `grep` to filter for collector's name:\n\n```bash\ngrep openldap /var/log/netdata/collector.log\n```\n\n**Note**: This method shows logs from all restarts. Focus on the **latest entries** for troubleshooting current issues.\n\n#### Docker Container\n\nIf your Netdata runs in a Docker container named \"netdata\" (replace if different), use this command:\n\n```bash\ndocker logs netdata 2>&1 | grep openldap\n```\n\n", - "alerts": "## Alerts\n\nThere are no alerts configured by default for this integration.\n", - "metrics": "## Metrics\n\nMetrics grouped by *scope*.\n\nThe scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.\n\n\n\n### Per OpenLDAP instance\n\nThese metrics refer to the entire monitored application.\n\nThis scope has no labels.\n\nMetrics:\n\n| Metric | Dimensions | Unit |\n|:------|:----------|:----|\n| openldap.total_connections | connections | connections/s |\n| openldap.traffic_stats | sent | KiB/s |\n| openldap.operations_status | completed, initiated | ops/s |\n| openldap.referrals | sent | referrals/s |\n| openldap.entries | sent | entries/s |\n| openldap.ldap_operations | bind, search, unbind, add, delete, modify, compare | ops/s |\n| openldap.waiters | write, read | waiters/s |\n\n", - "integration_type": "collector", - "id": "python.d.plugin-openldap-OpenLDAP", - "edit_link": "https://github.com/netdata/netdata/blob/master/src/collectors/python.d.plugin/openldap/metadata.yaml", - "related_resources": "" - }, { "meta": { "plugin_name": "python.d.plugin", diff --git a/src/collectors/COLLECTORS.md b/src/collectors/COLLECTORS.md index f0d45de466da3f..528aeaa1d53308 100644 --- a/src/collectors/COLLECTORS.md +++ b/src/collectors/COLLECTORS.md @@ -77,7 +77,7 @@ If you don't see the app/service you'd like to monitor in this list: - [OpenLDAP (community)](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/openldap_community.md) -- [OpenLDAP](https://github.com/netdata/netdata/blob/master/src/collectors/python.d.plugin/openldap/integrations/openldap.md) +- [OpenLDAP](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/openldap/integrations/openldap.md) - [RADIUS](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/radius.md) diff --git a/src/go/plugin/go.d/modules/openldap/README.md b/src/go/plugin/go.d/modules/openldap/README.md new file mode 120000 index 00000000000000..45f36b9b92ea6b --- /dev/null +++ b/src/go/plugin/go.d/modules/openldap/README.md @@ -0,0 +1 @@ +integrations/openldap.md \ No newline at end of file diff --git a/src/go/plugin/go.d/modules/openldap/integrations/openldap.md b/src/go/plugin/go.d/modules/openldap/integrations/openldap.md new file mode 100644 index 00000000000000..b2352f33772791 --- /dev/null +++ b/src/go/plugin/go.d/modules/openldap/integrations/openldap.md @@ -0,0 +1,228 @@ + + +# OpenLDAP + + + + + +Plugin: go.d.plugin +Module: openldap + + + +## Overview + +This collector monitors OpenLDAP metrics about connections, operations, referrals and more. + + +It gathers the metrics using the [go-ldap](https://github.com/go-ldap/ldap) module and the [Monitor backend](https://www.openldap.org/doc/admin24/monitoringslapd.html) of OpenLDAP. + + +This collector is only supported on the following platforms: + +- Linux + +This collector supports collecting metrics from multiple instances of this integration, including remote instances. + + +### Default Behavior + +#### Auto-Detection + +The collector cannot auto-detect OpenLDAP instances, because credential configuration is required. + +#### Limits + +The default configuration for this integration does not impose any limits on data collection. + +#### Performance Impact + +The default configuration for this integration is not expected to impose a significant performance impact on the system. + + +## Metrics + +Metrics grouped by *scope*. + +The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels. + + + +### Per OpenLDAP instance + +These metrics refer to the entire monitored application. + +This scope has no labels. + +Metrics: + +| Metric | Dimensions | Unit | +|:------|:----------|:----| +| openldap.current_connections | active | connections | +| openldap.connections | connections | connections/s | +| openldap.traffic | sent | bytes/s | +| openldap.entries | sent | entries/s | +| openldap.referrals | sent | referrals/s | +| openldap.operations | completed, initiated | operations/s | +| openldap.operations_by_type | bind, search, unbind, add, delete, modify, compare | operations/s | +| openldap.waiters | write, read | waiters/s | + + + +## Alerts + +There are no alerts configured by default for this integration. + + +## Setup + +### Prerequisites + +#### Enable the openLDAP Monitor Backend. + +Follow instructions from https://www.openldap.org/doc/admin24/monitoringslapd.html to activate monitoring interface. + + + +### Configuration + +#### File + +The configuration file name for this integration is `go.d/openldap.conf`. + + +You can edit the configuration file using the `edit-config` script from the +Netdata [config directory](/docs/netdata-agent/configuration/README.md#the-netdata-config-directory). + +```bash +cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata +sudo ./edit-config go.d/openldap.conf +``` +#### Options + +The following options can be defined globally: update_every. + + +
Config options + +| Name | Description | Default | Required | +|:----|:-----------|:-------|:--------:| +| update_every | Data collection frequency. | 1 | no | +| timeout | Timeout for establishing a connection and communication (reading and writing) in seconds. | 2 | no | +| url | LDAP server URL. | ldap://127.0.0.1:389 | yes | +| username | The distinguished name (DN) of the user authorized to view the monitor database. | | yes | +| password | The password associated with the user identified by the DN. | | yes | +| tls_skip_verify | Server certificate chain and hostname validation policy. Controls whether the client performs this check. | no | no | +| tls_ca | Certification authority that the client uses when verifying the server's certificates. | | no | +| tls_cert | Client TLS certificate. | | no | +| tls_key | Client TLS key. | | no | + +
+ +#### Examples + +##### Basic + +A basic example configuration. + +```yaml +jobs: + - name: local + url: ldap://localhost:389 + username: cn=netdata,dc=example,dc=com + password: secret + +``` +##### Multi-instance + +> **Note**: When you define multiple jobs, their names must be unique. + +Collecting metrics from local and remote instances. + + +```yaml +jobs: + - name: local + url: ldap://localhost:389 + username: cn=netdata,dc=example,dc=com + password: secret + + - name: remote + url: ldap://192.0.2.1:389 + username: cn=netdata,dc=example,dc=com + password: secret + +``` + + +## Troubleshooting + +### Debug Mode + +**Important**: Debug mode is not supported for data collection jobs created via the UI using the Dyncfg feature. + +To troubleshoot issues with the `openldap` collector, run the `go.d.plugin` with the debug option enabled. The output +should give you clues as to why the collector isn't working. + +- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on + your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`. + + ```bash + cd /usr/libexec/netdata/plugins.d/ + ``` + +- Switch to the `netdata` user. + + ```bash + sudo -u netdata -s + ``` + +- Run the `go.d.plugin` to debug the collector: + + ```bash + ./go.d.plugin -d -m openldap + ``` + +### Getting Logs + +If you're encountering problems with the `openldap` collector, follow these steps to retrieve logs and identify potential issues: + +- **Run the command** specific to your system (systemd, non-systemd, or Docker container). +- **Examine the output** for any warnings or error messages that might indicate issues. These messages should provide clues about the root cause of the problem. + +#### System with systemd + +Use the following command to view logs generated since the last Netdata service restart: + +```bash +journalctl _SYSTEMD_INVOCATION_ID="$(systemctl show --value --property=InvocationID netdata)" --namespace=netdata --grep openldap +``` + +#### System without systemd + +Locate the collector log file, typically at `/var/log/netdata/collector.log`, and use `grep` to filter for collector's name: + +```bash +grep openldap /var/log/netdata/collector.log +``` + +**Note**: This method shows logs from all restarts. Focus on the **latest entries** for troubleshooting current issues. + +#### Docker Container + +If your Netdata runs in a Docker container named "netdata" (replace if different), use this command: + +```bash +docker logs netdata 2>&1 | grep openldap +``` + + From 0689f2bf01a71b4ec1c5163798c39b2c03f67b11 Mon Sep 17 00:00:00 2001 From: Costa Tsaousis Date: Mon, 30 Sep 2024 18:18:42 +0300 Subject: [PATCH 10/23] fixed freebsd cpu calculation (#18648) --- src/collectors/apps.plugin/apps_os_freebsd.c | 8 ++++---- src/collectors/apps.plugin/apps_plugin.h | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/collectors/apps.plugin/apps_os_freebsd.c b/src/collectors/apps.plugin/apps_os_freebsd.c index bec6d0c4d9e2ce..f2480acfa0b694 100644 --- a/src/collectors/apps.plugin/apps_os_freebsd.c +++ b/src/collectors/apps.plugin/apps_os_freebsd.c @@ -281,10 +281,10 @@ bool apps_os_read_pid_stat_freebsd(struct pid_stat *p, void *ptr) { pid_incremental_rate(stat, PDF_CMINFLT, (kernel_uint_t)proc_info->ki_rusage_ch.ru_minflt); pid_incremental_rate(stat, PDF_MAJFLT, (kernel_uint_t)proc_info->ki_rusage.ru_majflt); pid_incremental_rate(stat, PDF_CMAJFLT, (kernel_uint_t)proc_info->ki_rusage_ch.ru_majflt); - pid_incremental_rate(stat, PDF_UTIME, (kernel_uint_t)proc_info->ki_rusage.ru_utime.tv_sec * NSEC_PER_SEC + proc_info->ki_rusage.ru_utime.tv_usec * NSEC_PER_USEC); - pid_incremental_rate(stat, PDF_STIME, (kernel_uint_t)proc_info->ki_rusage.ru_stime.tv_sec * NSEC_PER_SEC + proc_info->ki_rusage.ru_stime.tv_usec * NSEC_PER_USEC); - pid_incremental_rate(stat, PDF_CUTIME, (kernel_uint_t)proc_info->ki_rusage_ch.ru_utime.tv_sec * NSEC_PER_SEC + proc_info->ki_rusage_ch.ru_utime.tv_usec * NSEC_PER_USEC); - pid_incremental_rate(stat, PDF_CSTIME, (kernel_uint_t)proc_info->ki_rusage_ch.ru_stime.tv_sec * NSEC_PER_SEC + proc_info->ki_rusage_ch.ru_stime.tv_usec * NSEC_PER_USEC); + pid_incremental_cpu(stat, PDF_UTIME, (kernel_uint_t)proc_info->ki_rusage.ru_utime.tv_sec * NSEC_PER_SEC + proc_info->ki_rusage.ru_utime.tv_usec * NSEC_PER_USEC); + pid_incremental_cpu(stat, PDF_STIME, (kernel_uint_t)proc_info->ki_rusage.ru_stime.tv_sec * NSEC_PER_SEC + proc_info->ki_rusage.ru_stime.tv_usec * NSEC_PER_USEC); + pid_incremental_cpu(stat, PDF_CUTIME, (kernel_uint_t)proc_info->ki_rusage_ch.ru_utime.tv_sec * NSEC_PER_SEC + proc_info->ki_rusage_ch.ru_utime.tv_usec * NSEC_PER_USEC); + pid_incremental_cpu(stat, PDF_CSTIME, (kernel_uint_t)proc_info->ki_rusage_ch.ru_stime.tv_sec * NSEC_PER_SEC + proc_info->ki_rusage_ch.ru_stime.tv_usec * NSEC_PER_USEC); p->values[PDF_THREADS] = proc_info->ki_numthreads; diff --git a/src/collectors/apps.plugin/apps_plugin.h b/src/collectors/apps.plugin/apps_plugin.h index bf6e3924a7f63a..cc131c08fa542b 100644 --- a/src/collectors/apps.plugin/apps_plugin.h +++ b/src/collectors/apps.plugin/apps_plugin.h @@ -36,7 +36,7 @@ #define PROCESSES_HAVE_STATE 0 #define PPID_SHOULD_BE_RUNNING 1 #define INCREMENTAL_DATA_COLLECTION 1 -#define CPU_TO_NANOSECONDCORES (1000) // convert microseconds to nanoseconds +#define CPU_TO_NANOSECONDCORES (1) #define OS_FUNCTION(func) OS_FUNC_CONCAT(func, _freebsd) #elif defined(OS_MACOS) From 4ba63ac03bda214e96793455c4051439e7ab952e Mon Sep 17 00:00:00 2001 From: netdatabot Date: Tue, 1 Oct 2024 00:21:31 +0000 Subject: [PATCH 11/23] [ci skip] Update changelog and version for nightly build: v1.99.0-218-nightly. --- CHANGELOG.md | 13 +++++++------ packaging/version | 2 +- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b887dcafbf026f..abf4536c76333c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,12 +6,18 @@ **Merged pull requests:** +- fixed freebsd cpu calculation [\#18648](https://github.com/netdata/netdata/pull/18648) ([ktsaou](https://github.com/ktsaou)) +- Regenerate integrations.js [\#18647](https://github.com/netdata/netdata/pull/18647) ([netdatabot](https://github.com/netdatabot)) +- Update file names. [\#18638](https://github.com/netdata/netdata/pull/18638) ([vkalintiris](https://github.com/vkalintiris)) +- Move plugins.d directory outside of collectors [\#18637](https://github.com/netdata/netdata/pull/18637) ([vkalintiris](https://github.com/vkalintiris)) - go.d/smartctl: fix exit status check in scan [\#18635](https://github.com/netdata/netdata/pull/18635) ([ilyam8](https://github.com/ilyam8)) - go.d pkg/socket: keep only one timeout option [\#18633](https://github.com/netdata/netdata/pull/18633) ([ilyam8](https://github.com/ilyam8)) - Regenerate integrations.js [\#18630](https://github.com/netdata/netdata/pull/18630) ([netdatabot](https://github.com/netdatabot)) - go.d/postgres: fix checkpoints query for postgres 17 [\#18629](https://github.com/netdata/netdata/pull/18629) ([ilyam8](https://github.com/ilyam8)) - go.d/ceph: fix leftovers after \#18582 [\#18628](https://github.com/netdata/netdata/pull/18628) ([ilyam8](https://github.com/ilyam8)) - Regenerate integrations.js [\#18627](https://github.com/netdata/netdata/pull/18627) ([netdatabot](https://github.com/netdatabot)) +- Remove Python OpenLDAP implementation [\#18626](https://github.com/netdata/netdata/pull/18626) ([Ancairon](https://github.com/Ancairon)) +- Port the OpenLDAP collector from Python to Go [\#18625](https://github.com/netdata/netdata/pull/18625) ([Ancairon](https://github.com/Ancairon)) - Change default pages per extent [\#18623](https://github.com/netdata/netdata/pull/18623) ([stelfrag](https://github.com/stelfrag)) - Misc mqtt related code cleanup [\#18622](https://github.com/netdata/netdata/pull/18622) ([stelfrag](https://github.com/stelfrag)) - Revert "Add ceph commands to ndsudo" [\#18620](https://github.com/netdata/netdata/pull/18620) ([ilyam8](https://github.com/ilyam8)) @@ -26,6 +32,7 @@ - Regenerate integrations.js [\#18598](https://github.com/netdata/netdata/pull/18598) ([netdatabot](https://github.com/netdatabot)) - go.d/sensors fix meta [\#18597](https://github.com/netdata/netdata/pull/18597) ([ilyam8](https://github.com/ilyam8)) - go.d/sensors update meta [\#18595](https://github.com/netdata/netdata/pull/18595) ([ilyam8](https://github.com/ilyam8)) +- apps.plugin for windows [\#18594](https://github.com/netdata/netdata/pull/18594) ([ktsaou](https://github.com/ktsaou)) - Regenerate integrations.js [\#18592](https://github.com/netdata/netdata/pull/18592) ([netdatabot](https://github.com/netdatabot)) - Add DLLs to CmakeLists.txt [\#18590](https://github.com/netdata/netdata/pull/18590) ([thiagoftsm](https://github.com/thiagoftsm)) - Bump go.mongodb.org/mongo-driver from 1.16.1 to 1.17.0 in /src/go [\#18589](https://github.com/netdata/netdata/pull/18589) ([dependabot[bot]](https://github.com/apps/dependabot)) @@ -415,12 +422,6 @@ - Port ipfs from python to Go [\#18070](https://github.com/netdata/netdata/pull/18070) ([Ancairon](https://github.com/Ancairon)) - update golang version in netdata.spec [\#18069](https://github.com/netdata/netdata/pull/18069) ([ilyam8](https://github.com/ilyam8)) - go.d set sensitive props to "password" widget [\#18068](https://github.com/netdata/netdata/pull/18068) ([ilyam8](https://github.com/ilyam8)) -- netdata.spec/plugin-go: added weak dependency for lm\_sensors [\#18067](https://github.com/netdata/netdata/pull/18067) ([k0ste](https://github.com/k0ste)) -- Disable health thread on windows [\#18066](https://github.com/netdata/netdata/pull/18066) ([stelfrag](https://github.com/stelfrag)) -- Remove hard-coded url from python.d puppet chart plugin [\#18064](https://github.com/netdata/netdata/pull/18064) ([Hufschmidt](https://github.com/Hufschmidt)) -- go.d postgres github.com/jackc/pgx/v5 [\#18062](https://github.com/netdata/netdata/pull/18062) ([ilyam8](https://github.com/ilyam8)) -- fix prometeus export: missing comma before "instance" label [\#18061](https://github.com/netdata/netdata/pull/18061) ([ilyam8](https://github.com/ilyam8)) -- go.d vsphere add update\_every ui:help [\#18060](https://github.com/netdata/netdata/pull/18060) ([ilyam8](https://github.com/ilyam8)) ## [v1.46.3](https://github.com/netdata/netdata/tree/v1.46.3) (2024-07-23) diff --git a/packaging/version b/packaging/version index 3da8b6ab413d46..8d140b61ad9805 100644 --- a/packaging/version +++ b/packaging/version @@ -1 +1 @@ -v1.99.0-210-nightly +v1.99.0-218-nightly From ff8feb3f74bf709daed0b5c5ae70bf250ccc0050 Mon Sep 17 00:00:00 2001 From: Ilya Mashchenko Date: Tue, 1 Oct 2024 10:52:58 +0300 Subject: [PATCH 12/23] go.d remove duplicate chart check in tests (#18650) --- src/go/plugin/go.d/agent/module/charts.go | 8 +++-- .../go.d/modules/apcupsd/apcupsd_test.go | 2 +- .../go.d/modules/couchbase/couchbase_test.go | 22 ++----------- .../go.d/modules/couchdb/couchdb_test.go | 24 +++----------- .../go.d/modules/dnsdist/dnsdist_test.go | 22 ++----------- .../go.d/modules/dnsmasq/dnsmasq_test.go | 22 ++----------- .../docker_engine/docker_engine_test.go | 22 +++---------- .../elasticsearch/elasticsearch_test.go | 29 +---------------- .../plugin/go.d/modules/envoy/envoy_test.go | 15 +-------- .../modules/freeradius/freeradius_test.go | 19 ++---------- .../go.d/modules/haproxy/haproxy_test.go | 22 ++----------- .../go.d/modules/isc_dhcpd/isc_dhcpd_test.go | 24 +++----------- .../go.d/modules/logstash/logstash_test.go | 15 +-------- .../plugin/go.d/modules/mysql/mysql_test.go | 18 ++++------- .../go.d/modules/nginxplus/nginxplus_test.go | 20 ++---------- .../go.d/modules/nginxvts/nginxvts_test.go | 22 ++----------- src/go/plugin/go.d/modules/pika/pika_test.go | 22 ++----------- .../modules/powerdns/authoritativens_test.go | 22 ++----------- .../powerdns_recursor/recursor_test.go | 22 ++----------- .../plugin/go.d/modules/pulsar/pulsar_test.go | 21 +++---------- .../plugin/go.d/modules/redis/redis_test.go | 25 +++------------ .../go.d/modules/rethinkdb/rethinkdb_test.go | 4 +-- .../modules/supervisord/supervisord_test.go | 31 ++----------------- .../modules/systemdunits/systemdunits_test.go | 18 +---------- .../go.d/modules/traefik/traefik_test.go | 24 +++----------- .../go.d/modules/unbound/unbound_test.go | 22 +++---------- .../go.d/modules/vsphere/vsphere_test.go | 19 ++---------- .../modules/whoisquery/whoisquery_test.go | 21 +++---------- .../go.d/modules/x509check/x509check_test.go | 19 ++---------- .../go.d/modules/zfspool/zfspool_test.go | 2 +- .../go.d/modules/zookeeper/zookeeper_test.go | 22 ++----------- 31 files changed, 94 insertions(+), 506 deletions(-) diff --git a/src/go/plugin/go.d/agent/module/charts.go b/src/go/plugin/go.d/agent/module/charts.go index 1b266e0b6454ad..70b02470269cc1 100644 --- a/src/go/plugin/go.d/agent/module/charts.go +++ b/src/go/plugin/go.d/agent/module/charts.go @@ -468,12 +468,16 @@ func TestMetricsHasAllChartsDims(t *testing.T, charts *Charts, mx map[string]int TestMetricsHasAllChartsDimsSkip(t, charts, mx, nil) } -func TestMetricsHasAllChartsDimsSkip(t *testing.T, charts *Charts, mx map[string]int64, skip func(chart *Chart) bool) { +func TestMetricsHasAllChartsDimsSkip(t *testing.T, charts *Charts, mx map[string]int64, skip func(chart *Chart, dim *Dim) bool) { for _, chart := range *charts { - if chart.Obsolete || (skip != nil && skip(chart)) { + if chart.Obsolete { continue } for _, dim := range chart.Dims { + if skip != nil && skip(chart, dim) { + continue + } + _, ok := mx[dim.ID] assert.Truef(t, ok, "missing data for dimension '%s' in chart '%s'", dim.ID, chart.ID) } diff --git a/src/go/plugin/go.d/modules/apcupsd/apcupsd_test.go b/src/go/plugin/go.d/modules/apcupsd/apcupsd_test.go index 747dd70696ecaa..aca6670aaf4d0b 100644 --- a/src/go/plugin/go.d/modules/apcupsd/apcupsd_test.go +++ b/src/go/plugin/go.d/modules/apcupsd/apcupsd_test.go @@ -221,7 +221,7 @@ func TestApcupsd_Collect(t *testing.T) { if len(test.wantCollected) > 0 { if strings.Contains(name, "commlost") { - module.TestMetricsHasAllChartsDimsSkip(t, apc.Charts(), mx, func(chart *module.Chart) bool { + module.TestMetricsHasAllChartsDimsSkip(t, apc.Charts(), mx, func(chart *module.Chart, _ *module.Dim) bool { return chart.ID != statusChart.ID }) } else { diff --git a/src/go/plugin/go.d/modules/couchbase/couchbase_test.go b/src/go/plugin/go.d/modules/couchbase/couchbase_test.go index 818d7105da5193..2877a59b83d451 100644 --- a/src/go/plugin/go.d/modules/couchbase/couchbase_test.go +++ b/src/go/plugin/go.d/modules/couchbase/couchbase_test.go @@ -166,10 +166,10 @@ func TestCouchbase_Collect(t *testing.T) { cb, cleanup := test.prepare(t) defer cleanup() - collected := cb.Collect() + mx := cb.Collect() - assert.Equal(t, test.wantCollected, collected) - ensureCollectedHasAllChartsDimsVarsIDs(t, cb, collected) + assert.Equal(t, test.wantCollected, mx) + module.TestMetricsHasAllChartsDims(t, cb.Charts(), mx) }) } } @@ -222,19 +222,3 @@ func prepareCouchbaseConnectionRefused(t *testing.T) (*Couchbase, func()) { return cb, func() {} } - -func ensureCollectedHasAllChartsDimsVarsIDs(t *testing.T, cb *Couchbase, collected map[string]int64) { - for _, chart := range *cb.Charts() { - if chart.Obsolete { - continue - } - for _, dim := range chart.Dims { - _, ok := collected[dim.ID] - assert.Truef(t, ok, "chart '%s' dim '%s': no dim in collected", dim.ID, chart.ID) - } - for _, v := range chart.Vars { - _, ok := collected[v.ID] - assert.Truef(t, ok, "chart '%s' dim '%s': no dim in collected", v.ID, chart.ID) - } - } -} diff --git a/src/go/plugin/go.d/modules/couchdb/couchdb_test.go b/src/go/plugin/go.d/modules/couchdb/couchdb_test.go index b060f7b07af0d3..d4bd570ca66c61 100644 --- a/src/go/plugin/go.d/modules/couchdb/couchdb_test.go +++ b/src/go/plugin/go.d/modules/couchdb/couchdb_test.go @@ -357,35 +357,19 @@ func TestCouchDB_Collect(t *testing.T) { cdb, cleanup := prepareCouchDB(t, test.prepare) defer cleanup() - var collected map[string]int64 + var mx map[string]int64 for i := 0; i < 10; i++ { - collected = cdb.Collect() + mx = cdb.Collect() } - assert.Equal(t, test.wantCollected, collected) + assert.Equal(t, test.wantCollected, mx) if test.checkCharts { - ensureCollectedHasAllChartsDimsVarsIDs(t, cdb, collected) + module.TestMetricsHasAllChartsDims(t, cdb.Charts(), mx) } }) } } -func ensureCollectedHasAllChartsDimsVarsIDs(t *testing.T, cdb *CouchDB, collected map[string]int64) { - for _, chart := range *cdb.Charts() { - if chart.Obsolete { - continue - } - for _, dim := range chart.Dims { - _, ok := collected[dim.ID] - assert.Truef(t, ok, "collected metrics has no data for dim '%s' chart '%s'", dim.ID, chart.ID) - } - for _, v := range chart.Vars { - _, ok := collected[v.ID] - assert.Truef(t, ok, "collected metrics has no data for var '%s' chart '%s'", v.ID, chart.ID) - } - } -} - func prepareCouchDB(t *testing.T, createCDB func() *CouchDB) (cdb *CouchDB, cleanup func()) { t.Helper() cdb = createCDB() diff --git a/src/go/plugin/go.d/modules/dnsdist/dnsdist_test.go b/src/go/plugin/go.d/modules/dnsdist/dnsdist_test.go index dcce4e9e5708a3..372ddbe4d0a731 100644 --- a/src/go/plugin/go.d/modules/dnsdist/dnsdist_test.go +++ b/src/go/plugin/go.d/modules/dnsdist/dnsdist_test.go @@ -189,32 +189,16 @@ func TestDNSdist_Collect(t *testing.T) { defer cleanup() require.NoError(t, dist.Init()) - collected := dist.Collect() + mx := dist.Collect() - assert.Equal(t, test.wantCollected, collected) + assert.Equal(t, test.wantCollected, mx) if len(test.wantCollected) > 0 { - ensureCollectedHasAllChartsDimsVarsIDs(t, dist, collected) + module.TestMetricsHasAllChartsDims(t, dist.Charts(), mx) } }) } } -func ensureCollectedHasAllChartsDimsVarsIDs(t *testing.T, dist *DNSdist, collected map[string]int64) { - for _, chart := range *dist.Charts() { - if chart.Obsolete { - continue - } - for _, dim := range chart.Dims { - _, ok := collected[dim.ID] - assert.Truef(t, ok, "chart '%s' dim '%s': no dim in collected", dim.ID, chart.ID) - } - for _, v := range chart.Vars { - _, ok := collected[v.ID] - assert.Truef(t, ok, "chart '%s' dim '%s': no dim in collected", v.ID, chart.ID) - } - } -} - func preparePowerDNSdistV151() (*DNSdist, func()) { srv := preparePowerDNSDistEndpoint() ns := New() diff --git a/src/go/plugin/go.d/modules/dnsmasq/dnsmasq_test.go b/src/go/plugin/go.d/modules/dnsmasq/dnsmasq_test.go index b3d54ac9cfe6bb..423aa2daf50dda 100644 --- a/src/go/plugin/go.d/modules/dnsmasq/dnsmasq_test.go +++ b/src/go/plugin/go.d/modules/dnsmasq/dnsmasq_test.go @@ -152,32 +152,16 @@ func TestDnsmasq_Collect(t *testing.T) { dnsmasq := test.prepare() require.NoError(t, dnsmasq.Init()) - collected := dnsmasq.Collect() + mx := dnsmasq.Collect() - assert.Equal(t, test.wantCollected, collected) + assert.Equal(t, test.wantCollected, mx) if len(test.wantCollected) > 0 { - ensureCollectedHasAllChartsDimsVarsIDs(t, dnsmasq, collected) + module.TestMetricsHasAllChartsDims(t, dnsmasq.Charts(), mx) } }) } } -func ensureCollectedHasAllChartsDimsVarsIDs(t *testing.T, dnsmasq *Dnsmasq, collected map[string]int64) { - for _, chart := range *dnsmasq.Charts() { - if chart.Obsolete { - continue - } - for _, dim := range chart.Dims { - _, ok := collected[dim.ID] - assert.Truef(t, ok, "chart '%s' dim '%s': no dim in collected", dim.ID, chart.ID) - } - for _, v := range chart.Vars { - _, ok := collected[v.ID] - assert.Truef(t, ok, "chart '%s' dim '%s': no dim in collected", v.ID, chart.ID) - } - } -} - func prepareOKDnsmasq() *Dnsmasq { dnsmasq := New() dnsmasq.newDNSClient = func(network string, timeout time.Duration) dnsClient { diff --git a/src/go/plugin/go.d/modules/docker_engine/docker_engine_test.go b/src/go/plugin/go.d/modules/docker_engine/docker_engine_test.go index 32027ebec5c98f..9c998df5b26512 100644 --- a/src/go/plugin/go.d/modules/docker_engine/docker_engine_test.go +++ b/src/go/plugin/go.d/modules/docker_engine/docker_engine_test.go @@ -253,29 +253,15 @@ func TestDockerEngine_Collect(t *testing.T) { for i := 0; i < 10; i++ { _ = pulsar.Collect() } - collected := pulsar.Collect() + mx := pulsar.Collect() - require.NotNil(t, collected) - require.Equal(t, test.expected, collected) - ensureCollectedHasAllChartsDimsVarsIDs(t, pulsar, collected) + require.NotNil(t, mx) + require.Equal(t, test.expected, mx) + module.TestMetricsHasAllChartsDims(t, pulsar.Charts(), mx) }) } } -func ensureCollectedHasAllChartsDimsVarsIDs(t *testing.T, dockerEngine *DockerEngine, collected map[string]int64) { - t.Helper() - for _, chart := range *dockerEngine.Charts() { - for _, dim := range chart.Dims { - _, ok := collected[dim.ID] - assert.Truef(t, ok, "collected metrics has no data for dim '%s' chart '%s'", dim.ID, chart.ID) - } - for _, v := range chart.Vars { - _, ok := collected[v.ID] - assert.Truef(t, ok, "collected metrics has no data for var '%s' chart '%s'", v.ID, chart.ID) - } - } -} - func prepareClientServerV17050CE(t *testing.T) (*DockerEngine, *httptest.Server) { t.Helper() srv := httptest.NewServer(http.HandlerFunc( diff --git a/src/go/plugin/go.d/modules/elasticsearch/elasticsearch_test.go b/src/go/plugin/go.d/modules/elasticsearch/elasticsearch_test.go index f94e1e419bd001..315b46db8be47b 100644 --- a/src/go/plugin/go.d/modules/elasticsearch/elasticsearch_test.go +++ b/src/go/plugin/go.d/modules/elasticsearch/elasticsearch_test.go @@ -636,40 +636,13 @@ func TestElasticsearch_Collect(t *testing.T) { mx = es.Collect() } - //m := mx - //l := make([]string, 0) - //for k := range m { - // l = append(l, k) - //} - //sort.Strings(l) - //for _, value := range l { - // fmt.Println(fmt.Sprintf("\"%s\": %d,", value, m[value])) - //} - //return - assert.Equal(t, test.wantCollected, mx) assert.Len(t, *es.Charts(), test.wantCharts) - ensureCollectedHasAllChartsDimsVarsIDs(t, es, mx) + module.TestMetricsHasAllChartsDims(t, es.Charts(), mx) }) } } -func ensureCollectedHasAllChartsDimsVarsIDs(t *testing.T, es *Elasticsearch, collected map[string]int64) { - for _, chart := range *es.Charts() { - if chart.Obsolete { - continue - } - for _, dim := range chart.Dims { - _, ok := collected[dim.ID] - assert.Truef(t, ok, "collected metrics has no data for dim '%s' chart '%s'", dim.ID, chart.ID) - } - for _, v := range chart.Vars { - _, ok := collected[v.ID] - assert.Truef(t, ok, "collected metrics has no data for var '%s' chart '%s'", v.ID, chart.ID) - } - } -} - func prepareElasticsearch(t *testing.T, createES func() *Elasticsearch) (es *Elasticsearch, cleanup func()) { t.Helper() srv := prepareElasticsearchEndpoint() diff --git a/src/go/plugin/go.d/modules/envoy/envoy_test.go b/src/go/plugin/go.d/modules/envoy/envoy_test.go index 664ee57c71322a..cfcafe10ab4052 100644 --- a/src/go/plugin/go.d/modules/envoy/envoy_test.go +++ b/src/go/plugin/go.d/modules/envoy/envoy_test.go @@ -504,24 +504,11 @@ func TestEnvoy_Collect(t *testing.T) { mx := envoy.Collect() require.Equal(t, test.wantMetrics, mx) - ensureCollectedHasAllChartsDimsVarsIDs(t, envoy, mx) + module.TestMetricsHasAllChartsDims(t, envoy.Charts(), mx) }) } } -func ensureCollectedHasAllChartsDimsVarsIDs(t *testing.T, envoy *Envoy, mx map[string]int64) { - for _, chart := range *envoy.Charts() { - for _, dim := range chart.Dims { - _, ok := mx[dim.ID] - assert.Truef(t, ok, "collected metrics has no data for dim '%s' chart '%s'", dim.ID, chart.ID) - } - for _, v := range chart.Vars { - _, ok := mx[v.ID] - assert.Truef(t, ok, "collected metrics has no data for var '%s' chart '%s'", v.ID, chart.ID) - } - } -} - func prepareCaseEnvoyConsulDataplaneStats() (*Envoy, func()) { srv := httptest.NewServer(http.HandlerFunc( func(w http.ResponseWriter, r *http.Request) { diff --git a/src/go/plugin/go.d/modules/freeradius/freeradius_test.go b/src/go/plugin/go.d/modules/freeradius/freeradius_test.go index 58e2dce5979478..21ad25517afeb2 100644 --- a/src/go/plugin/go.d/modules/freeradius/freeradius_test.go +++ b/src/go/plugin/go.d/modules/freeradius/freeradius_test.go @@ -117,10 +117,10 @@ func TestFreeRADIUS_Collect(t *testing.T) { "proxy-acct-dropped-requests": 33, "proxy-acct-unknown-types": 34, } - collected := freeRADIUS.Collect() + mx := freeRADIUS.Collect() - assert.Equal(t, expected, collected) - ensureCollectedHasAllChartsDimsVarsIDs(t, freeRADIUS, collected) + assert.Equal(t, expected, mx) + module.TestMetricsHasAllChartsDims(t, freeRADIUS.Charts(), mx) } func TestFreeRADIUS_Collect_ReturnsNilIfClientStatusReturnsError(t *testing.T) { @@ -134,19 +134,6 @@ func TestFreeRADIUS_Cleanup(t *testing.T) { New().Cleanup() } -func ensureCollectedHasAllChartsDimsVarsIDs(t *testing.T, f *FreeRADIUS, collected map[string]int64) { - for _, chart := range *f.Charts() { - for _, dim := range chart.Dims { - _, ok := collected[dim.ID] - assert.Truef(t, ok, "collected metrics has no data for dim '%s' chart '%s'", dim.ID, chart.ID) - } - for _, v := range chart.Vars { - _, ok := collected[v.ID] - assert.Truef(t, ok, "collected metrics has no data for var '%s' chart '%s'", v.ID, chart.ID) - } - } -} - func newOKMockClient() *mockClient { return &mockClient{} } diff --git a/src/go/plugin/go.d/modules/haproxy/haproxy_test.go b/src/go/plugin/go.d/modules/haproxy/haproxy_test.go index b264819a5114c7..1cf915cf0ffcd6 100644 --- a/src/go/plugin/go.d/modules/haproxy/haproxy_test.go +++ b/src/go/plugin/go.d/modules/haproxy/haproxy_test.go @@ -173,11 +173,11 @@ func TestHaproxy_Collect(t *testing.T) { h, cleanup := test.prepare(t) defer cleanup() - ms := h.Collect() + mx := h.Collect() - assert.Equal(t, test.wantCollected, ms) + assert.Equal(t, test.wantCollected, mx) if len(test.wantCollected) > 0 { - ensureCollectedHasAllChartsDimsVarsIDs(t, h, ms) + module.TestMetricsHasAllChartsDims(t, h.Charts(), mx) } }) } @@ -245,19 +245,3 @@ func prepareCaseConnectionRefused(t *testing.T) (*Haproxy, func()) { return h, func() {} } - -func ensureCollectedHasAllChartsDimsVarsIDs(t *testing.T, h *Haproxy, ms map[string]int64) { - for _, chart := range *h.Charts() { - if chart.Obsolete { - continue - } - for _, dim := range chart.Dims { - _, ok := ms[dim.ID] - assert.Truef(t, ok, "chart '%s' dim '%s': no dim in collected", dim.ID, chart.ID) - } - for _, v := range chart.Vars { - _, ok := ms[v.ID] - assert.Truef(t, ok, "chart '%s' dim '%s': no dim in collected", v.ID, chart.ID) - } - } -} diff --git a/src/go/plugin/go.d/modules/isc_dhcpd/isc_dhcpd_test.go b/src/go/plugin/go.d/modules/isc_dhcpd/isc_dhcpd_test.go index 24540ea2fa2dec..3a28beaf9f4472 100644 --- a/src/go/plugin/go.d/modules/isc_dhcpd/isc_dhcpd_test.go +++ b/src/go/plugin/go.d/modules/isc_dhcpd/isc_dhcpd_test.go @@ -226,32 +226,16 @@ func TestDHCPd_Collect(t *testing.T) { dhcpd := test.prepare() require.NoError(t, dhcpd.Init()) - collected := dhcpd.Collect() + mx := dhcpd.Collect() - assert.Equal(t, test.wantCollected, collected) - if len(collected) > 0 { - ensureCollectedHasAllChartsDimsVarsIDs(t, dhcpd, collected) + assert.Equal(t, test.wantCollected, mx) + if len(mx) > 0 { + module.TestMetricsHasAllChartsDims(t, dhcpd.Charts(), mx) } }) } } -func ensureCollectedHasAllChartsDimsVarsIDs(t *testing.T, dhcpd *DHCPd, collected map[string]int64) { - for _, chart := range *dhcpd.Charts() { - if chart.Obsolete { - continue - } - for _, dim := range chart.Dims { - _, ok := collected[dim.ID] - assert.Truef(t, ok, "collected metrics has no data for dim '%s' chart '%s'", dim.ID, chart.ID) - } - for _, v := range chart.Vars { - _, ok := collected[v.ID] - assert.Truef(t, ok, "collected metrics has no data for var '%s' chart '%s'", v.ID, chart.ID) - } - } -} - func prepareDHCPdLeasesNotExists() *DHCPd { dhcpd := New() dhcpd.Config = Config{ diff --git a/src/go/plugin/go.d/modules/logstash/logstash_test.go b/src/go/plugin/go.d/modules/logstash/logstash_test.go index e02c403d844b1f..4afc82636f71f7 100644 --- a/src/go/plugin/go.d/modules/logstash/logstash_test.go +++ b/src/go/plugin/go.d/modules/logstash/logstash_test.go @@ -180,25 +180,12 @@ func TestLogstash_Collect(t *testing.T) { require.Equal(t, test.wantMetrics, mx) if len(test.wantMetrics) > 0 { assert.Equal(t, test.wantNumOfCharts, len(*ls.Charts())) - ensureCollectedHasAllChartsDimsVarsIDs(t, ls, mx) + module.TestMetricsHasAllChartsDims(t, ls.Charts(), mx) } }) } } -func ensureCollectedHasAllChartsDimsVarsIDs(t *testing.T, ls *Logstash, mx map[string]int64) { - for _, chart := range *ls.Charts() { - for _, dim := range chart.Dims { - _, ok := mx[dim.ID] - assert.Truef(t, ok, "collected metrics has no data for dim '%s' chart '%s'", dim.ID, chart.ID) - } - for _, v := range chart.Vars { - _, ok := mx[v.ID] - assert.Truef(t, ok, "collected metrics has no data for var '%s' chart '%s'", v.ID, chart.ID) - } - } -} - func caseValidResponse(t *testing.T) (*Logstash, func()) { t.Helper() srv := httptest.NewServer(http.HandlerFunc( diff --git a/src/go/plugin/go.d/modules/mysql/mysql_test.go b/src/go/plugin/go.d/modules/mysql/mysql_test.go index 300f8dabee6ac5..33c6239a9bef8f 100644 --- a/src/go/plugin/go.d/modules/mysql/mysql_test.go +++ b/src/go/plugin/go.d/modules/mysql/mysql_test.go @@ -1666,23 +1666,17 @@ func TestMySQL_Collect(t *testing.T) { } } -func ensureCollectedHasAllChartsDimsVarsIDs(t *testing.T, mySQL *MySQL, collected map[string]int64) { - for _, chart := range *mySQL.Charts() { +func ensureCollectedHasAllChartsDimsVarsIDs(t *testing.T, mySQL *MySQL, mx map[string]int64) { + module.TestMetricsHasAllChartsDimsSkip(t, mySQL.Charts(), mx, func(chart *module.Chart, _ *module.Dim) bool { if mySQL.isMariaDB { // https://mariadb.com/kb/en/server-status-variables/#connection_errors_accept if mySQL.version.LT(semver.Version{Major: 10, Minor: 0, Patch: 4}) && chart.ID == "connection_errors" { - continue + return true } } - for _, dim := range chart.Dims { - _, ok := collected[dim.ID] - assert.Truef(t, ok, "collected metrics has no data for dim '%s' chart '%s'", dim.ID, chart.ID) - } - for _, v := range chart.Vars { - _, ok := collected[v.ID] - assert.Truef(t, ok, "collected metrics has no data for var '%s' chart '%s'", v.ID, chart.ID) - } - } + return false + + }) } func copyProcessListQueryDuration(dst, src map[string]int64) { diff --git a/src/go/plugin/go.d/modules/nginxplus/nginxplus_test.go b/src/go/plugin/go.d/modules/nginxplus/nginxplus_test.go index bfea978ec1e61e..9b2e5739196d6a 100644 --- a/src/go/plugin/go.d/modules/nginxplus/nginxplus_test.go +++ b/src/go/plugin/go.d/modules/nginxplus/nginxplus_test.go @@ -463,7 +463,9 @@ func TestNginxPlus_Collect(t *testing.T) { require.Equal(t, test.wantMetrics, mx) if len(test.wantMetrics) > 0 { assert.Equalf(t, test.wantNumOfCharts, len(*nginx.Charts()), "number of charts") - ensureCollectedHasAllChartsDimsVarsIDs(t, nginx, mx) + module.TestMetricsHasAllChartsDimsSkip(t, nginx.Charts(), mx, func(chart *module.Chart, _ *module.Dim) bool { + return chart.ID == uptimeChart.ID + }) } }) } @@ -578,19 +580,3 @@ func caseConnectionRefused(t *testing.T) (*NginxPlus, func()) { return nginx, func() {} } - -func ensureCollectedHasAllChartsDimsVarsIDs(t *testing.T, n *NginxPlus, mx map[string]int64) { - for _, chart := range *n.Charts() { - if chart.ID == uptimeChart.ID { - continue - } - for _, dim := range chart.Dims { - _, ok := mx[dim.ID] - assert.Truef(t, ok, "collected metrics has no data for dim '%s' chart '%s'", dim.ID, chart.ID) - } - for _, v := range chart.Vars { - _, ok := mx[v.ID] - assert.Truef(t, ok, "collected metrics has no data for var '%s' chart '%s'", v.ID, chart.ID) - } - } -} diff --git a/src/go/plugin/go.d/modules/nginxvts/nginxvts_test.go b/src/go/plugin/go.d/modules/nginxvts/nginxvts_test.go index 99e82c3e0d7360..e5cada4f73caec 100644 --- a/src/go/plugin/go.d/modules/nginxvts/nginxvts_test.go +++ b/src/go/plugin/go.d/modules/nginxvts/nginxvts_test.go @@ -170,32 +170,16 @@ func TestNginxVTS_Collect(t *testing.T) { vts, cleanup := test.prepare(t) defer cleanup() - collected := vts.Collect() + mx := vts.Collect() - assert.Equal(t, test.wantCollected, collected) + assert.Equal(t, test.wantCollected, mx) if test.checkCharts { - ensureCollectedHasAllChartsDimsVarsIDs(t, vts, collected) + module.TestMetricsHasAllChartsDims(t, vts.Charts(), mx) } }) } } -func ensureCollectedHasAllChartsDimsVarsIDs(t *testing.T, vts *NginxVTS, collected map[string]int64) { - for _, chart := range *vts.Charts() { - if chart.Obsolete { - continue - } - for _, dim := range chart.Dims { - _, ok := collected[dim.ID] - assert.Truef(t, ok, "collected metrics has no data for dim '%s' chart '%s'", dim.ID, chart.ID) - } - for _, v := range chart.Vars { - _, ok := collected[v.ID] - assert.Truef(t, ok, "collected metrics has no data for var '%s' chart '%s'", v.ID, chart.ID) - } - } -} - func prepareNginxVTS(t *testing.T, createNginxVTS func() *NginxVTS) (vts *NginxVTS, cleanup func()) { t.Helper() vts = createNginxVTS() diff --git a/src/go/plugin/go.d/modules/pika/pika_test.go b/src/go/plugin/go.d/modules/pika/pika_test.go index 6020d503c4d128..c2cf30f79c7bf8 100644 --- a/src/go/plugin/go.d/modules/pika/pika_test.go +++ b/src/go/plugin/go.d/modules/pika/pika_test.go @@ -187,11 +187,11 @@ func TestPika_Collect(t *testing.T) { t.Run(name, func(t *testing.T) { pika := test.prepare(t) - ms := pika.Collect() + mx := pika.Collect() - assert.Equal(t, test.wantCollected, ms) + assert.Equal(t, test.wantCollected, mx) if len(test.wantCollected) > 0 { - ensureCollectedHasAllChartsDimsVarsIDs(t, pika, ms) + module.TestMetricsHasAllChartsDims(t, pika.Charts(), mx) ensureCollectedCommandsAddedToCharts(t, pika) ensureCollectedDbsAddedToCharts(t, pika) } @@ -226,22 +226,6 @@ func preparePikaWithRedisMetrics(t *testing.T) *Pika { return pika } -func ensureCollectedHasAllChartsDimsVarsIDs(t *testing.T, pika *Pika, ms map[string]int64) { - for _, chart := range *pika.Charts() { - if chart.Obsolete { - continue - } - for _, dim := range chart.Dims { - _, ok := ms[dim.ID] - assert.Truef(t, ok, "chart '%s' dim '%s': no dim in collected", dim.ID, chart.ID) - } - for _, v := range chart.Vars { - _, ok := ms[v.ID] - assert.Truef(t, ok, "chart '%s' dim '%s': no dim in collected", v.ID, chart.ID) - } - } -} - func ensureCollectedCommandsAddedToCharts(t *testing.T, pika *Pika) { for _, id := range []string{ chartCommandsCalls.ID, diff --git a/src/go/plugin/go.d/modules/powerdns/authoritativens_test.go b/src/go/plugin/go.d/modules/powerdns/authoritativens_test.go index b6570c522fcb39..a4242a0b6a904a 100644 --- a/src/go/plugin/go.d/modules/powerdns/authoritativens_test.go +++ b/src/go/plugin/go.d/modules/powerdns/authoritativens_test.go @@ -244,32 +244,16 @@ func TestAuthoritativeNS_Collect(t *testing.T) { defer cleanup() require.NoError(t, ns.Init()) - collected := ns.Collect() + mx := ns.Collect() - assert.Equal(t, test.wantCollected, collected) + assert.Equal(t, test.wantCollected, mx) if len(test.wantCollected) > 0 { - ensureCollectedHasAllChartsDimsVarsIDs(t, ns, collected) + module.TestMetricsHasAllChartsDims(t, ns.Charts(), mx) } }) } } -func ensureCollectedHasAllChartsDimsVarsIDs(t *testing.T, ns *AuthoritativeNS, collected map[string]int64) { - for _, chart := range *ns.Charts() { - if chart.Obsolete { - continue - } - for _, dim := range chart.Dims { - _, ok := collected[dim.ID] - assert.Truef(t, ok, "chart '%s' dim '%s': no dim in collected", dim.ID, chart.ID) - } - for _, v := range chart.Vars { - _, ok := collected[v.ID] - assert.Truef(t, ok, "chart '%s' dim '%s': no dim in collected", v.ID, chart.ID) - } - } -} - func preparePowerDNSAuthoritativeNSV430() (*AuthoritativeNS, func()) { srv := preparePowerDNSAuthoritativeNSEndpoint() ns := New() diff --git a/src/go/plugin/go.d/modules/powerdns_recursor/recursor_test.go b/src/go/plugin/go.d/modules/powerdns_recursor/recursor_test.go index fbe537a600a954..1cf3561ea295fb 100644 --- a/src/go/plugin/go.d/modules/powerdns_recursor/recursor_test.go +++ b/src/go/plugin/go.d/modules/powerdns_recursor/recursor_test.go @@ -279,32 +279,16 @@ func TestRecursor_Collect(t *testing.T) { defer cleanup() require.NoError(t, recursor.Init()) - collected := recursor.Collect() + mx := recursor.Collect() - assert.Equal(t, test.wantCollected, collected) + assert.Equal(t, test.wantCollected, mx) if len(test.wantCollected) > 0 { - ensureCollectedHasAllChartsDimsVarsIDs(t, recursor, collected) + module.TestMetricsHasAllChartsDims(t, recursor.Charts(), mx) } }) } } -func ensureCollectedHasAllChartsDimsVarsIDs(t *testing.T, rec *Recursor, collected map[string]int64) { - for _, chart := range *rec.Charts() { - if chart.Obsolete { - continue - } - for _, dim := range chart.Dims { - _, ok := collected[dim.ID] - assert.Truef(t, ok, "chart '%s' dim '%s': no dim in collected", dim.ID, chart.ID) - } - for _, v := range chart.Vars { - _, ok := collected[v.ID] - assert.Truef(t, ok, "chart '%s' dim '%s': no dim in collected", v.ID, chart.ID) - } - } -} - func preparePowerDNSRecursorV431() (*Recursor, func()) { srv := preparePowerDNSRecursorEndpoint() recursor := New() diff --git a/src/go/plugin/go.d/modules/pulsar/pulsar_test.go b/src/go/plugin/go.d/modules/pulsar/pulsar_test.go index 865a41cb3b9a5f..cf542378621d69 100644 --- a/src/go/plugin/go.d/modules/pulsar/pulsar_test.go +++ b/src/go/plugin/go.d/modules/pulsar/pulsar_test.go @@ -171,11 +171,11 @@ func TestPulsar_Collect(t *testing.T) { for i := 0; i < 10; i++ { _ = pulsar.Collect() } - collected := pulsar.Collect() + mx := pulsar.Collect() - require.NotNil(t, collected) - require.Equal(t, test.expected, collected) - ensureCollectedHasAllChartsDimsVarsIDs(t, pulsar, collected) + require.NotNil(t, mx) + require.Equal(t, test.expected, mx) + module.TestMetricsHasAllChartsDims(t, pulsar.Charts(), mx) }) } } @@ -212,19 +212,6 @@ func TestPulsar_Collect_RemoveAddNamespacesTopicsInRuntime(t *testing.T) { } } -func ensureCollectedHasAllChartsDimsVarsIDs(t *testing.T, pulsar *Pulsar, collected map[string]int64) { - for _, chart := range *pulsar.Charts() { - for _, dim := range chart.Dims { - _, ok := collected[dim.ID] - assert.Truef(t, ok, "collected metrics has no data for dim '%s' chart '%s'", dim.ID, chart.ID) - } - for _, v := range chart.Vars { - _, ok := collected[v.ID] - assert.Truef(t, ok, "collected metrics has no data for var '%s' chart '%s'", v.ID, chart.ID) - } - } -} - func prepareClientServerStdV250Namespaces(t *testing.T) (*Pulsar, *httptest.Server) { t.Helper() srv := httptest.NewServer(http.HandlerFunc( diff --git a/src/go/plugin/go.d/modules/redis/redis_test.go b/src/go/plugin/go.d/modules/redis/redis_test.go index 067bb3ad29151c..597fde1dd383ac 100644 --- a/src/go/plugin/go.d/modules/redis/redis_test.go +++ b/src/go/plugin/go.d/modules/redis/redis_test.go @@ -298,13 +298,13 @@ func TestRedis_Collect(t *testing.T) { t.Run(name, func(t *testing.T) { rdb := test.prepare(t) - ms := rdb.Collect() + mx := rdb.Collect() - copyTimeRelatedMetrics(ms, test.wantCollected) + copyTimeRelatedMetrics(mx, test.wantCollected) - assert.Equal(t, test.wantCollected, ms) + assert.Equal(t, test.wantCollected, mx) if len(test.wantCollected) > 0 { - ensureCollectedHasAllChartsDimsVarsIDs(t, rdb, ms) + module.TestMetricsHasAllChartsDims(t, rdb.Charts(), mx) ensureCollectedCommandsAddedToCharts(t, rdb) ensureCollectedDbsAddedToCharts(t, rdb) } @@ -338,23 +338,6 @@ func prepareRedisWithPikaMetrics(t *testing.T) *Redis { } return rdb } - -func ensureCollectedHasAllChartsDimsVarsIDs(t *testing.T, rdb *Redis, ms map[string]int64) { - for _, chart := range *rdb.Charts() { - if chart.Obsolete { - continue - } - for _, dim := range chart.Dims { - _, ok := ms[dim.ID] - assert.Truef(t, ok, "chart '%s' dim '%s': no dim in collected", dim.ID, chart.ID) - } - for _, v := range chart.Vars { - _, ok := ms[v.ID] - assert.Truef(t, ok, "chart '%s' dim '%s': no dim in collected", v.ID, chart.ID) - } - } -} - func ensureCollectedCommandsAddedToCharts(t *testing.T, rdb *Redis) { for _, id := range []string{ chartCommandsCalls.ID, diff --git a/src/go/plugin/go.d/modules/rethinkdb/rethinkdb_test.go b/src/go/plugin/go.d/modules/rethinkdb/rethinkdb_test.go index f23c497477d67f..1e5b528946182f 100644 --- a/src/go/plugin/go.d/modules/rethinkdb/rethinkdb_test.go +++ b/src/go/plugin/go.d/modules/rethinkdb/rethinkdb_test.go @@ -154,12 +154,12 @@ func TestRethinkdb_Collect(t *testing.T) { prepare func() *Rethinkdb wantMetrics map[string]int64 wantCharts int - skipChart func(chart *module.Chart) bool + skipChart func(chart *module.Chart, dim *module.Dim) bool }{ "success on valid response": { prepare: prepareCaseOk, wantCharts: len(clusterCharts) + len(serverChartsTmpl)*3, - skipChart: func(chart *module.Chart) bool { + skipChart: func(chart *module.Chart, dim *module.Dim) bool { return strings.HasPrefix(chart.ID, "server_0f74c641-af5f-48d6-a005-35b8983c576a") && !strings.Contains(chart.ID, "stats_request_status") }, diff --git a/src/go/plugin/go.d/modules/supervisord/supervisord_test.go b/src/go/plugin/go.d/modules/supervisord/supervisord_test.go index 7eb5df53a5f11d..eb4ed65c77555d 100644 --- a/src/go/plugin/go.d/modules/supervisord/supervisord_test.go +++ b/src/go/plugin/go.d/modules/supervisord/supervisord_test.go @@ -169,40 +169,15 @@ func TestSupervisord_Collect(t *testing.T) { supvr := test.prepare(t) defer supvr.Cleanup() - ms := supvr.Collect() - assert.Equal(t, test.wantCollected, ms) + mx := supvr.Collect() + assert.Equal(t, test.wantCollected, mx) if len(test.wantCollected) > 0 { - ensureCollectedHasAllChartsDimsVarsIDs(t, supvr, ms) - ensureCollectedProcessesAddedToCharts(t, supvr) + module.TestMetricsHasAllChartsDims(t, supvr.Charts(), mx) } }) } } -func ensureCollectedHasAllChartsDimsVarsIDs(t *testing.T, supvr *Supervisord, ms map[string]int64) { - for _, chart := range *supvr.Charts() { - if chart.Obsolete { - continue - } - for _, dim := range chart.Dims { - _, ok := ms[dim.ID] - assert.Truef(t, ok, "chart '%s' dim '%s': no dim in collected", dim.ID, chart.ID) - } - for _, v := range chart.Vars { - _, ok := ms[v.ID] - assert.Truef(t, ok, "chart '%s' dim '%s': no dim in collected", v.ID, chart.ID) - } - } -} - -func ensureCollectedProcessesAddedToCharts(t *testing.T, supvr *Supervisord) { - for group := range supvr.cache { - for _, c := range *newProcGroupCharts(group) { - assert.NotNilf(t, supvr.Charts().Get(c.ID), "'%s' chart is not in charts", c.ID) - } - } -} - func prepareSupervisordSuccessOnGetAllProcessInfo(t *testing.T) *Supervisord { supvr := New() require.NoError(t, supvr.Init()) diff --git a/src/go/plugin/go.d/modules/systemdunits/systemdunits_test.go b/src/go/plugin/go.d/modules/systemdunits/systemdunits_test.go index 7074e186e4e177..dd63c6a9eecb7b 100644 --- a/src/go/plugin/go.d/modules/systemdunits/systemdunits_test.go +++ b/src/go/plugin/go.d/modules/systemdunits/systemdunits_test.go @@ -862,7 +862,7 @@ func TestSystemdUnits_Collect(t *testing.T) { assert.Equal(t, test.wantCollected, mx) if len(test.wantCollected) > 0 { - ensureCollectedHasAllChartsDimsVarsIDs(t, systemd, mx) + module.TestMetricsHasAllChartsDims(t, systemd.Charts(), mx) } }) } @@ -884,22 +884,6 @@ func TestSystemdUnits_connectionReuse(t *testing.T) { assert.Equal(t, 1, client.connectCalls) } -func ensureCollectedHasAllChartsDimsVarsIDs(t *testing.T, sd *SystemdUnits, collected map[string]int64) { - for _, chart := range *sd.Charts() { - if chart.Obsolete { - continue - } - for _, dim := range chart.Dims { - _, ok := collected[dim.ID] - assert.Truef(t, ok, "collected metrics has no data for dim '%s' chart '%s'", dim.ID, chart.ID) - } - for _, v := range chart.Vars { - _, ok := collected[v.ID] - assert.Truef(t, ok, "collected metrics has no data for var '%s' chart '%s'", v.ID, chart.ID) - } - } -} - func prepareOKClient(ver int) *mockClient { return &mockClient{ conn: &mockConn{ diff --git a/src/go/plugin/go.d/modules/traefik/traefik_test.go b/src/go/plugin/go.d/modules/traefik/traefik_test.go index 6efb27e34fd0f9..b606c1b35f512b 100644 --- a/src/go/plugin/go.d/modules/traefik/traefik_test.go +++ b/src/go/plugin/go.d/modules/traefik/traefik_test.go @@ -241,13 +241,13 @@ func TestTraefik_Collect(t *testing.T) { tk, cleanup := test.prepare(t) defer cleanup() - var ms map[string]int64 + var mx map[string]int64 for _, want := range test.wantCollected { - ms = tk.Collect() - assert.Equal(t, want, ms) + mx = tk.Collect() + assert.Equal(t, want, mx) } if len(test.wantCollected) > 0 { - ensureCollectedHasAllChartsDimsVarsIDs(t, tk, ms) + module.TestMetricsHasAllChartsDims(t, tk.Charts(), mx) } }) } @@ -352,19 +352,3 @@ func prepareCaseConnectionRefused(t *testing.T) (*Traefik, func()) { return h, func() {} } - -func ensureCollectedHasAllChartsDimsVarsIDs(t *testing.T, tk *Traefik, ms map[string]int64) { - for _, chart := range *tk.Charts() { - if chart.Obsolete { - continue - } - for _, dim := range chart.Dims { - _, ok := ms[dim.ID] - assert.Truef(t, ok, "chart '%s' dim '%s': no dim in collected", dim.ID, chart.ID) - } - for _, v := range chart.Vars { - _, ok := ms[v.ID] - assert.Truef(t, ok, "chart '%s' dim '%s': no dim in collected", v.ID, chart.ID) - } - } -} diff --git a/src/go/plugin/go.d/modules/unbound/unbound_test.go b/src/go/plugin/go.d/modules/unbound/unbound_test.go index f9ed73afe6c7d9..e23147cd3529cc 100644 --- a/src/go/plugin/go.d/modules/unbound/unbound_test.go +++ b/src/go/plugin/go.d/modules/unbound/unbound_test.go @@ -269,11 +269,13 @@ func (m mockUnboundClient) Command(_ string, process socket.Processor) error { return nil } -func testCharts(t *testing.T, unbound *Unbound, collected map[string]int64) { +func testCharts(t *testing.T, unbound *Unbound, mx map[string]int64) { t.Helper() ensureChartsCreatedForEveryThread(t, unbound) ensureExtendedChartsCreated(t, unbound) - ensureCollectedHasAllChartsDimsVarsIDs(t, unbound, collected) + module.TestMetricsHasAllChartsDimsSkip(t, unbound.Charts(), mx, func(_ *module.Chart, dim *module.Dim) bool { + return dim.ID == "mem.mod.ipsecmod" + }) } func ensureChartsCreatedForEveryThread(t *testing.T, u *Unbound) { @@ -318,22 +320,6 @@ func ensureExtendedChartsCreated(t *testing.T, u *Unbound) { } } -func ensureCollectedHasAllChartsDimsVarsIDs(t *testing.T, u *Unbound, collected map[string]int64) { - for _, chart := range *u.Charts() { - for _, dim := range chart.Dims { - if dim.ID == "mem.mod.ipsecmod" { - continue - } - _, ok := collected[dim.ID] - assert.Truef(t, ok, "collected metrics has no data for dim '%s' chart '%s'", dim.ID, chart.ID) - } - for _, v := range chart.Vars { - _, ok := collected[v.ID] - assert.Truef(t, ok, "collected metrics has no data for var '%s' chart '%s'", v.ID, chart.ID) - } - } -} - var ( expectedCommon = map[string]int64{ "thread0.num.cachehits": 21, diff --git a/src/go/plugin/go.d/modules/vsphere/vsphere_test.go b/src/go/plugin/go.d/modules/vsphere/vsphere_test.go index 3c1b9ded146dd9..b0c22a1f7c357d 100644 --- a/src/go/plugin/go.d/modules/vsphere/vsphere_test.go +++ b/src/go/plugin/go.d/modules/vsphere/vsphere_test.go @@ -330,9 +330,9 @@ func TestVSphere_Collect(t *testing.T) { "vm-72_sys.uptime.latest": 200, } - collected := vSphere.Collect() + mx := vSphere.Collect() - require.Equal(t, expected, collected) + require.Equal(t, expected, mx) count := model.Count() assert.Len(t, vSphere.discoveredHosts, count.Host) @@ -340,7 +340,7 @@ func TestVSphere_Collect(t *testing.T) { assert.Len(t, vSphere.charted, count.Host+count.Machine) assert.Len(t, *vSphere.Charts(), count.Host*len(hostChartsTmpl)+count.Machine*len(vmChartsTmpl)) - ensureCollectedHasAllChartsDimsVarsIDs(t, vSphere, collected) + module.TestMetricsHasAllChartsDims(t, vSphere.Charts(), mx) } func TestVSphere_Collect_RemoveHostsVMsInRuntime(t *testing.T) { @@ -421,19 +421,6 @@ func TestVSphere_Collect_Run(t *testing.T) { assert.Len(t, *vSphere.charts, count.Host*len(hostChartsTmpl)+count.Machine*len(vmChartsTmpl)) } -func ensureCollectedHasAllChartsDimsVarsIDs(t *testing.T, vSphere *VSphere, collected map[string]int64) { - for _, chart := range *vSphere.Charts() { - for _, dim := range chart.Dims { - _, ok := collected[dim.ID] - assert.Truef(t, ok, "collected metrics has no data for dim '%s' chart '%s'", dim.ID, chart.ID) - } - for _, v := range chart.Vars { - _, ok := collected[v.ID] - assert.Truef(t, ok, "collected metrics has no data for var '%s' chart '%s'", v.ID, chart.ID) - } - } -} - func prepareVSphereSim(t *testing.T) (vSphere *VSphere, model *simulator.Model, teardown func()) { model, srv := createSim(t) vSphere = New() diff --git a/src/go/plugin/go.d/modules/whoisquery/whoisquery_test.go b/src/go/plugin/go.d/modules/whoisquery/whoisquery_test.go index 4979c7f57a0b7a..57ba6b05186a6b 100644 --- a/src/go/plugin/go.d/modules/whoisquery/whoisquery_test.go +++ b/src/go/plugin/go.d/modules/whoisquery/whoisquery_test.go @@ -101,7 +101,7 @@ func TestWhoisQuery_Collect(t *testing.T) { require.NoError(t, whoisquery.Init()) whoisquery.prov = &mockProvider{remTime: 12345} - collected := whoisquery.Collect() + mx := whoisquery.Collect() expected := map[string]int64{ "expiry": 12345, @@ -109,9 +109,9 @@ func TestWhoisQuery_Collect(t *testing.T) { "days_until_expiration_critical": 15, } - assert.NotZero(t, collected) - assert.Equal(t, expected, collected) - ensureCollectedHasAllChartsDimsVarsIDs(t, whoisquery, collected) + assert.NotZero(t, mx) + assert.Equal(t, expected, mx) + module.TestMetricsHasAllChartsDims(t, whoisquery.Charts(), mx) } func TestWhoisQuery_Collect_ReturnsNilOnProviderError(t *testing.T) { @@ -123,19 +123,6 @@ func TestWhoisQuery_Collect_ReturnsNilOnProviderError(t *testing.T) { assert.Nil(t, whoisquery.Collect()) } -func ensureCollectedHasAllChartsDimsVarsIDs(t *testing.T, whoisquery *WhoisQuery, collected map[string]int64) { - for _, chart := range *whoisquery.Charts() { - for _, dim := range chart.Dims { - _, ok := collected[dim.ID] - assert.Truef(t, ok, "collected metrics has no data for dim '%s' chart '%s'", dim.ID, chart.ID) - } - for _, v := range chart.Vars { - _, ok := collected[v.ID] - assert.Truef(t, ok, "collected metrics has no data for var '%s' chart '%s'", v.ID, chart.ID) - } - } -} - type mockProvider struct { remTime float64 err bool diff --git a/src/go/plugin/go.d/modules/x509check/x509check_test.go b/src/go/plugin/go.d/modules/x509check/x509check_test.go index e0b287251d0d52..3bdd84ced87c61 100644 --- a/src/go/plugin/go.d/modules/x509check/x509check_test.go +++ b/src/go/plugin/go.d/modules/x509check/x509check_test.go @@ -130,10 +130,10 @@ func TestX509Check_Collect(t *testing.T) { require.NoError(t, x509Check.Init()) x509Check.prov = &mockProvider{certs: []*x509.Certificate{{}}} - collected := x509Check.Collect() + mx := x509Check.Collect() - assert.NotZero(t, collected) - ensureCollectedHasAllChartsDimsVarsIDs(t, x509Check, collected) + assert.NotZero(t, mx) + module.TestMetricsHasAllChartsDims(t, x509Check.Charts(), mx) } func TestX509Check_Collect_ReturnsNilOnProviderError(t *testing.T) { @@ -151,19 +151,6 @@ func TestX509Check_Collect_ReturnsNilOnZeroCertificates(t *testing.T) { assert.Nil(t, mx) } -func ensureCollectedHasAllChartsDimsVarsIDs(t *testing.T, x509Check *X509Check, collected map[string]int64) { - for _, chart := range *x509Check.Charts() { - for _, dim := range chart.Dims { - _, ok := collected[dim.ID] - assert.Truef(t, ok, "collected metrics has no data for dim '%s' chart '%s'", dim.ID, chart.ID) - } - for _, v := range chart.Vars { - _, ok := collected[v.ID] - assert.Truef(t, ok, "collected metrics has no data for var '%s' chart '%s'", v.ID, chart.ID) - } - } -} - type mockProvider struct { certs []*x509.Certificate err bool diff --git a/src/go/plugin/go.d/modules/zfspool/zfspool_test.go b/src/go/plugin/go.d/modules/zfspool/zfspool_test.go index bf64d1713b356b..90ffd241bf8e6d 100644 --- a/src/go/plugin/go.d/modules/zfspool/zfspool_test.go +++ b/src/go/plugin/go.d/modules/zfspool/zfspool_test.go @@ -388,7 +388,7 @@ func TestZFSPool_Collect(t *testing.T) { assert.Len(t, *zp.Charts(), want, "want charts") - module.TestMetricsHasAllChartsDimsSkip(t, zp.Charts(), mx, func(chart *module.Chart) bool { + module.TestMetricsHasAllChartsDimsSkip(t, zp.Charts(), mx, func(chart *module.Chart, _ *module.Dim) bool { return strings.HasPrefix(chart.ID, "zfspool_zion") && !strings.HasSuffix(chart.ID, "health_state") }) } diff --git a/src/go/plugin/go.d/modules/zookeeper/zookeeper_test.go b/src/go/plugin/go.d/modules/zookeeper/zookeeper_test.go index 3fc8ad5b4b99c8..9b45e2ad543ced 100644 --- a/src/go/plugin/go.d/modules/zookeeper/zookeeper_test.go +++ b/src/go/plugin/go.d/modules/zookeeper/zookeeper_test.go @@ -99,10 +99,10 @@ func TestZookeeper_Collect(t *testing.T) { "znode_count": 5, } - collected := job.Collect() + mx := job.Collect() - assert.Equal(t, expected, collected) - ensureCollectedHasAllChartsDimsVarsIDs(t, job, collected) + assert.Equal(t, expected, mx) + module.TestMetricsHasAllChartsDims(t, job.Charts(), mx) } func TestZookeeper_CollectMntrNotInWhiteList(t *testing.T) { @@ -137,22 +137,6 @@ func TestZookeeper_CollectMntrReceiveError(t *testing.T) { assert.Nil(t, job.Collect()) } -func ensureCollectedHasAllChartsDimsVarsIDs(t *testing.T, zk *Zookeeper, collected map[string]int64) { - for _, chart := range *zk.Charts() { - if chart.Obsolete { - continue - } - for _, dim := range chart.Dims { - _, ok := collected[dim.ID] - assert.Truef(t, ok, "collected metrics has no data for dim '%s' chart '%s'", dim.ID, chart.ID) - } - for _, v := range chart.Vars { - _, ok := collected[v.ID] - assert.Truef(t, ok, "collected metrics has no data for var '%s' chart '%s'", v.ID, chart.ID) - } - } -} - type mockZookeeperFetcher struct { data []byte err bool From 3c91cb00664d5ab1a2458df465734e9f9a7a57b5 Mon Sep 17 00:00:00 2001 From: "Austin S. Hemmelgarn" Date: Tue, 1 Oct 2024 06:50:43 -0400 Subject: [PATCH 13/23] Use temporary file for commit date check. (#18646) --- packaging/installer/netdata-updater.sh | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/packaging/installer/netdata-updater.sh b/packaging/installer/netdata-updater.sh index fc8b39cdd514a2..5ebb6baa214074 100755 --- a/packaging/installer/netdata-updater.sh +++ b/packaging/installer/netdata-updater.sh @@ -532,6 +532,8 @@ get_netdata_latest_tag() { newer_commit_date() { info "Checking if a newer version of the updater script is available." + ndtmpdir="$(create_tmp_directory)" + commit_check_file="${ndtmpdir}/latest-commit.json" commit_check_url="https://api.github.com/repos/netdata/netdata/commits?path=packaging%2Finstaller%2Fnetdata-updater.sh&page=1&per_page=1" python_version_check=" from __future__ import print_function @@ -545,12 +547,14 @@ else: print(data[0]['commit']['committer']['date'] if isinstance(data, list) and data else '') " + _safe_download "${commit_check_url}" "${commit_check_file}" + if command -v jq > /dev/null 2>&1; then - commit_date="$(_safe_download "${commit_check_url}" /dev/stdout | jq '.[0].commit.committer.date' 2>/dev/null | tr -d '"')" + commit_date="$(jq '.[0].commit.committer.date' 2>/dev/null < "${commit_check_file}" | tr -d '"')" elif command -v python > /dev/null 2>&1;then - commit_date="$(_safe_download "${commit_check_url}" /dev/stdout | python -c "${python_version_check}")" + commit_date="$(python -c "${python_version_check}" < "${commit_check_file}")" elif command -v python3 > /dev/null 2>&1;then - commit_date="$(_safe_download "${commit_check_url}" /dev/stdout | python3 -c "${python_version_check}")" + commit_date="$(python3 -c "${python_version_check}" < "${commit_check_file}")" fi if [ -z "${commit_date}" ] ; then From 6c6b8e12921be36ff986af9fa794e0b6eae5e21d Mon Sep 17 00:00:00 2001 From: Stelios Fragkakis <52996999+stelfrag@users.noreply.github.com> Date: Tue, 1 Oct 2024 14:13:16 +0300 Subject: [PATCH 14/23] Log agent start / stop timing events (#18632) * log agent start / stop events * Make it simple * Populate average start/shutdown time in stream path * Get the median and not the average of the values * Change the log --- src/daemon/main.c | 12 +++++- src/database/sqlite/sqlite_metadata.c | 59 +++++++++++++++++++++++++++ src/database/sqlite/sqlite_metadata.h | 9 ++++ src/streaming/stream_path.c | 4 ++ src/streaming/stream_path.h | 2 + 5 files changed, 85 insertions(+), 1 deletion(-) diff --git a/src/daemon/main.c b/src/daemon/main.c index 9b3758ca466962..98f32a88d2974b 100644 --- a/src/daemon/main.c +++ b/src/daemon/main.c @@ -316,6 +316,7 @@ void web_client_cache_destroy(void); void netdata_cleanup_and_exit(int ret, const char *action, const char *action_result, const char *action_data) { netdata_exit = 1; + usec_t shutdown_start_time = now_monotonic_usec(); watcher_shutdown_begin(); nd_log_limits_unlimited(); @@ -464,6 +465,9 @@ void netdata_cleanup_and_exit(int ret, const char *action, const char *action_re #endif } + // Don't register a shutdown event if we crashed + if (!ret) + add_agent_event(EVENT_AGENT_SHUTDOWN_TIME, (int64_t)(now_monotonic_usec() - shutdown_start_time)); sqlite_close_databases(); watcher_step_complete(WATCHER_STEP_ID_CLOSE_SQL_DATABASES); sqlite_library_shutdown(); @@ -2307,7 +2311,13 @@ int netdata_main(int argc, char **argv) { delta_startup_time("ready"); usec_t ready_ut = now_monotonic_usec(); - netdata_log_info("NETDATA STARTUP: completed in %llu ms. Enjoy real-time performance monitoring!", (ready_ut - started_ut) / USEC_PER_MS); + add_agent_event(EVENT_AGENT_START_TIME, (int64_t ) (ready_ut - started_ut)); + usec_t median_start_time = get_agent_event_time_median(EVENT_AGENT_START_TIME); + netdata_log_info( + "NETDATA STARTUP: completed in %llu ms (median start up time is %llu ms). Enjoy real-time performance monitoring!", + (ready_ut - started_ut) / USEC_PER_MS, median_start_time / USEC_PER_MS); + + cleanup_agent_event_log(); netdata_ready = true; analytics_statistic_t start_statistic = { "START", "-", "-" }; diff --git a/src/database/sqlite/sqlite_metadata.c b/src/database/sqlite/sqlite_metadata.c index 1e2cc341cc1e7f..6c0b5c6533866b 100644 --- a/src/database/sqlite/sqlite_metadata.c +++ b/src/database/sqlite/sqlite_metadata.c @@ -78,6 +78,9 @@ const char *database_config[] = { "CREATE INDEX IF NOT EXISTS health_log_d_ind_7 on health_log_detail (alarm_id)", "CREATE INDEX IF NOT EXISTS health_log_d_ind_8 on health_log_detail (new_status, updated_by_id)", + "CREATE TABLE IF NOT EXISTS agent_event_log (id INTEGER PRIMARY KEY, version TEXT, event_type INT, value, date_created INT)", + "CREATE INDEX IF NOT EXISTS idx_agent_event_log1 on agent_event_log (event_type)", + "CREATE TABLE IF NOT EXISTS alert_queue " " (host_id BLOB, health_log_id INT, unique_id INT, alarm_id INT, status INT, date_scheduled INT, " " UNIQUE(host_id, health_log_id, alarm_id))", @@ -2337,6 +2340,62 @@ uint64_t sqlite_get_meta_space(void) return sqlite_get_db_space(db_meta); } +#define SQL_ADD_AGENT_EVENT_LOG \ + "INSERT INTO agent_event_log (event_type, version, value, date_created) VALUES " \ + " (@event_type, @version, @value, UNIXEPOCH())" + +void add_agent_event(event_log_type_t event_id, int64_t value) +{ + sqlite3_stmt *res = NULL; + + if (!PREPARE_STATEMENT(db_meta, SQL_ADD_AGENT_EVENT_LOG, &res)) + return; + + int param = 0; + SQLITE_BIND_FAIL(done, sqlite3_bind_int(res, ++param, event_id)); + SQLITE_BIND_FAIL(done, sqlite3_bind_text(res, ++param, NETDATA_VERSION, -1, SQLITE_STATIC)); + SQLITE_BIND_FAIL(done, sqlite3_bind_int64(res, ++param, value)); + + param = 0; + int rc = execute_insert(res); + if (rc != SQLITE_DONE) + error_report("Failed to store agent event information, rc = %d", rc); +done: + REPORT_BIND_FAIL(res, param); + SQLITE_FINALIZE(res); +} + +void cleanup_agent_event_log(void) +{ + db_execute(db_meta, "DELETE FROM agent_event_log WHERE date_created < UNIXEPOCH() - 30 * 86400"); +} + +#define SQL_GET_AGENT_EVENT_TYPE_MEDIAN \ + "SELECT AVG(value) AS median FROM " \ + "(SELECT value FROM agent_event_log WHERE event_type = @event ORDER BY value " \ + " LIMIT 2 - (SELECT COUNT(*) FROM agent_event_log WHERE event_type = @event) % 2 " \ + "OFFSET(SELECT(COUNT(*) - 1) / 2 FROM agent_event_log WHERE event_type = @event)) " + +usec_t get_agent_event_time_median(event_log_type_t event_id) +{ + sqlite3_stmt *res = NULL; + if (!PREPARE_STATEMENT(db_meta, SQL_GET_AGENT_EVENT_TYPE_MEDIAN, &res)) + return 0; + + usec_t avg_time = 0; + int param = 0; + SQLITE_BIND_FAIL(done, sqlite3_bind_int(res, ++param, event_id)); + + param = 0; + if (sqlite3_step_monitored(res) == SQLITE_ROW) + avg_time = sqlite3_column_int64(res, 0); + +done: + REPORT_BIND_FAIL(res, param); + SQLITE_FINALIZE(res); + return avg_time; +} + // // unitests // diff --git a/src/database/sqlite/sqlite_metadata.h b/src/database/sqlite/sqlite_metadata.h index b6f9176d19fcb1..a5e68eb8cfda08 100644 --- a/src/database/sqlite/sqlite_metadata.h +++ b/src/database/sqlite/sqlite_metadata.h @@ -6,6 +6,11 @@ #include "sqlite3.h" #include "sqlite_functions.h" +typedef enum event_log_type { + EVENT_AGENT_START_TIME = 1, + EVENT_AGENT_SHUTDOWN_TIME, +} event_log_type_t; + // return a node list struct node_instance_list { nd_uuid_t node_id; @@ -54,6 +59,10 @@ bool sql_set_host_label(nd_uuid_t *host_id, const char *label_key, const char *l uint64_t sqlite_get_meta_space(void); int sql_init_meta_database(db_check_action_type_t rebuild, int memory); +void cleanup_agent_event_log(void); +void add_agent_event(event_log_type_t event_id, int64_t value); +usec_t get_agent_event_time_median(event_log_type_t event_id); + // UNIT TEST int metadata_unittest(void); #endif //NETDATA_SQLITE_METADATA_H diff --git a/src/streaming/stream_path.c b/src/streaming/stream_path.c index ce7d9f055dc401..40e6526378e756 100644 --- a/src/streaming/stream_path.c +++ b/src/streaming/stream_path.c @@ -54,6 +54,8 @@ static void stream_path_to_json_object(BUFFER *wb, STREAM_PATH *p) { buffer_json_member_add_int64(wb, "hops", p->hops); buffer_json_member_add_uint64(wb, "since", p->since); buffer_json_member_add_uint64(wb, "first_time_t", p->first_time_t); + buffer_json_member_add_uint64(wb, "start_time", p->start_time); + buffer_json_member_add_uint64(wb, "shutdown_time", p->shutdown_time); stream_capabilities_to_json_array(wb, p->capabilities, "capabilities"); STREAM_PATH_FLAGS_2json(wb, "flags", p->flags); buffer_json_object_close(wb); @@ -68,6 +70,8 @@ static STREAM_PATH rrdhost_stream_path_self(RRDHOST *host) { p.host_id = localhost->host_id; p.node_id = localhost->node_id; p.claim_id = claim_id_get_uuid(); + p.start_time = get_agent_event_time_median(EVENT_AGENT_START_TIME) / USEC_PER_MS; + p.shutdown_time = get_agent_event_time_median(EVENT_AGENT_SHUTDOWN_TIME) / USEC_PER_MS; p.flags = STREAM_PATH_FLAG_NONE; if(!UUIDiszero(p.claim_id)) diff --git a/src/streaming/stream_path.h b/src/streaming/stream_path.h index e9d741580a0dcb..96f141d84fd238 100644 --- a/src/streaming/stream_path.h +++ b/src/streaming/stream_path.h @@ -22,6 +22,8 @@ typedef struct stream_path { int16_t hops; // -1 = stale node, 0 = localhost, >0 the hops count STREAM_PATH_FLAGS flags; // ACLK or NONE for the moment STREAM_CAPABILITIES capabilities; // streaming connection capabilities + uint32_t start_time; // median time in ms the agent needs to start + uint32_t shutdown_time; // median time in ms the agent needs to shutdown } STREAM_PATH; typedef struct rrdhost_stream_path { From 59d806e23ba8c0f0f30ced872e3f4c31c0697688 Mon Sep 17 00:00:00 2001 From: thiagoftsm Date: Tue, 1 Oct 2024 18:24:01 +0000 Subject: [PATCH 15/23] Windows Installer (Silent mode) (#18613) --- packaging/installer/installer.nsi | 128 ------------------------- packaging/windows/WINDOWS_INSTALLER.md | 50 ++++++++++ packaging/windows/installer.nsi | 124 ++++++++++++++++++++++++ 3 files changed, 174 insertions(+), 128 deletions(-) delete mode 100644 packaging/installer/installer.nsi create mode 100644 packaging/windows/WINDOWS_INSTALLER.md diff --git a/packaging/installer/installer.nsi b/packaging/installer/installer.nsi deleted file mode 100644 index c14ccb599368ef..00000000000000 --- a/packaging/installer/installer.nsi +++ /dev/null @@ -1,128 +0,0 @@ -!include "MUI2.nsh" -!include "nsDialogs.nsh" -!include "FileFunc.nsh" - -Name "Netdata" -Outfile "netdata-installer.exe" -InstallDir "$PROGRAMFILES\Netdata" -RequestExecutionLevel admin - -!define MUI_ICON "NetdataWhite.ico" -!define MUI_UNICON "NetdataWhite.ico" - -!define ND_UININSTALL_REG "Software\Microsoft\Windows\CurrentVersion\Uninstall\Netdata" - -!define MUI_ABORTWARNING -!define MUI_UNABORTWARNING - -!insertmacro MUI_PAGE_WELCOME -!insertmacro MUI_PAGE_LICENSE "C:\msys64\gpl-3.0.txt" -!insertmacro MUI_PAGE_DIRECTORY -!insertmacro MUI_PAGE_INSTFILES -!insertmacro MUI_PAGE_FINISH - -!insertmacro MUI_UNPAGE_CONFIRM -!insertmacro MUI_UNPAGE_INSTFILES -!insertmacro MUI_UNPAGE_FINISH - -!insertmacro MUI_LANGUAGE "English" - -Function .onInit - nsExec::ExecToLog '$SYSDIR\sc.exe stop Netdata' - pop $0 - ${If} $0 == 0 - nsExec::ExecToLog '$SYSDIR\sc.exe delete Netdata' - pop $0 - ${EndIf} -FunctionEnd - -Function NetdataUninstallRegistry - ClearErrors - WriteRegStr HKLM "${ND_UININSTALL_REG}" \ - "DisplayName" "Netdata - Real-time system monitoring." - WriteRegStr HKLM "${ND_UININSTALL_REG}" \ - "DisplayIcon" "$INSTDIR\Uninstall.exe,0" - WriteRegStr HKLM "${ND_UININSTALL_REG}" \ - "UninstallString" "$INSTDIR\Uninstall.exe" - WriteRegStr HKLM "${ND_UININSTALL_REG}" \ - "RegOwner" "Netdata Inc." - WriteRegStr HKLM "${ND_UININSTALL_REG}" \ - "RegCompany" "Netdata Inc." - WriteRegStr HKLM "${ND_UININSTALL_REG}" \ - "Publisher" "Netdata Inc." - WriteRegStr HKLM "${ND_UININSTALL_REG}" \ - "HelpLink" "https://learn.netdata.cloud/" - WriteRegStr HKLM "${ND_UININSTALL_REG}" \ - "URLInfoAbout" "https://www.netdata.cloud/" - WriteRegStr HKLM "${ND_UININSTALL_REG}" \ - "DisplayVersion" "${CURRVERSION}" - WriteRegStr HKLM "${ND_UININSTALL_REG}" \ - "VersionMajor" "${MAJORVERSION}" - WriteRegStr HKLM "${ND_UININSTALL_REG}" \ - "VersionMinor" "${MINORVERSION}" - - IfErrors 0 +2 - MessageBox MB_ICONEXCLAMATION|MB_OK "Unable to create an entry in the Control Panel!" IDOK end - - ClearErrors - ${GetSize} "$INSTDIR" "/S=0K" $0 $1 $2 - IntFmt $0 "0x%08X" $0 - WriteRegDWORD HKLM "${ND_UININSTALL_REG}" "EstimatedSize" "$0" - - IfErrors 0 +2 - MessageBox MB_ICONEXCLAMATION|MB_OK "Cannot estimate the installation size." IDOK end - end: -FunctionEnd - -Section "Install Netdata" - SetOutPath $INSTDIR - SetCompress off - - File /r "C:\msys64\opt\netdata\*.*" - - ClearErrors - nsExec::ExecToLog '$SYSDIR\sc.exe create Netdata binPath= "$INSTDIR\usr\bin\netdata.exe" start= delayed-auto' - pop $0 - ${If} $0 != 0 - DetailPrint "Warning: Failed to create Netdata service." - ${EndIf} - - ClearErrors - nsExec::ExecToLog '$SYSDIR\sc.exe description Netdata "Real-time system monitoring service"' - pop $0 - ${If} $0 != 0 - DetailPrint "Warning: Failed to add Netdata service description." - ${EndIf} - - ClearErrors - nsExec::ExecToLog '$SYSDIR\sc.exe start Netdata' - pop $0 - ${If} $0 != 0 - DetailPrint "Warning: Failed to start Netdata service." - ${EndIf} - - WriteUninstaller "$INSTDIR\Uninstall.exe" - - Call NetdataUninstallRegistry -SectionEnd - -Section "Uninstall" - ClearErrors - nsExec::ExecToLog '$SYSDIR\sc.exe stop Netdata' - pop $0 - ${If} $0 != 0 - DetailPrint "Warning: Failed to stop Netdata service." - ${EndIf} - - ClearErrors - nsExec::ExecToLog '$SYSDIR\sc.exe delete Netdata' - pop $0 - ${If} $0 != 0 - DetailPrint "Warning: Failed to delete Netdata service." - ${EndIf} - - RMDir /r "$INSTDIR" - - DeleteRegKey HKLM "Software\Microsoft\Windows\CurrentVersion\Uninstall\Netdata" -SectionEnd - diff --git a/packaging/windows/WINDOWS_INSTALLER.md b/packaging/windows/WINDOWS_INSTALLER.md new file mode 100644 index 00000000000000..1b8c6f6be23147 --- /dev/null +++ b/packaging/windows/WINDOWS_INSTALLER.md @@ -0,0 +1,50 @@ +# Netdata Windows Installer + +Netdata offers a convenient Windows installer for easy setup. This executable provides two distinct installation modes, outlined below. + +## Graphical User Interface (GUI) + +Double-clicking the installer initiates the setup process. Since Netdata adds a service to your system, you'll need to provide administrator privileges. +The installer will then guide you through these steps: + +1. **Welcome**: This screen provides a summary of the actions the installer will perform. +2. **License Agreements**: + - [Netdata Cloud UI License](/src/web/gui/v2/LICENSE.md): Review and accept the license terms to proceed. + - [GPLv3 License](/LICENSE): Read the GNU General Public License v3, which governs the Netdata software. +3. **Destination**: Choose the installation directory. By default, Netdata installs in `C:\Program Files\Netdata`. +4. **Installation**: The installer will copy the necessary files to the chosen directory. +5. **Claiming**: [Connecting](/src/claim/README.md) your Netdata Agent to your Netdata Cloud Space. Here's what you can configure: + - **Token**: The claiming token for your Netdata Cloud Space. + - **Rooms**: Specify the Room IDs where you want your node to appear (comma-separated list). + - **Proxy**: Enter the address of a proxy server if required for communication with Netdata Cloud. + - **Insecure connection**: By default, Netdata verifies the server's certificate. Enabling this option bypasses verification (use only if necessary). + - **Open Terminal**: Select this option to launch the `MSYS2` terminal after installation completes. +6. **Finish**: The installation process is complete! + +## Silent Mode + +This section provides instructions for installing Netdata in silent mode, which is ideal for automated deployments. +Silent mode skips displaying license agreements, but requires explicitly accepting them using the `/A` option. + +**Available Options**: + +| Option | Description | +|-----------|--------------------------------------------------------------------------------------------------| +| `/S` | Enables silent mode installation. | +| `/A` | Accepts all Netdata licenses. This option is mandatory for silent installations. | +| `/D` | Specifies the desired installation directory (defaults to `C:\Program Files\Netdata`). | +| `/T` | Opens the `MSYS2` terminal after installation. | +| `/I` | Forces insecure connections, bypassing hostname verification (use only if absolutely necessary). | +| `/TOKEN=` | Sets the claiming token for your Netdata Cloud Space. | +| `/ROOMS=` | Comma-separated list of Room IDs where you want your node to appear. | +| `/PROXY=` | Sets the proxy server address if your network requires one. | + +**Example Usage** + +Connect your Agent to your Netdata Cloud Space with token `` and room ``: + +```bash +netdata-installer.exe /S /A /TOKEN= /ROOMS= +``` + +Replace `` and `` with your actual Netdata Cloud Space claim token and room ID, respectively. diff --git a/packaging/windows/installer.nsi b/packaging/windows/installer.nsi index 93a0444bdf9c61..3c070ebec8e695 100644 --- a/packaging/windows/installer.nsi +++ b/packaging/windows/installer.nsi @@ -29,6 +29,39 @@ Page Custom NetdataConfigPage NetdataConfigLeave !insertmacro MUI_LANGUAGE "English" +!define INSTALLERLOCKFILEGUID "f787d5ef-5c41-4dc0-a115-a1fb654fad1c" + +# https://nsis.sourceforge.io/Allow_only_one_installer_instance +!macro SingleInstanceFile + !if "${NSIS_PTR_SIZE}" > 4 + !include "Util.nsh" + !else ifndef IntPtrCmp + !define IntPtrCmp IntCmp + !endif + + !ifndef NSIS_PTR_SIZE & SYSTYPE_PTR + !define SYSTYPE_PTR i ; NSIS v2.x + !else + !define /ifndef SYSTYPE_PTR p ; NSIS v3.0+ + !endif + + !if "${NSIS_CHAR_SIZE}" < 2 + Push "$TEMP\${INSTALLERLOCKFILEGUID}.lock" + !else + Push "$APPDATA\${INSTALLERLOCKFILEGUID}.lock" + !endif + + System::Call 'KERNEL32::CreateFile(ts,i0x40000000,i0,${SYSTYPE_PTR}0,i4,i0x04000000,${SYSTYPE_PTR}0)${SYSTYPE_PTR}.r0' + ${IntPtrCmp} $0 -1 "" launch launch + System::Call 'kernel32::AttachConsole(i -1)i.r0' + ${If} $0 != 0 + System::Call 'kernel32::GetStdHandle(i -11)i.r0' + FileWrite $0 "The installer is already running.$\r$\n" + ${EndIf} + Quit + launch: +!macroend + var hStartMsys var startMsys @@ -40,10 +73,13 @@ var hProxy var proxy var hInsecure var insecure +var accepted var avoidClaim Function .onInit + !insertmacro SingleInstanceFile + nsExec::ExecToLog '$SYSDIR\sc.exe stop Netdata' pop $0 ${If} $0 == 0 @@ -54,6 +90,59 @@ Function .onInit StrCpy $startMsys ${BST_UNCHECKED} StrCpy $insecure ${BST_UNCHECKED} StrCpy $avoidClaim ${BST_UNCHECKED} + StrCpy $accepted ${BST_UNCHECKED} + + ${GetParameters} $R0 + ${GetOptions} $R0 "/s" $0 + IfErrors +2 0 + SetSilent silent + ClearErrors + + ${GetOptions} $R0 "/t" $0 + IfErrors +2 0 + StrCpy $startMsys ${BST_CHECKED} + ClearErrors + + ${GetOptions} $R0 "/i" $0 + IfErrors +2 0 + StrCpy $insecure ${BST_CHECKED} + ClearErrors + + ${GetOptions} $R0 "/a" $0 + IfErrors +2 0 + StrCpy $accepted ${BST_CHECKED} + ClearErrors + + ${GetOptions} $R0 "/token=" $0 + IfErrors +2 0 + StrCpy $cloudToken $0 + ClearErrors + + ${GetOptions} $R0 "/rooms=" $0 + IfErrors +2 0 + StrCpy $cloudRooms $0 + ClearErrors + + ${GetOptions} $R0 "/proxy=" $0 + IfErrors +2 0 + StrCpy $proxy $0 + ClearErrors + + IfSilent checklicense goahead + checklicense: + ${If} $accepted == ${BST_UNCHECKED} + System::Call 'kernel32::AttachConsole(i -1)i.r0' + ${If} $0 != 0 + System::Call 'kernel32::GetStdHandle(i -11)i.r0' + FileWrite $0 "You must accept the licenses (/A) to continue.$\r$\n" + ${EndIf} + Quit + ${EndIf} + goahead: +FunctionEnd + +Function un.onInit +!insertmacro SingleInstanceFile FunctionEnd Function NetdataConfigPage @@ -199,6 +288,41 @@ Section "Install Netdata" WriteUninstaller "$INSTDIR\Uninstall.exe" Call NetdataUninstallRegistry + + IfSilent runcmds goodbye + runcmds: + nsExec::ExecToLog '$SYSDIR\sc.exe start Netdata' + pop $0 + + System::Call 'kernel32::AttachConsole(i -1)i.r0' + ${If} $0 != 0 + System::Call 'kernel32::GetStdHandle(i -11)i.r0' + FileWrite $0 "Netdata installed with success.$\r$\n" + ${EndIf} + ${If} $startMsys == ${BST_CHECKED} + nsExec::ExecToLog '$INSTDIR\msys2.exe' + pop $0 + ${EndIf} + + StrLen $0 $cloudToken + StrLen $1 $cloudRooms + ${If} $0 == 0 + ${OrIf} $1 == 0 + Goto goodbye + ${EndIf} + + ${If} $0 == 135 + ${AndIf} $1 >= 36 + nsExec::ExecToLog '$INSTDIR\usr\bin\NetdataClaim.exe /T $cloudToken /R $cloudRooms /P $proxy /I $insecure' + pop $0 + ${Else} + System::Call 'kernel32::AttachConsole(i -1)i.r0' + ${If} $0 != 0 + System::Call 'kernel32::GetStdHandle(i -11)i.r0' + FileWrite $0 "Room(s) or Token invalid.$\r$\n" + ${EndIf} + ${EndIf} + goodbye: SectionEnd Section "Uninstall" From 723a0a8c7aaa804b5dd484ea44bd6f79a482e408 Mon Sep 17 00:00:00 2001 From: netdatabot Date: Wed, 2 Oct 2024 00:19:33 +0000 Subject: [PATCH 16/23] [ci skip] Update changelog and version for nightly build: v1.99.0-223-nightly. --- CHANGELOG.md | 9 ++++----- packaging/version | 2 +- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index abf4536c76333c..4a96dfc021a1ae 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,12 +6,15 @@ **Merged pull requests:** +- go.d remove duplicate chart check in tests [\#18650](https://github.com/netdata/netdata/pull/18650) ([ilyam8](https://github.com/ilyam8)) - fixed freebsd cpu calculation [\#18648](https://github.com/netdata/netdata/pull/18648) ([ktsaou](https://github.com/ktsaou)) - Regenerate integrations.js [\#18647](https://github.com/netdata/netdata/pull/18647) ([netdatabot](https://github.com/netdatabot)) +- Use temporary file for commit date check. [\#18646](https://github.com/netdata/netdata/pull/18646) ([Ferroin](https://github.com/Ferroin)) - Update file names. [\#18638](https://github.com/netdata/netdata/pull/18638) ([vkalintiris](https://github.com/vkalintiris)) - Move plugins.d directory outside of collectors [\#18637](https://github.com/netdata/netdata/pull/18637) ([vkalintiris](https://github.com/vkalintiris)) - go.d/smartctl: fix exit status check in scan [\#18635](https://github.com/netdata/netdata/pull/18635) ([ilyam8](https://github.com/ilyam8)) - go.d pkg/socket: keep only one timeout option [\#18633](https://github.com/netdata/netdata/pull/18633) ([ilyam8](https://github.com/ilyam8)) +- Log agent start / stop timing events [\#18632](https://github.com/netdata/netdata/pull/18632) ([stelfrag](https://github.com/stelfrag)) - Regenerate integrations.js [\#18630](https://github.com/netdata/netdata/pull/18630) ([netdatabot](https://github.com/netdatabot)) - go.d/postgres: fix checkpoints query for postgres 17 [\#18629](https://github.com/netdata/netdata/pull/18629) ([ilyam8](https://github.com/ilyam8)) - go.d/ceph: fix leftovers after \#18582 [\#18628](https://github.com/netdata/netdata/pull/18628) ([ilyam8](https://github.com/ilyam8)) @@ -23,6 +26,7 @@ - Revert "Add ceph commands to ndsudo" [\#18620](https://github.com/netdata/netdata/pull/18620) ([ilyam8](https://github.com/ilyam8)) - go.d/hddtemp: connect and read [\#18619](https://github.com/netdata/netdata/pull/18619) ([ilyam8](https://github.com/ilyam8)) - go.d/uwsgi: don't write just connect and read [\#18618](https://github.com/netdata/netdata/pull/18618) ([ilyam8](https://github.com/ilyam8)) +- Windows Installer \(Silent mode\) [\#18613](https://github.com/netdata/netdata/pull/18613) ([thiagoftsm](https://github.com/thiagoftsm)) - POST Functions [\#18611](https://github.com/netdata/netdata/pull/18611) ([ktsaou](https://github.com/ktsaou)) - Correctly include Windows installer in release creation. [\#18609](https://github.com/netdata/netdata/pull/18609) ([Ferroin](https://github.com/Ferroin)) - feat: HW req for onprem installation. [\#18608](https://github.com/netdata/netdata/pull/18608) ([M4itee](https://github.com/M4itee)) @@ -417,11 +421,6 @@ - Bump github.com/gofrs/flock from 0.11.0 to 0.12.0 in /src/go [\#18077](https://github.com/netdata/netdata/pull/18077) ([dependabot[bot]](https://github.com/apps/dependabot)) - proc: collect ksm/swap/cma/zswap only when feature enabled [\#18076](https://github.com/netdata/netdata/pull/18076) ([ilyam8](https://github.com/ilyam8)) - health add alarm docker container down [\#18075](https://github.com/netdata/netdata/pull/18075) ([ilyam8](https://github.com/ilyam8)) -- go.d ipfs fix tests [\#18074](https://github.com/netdata/netdata/pull/18074) ([ilyam8](https://github.com/ilyam8)) -- Regenerate integrations.js [\#18073](https://github.com/netdata/netdata/pull/18073) ([netdatabot](https://github.com/netdatabot)) -- Port ipfs from python to Go [\#18070](https://github.com/netdata/netdata/pull/18070) ([Ancairon](https://github.com/Ancairon)) -- update golang version in netdata.spec [\#18069](https://github.com/netdata/netdata/pull/18069) ([ilyam8](https://github.com/ilyam8)) -- go.d set sensitive props to "password" widget [\#18068](https://github.com/netdata/netdata/pull/18068) ([ilyam8](https://github.com/ilyam8)) ## [v1.46.3](https://github.com/netdata/netdata/tree/v1.46.3) (2024-07-23) diff --git a/packaging/version b/packaging/version index 8d140b61ad9805..b41648f786959f 100644 --- a/packaging/version +++ b/packaging/version @@ -1 +1 @@ -v1.99.0-218-nightly +v1.99.0-223-nightly From c75e5140a9d191d1dab4ee0ab930a5e7242edaf1 Mon Sep 17 00:00:00 2001 From: Ilya Mashchenko Date: Wed, 2 Oct 2024 11:30:49 +0300 Subject: [PATCH 17/23] go.d sd fix sprig funcmap (#18658) --- src/go/plugin/go.d/agent/discovery/sd/pipeline/funcmap.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/go/plugin/go.d/agent/discovery/sd/pipeline/funcmap.go b/src/go/plugin/go.d/agent/discovery/sd/pipeline/funcmap.go index f94a5c24a63b8c..378e03c25aa446 100644 --- a/src/go/plugin/go.d/agent/discovery/sd/pipeline/funcmap.go +++ b/src/go/plugin/go.d/agent/discovery/sd/pipeline/funcmap.go @@ -14,7 +14,9 @@ import ( ) func newFuncMap() template.FuncMap { - custom := map[string]any{ + fm := sprig.TxtFuncMap() + + extra := map[string]any{ "match": funcMatchAny, "glob": func(value, pattern string, patterns ...string) bool { return funcMatchAny("glob", value, pattern, patterns...) @@ -25,9 +27,7 @@ func newFuncMap() template.FuncMap { }, } - fm := sprig.HermeticTxtFuncMap() - - for name, fn := range custom { + for name, fn := range extra { fm[name] = fn } From f3efa0f8705e6a705f946acdbb8176d997ceaee1 Mon Sep 17 00:00:00 2001 From: Fotis Voutsas Date: Wed, 2 Oct 2024 12:43:39 +0300 Subject: [PATCH 18/23] New wording about edit-config script in docs (#18639) --- docs/deployment-guides/deployment-strategies.md | 10 +++++----- .../raspberry-pi-anomaly-detection.md | 2 +- docs/netdata-agent/configuration/README.md | 12 +++++++----- src/collectors/charts.d.plugin/README.md | 2 +- src/collectors/ebpf.plugin/README.md | 6 +++--- src/collectors/profile.plugin/README.md | 2 +- src/collectors/statsd.plugin/README.md | 2 +- src/health/notifications/README.md | 2 +- src/ml/ml-configuration.md | 2 +- src/registry/README.md | 2 +- 10 files changed, 22 insertions(+), 20 deletions(-) diff --git a/docs/deployment-guides/deployment-strategies.md b/docs/deployment-guides/deployment-strategies.md index 017aaa0c28a6da..e23e73ae99b505 100644 --- a/docs/deployment-guides/deployment-strategies.md +++ b/docs/deployment-guides/deployment-strategies.md @@ -32,7 +32,7 @@ In this example, Machine Learning and Alerting are disabled for the Child, so th ##### netdata.conf -On the child node, edit `netdata.conf` by using the [edit-config](/docs/netdata-agent/configuration/README.md#edit-netdataconf) script and set the following parameters: +On the child node, edit `netdata.conf` by using the [edit-config](/docs/netdata-agent/configuration/README.md#edit-a-configuration-file-using-edit-config) script and set the following parameters: ```yaml [db] @@ -63,7 +63,7 @@ On the child node, edit `netdata.conf` by using the [edit-config](/docs/netdata- ##### stream.conf -To edit `stream.conf`, use again the [edit-config](/docs/netdata-agent/configuration/README.md#edit-netdataconf) script and set the following parameters: +To edit `stream.conf`, use again the [edit-config](/docs/netdata-agent/configuration/README.md#edit-a-configuration-file-using-edit-config) script and set the following parameters: ```yaml [stream] @@ -90,7 +90,7 @@ Requiring: ##### netdata.conf -On the Parent, edit `netdata.conf` by using the [edit-config](/docs/netdata-agent/configuration/README.md#edit-netdataconf) script and set the following parameters: +On the Parent, edit `netdata.conf` by using the [edit-config](/docs/netdata-agent/configuration/README.md#edit-a-configuration-file-using-edit-config) script and set the following parameters: ```yaml [db] @@ -120,7 +120,7 @@ On the Parent, edit `netdata.conf` by using the [edit-config](/docs/netdata-agen ##### stream.conf -On the Parent node, edit `stream.conf` by using the [edit-config](/docs/netdata-agent/configuration/README.md#edit-netdataconf) script and set the following parameters: +On the Parent node, edit `stream.conf` by using the [edit-config](/docs/netdata-agent/configuration/README.md#edit-a-configuration-file-using-edit-config) script and set the following parameters: ```yaml [API_KEY] @@ -132,7 +132,7 @@ On the Parent node, edit `stream.conf` by using the [edit-config](/docs/netdata- In order to setup active–active streaming between Parent 1 and Parent 2, Parent 1 needs to be instructed to stream data to Parent 2 and Parent 2 to stream data to Parent 1. The Child Agents need to be configured with the addresses of both Parent Agents. An Agent will only connect to one Parent at a time, falling back to the next upon failure. These examples use the same API key between Parent Agents and for connections for Child Agents. -On both Netdata Parent and all Child Agents, edit `stream.conf` by using the [edit-config](/docs/netdata-agent/configuration/README.md#edit-netdataconf) script: +On both Netdata Parent and all Child Agents, edit `stream.conf` by using the [edit-config](/docs/netdata-agent/configuration/README.md#edit-a-configuration-file-using-edit-config) script: #### stream.conf on Parent 1 diff --git a/docs/developer-and-contributor-corner/raspberry-pi-anomaly-detection.md b/docs/developer-and-contributor-corner/raspberry-pi-anomaly-detection.md index 41cf007eb444e4..6c53e8f019b2a6 100644 --- a/docs/developer-and-contributor-corner/raspberry-pi-anomaly-detection.md +++ b/docs/developer-and-contributor-corner/raspberry-pi-anomaly-detection.md @@ -23,7 +23,7 @@ Read on to learn all the steps and enable unsupervised anomaly detection on your First make sure Netdata is using Python 3 when it runs Python-based data collectors. -Next, open `netdata.conf` using [`edit-config`](/docs/netdata-agent/configuration/README.md#edit-netdataconf) +Next, open `netdata.conf` using [`edit-config`](/docs/netdata-agent/configuration/README.md#edit-a-configuration-file-using-edit-config) from within the [Netdata config directory](/docs/netdata-agent/configuration/README.md#the-netdata-config-directory). Scroll down to the `[plugin:python.d]` section to pass in the `-ppython3` command option. diff --git a/docs/netdata-agent/configuration/README.md b/docs/netdata-agent/configuration/README.md index 097fb93105560e..932253cb602cca 100644 --- a/docs/netdata-agent/configuration/README.md +++ b/docs/netdata-agent/configuration/README.md @@ -1,6 +1,6 @@ # Netdata Agent Configuration -The main Netdata agent configuration is `netdata.conf`. +The main Netdata Agent configuration is `netdata.conf`. ## The Netdata config directory @@ -10,12 +10,15 @@ few directories, and a shell script named `edit-config`. > Some operating systems will use `/opt/netdata/etc/netdata/` as the config directory. If you're not sure where yours > is, navigate to `http://NODE:19999/netdata.conf` in your browser, replacing `NODE` with the IP address or hostname of -> your node, and find the `# config directory = ` setting. The value listed is the config directory for your system. +> your node, and find the `# config directory =` setting. The value listed is the config directory for your system. All of Netdata's documentation assumes that your config directory is at `/etc/netdata`, and that you're running any scripts from inside that directory. +## Edit a configuration file using `edit-config` -## edit `netdata.conf` +We recommend the use of the `edit-config` script for configuration changes. + +It exists inside your config directory (read above) and helps manage and safely edit configuration files. To edit `netdata.conf`, run this on your terminal: @@ -28,7 +31,7 @@ Your editor will open. ## downloading `netdata.conf` -The running version of `netdata.conf` can be downloaded from a running Netdata agent, at this URL: +The running version of `netdata.conf` can be downloaded from a running Netdata Agent, at this URL: ``` http://agent-ip:19999/netdata.conf @@ -40,4 +43,3 @@ You can save and use this version, using these commands: cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata curl -ksSLo /tmp/netdata.conf.new http://localhost:19999/netdata.conf && sudo mv -i /tmp/netdata.conf.new netdata.conf ``` - diff --git a/src/collectors/charts.d.plugin/README.md b/src/collectors/charts.d.plugin/README.md index d6cd07bcbb0c2d..99a6417cdd40ea 100644 --- a/src/collectors/charts.d.plugin/README.md +++ b/src/collectors/charts.d.plugin/README.md @@ -21,7 +21,7 @@ By default, `charts.d.plugin` is not included as part of the install when using ## Configuration -`charts.d.plugin` itself can be [configured](/docs/netdata-agent/configuration/README.md#edit-netdataconf)using the configuration file `/etc/netdata/charts.d.conf`. This file is also a BASH script. +`charts.d.plugin` itself can be [configured](/docs/netdata-agent/configuration/README.md#edit-a-configuration-file-using-edit-config)using the configuration file `/etc/netdata/charts.d.conf`. This file is also a BASH script. In this file, you can place statements like this: diff --git a/src/collectors/ebpf.plugin/README.md b/src/collectors/ebpf.plugin/README.md index e9243966b6a9d7..532d5be3596fe9 100644 --- a/src/collectors/ebpf.plugin/README.md +++ b/src/collectors/ebpf.plugin/README.md @@ -49,7 +49,7 @@ To enable or disable the entire eBPF collector: cd /etc/netdata ``` -2. Use the [`edit-config`](/docs/netdata-agent/configuration/README.md#edit-netdataconf) script to edit `netdata.conf`. +2. Use the [`edit-config`](/docs/netdata-agent/configuration/README.md#edit-a-configuration-file-using-edit-config) script to edit `netdata.conf`. ```bash ./edit-config netdata.conf @@ -73,7 +73,7 @@ To edit the `ebpf.d.conf`: ```bash cd /etc/netdata ``` -2. Use the [`edit-config`](/docs/netdata-agent/configuration/README.md#edit-netdataconf) script to edit [`ebpf.d.conf`](https://github.com/netdata/netdata/blob/master/src/collectors/ebpf.plugin/ebpf.d.conf). +2. Use the [`edit-config`](/docs/netdata-agent/configuration/README.md#edit-a-configuration-file-using-edit-config) script to edit [`ebpf.d.conf`](https://github.com/netdata/netdata/blob/master/src/collectors/ebpf.plugin/ebpf.d.conf). ```bash ./edit-config ebpf.d.conf @@ -276,7 +276,7 @@ To configure an eBPF thread: ```bash cd /etc/netdata ``` -2. Use the [`edit-config`](/docs/netdata-agent/configuration/README.md#edit-netdataconf) script to edit a thread configuration file. The following configuration files are available: +2. Use the [`edit-config`](/docs/netdata-agent/configuration/README.md#edit-a-configuration-file-using-edit-config) script to edit a thread configuration file. The following configuration files are available: - `network.conf`: Configuration for the [`network` thread](#network-configuration). This config file overwrites the global options and also lets you specify which network the eBPF collector monitors. diff --git a/src/collectors/profile.plugin/README.md b/src/collectors/profile.plugin/README.md index 7e368120816f4a..8b500f590e0bd9 100644 --- a/src/collectors/profile.plugin/README.md +++ b/src/collectors/profile.plugin/README.md @@ -12,7 +12,7 @@ A user can specify: ## Configuration -Edit the `netdata.conf` configuration file using [`edit-config`](/docs/netdata-agent/configuration/README.md#edit-netdataconf) from the [Netdata config directory](/docs/netdata-agent/configuration/README.md#the-netdata-config-directory), which is typically at `/etc/netdata`. +Edit the `netdata.conf` configuration file using [`edit-config`](/docs/netdata-agent/configuration/README.md#edit-a-configuration-file-using-edit-config) from the [Netdata config directory](/docs/netdata-agent/configuration/README.md#the-netdata-config-directory), which is typically at `/etc/netdata`. Scroll down to the `[plugin:profile]` section to find the available options: diff --git a/src/collectors/statsd.plugin/README.md b/src/collectors/statsd.plugin/README.md index 4162a096a84f3a..d1c1d71388aa36 100644 --- a/src/collectors/statsd.plugin/README.md +++ b/src/collectors/statsd.plugin/README.md @@ -785,7 +785,7 @@ visualize all the available operations. Start by creating a new configuration file under the `statsd.d/` folder in the [Netdata config directory](/docs/netdata-agent/configuration/README.md#the-netdata-config-directory). -Use [`edit-config`](/docs/netdata-agent/configuration/README.md#edit-netdataconf) +Use [`edit-config`](/docs/netdata-agent/configuration/README.md#edit-a-configuration-file-using-edit-config) to create a new file called `k6.conf`. ```bash= diff --git a/src/health/notifications/README.md b/src/health/notifications/README.md index 5a2b032a3e7749..8c539c2a9def26 100644 --- a/src/health/notifications/README.md +++ b/src/health/notifications/README.md @@ -10,7 +10,7 @@ The default script is `alarm-notify.sh`. > > This file mentions editing configuration files. > -> - To edit configuration files in a safe way, we provide the [`edit config` script](/docs/netdata-agent/configuration/README.md#edit-netdataconf)located in your [Netdata config directory](/docs/netdata-agent/configuration/README.md#the-netdata-config-directory) (typically is `/etc/netdata`) that creates the proper file and opens it in an editor automatically. +> - To edit configuration files in a safe way, we provide the [`edit config` script](/docs/netdata-agent/configuration/README.md#edit-a-configuration-file-using-edit-config)located in your [Netdata config directory](/docs/netdata-agent/configuration/README.md#the-netdata-config-directory) (typically is `/etc/netdata`) that creates the proper file and opens it in an editor automatically. > Note that to run the script you need to be inside your Netdata config directory. > > - Please also note that after most configuration changes you will need to [restart the Agent](/packaging/installer/README.md#maintaining-a-netdata-agent-installation) for the changes to take effect. diff --git a/src/ml/ml-configuration.md b/src/ml/ml-configuration.md index 86a33c5d8f8106..a06a186d8198ec 100644 --- a/src/ml/ml-configuration.md +++ b/src/ml/ml-configuration.md @@ -4,7 +4,7 @@ Netdata's [Machine Learning](/src/ml/README.md) capabilities are enabled by defa To enable or disable Machine Learning capabilities on a node: -1. [Edit `netdata.conf`](/docs/netdata-agent/configuration/README.md#edit-netdataconf) +1. [Edit `netdata.conf`](/docs/netdata-agent/configuration/README.md#edit-a-configuration-file-using-edit-config) 2. In the `[ml]` section, set `enabled = yes` to enable or `enabled = no` to disable 3. [Restart Netdata](/docs/netdata-agent/start-stop-restart.md) diff --git a/src/registry/README.md b/src/registry/README.md index d976528c7bbe3a..badbbe3e9ebc09 100644 --- a/src/registry/README.md +++ b/src/registry/README.md @@ -183,7 +183,7 @@ Both files are machine readable text files. Beginning with `v1.30.0`, when the Netdata Agent's web server processes a request, it delivers the `SameSite=none` and `Secure` cookies. If you have problems accessing the local Agent dashboard or Netdata Cloud, disable these -cookies by [editing `netdata.conf`](/docs/netdata-agent/configuration/README.md#edit-netdataconf): +cookies by [editing `netdata.conf`](/docs/netdata-agent/configuration/README.md#edit-a-configuration-file-using-edit-config): ```conf [registry] From 2b6b10573f11ad413e8def4130b89ca1b47bf2a6 Mon Sep 17 00:00:00 2001 From: Ilya Mashchenko Date: Wed, 2 Oct 2024 13:19:19 +0300 Subject: [PATCH 19/23] bump go toolchain v1.22.8 (#18659) --- packaging/check-for-go-toolchain.sh | 52 ++++++++++++++--------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/packaging/check-for-go-toolchain.sh b/packaging/check-for-go-toolchain.sh index fe5dabfadfabb1..0c8ceec2eeaa7e 100644 --- a/packaging/check-for-go-toolchain.sh +++ b/packaging/check-for-go-toolchain.sh @@ -15,7 +15,7 @@ GOLANG_MIN_MAJOR_VERSION='1' GOLANG_MIN_MINOR_VERSION='22' -GOLANG_MIN_PATCH_VERSION='0' +GOLANG_MIN_PATCH_VERSION='8' GOLANG_MIN_VERSION="${GOLANG_MIN_MAJOR_VERSION}.${GOLANG_MIN_MINOR_VERSION}.${GOLANG_MIN_PATCH_VERSION}" GOLANG_TEMP_PATH="${TMPDIR}/go-toolchain" @@ -53,32 +53,32 @@ install_go_toolchain() { Linux) case "$(uname -m)" in i?86) - GOLANG_ARCHIVE_URL="https://go.dev/dl/go1.22.0.linux-386.tar.gz" - GOLANG_ARCHIVE_CHECKSUM="1e209c4abde069067ac9afb341c8003db6a210f8173c77777f02d3a524313da3" + GOLANG_ARCHIVE_URL="https://go.dev/dl/go1.22.8.linux-386.tar.gz" + GOLANG_ARCHIVE_CHECKSUM="0c8e9f824bf443f51e06ac017b9ae402ea066d761b309d880dbb2ca5793db8a2" ;; x86_64) - GOLANG_ARCHIVE_URL="https://go.dev/dl/go1.22.0.linux-amd64.tar.gz" - GOLANG_ARCHIVE_CHECKSUM="f6c8a87aa03b92c4b0bf3d558e28ea03006eb29db78917daec5cfb6ec1046265" + GOLANG_ARCHIVE_URL="https://go.dev/dl/go1.22.8.linux-amd64.tar.gz" + GOLANG_ARCHIVE_CHECKSUM="5f467d29fc67c7ae6468cb6ad5b047a274bae8180cac5e0b7ddbfeba3e47e18f" ;; aarch64) - GOLANG_ARCHIVE_URL="https://go.dev/dl/go1.22.0.linux-arm64.tar.gz" - GOLANG_ARCHIVE_CHECKSUM="6a63fef0e050146f275bf02a0896badfe77c11b6f05499bb647e7bd613a45a10" + GOLANG_ARCHIVE_URL="https://go.dev/dl/go1.22.8.linux-arm64.tar.gz" + GOLANG_ARCHIVE_CHECKSUM="5c616b32dab04bb8c4c8700478381daea0174dc70083e4026321163879278a4a" ;; armv*) - GOLANG_ARCHIVE_URL="https://go.dev/dl/go1.22.0.linux-armv6l.tar.gz" - GOLANG_ARCHIVE_CHECKSUM="0525f92f79df7ed5877147bce7b955f159f3962711b69faac66bc7121d36dcc4" + GOLANG_ARCHIVE_URL="https://go.dev/dl/go1.22.8.linux-armv6l.tar.gz" + GOLANG_ARCHIVE_CHECKSUM="5191e87a51a85d88edddc028ab30dfbfa2d7c37cf35d536655e7a063bfb2c9d2" ;; ppc64le) - GOLANG_ARCHIVE_URL="https://go.dev/dl/go1.22.0.linux-ppc64le.tar.gz" - GOLANG_ARCHIVE_CHECKSUM="0e57f421df9449066f00155ce98a5be93744b3d81b00ee4c2c9b511be2a31d93" + GOLANG_ARCHIVE_URL="https://go.dev/dl/go1.22.8.linux-ppc64le.tar.gz" + GOLANG_ARCHIVE_CHECKSUM="c546f27866510bf8e54e86fe6f58c705af0e894341e5572c91f197a734152c27" ;; riscv64) - GOLANG_ARCHIVE_URL="https://go.dev/dl/go1.22.0.linux-riscv64.tar.gz" - GOLANG_ARCHIVE_CHECKSUM="afe9cedcdbd6fdff27c57efd30aa5ce0f666f471fed5fa96cd4fb38d6b577086" + GOLANG_ARCHIVE_URL="https://go.dev/dl/go1.22.8.linux-riscv64.tar.gz" + GOLANG_ARCHIVE_CHECKSUM="f53174ee946b206afe66e043646a6f37af9375d5a9ce420c0f974790508f9e39" ;; s390x) - GOLANG_ARCHIVE_URL="https://go.dev/dl/go1.22.0.linux-s390x.tar.gz" - GOLANG_ARCHIVE_CHECKSUM="2e546a3583ba7bd3988f8f476245698f6a93dfa9fe206a8ca8f85c1ceecb2446" + GOLANG_ARCHIVE_URL="https://go.dev/dl/go1.22.8.linux-s390x.tar.gz" + GOLANG_ARCHIVE_CHECKSUM="fabb3adc241474e28ae151a00e1421983deb35184d31cc76e90025b1b389f6bf" ;; *) GOLANG_FAILURE_REASON="Linux $(uname -m) platform is not supported out-of-box by Go, you must install a toolchain for it yourself." @@ -89,24 +89,24 @@ install_go_toolchain() { FreeBSD) case "$(uname -m)" in 386) - GOLANG_ARCHIVE_URL="https://go.dev/dl/go1.22.0.freebsd-386.tar.gz" - GOLANG_ARCHIVE_CHECKSUM="b8065da37783e8b9e7086365a54d74537e832c92311b61101a66989ab2458d8e" + GOLANG_ARCHIVE_URL="https://go.dev/dl/go1.22.8.freebsd-386.tar.gz" + GOLANG_ARCHIVE_CHECKSUM="854cffbfb089438397442be4a0c64239da50be4ed037606ea00ed8d86eb89514" ;; amd64) - GOLANG_ARCHIVE_URL="https://go.dev/dl/go1.22.0.freebsd-amd64.tar.gz" - GOLANG_ARCHIVE_CHECKSUM="50f421c7f217083ac94aab1e09400cb9c2fea7d337679ec11f1638a11460da30" + GOLANG_ARCHIVE_URL="https://go.dev/dl/go1.22.8.freebsd-amd64.tar.gz" + GOLANG_ARCHIVE_CHECKSUM="d7dfa0b309d9ef9f63ad07c63300982ce3e658d7cbac20b031bd31e91afcf209" ;; arm) - GOLANG_ARCHIVE_URL="https://go.dev/dl/go1.22.0.freebsd-arm.tar.gz" - GOLANG_ARCHIVE_CHECKSUM="c9c8b305f90903536f4981bad9f029828c2483b3216ca1783777344fbe603f2d" + GOLANG_ARCHIVE_URL="https://go.dev/dl/go1.22.8.freebsd-arm.tar.gz" + GOLANG_ARCHIVE_CHECKSUM="5d532d05082524748f24948f3028c7a21e1804130ffd624bce4a3d0bee60ce39" ;; arm64) - GOLANG_ARCHIVE_URL="https://go.dev/dl/go1.22.0.freebsd-arm64.tar.gz" - GOLANG_ARCHIVE_CHECKSUM="e23385e5c640787fa02cd58f2301ea09e162c4d99f8ca9fa6d52766f428a933d" + GOLANG_ARCHIVE_URL="https://go.dev/dl/go1.22.8.freebsd-arm64.tar.gz" + GOLANG_ARCHIVE_CHECKSUM="f7d2664896ad6c773eafbab0748497bec62ff57beb4e25fe6dea12c443d05639" ;; riscv64) - GOLANG_ARCHIVE_URL="https://go.dev/dl/go1.22.0.freebsd-riscv64.tar.gz" - GOLANG_ARCHIVE_CHECKSUM="c8f94d1de6024546194d58e7b9370dc7ea06176aad94a675b0062c25c40cb645" + GOLANG_ARCHIVE_URL="https://go.dev/dl/go1.22.8.freebsd-riscv64.tar.gz" + GOLANG_ARCHIVE_CHECKSUM="ef7d2dbf341d8a8f2a15f2841216ef30329b1f5f301047bd256317480b22a033" ;; *) GOLANG_FAILURE_REASON="FreeBSD $(uname -m) platform is not supported out-of-box by Go, you must install a toolchain for it yourself." @@ -120,7 +120,7 @@ install_go_toolchain() { ;; esac - if [ -d '/usr/local/go' ]; then + if [ -d '/usr/local/go' ]; then if [ -f '/usr/local/go/.installed-by-netdata' ]; then rm -rf /usr/local/go else From 587e8360193c26d0645e7987764d02e13e016d95 Mon Sep 17 00:00:00 2001 From: Costa Tsaousis Date: Wed, 2 Oct 2024 18:12:41 +0300 Subject: [PATCH 20/23] apps.plugin improvements (#18652) * apps.plugin now supports simple patterns when an asterisk is in the middle of a match; expanded kernel threads matching to group them into meaningful entities * removed cli tools * systemd merged * apps.plugin now has the option to print the tree with the target assignment * apps.plugin now extracts the full comm name from the cmdline * optimizations * updated windows comm handling * get the full command line on windows * extract service names for svchost.exe processes * get service names from SCM * Update src/collectors/apps.plugin/README.md Co-authored-by: Fotis Voutsas * Update src/collectors/apps.plugin/README.md Co-authored-by: Fotis Voutsas * Update src/collectors/apps.plugin/README.md Co-authored-by: Fotis Voutsas * Update src/collectors/apps.plugin/README.md Co-authored-by: Fotis Voutsas * Update src/collectors/apps.plugin/README.md Co-authored-by: Fotis Voutsas * Update src/collectors/apps.plugin/README.md Co-authored-by: Fotis Voutsas * Update src/collectors/apps.plugin/README.md Co-authored-by: Fotis Voutsas * Update src/collectors/apps.plugin/README.md Co-authored-by: Fotis Voutsas * Update src/collectors/apps.plugin/README.md Co-authored-by: Fotis Voutsas * Update src/collectors/apps.plugin/README.md Co-authored-by: Fotis Voutsas * fix compilation on freebsd and macos * windows priveleges * add missing opening quote on windows spawn server * fix alerts notifications infinite loop when alarm-notify.sh cannot be executed --------- Co-authored-by: Fotis Voutsas --- CMakeLists.txt | 4 +- src/collectors/apps.plugin/README.md | 65 ++-- .../apps.plugin/apps_aggregations.c | 87 ++++-- src/collectors/apps.plugin/apps_groups.conf | 182 +++++++++-- src/collectors/apps.plugin/apps_os_freebsd.c | 2 +- src/collectors/apps.plugin/apps_os_linux.c | 4 +- src/collectors/apps.plugin/apps_os_macos.c | 2 +- src/collectors/apps.plugin/apps_os_windows.c | 289 +++++++++++++----- .../apps.plugin/apps_os_windows_nt.c | 44 +++ src/collectors/apps.plugin/apps_pid.c | 65 +++- src/collectors/apps.plugin/apps_plugin.c | 91 +++++- src/collectors/apps.plugin/apps_plugin.h | 29 +- src/collectors/apps.plugin/apps_targets.c | 150 ++++----- .../windows-events.plugin/windows-events.c | 9 + src/health/health_notifications.c | 35 ++- src/libnetdata/os/os-windows-wrappers.c | 38 +++ src/libnetdata/os/os-windows-wrappers.h | 2 + .../spawn_server/spawn_server_windows.c | 4 +- 18 files changed, 802 insertions(+), 300 deletions(-) create mode 100644 src/collectors/apps.plugin/apps_os_windows_nt.c diff --git a/CMakeLists.txt b/CMakeLists.txt index 9e83c96b9908eb..f3d41c1f9eadfd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1895,12 +1895,14 @@ if(ENABLE_PLUGIN_APPS) src/collectors/apps.plugin/apps_os_macos.c src/collectors/apps.plugin/apps_os_windows.c src/collectors/apps.plugin/apps_incremental_collection.c + src/collectors/apps.plugin/apps_os_windows_nt.c ) add_executable(apps.plugin ${APPS_PLUGIN_FILES}) target_link_libraries(apps.plugin libnetdata ${CAP_LIBRARIES} - "$<$:Version>") + "$<$:Version>" + "$<$:ntdll>") target_include_directories(apps.plugin PRIVATE ${CAP_INCLUDE_DIRS}) target_compile_options(apps.plugin PRIVATE ${CAP_CFLAGS_OTHER}) diff --git a/src/collectors/apps.plugin/README.md b/src/collectors/apps.plugin/README.md index 7960cd81017121..0ed4c68fdf3406 100644 --- a/src/collectors/apps.plugin/README.md +++ b/src/collectors/apps.plugin/README.md @@ -132,6 +132,26 @@ its CPU resources will be cut in half, and data collection will be once every 2 The configuration file is `/etc/netdata/apps_groups.conf`. You can edit this file using our [`edit-config`](docs/netdata-agent/configuration/README.md) script. +### Configuring process managers + +`apps.plugin` needs to know the common process managers, meaning the names of the processes +which spawn other processes. Process managers are used so that `apps.plugin` will automatically +consider all their sub-processes important to monitor. + +Process managers are configured in `apps_groups.conf` with the prefix `managers:`, like this: + +``` +managers: process1 process2 process3 +``` + +Multiple lines may exist, all starting with `managers:`. + +The process names given here should be exactly as the operating system sets them. In Linux these +process names are limited to 15 characters. Usually the command `ps -e` or `cat /proc/{PID}/stat` +states the names needed here. + +### Configuring process groups and renaming processes + The configuration file works accepts multiple lines, each having this format: ```txt @@ -140,48 +160,39 @@ group: process1 process2 ... Each group can be given multiple times, to add more processes to it. -For the **Applications** section, only groups configured in this file are reported. -All other processes will be reported as `other`. - -For each process given, its whole process tree will be grouped, not just the process matched. -The plugin will include both parents and children. If including the parents into the group is -undesirable, the line `other: *` should be appended to the `apps_groups.conf`. +For each process given, all of its sub-processes will be grouped, not just the matched process. The process names are the ones returned by: -- `ps -e` or `cat /proc/PID/stat` -- in case of substring mode (see below): `/proc/PID/cmdline` +- **comm**: `ps -e` or `cat /proc/{PID}/stat` +- **cmdline**: in case of substring mode (see below): `/proc/{PID}/cmdline` + +On Linux **comm** is limited to just a few characters. `apps.plugin` attempts to find the entire +**comm** name by looking for it at the **cmdline**. When this is successful, the entire process name +is available, otherwise the shortened one is used. To add process names with spaces, enclose them in quotes (single or double) example: `'Plex Media Serv'` or `"my other process"`. -You can add an asterisk `*` at the beginning and/or the end of a process: +You can add asterisks (`*`) to provide a pattern: -- `*name` _suffix_ mode: will search for processes ending with `name` (at `/proc/PID/stat`) -- `name*` _prefix_ mode: will search for processes beginning with `name` (at `/proc/PID/stat`) -- `*name*` _substring_ mode: will search for `name` in the whole command line (at `/proc/PID/cmdline`) +- `*name` _suffix_ mode: will match a **comm** ending with `name`. +- `name*` _prefix_ mode: will match a **comm** beginning with `name`. +- `*name*` _substring_ mode: will search for `name` in **cmdline**. -If you enter even just one _name_ (substring), `apps.plugin` will process -`/proc/PID/cmdline` for all processes (of course only once per process: when they are first seen). +Asterisks may appear in the middle of `name` (like `na*me`), without affecting what is being +matched (**comm** or **cmdline**). To add processes with single quotes, enclose them in double quotes: `"process with this ' single quote"` To add processes with double quotes, enclose them in single quotes: `'process with this " double quote'` -If a group or process name starts with a `-`, the dimension will be hidden from the chart (cpu chart only). - -If a process starts with a `+`, debugging will be enabled for it (debugging produces a lot of output - do not enable it in production systems). - -You can add any number of groups. Only the ones found running will affect the charts generated. -However, producing charts with hundreds of dimensions may slow down your web browser. - -The order of the entries in this list is important: the first that matches a process is used, so put important -ones at the top. Processes not matched by any row, will inherit it from their parents or children. - -The order also controls the order of the dimensions on the generated charts (although applications started -after apps.plugin is started, will be appended to the existing list of dimensions the `netdata` daemon maintains). +The order of the entries in this list is important: the first one that matches a process is used, so follow a top-down hierarchy. +Processes not matched by any row, will inherit it from their parents. -There are a few command line options you can pass to `apps.plugin`. The list of available options can be acquired with the `--help` flag. The options can be set in the `netdata.conf` file. For example, to disable user and user group charts you should set +There are a few command line options you can pass to `apps.plugin`. The list of available +options can be acquired with the `--help` flag. The options can be set in the `netdata.conf` using the [`edit-config` script](/docs/netdata-agent/configuration/README.md). +For example, to disable user and user group charts you would set: ``` [plugin:apps] diff --git a/src/collectors/apps.plugin/apps_aggregations.c b/src/collectors/apps.plugin/apps_aggregations.c index 8c7ebb68997ad3..289d874b975733 100644 --- a/src/collectors/apps.plugin/apps_aggregations.c +++ b/src/collectors/apps.plugin/apps_aggregations.c @@ -74,7 +74,7 @@ static inline void aggregate_pid_on_target(struct target *w, struct pid_stat *p, if(!w->uptime_min || p->values[PDF_UPTIME] < w->uptime_min) w->uptime_min = p->values[PDF_UPTIME]; if(!w->uptime_max || w->uptime_max < p->values[PDF_UPTIME]) w->uptime_max = p->values[PDF_UPTIME]; - if(unlikely(debug_enabled || w->debug_enabled)) { + if(unlikely(debug_enabled)) { struct pid_on_target *pid_on_target = mallocz(sizeof(struct pid_on_target)); pid_on_target->pid = p->pid; pid_on_target->next = w->root_pid; @@ -110,27 +110,61 @@ static inline void cleanup_exited_pids(void) { } } -static struct target *get_app_group_target_for_pid(struct pid_stat *p) { +static struct target *matched_apps_groups_target(struct pid_stat *p, struct target *w) { + if(is_process_manager(p)) + return NULL; + + p->matched_by_config = true; + return w->target ? w->target : w; +} + +static struct target *get_apps_groups_target_for_pid(struct pid_stat *p) { targets_assignment_counter++; for(struct target *w = apps_groups_root_target; w ; w = w->next) { if(w->type != TARGET_TYPE_APP_GROUP) continue; - // find it - 4 cases: - // 1. the target is not a pattern - // 2. the target has the prefix - // 3. the target has the suffix - // 4. the target is something inside cmdline - - if(unlikely(( (!w->starts_with && !w->ends_with && w->compare == p->comm) - || (w->starts_with && !w->ends_with && string_starts_with_string(p->comm, w->compare)) - || (!w->starts_with && w->ends_with && string_ends_with_string(p->comm, w->compare)) - || (proc_pid_cmdline_is_needed && w->starts_with && w->ends_with && strstr(pid_stat_cmdline(p), string2str(w->compare))) - ))) { - - p->matched_by_config = true; - if(w->target) return w->target; - else return w; + if(!w->starts_with && !w->ends_with) { + if(w->ag.pattern) { + if(simple_pattern_matches_string(w->ag.pattern, p->comm)) + return matched_apps_groups_target(p, w); + } + else { + if(w->ag.compare == p->comm || w->ag.compare == p->comm_orig) + return matched_apps_groups_target(p, w); + } + } + else if(w->starts_with && !w->ends_with) { + if(w->ag.pattern) { + if(simple_pattern_matches_string(w->ag.pattern, p->comm)) + return matched_apps_groups_target(p, w); + } + else { + if(string_starts_with_string(p->comm, w->ag.compare) || + (p->comm != p->comm_orig && string_starts_with_string(p->comm, w->ag.compare))) + return matched_apps_groups_target(p, w); + } + } + else if(!w->starts_with && w->ends_with) { + if(w->ag.pattern) { + if(simple_pattern_matches_string(w->ag.pattern, p->comm)) + return matched_apps_groups_target(p, w); + } + else { + if(string_ends_with_string(p->comm, w->ag.compare) || + (p->comm != p->comm_orig && string_ends_with_string(p->comm, w->ag.compare))) + return matched_apps_groups_target(p, w); + } + } + else if(w->starts_with && w->ends_with && p->cmdline) { + if(w->ag.pattern) { + if(simple_pattern_matches_string(w->ag.pattern, p->cmdline)) + return matched_apps_groups_target(p, w); + } + else { + if(strstr(string2str(p->cmdline), string2str(w->ag.compare))) + return matched_apps_groups_target(p, w); + } } } @@ -141,19 +175,23 @@ static void assign_a_target_to_all_processes(void) { // assign targets from app_groups.conf for(struct pid_stat *p = root_of_pids(); p ; p = p->next) { if(!p->target) - p->target = get_app_group_target_for_pid(p); + p->target = get_apps_groups_target_for_pid(p); } // assign targets from their parents, if they have for(struct pid_stat *p = root_of_pids(); p ; p = p->next) { if(!p->target) { - for(struct pid_stat *pp = p->parent ; pp ; pp = pp->parent) { - if(pp->target) { - if(pp->matched_by_config) { - // we are only interested about app_groups.conf matches - p->target = pp->target; + if(!p->is_manager) { + for (struct pid_stat *pp = p->parent; pp; pp = pp->parent) { + if(pp->is_manager) break; + + if (pp->target) { + if (pp->matched_by_config) { + // we are only interested about app_groups.conf matches + p->target = pp->target; + } + break; } - break; } } @@ -180,6 +218,7 @@ void aggregate_processes_to_targets(void) { // this has to be done, before the cleanup struct target *w = NULL, *o = NULL; + (void)w; (void)o; // concentrate everything on the targets for(struct pid_stat *p = root_of_pids(); p ; p = p->next) { diff --git a/src/collectors/apps.plugin/apps_groups.conf b/src/collectors/apps.plugin/apps_groups.conf index df01c4b4589e25..6b446361c452e6 100644 --- a/src/collectors/apps.plugin/apps_groups.conf +++ b/src/collectors/apps.plugin/apps_groups.conf @@ -4,19 +4,21 @@ ## Documentation at: ## https://github.com/netdata/netdata/blob/master/src/collectors/apps.plugin/README.md ## -## The list of process managers can be configured here (uncomment and edit): +## Subprocesses of process managers are monitored. +## (uncomment to edit - the default is also hardcoded into the plugin) -## Linux -#managers: init systemd containerd-shim dumb-init gnome-shell docker-init +## Linux process managers +#managers: init systemd containerd-shim-runc-v2 dumb-init gnome-shell docker-init +#managers: openrc-run.sh crond plasmashell xfwm4 -## FreeBSD +## FreeBSD process managers #managers: init -## MacOS +## MacOS process managers #managers: launchd -## Windows -#managers: System services wininit +## Windows process managers +#managers: wininit services explorer System ## ----------------------------------------------------------------------------- ## Processes of interest @@ -26,23 +28,23 @@ netdata: netdata ## netdata known plugins ## plugins not defined here will be accumulated into netdata, above apps.plugin: *apps.plugin* -freeipmi.plugin: *freeipmi.plugin* -nfacct.plugin: *nfacct.plugin* +go.d.plugin: *go.d.plugin* +systemd-journal.plugin: *systemd-journal.plugin* +network-viewer.plugin: *network-viewer.plugin* +windows-events.plugin: *windows-events.plugin* cups.plugin: *cups.plugin* -xenstat.plugin: *xenstat.plugin* perf.plugin: *perf.plugin* +nfacct.plugin: *nfacct.plugin* +xenstat.plugin: *xenstat.plugin* +freeipmi.plugin: *freeipmi.plugin* charts.d.plugin: *charts.d.plugin* python.d.plugin: *python.d.plugin* -systemd-journal.plugin: *systemd-journal.plugin* -network-viewer.plugin: *network-viewer.plugin* -windows-events.plugin: *windows-events.plugin* -tc-qos-helper: *tc-qos-helper.sh* -fping: fping -ioping: ioping -go.d.plugin: *go.d.plugin* slabinfo.plugin: *slabinfo.plugin* ebpf.plugin: *ebpf.plugin* debugfs.plugin: *debugfs.plugin* +tc-qos-helper: *tc-qos-helper.sh* +fping: fping +ioping: ioping ## agent-service-discovery agent_sd: agent_sd @@ -65,32 +67,26 @@ azure: mdsd *waagent* *omiserver* *omiagent* hv_kvp_daemon hv_vss_daemon *auoms* datadog: *datadog* newrelic: newrelic* google-agent: *google_guest_agent* *google_osconfig_agent* -ceph: ceph-* ceph_* radosgw* rbd-* cephfs-* osdmaptool crushtool -samba: smbd nmbd winbindd ctdbd ctdb-* ctdb_* -nfs: rpcbind rpc.* nfs* -zfs: spl_* z_* txg_* zil_* arc_* l2arc* -iscsi: iscsid iscsi_eh -afp: netatalk afpd cnid_dbd cnid_metad aws-s3: '*aws s3*' s3cmd s5cmd proxmox-ve: pve* spiceproxy libvirt: virtlogd virtqemud virtstoraged virtnetworkd virtlockd virtinterfaced libvirt: virtnodedevd virtproxyd virtsecretd libvirtd guest-agent: qemu-ga spice-vdagent cloud-init* -dhcp: *dhcp* dhclient +dhcp: dhcp* dhclient build: cc1 cc1plus as gcc* cppcheck ld make cmake automake autoconf autoreconf build: cargo rustc bazel buck git gdb valgrind* rpmbuild dpkg-buildpackage -packagemanager: apt* dpkg* dselect dnf yum rpm zypp* yast* pacman xbps* swupd* emerge* -packagemanager: packagekitd pkgin pkg apk snapd slackpkg slapt-get +packagemanager: apt* dpkg* dselect dnf yum rpm zypp* yast* pacman xbps* swupd* +packagemanager: packagekitd pkgin pkg apk snapd slackpkg slapt-get emerge* clam: clam* *clam backup: rsync lsyncd bacula* borg rclone cron: cron* atd anacron *systemd-cron* incrond ups: upsmon upsd */nut/* apcupsd -audio: pulse* pipewire wireplumber jack* rabbitmq: *rabbitmq* sidekiq: *sidekiq* erlang: beam.smp +postfix: *postfix* ## ----------------------------------------------------------------------------- ## java applications @@ -117,12 +113,134 @@ kafka: *kafka.Kafka* ## ----------------------------------------------------------------------------- ## Kernel / System +## The following are interesting kernel threads and related processes to +## monitor individually, mainly for their CPU utilization. + +## These kernel threads switch tasks all the time, so they should never be +## categorized as anything specific. +kernel: kworker/* +## Kernel Samepage Merging (KSM) daemon that looks for identical memory pages +## across processes and merges them to save memory. ksmd: ksmd -khugepaged: khugepaged + +## Handles migration of processes between CPU cores to balance load. +kmigration: migration/* + +## Manages memory compaction, moving memory pages around to reduce +## fragmentation. +kcompactd: kcompactd* + +## Responsible for freeing up memory by swapping pages to disk when needed. +kswapd: kswapd* + +## DAMON is a mechanism designed to efficiently monitor the memory access +## patterns of running processes or the system itself. kdamond: kdamond -kswapd: kswapd -zswap: zswap -kcompactd: kcompactd -ipvs: ipvs_* + +## Manages ballooning in virtualized environments. +vballoon: vballoon* + +## virtio - Handles or I/O (storage and network) on virtual machines. +kvirtio: virtio-* vhost-* + +## Layer 4 (transport layer) load balancing +ipvs: ipvsd ipvs_* ip_vs_* + +## Hugepages +## Scans memory regions and tries to promote regular-sized pages (4KB) into +## hugepages (2MB) where possible. Merge smaller contiguous 4KB pages into 2MB +## pages. Hugepages also use: kswapd, kcompactd, and migration. +khugepaged: khugepaged + +## Note about zswap: +## zswap does not introduce its own dedicated kernel threads. Instead, it +## operates within the existing memory management and swapping framework of the +## kernel: +## - kswapd: swaps pages in/out of memory, using compression in the process. +## - kcompactd: compacts memory when pages are compressed or moved around. + +## ----------------------------------------------------------------------------- +## Block Devices + +## Handles deferred block I/O operations for block devices. +kblockd: kblockd + +## Device Mapper (DM) +device-mapper: kcopyd/* kcryptd/* kdmflush/* dm_bufio_cache +device-mapper: raid1/* raid5/* raid10/* multipathd bioset/* + +## Software RAID (MD) +md-raid: md*_raid* md*_resync md*_reshape md*_recovery md_thread +md-raid: flush_md* raid*_sync + +## iSCSI +iscsi: iscsid iscsiadm iscsi_eh/* iscsi_xmit/* iscsi_ttx/* iscsi_rx/* iscsi_trx/* + +## SCSI +scsi: scsi_eh/* scsi_tmf/* scsi_wq/* + +## BCACHE +bcache: bcache* bch_btree_io bch_journal + +## SAS +sas: sas_task/* mpt* + +## Fibre Channel (FC) +fc: fc_transport qla2xxx* + +## loop devices +loop: loop* flush-loop* + +## ----------------------------------------------------------------------------- +## Filesystems + +## Ext4 +ext4: ext4-* jbd2/* + +## XFS +xfs: xfs* + +## BTRFS btrfs: btrfs* + +## NFS +nfs: rpcbind rpc.* nfs* rpciod + +## ZFS +zfs: spl_* z_* txg_* zil_* arc_* l2arc* zfs* zed zdb zpool* + +## CEPH +ceph: ceph-* ceph_* radosgw* rbd-* cephfs-* +ceph: ceph cephadm osdmaptool crushtool rados rbd + +## CIFS & Samba +cifs: smbd nmbd winbindd ctdbd ctdb-* ctdb_* +cifs: cifsd cifscreds cifs.upcall + +## Apple Filling Protocol (AFP) +afp: netatalk afpd cnid_dbd cnid_metad + +## ----------------------------------------------------------------------------- +## Desktops + +systemd-journald: *systemd-journal* +systemd: systemd systemd-* + +## GNOME +desktop: gnome-* gsd-* gjs goa-* gcr-* gvfs-* *xdg-*-gnome* passimd gvfsd* +desktop: at-spi-* at-spi2-* dconf-service gcr-* + +## KDE +desktop: plasmashell kwin-* kde* *-kde-* klauncher kactivitymanagerd krunner +desktop: kdeconnectd ksmserver kglobalaccel5 plasma-* *org.kde.* +desktop: sddm* kwalletd5 knotify5 kmix kscreen kwayland-* + +## XFCE4 +desktop: xfce4-* xfwm4 xfdesktop xfce4-panel xfsettingsd xfconfd +desktop: lightdm lightdm-* + +## Generic tools related to desktop +desktop: gdm gdm-* dbus-* xdg-* ibus-* evolution-* accounts-daemon colord +desktop: geoclue pulse* pipewire* wireplumber jack* touchegg pulseaudio +desktop: Xwayland Xorg diff --git a/src/collectors/apps.plugin/apps_os_freebsd.c b/src/collectors/apps.plugin/apps_os_freebsd.c index f2480acfa0b694..265cda8b675dc3 100644 --- a/src/collectors/apps.plugin/apps_os_freebsd.c +++ b/src/collectors/apps.plugin/apps_os_freebsd.c @@ -291,7 +291,7 @@ bool apps_os_read_pid_stat_freebsd(struct pid_stat *p, void *ptr) { usec_t started_ut = timeval_usec(&proc_info->ki_start); p->values[PDF_UPTIME] = (system_current_time_ut > started_ut) ? (system_current_time_ut - started_ut) / USEC_PER_SEC : 0; - if(unlikely(debug_enabled || (p->target && p->target->debug_enabled))) + if(unlikely(debug_enabled || p->target)) debug_log_int("READ PROC/PID/STAT: %s/proc/%d/stat, process: '%s' on target '%s' (dt=%llu) VALUES: utime=" KERNEL_UINT_FORMAT ", stime=" KERNEL_UINT_FORMAT ", cutime=" KERNEL_UINT_FORMAT ", cstime=" KERNEL_UINT_FORMAT ", minflt=" KERNEL_UINT_FORMAT ", majflt=" KERNEL_UINT_FORMAT ", cminflt=" KERNEL_UINT_FORMAT ", cmajflt=" KERNEL_UINT_FORMAT ", threads=%d", netdata_configured_host_prefix, p->pid, pid_stat_comm(p), (p->target)?string2str(p->target->name):"UNSET", p->stat_collected_usec - p->last_stat_collected_usec, diff --git a/src/collectors/apps.plugin/apps_os_linux.c b/src/collectors/apps.plugin/apps_os_linux.c index bda64c1a6c52da..8ba1771072be7f 100644 --- a/src/collectors/apps.plugin/apps_os_linux.c +++ b/src/collectors/apps.plugin/apps_os_linux.c @@ -93,7 +93,7 @@ bool apps_os_read_pid_fds_linux(struct pid_stat *p, void *ptr __maybe_unused) { if(unlikely(l == -1)) { // cannot read the link - if(debug_enabled || (p->target && p->target->debug_enabled)) + if(debug_enabled) netdata_log_error("Cannot read link %s", p->fds[fdid].filename); if(unlikely(p->fds[fdid].fd < 0)) { @@ -689,7 +689,7 @@ bool apps_os_read_pid_stat_linux(struct pid_stat *p, void *ptr __maybe_unused) { } } - if(unlikely(debug_enabled || (p->target && p->target->debug_enabled))) + if(unlikely(debug_enabled)) debug_log_int("READ PROC/PID/STAT: %s/proc/%d/stat, process: '%s' on target '%s' (dt=%llu) VALUES: utime=" KERNEL_UINT_FORMAT ", stime=" KERNEL_UINT_FORMAT ", cutime=" KERNEL_UINT_FORMAT ", cstime=" KERNEL_UINT_FORMAT ", minflt=" KERNEL_UINT_FORMAT ", majflt=" KERNEL_UINT_FORMAT ", cminflt=" KERNEL_UINT_FORMAT ", cmajflt=" KERNEL_UINT_FORMAT ", threads=" KERNEL_UINT_FORMAT, netdata_configured_host_prefix, p->pid, pid_stat_comm(p), (p->target)?string2str(p->target->name):"UNSET", p->stat_collected_usec - p->last_stat_collected_usec, p->values[PDF_UTIME], diff --git a/src/collectors/apps.plugin/apps_os_macos.c b/src/collectors/apps.plugin/apps_os_macos.c index 746153ba5a6b4d..9e50246ab853c1 100644 --- a/src/collectors/apps.plugin/apps_os_macos.c +++ b/src/collectors/apps.plugin/apps_os_macos.c @@ -242,7 +242,7 @@ bool apps_os_read_pid_stat_macos(struct pid_stat *p, void *ptr) { // Note: Some values such as guest time, cutime, cstime, etc., are not directly available in MacOS. // You might need to approximate or leave them unset depending on your needs. - if(unlikely(debug_enabled || (p->target && p->target->debug_enabled))) { + if(unlikely(debug_enabled || p->target)) { debug_log_int("READ PROC/PID/STAT for MacOS: process: '%s' on target '%s' VALUES: utime=" KERNEL_UINT_FORMAT ", stime=" KERNEL_UINT_FORMAT ", minflt=" KERNEL_UINT_FORMAT ", majflt=" KERNEL_UINT_FORMAT ", threads=%d", pid_stat_comm(p), (p->target) ? string2str(p->target->name) : "UNSET", p->values[PDF_UTIME], diff --git a/src/collectors/apps.plugin/apps_os_windows.c b/src/collectors/apps.plugin/apps_os_windows.c index 65784682ee358e..38a552bb0dff12 100644 --- a/src/collectors/apps.plugin/apps_os_windows.c +++ b/src/collectors/apps.plugin/apps_os_windows.c @@ -451,6 +451,8 @@ #include #include +WCHAR* GetProcessCommandLine(HANDLE hProcess); + struct perflib_data { PERF_DATA_BLOCK *pDataBlock; PERF_OBJECT_TYPE *pObjectType; @@ -458,34 +460,17 @@ struct perflib_data { DWORD pid; }; -BOOL EnableDebugPrivilege() { - HANDLE hToken; - LUID luid; - TOKEN_PRIVILEGES tkp; - - if (!OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &hToken)) - return FALSE; - - if (!LookupPrivilegeValue(NULL, SE_DEBUG_NAME, &luid)) - return FALSE; - - tkp.PrivilegeCount = 1; - tkp.Privileges[0].Luid = luid; - tkp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED; - - if (!AdjustTokenPrivileges(hToken, FALSE, &tkp, sizeof(tkp), NULL, NULL)) - return FALSE; - - CloseHandle(hToken); - - return TRUE; -} - void apps_os_init_windows(void) { PerflibNamesRegistryInitialize(); - if(!EnableDebugPrivilege()) - nd_log(NDLS_COLLECTORS, NDLP_WARNING, "Failed to enable debug privilege"); + if(!EnableWindowsPrivilege(SE_DEBUG_NAME)) + nd_log(NDLS_COLLECTORS, NDLP_WARNING, "Failed to enable %s privilege", SE_DEBUG_NAME); + + if(!EnableWindowsPrivilege(SE_SYSTEM_PROFILE_NAME)) + nd_log(NDLS_COLLECTORS, NDLP_WARNING, "Failed to enable %s privilege", SE_SYSTEM_PROFILE_NAME); + + if(!EnableWindowsPrivilege(SE_PROF_SINGLE_PROCESS_NAME)) + nd_log(NDLS_COLLECTORS, NDLP_WARNING, "Failed to enable %s privilege", SE_PROF_SINGLE_PROCESS_NAME); } uint64_t apps_os_get_total_memory_windows(void) { @@ -500,10 +485,31 @@ uint64_t apps_os_get_total_memory_windows(void) { return memStat.ullTotalPhys; } -static __thread wchar_t unicode[PATH_MAX]; +// remove the PID suffix and .exe suffix, if any +static void fix_windows_comm(struct pid_stat *p, char *comm) { + char pid[UINT64_MAX_LENGTH + 1]; // +1 for the underscore + pid[0] = '_'; + print_uint64(&pid[1], p->pid); + size_t pid_len = strlen(pid); + size_t comm_len = strlen(comm); + if (pid_len < comm_len) { + char *compare = &comm[comm_len - pid_len]; + if (strcmp(pid, compare) == 0) + *compare = '\0'; + } + + // remove the .exe suffix, if any + comm_len = strlen(comm); + size_t exe_len = strlen(".exe"); + if(exe_len < comm_len) { + char *compare = &comm[comm_len - exe_len]; + if (strcmp(".exe", compare) == 0) + *compare = '\0'; + } +} // Convert wide string to UTF-8 -static STRING *wchar_to_string(WCHAR *s) { +static char *wchar_to_utf8(WCHAR *s) { static __thread char utf8[PATH_MAX]; static __thread int utf8_size = sizeof(utf8); @@ -512,33 +518,152 @@ static STRING *wchar_to_string(WCHAR *s) { return NULL; WideCharToMultiByte(CP_UTF8, 0, s, -1, utf8, utf8_size, NULL, NULL); - return string_strdupz(utf8); + return utf8; } -STRING *GetProcessFriendlyName(WCHAR *path) { +// Convert wide string to UTF-8 +static STRING *wchar_to_string(WCHAR *s) { + return string_strdupz(wchar_to_utf8(s)); +} + +// -------------------------------------------------------------------------------------------------------------------- + +// return a sanitized name for the process +STRING *GetProcessFriendlyNameSanitized(WCHAR *path) { static __thread uint8_t void_buf[1024 * 1024]; + static __thread DWORD void_buf_size = sizeof(void_buf); + static __thread wchar_t unicode[PATH_MAX]; + static __thread DWORD unicode_size = sizeof(unicode) / sizeof(*unicode); DWORD handle; DWORD size = GetFileVersionInfoSizeW(path, &handle); - if (size == 0 || size > sizeof(void_buf)) + if (size == 0 || size > void_buf_size) return FALSE; if (GetFileVersionInfoW(path, handle, size, void_buf)) { LPWSTR value = NULL; UINT len = 0; - DWORD unicode_size = sizeof(unicode) / sizeof(*unicode); if (VerQueryValueW(void_buf, L"\\StringFileInfo\\040904B0\\FileDescription", (LPVOID*)&value, &len) && len > 0 && len < unicode_size) { wcsncpy(unicode, value, unicode_size - 1); unicode[unicode_size - 1] = L'\0'; - return wchar_to_string(unicode); + char *name = wchar_to_utf8(unicode); + sanitize_chart_meta(name); + return string_strdupz(name); + } + } + + return NULL; +} + +#define SERVICE_PREFIX "Service " +// return a sanitized name for the process +static STRING *GetNameFromCmdlineSanitized(struct pid_stat *p) { + if(!p->cmdline) return NULL; + + char buf[string_strlen(p->cmdline) + 1]; + memcpy(buf, string2str(p->cmdline), sizeof(buf)); + char *words[100]; + size_t num_words = quoted_strings_splitter(buf, words, 100, isspace_map_pluginsd); + + if(string_strcmp(p->comm, "svchost") == 0) { + // find -s SERVICE in the command line + for(size_t i = 0; i < num_words ;i++) { + if(strcmp(words[i], "-s") == 0 && i + 1 < num_words) { + char service[strlen(words[i + 1]) + sizeof(SERVICE_PREFIX)]; // sizeof() includes a null + strcpy(service, SERVICE_PREFIX); + strcpy(&service[sizeof(SERVICE_PREFIX) - 1], words[i + 1]); + sanitize_chart_meta(service); + return string_strdupz(service); + } } } return NULL; } +static void GetServiceNames(void) { + SC_HANDLE hSCManager = OpenSCManager(NULL, NULL, SC_MANAGER_ENUMERATE_SERVICE); + if (hSCManager == NULL) return; + + DWORD dwBytesNeeded = 0, dwServicesReturned = 0, dwResumeHandle = 0; + ENUM_SERVICE_STATUS_PROCESS *pServiceStatus = NULL; + + // First, query the required buffer size + EnumServicesStatusEx( + hSCManager, SC_ENUM_PROCESS_INFO, SERVICE_WIN32, SERVICE_STATE_ALL, + NULL, 0, &dwBytesNeeded, &dwServicesReturned, &dwResumeHandle, NULL); + + if (dwBytesNeeded == 0) { + CloseServiceHandle(hSCManager); + return; + } + + // Allocate memory to hold the services + pServiceStatus = mallocz(dwBytesNeeded); + + // Now, retrieve the list of services + if (!EnumServicesStatusEx( + hSCManager, SC_ENUM_PROCESS_INFO, SERVICE_WIN32, SERVICE_STATE_ALL, + (LPBYTE)pServiceStatus, dwBytesNeeded, &dwBytesNeeded, &dwServicesReturned, + &dwResumeHandle, NULL)) { + freez(pServiceStatus); + CloseServiceHandle(hSCManager); + return; + } + + // Loop through the services + for (DWORD i = 0; i < dwServicesReturned; i++) { + if(!pServiceStatus[i].lpDisplayName || !*pServiceStatus[i].lpDisplayName) + continue; + + struct pid_stat *p = find_pid_entry((pid_t)pServiceStatus[i].ServiceStatusProcess.dwProcessId); + if(p && !p->got_service) { + p->got_service = true; + + size_t len = strlen(pServiceStatus[i].lpDisplayName); + char buf[len + 1]; + memcpy(buf, pServiceStatus[i].lpDisplayName, sizeof(buf)); + sanitize_chart_meta(buf); + + string_freez(p->name); + p->name = string_strdupz(buf); + } + } + + free(pServiceStatus); + CloseServiceHandle(hSCManager); +} + +static WCHAR *executable_path_from_cmdline(WCHAR *cmdline) { + if (!cmdline || !*cmdline) return NULL; + + WCHAR *exe_path_start = cmdline; + WCHAR *exe_path_end = NULL; + + if (cmdline[0] == L'"') { + // Command line starts with a double quote + exe_path_start++; // Move past the first double quote + exe_path_end = wcschr(exe_path_start, L'"'); // Find the next quote + } + else { + // Command line does not start with a double quote + exe_path_end = wcschr(exe_path_start, L' '); // Find the first space + } + + if (exe_path_end) { + // Null-terminate the string at the end of the executable path + *exe_path_end = L'\0'; + return exe_path_start; + } + + return NULL; +} + void GetAllProcessesInfo(void) { + static __thread wchar_t unicode[PATH_MAX]; + static __thread DWORD unicode_size = sizeof(unicode) / sizeof(*unicode); + calls_counter++; HANDLE hSnapshot = CreateToolhelp32Snapshot(TH32CS_SNAPPROCESS, 0); @@ -552,45 +677,70 @@ void GetAllProcessesInfo(void) { return; } + bool need_service_names = false; + do { + if(!pe32.th32ProcessID) continue; + struct pid_stat *p = get_or_allocate_pid_entry((pid_t)pe32.th32ProcessID); p->ppid = (pid_t)pe32.th32ParentProcessID; if(p->got_info) continue; p->got_info = true; - if(!p->initialized) { - string_freez(p->comm); - p->comm = wchar_to_string(pe32.szExeFile); - p->assigned_to_target = false; - } - HANDLE hProcess = OpenProcess(PROCESS_QUERY_INFORMATION | PROCESS_VM_READ, FALSE, p->pid); - if (hProcess == NULL) continue; + if (hProcess == NULL) + continue; - STRING *full_path = NULL; - STRING *friendly_name = NULL; + // Get the full command line, if possible + { + WCHAR *cmdline = GetProcessCommandLine(hProcess); // returns malloc'd buffer + if (cmdline) { + string_freez(p->cmdline); + p->cmdline = wchar_to_string(cmdline); + + // extract the process full path from the command line + WCHAR *path = executable_path_from_cmdline(cmdline); + if(path) { + string_freez(p->name); + p->name = GetProcessFriendlyNameSanitized(path); + } - DWORD unicode_size = sizeof(unicode) / sizeof(*unicode); - if(QueryFullProcessImageNameW(hProcess, 0, unicode, &unicode_size)) { - full_path = wchar_to_string(unicode); - friendly_name = GetProcessFriendlyName(unicode); + free(cmdline); // free(), not freez() + } } - CloseHandle(hProcess); + if(!p->cmdline || !p->name) { + if (QueryFullProcessImageNameW(hProcess, 0, unicode, &unicode_size)) { + // put the full path name to the command into cmdline + if(!p->cmdline) + p->cmdline = wchar_to_string(unicode); - if(full_path) { - string_freez(p->cmdline); - p->cmdline = full_path; + if(!p->name) + p->name = GetProcessFriendlyNameSanitized(unicode); + } } - if(friendly_name) { + CloseHandle(hProcess); + + char *comm = wchar_to_utf8(pe32.szExeFile); + fix_windows_comm(p, comm); + update_pid_comm(p, comm); // will sanitize p->comm + + if(!need_service_names && string_strcmp(p->comm, "svchost") == 0) + need_service_names = true; + + STRING *better_name = GetNameFromCmdlineSanitized(p); + if(better_name) { string_freez(p->name); - p->name = friendly_name; - p->assigned_to_target = false; + p->name = better_name; } + } while (Process32NextW(hSnapshot, &pe32)); CloseHandle(hSnapshot); + + if(need_service_names) + GetServiceNames(); } static inline kernel_uint_t perflib_cpu_utilization(COUNTER_DATA *d) { @@ -692,40 +842,17 @@ bool apps_os_collect_all_pids_windows(void) { // a new pid p->initialized = true; - static __thread char name[MAX_PATH]; - - if (getInstanceName(d.pDataBlock, d.pObjectType, d.pi, name, sizeof(name))) { - // remove the PID suffix, if any - char pid[UINT64_MAX_LENGTH + 1]; // +1 for the underscore - pid[0] = '_'; - print_uint64(&pid[1], p->pid); - size_t pid_len = strlen(pid); - size_t name_len = strlen(name); - if (pid_len < name_len) { - char *compare = &name[name_len - pid_len]; - if (strcmp(pid, compare) == 0) - *compare = '\0'; - } + static __thread char comm[MAX_PATH]; - // remove the .exe suffix, if any - name_len = strlen(name); - size_t exe_len = strlen(".exe"); - if(exe_len < name_len) { - char *compare = &name[name_len - exe_len]; - if (strcmp(".exe", compare) == 0) - *compare = '\0'; - } - } + if (getInstanceName(d.pDataBlock, d.pObjectType, d.pi, comm, sizeof(comm))) + fix_windows_comm(p, comm); else - strncpyz(name, "unknown", sizeof(name) - 1); + strncpyz(comm, "unknown", sizeof(comm) - 1); - if(strcmp(name, "wininit") == 0) + if(strcmp(comm, "wininit") == 0) INIT_PID = p->pid; - string_freez(p->comm); // it may be detected in a previous run via GetAllProcessesInfo() - p->comm = string_strdupz(name); - p->got_info = false; - p->assigned_to_target = false; + update_pid_comm(p, comm); // will sanitize p->comm added++; COUNTER_DATA ppid = {.key = "Creating Process ID"}; diff --git a/src/collectors/apps.plugin/apps_os_windows_nt.c b/src/collectors/apps.plugin/apps_os_windows_nt.c new file mode 100644 index 00000000000000..6a853d65b2e52c --- /dev/null +++ b/src/collectors/apps.plugin/apps_os_windows_nt.c @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +// this must not include libnetdata.h because STRING is defined in winternl.h + +#include "config.h" +#if defined(OS_WINDOWS) + +#include +#include +#include +#include + +// -------------------------------------------------------------------------------------------------------------------- +// Get the full windows command line + +WCHAR* GetProcessCommandLine(HANDLE hProcess) { + PROCESS_BASIC_INFORMATION pbi; + ULONG len; + NTSTATUS status = NtQueryInformationProcess(hProcess, 0, &pbi, sizeof(pbi), &len); + if (status != 0) + return NULL; + + // The rest of the function remains the same as before + PEB peb; + if (!ReadProcessMemory(hProcess, pbi.PebBaseAddress, &peb, sizeof(peb), NULL)) + return NULL; + + RTL_USER_PROCESS_PARAMETERS procParams; + if (!ReadProcessMemory(hProcess, peb.ProcessParameters, &procParams, sizeof(procParams), NULL)) + return NULL; + + WCHAR* commandLine = (WCHAR*)malloc(procParams.CommandLine.MaximumLength); + if (!commandLine) + return NULL; + + if (!ReadProcessMemory(hProcess, procParams.CommandLine.Buffer, commandLine, procParams.CommandLine.MaximumLength, NULL)) { + free(commandLine); + return NULL; + } + + return commandLine; +} + +#endif diff --git a/src/collectors/apps.plugin/apps_pid.c b/src/collectors/apps.plugin/apps_pid.c index 03841cd8001acb..768087cc16c183 100644 --- a/src/collectors/apps.plugin/apps_pid.c +++ b/src/collectors/apps.plugin/apps_pid.c @@ -134,6 +134,7 @@ void del_pid_entry(pid_t pid) { freez(p->fds); #endif + string_freez(p->comm_orig); string_freez(p->comm); string_freez(p->cmdline); aral_freez(pids.all_pids.aral, p); @@ -316,7 +317,49 @@ static inline void link_all_processes_to_their_parents(void) { // -------------------------------------------------------------------------------------------------------------------- +static inline STRING *comm_from_cmdline_sanitized(char *comm, STRING *cmdline) { + if(!cmdline) { + sanitize_chart_meta(comm); + return string_strdupz(comm); + } + + const char *cl = string2str(cmdline); + size_t len = string_strlen(cmdline); + + char buf_cmd[len + 1]; + // if it is enclosed in (), remove the parenthesis + if(cl[0] == '(' && cl[len - 1] == ')') { + memcpy(buf_cmd, &cl[1], len - 2); + buf_cmd[len - 2] = '\0'; + } + else + memcpy(buf_cmd, cl, sizeof(buf_cmd)); + + size_t comm_len = strlen(comm); + char *start = strstr(buf_cmd, comm); + if(start) { + char *end = start + comm_len; + while(*end && !isspace((uint8_t)*end) && *end != '/' && *end != '\\' && *end != '"') end++; + *end = '\0'; + + sanitize_chart_meta(start); + return string_strdupz(start); + } + + sanitize_chart_meta(comm); + return string_strdupz(comm); +} + void update_pid_comm(struct pid_stat *p, const char *comm) { + if(p->comm_orig && string_strcmp(p->comm_orig, comm) == 0) + // no change + return; + +#if (PROCESSES_HAVE_CMDLINE == 1) + if(likely(proc_pid_cmdline_is_needed && !p->cmdline)) + managed_log(p, PID_LOG_CMDLINE, read_proc_pid_cmdline(p)); +#endif + // some process names have ( and ), remove the parenthesis size_t len = strlen(comm); char buf[len + 1]; @@ -327,22 +370,18 @@ void update_pid_comm(struct pid_stat *p, const char *comm) { else memcpy(buf, comm, sizeof(buf)); - // check if the comm is changed - if(!p->comm || strcmp(pid_stat_comm(p), buf) != 0) { - // it is changed + string_freez(p->comm_orig); + p->comm_orig = string_strdupz(comm); - string_freez(p->comm); - p->comm = string_strdupz(buf); + string_freez(p->comm); + p->comm = comm_from_cmdline_sanitized(buf, p->cmdline); -#if (PROCESSES_HAVE_CMDLINE == 1) - if(likely(proc_pid_cmdline_is_needed)) - managed_log(p, PID_LOG_CMDLINE, read_proc_pid_cmdline(p)); -#endif + p->is_manager = is_process_manager(p); + p->is_aggregator = is_process_aggregator(p); - // the process changes comm, we may have to reassign it to - // an apps_groups.conf target. - p->target = NULL; - } + // the process changed comm, we may have to reassign it to + // an apps_groups.conf target. + p->target = NULL; } // -------------------------------------------------------------------------------------------------------------------- diff --git a/src/collectors/apps.plugin/apps_plugin.c b/src/collectors/apps.plugin/apps_plugin.c index 60f80c3c9b17d4..8d4815be1eae83 100644 --- a/src/collectors/apps.plugin/apps_plugin.c +++ b/src/collectors/apps.plugin/apps_plugin.c @@ -121,6 +121,84 @@ size_t pagesize; // ---------------------------------------------------------------------------- // update chart dimensions +// Helper function to count the number of processes in the linked list +int count_processes(struct pid_stat *root) { + int count = 0; + + for(struct pid_stat *p = root; p ; p = p->next) + if(p->updated) count++; + + return count; +} + +// Comparator function to sort by pid +int compare_by_pid(const void *a, const void *b) { + struct pid_stat *pa = *(struct pid_stat **)a; + struct pid_stat *pb = *(struct pid_stat **)b; + return ((int)pa->pid - (int)pb->pid); +} + +// Function to print a process and its children recursively +void print_process_tree(struct pid_stat *root, struct pid_stat *parent, int depth, int total_processes) { + // Allocate an array of pointers for processes with the given parent + struct pid_stat **children = (struct pid_stat **)malloc(total_processes * sizeof(struct pid_stat *)); + int children_count = 0; + + // Populate the array with processes that have the given parent + struct pid_stat *p = root; + while (p != NULL) { + if (p->updated && p->parent == parent) { + children[children_count++] = p; + } + p = p->next; + } + + // Sort the children array by pid + qsort(children, children_count, sizeof(struct pid_stat *), compare_by_pid); + + // Print each child and recurse + for (int i = 0; i < children_count; i++) { + // Print the current process with indentation based on depth + if (depth > 0) { + for (int j = 0; j < (depth - 1) * 4; j++) { + printf(" "); + } + printf(" \\_ "); + } + +#if (PROCESSES_HAVE_COMM_AND_NAME == 1) + printf("[%d] %s (name: %s) [%s]: %s\n", children[i]->pid, + string2str(children[i]->comm), + string2str(children[i]->name), + string2str(children[i]->target->name), + string2str(children[i]->cmdline)); +#else + printf("[%d] %s [%s]: %s\n", children[i]->pid, + string2str(children[i]->comm), + string2str(children[i]->target->name), + string2str(children[i]->cmdline)); +#endif + + // Recurse to print this child's children + print_process_tree(root, children[i], depth + 1, total_processes); + } + + // Free the allocated array + free(children); +} + +// Function to print the full hierarchy +void print_hierarchy(struct pid_stat *root) { + // Count the total number of processes + int total_processes = count_processes(root); + + // Start printing from processes with parent = NULL (i.e., root processes) + print_process_tree(root, NULL, 0, total_processes); +} + +// ---------------------------------------------------------------------------- +// update chart dimensions + #if (ALL_PIDS_ARE_READ_INSTANTLY == 0) static void normalize_utilization(struct target *root) { struct target *w; @@ -297,6 +375,7 @@ int check_proc_1_io() { } static bool profile_speed = false; +static bool print_tree_and_exit = false; static void parse_args(int argc, char **argv) { @@ -316,6 +395,11 @@ static void parse_args(int argc, char **argv) exit(0); } + if(strcmp("print", argv[i]) == 0 || strcmp("-print", argv[i]) == 0 || strcmp("--print", argv[i]) == 0) { + print_tree_and_exit = true; + continue; + } + #if defined(OS_LINUX) if(strcmp("test-permissions", argv[i]) == 0 || strcmp("-t", argv[i]) == 0) { if(!check_proc_1_io()) { @@ -618,7 +702,7 @@ int main(int argc, char **argv) { procfile_adaptive_initial_allocation = 1; os_get_system_HZ(); os_get_system_cpus_uncached(); - apps_orchestrators_and_aggregators_init(); // before parsing args! + apps_managers_and_aggregators_init(); // before parsing args! parse_args(argc, argv); #if !defined(OS_WINDOWS) @@ -702,6 +786,11 @@ int main(int argc, char **argv) { normalize_utilization(apps_groups_root_target); #endif + if(unlikely(print_tree_and_exit)) { + print_hierarchy(root_of_pids()); + exit(0); + } + if(send_resource_usage) send_resource_usage_to_netdata(dt); diff --git a/src/collectors/apps.plugin/apps_plugin.h b/src/collectors/apps.plugin/apps_plugin.h index cc131c08fa542b..26f166b5a54789 100644 --- a/src/collectors/apps.plugin/apps_plugin.h +++ b/src/collectors/apps.plugin/apps_plugin.h @@ -371,7 +371,10 @@ struct target { TARGET_TYPE type; union { - STRING *compare; + struct { + SIMPLE_PATTERN *pattern; + STRING *compare; + } ag; #if (PROCESSES_HAVE_UID == 1) uid_t uid; #endif @@ -393,11 +396,8 @@ struct target { #endif bool exposed:1; // if set, we have sent this to netdata - bool hidden:1; // if set, we set the hidden flag on the dimension - bool debug_enabled:1; - bool ends_with:1; - bool starts_with:1; // if set, the compare string matches only the - // beginning of the command + bool ends_with:1; // if set, the compare string matches the end of the command + bool starts_with:1; // if set, the compare string matches the start of the command struct pid_on_target *root_pid; // list of aggregated pids for target debugging @@ -476,7 +476,7 @@ struct pid_stat { struct pid_stat *next; struct pid_stat *prev; - struct target *target; // app_groups.conf targets + struct target *target; // app_groups.conf/tree targets #if (PROCESSES_HAVE_UID == 1) struct target *uid_target; // uid based targets @@ -485,9 +485,10 @@ struct pid_stat { struct target *gid_target; // gid based targets #endif - STRING *comm; // the command name (short version) - STRING *name; // a better name, or NULL - STRING *cmdline; // the full command line (or on windows, the full pathname of the program) + STRING *comm_orig; // the command, as-collected + STRING *comm; // the command, sanitized + STRING *name; // the command name if any, sanitized + STRING *cmdline; // the full command line of the program #if defined(OS_WINDOWS) COUNTER_DATA perflib[PDF_MAX]; @@ -531,6 +532,8 @@ struct pid_stat { bool updated:1; // true when the process is currently running bool merged:1; // true when it has been merged to its parent bool keep:1; // true when we need to keep this process in memory even after it exited + bool is_manager:1; // true when this pid is a process manager + bool is_aggregator:1; // true when this pid is a process aggregator bool matched_by_config:1; @@ -540,7 +543,7 @@ struct pid_stat { #if defined(OS_WINDOWS) bool got_info:1; - bool assigned_to_target:1; + bool got_service:1; bool initialized:1; #endif @@ -631,7 +634,7 @@ bool managed_log(struct pid_stat *p, PID_LOG log, bool status); #define pid_incremental_cpu(type, idx, value) \ incremental_rate(p->values[idx], p->raw[idx], value, p->type##_collected_usec, p->last_##type##_collected_usec, CPU_TO_NANOSECONDCORES) -void apps_orchestrators_and_aggregators_init(void); +void apps_managers_and_aggregators_init(void); void apps_users_and_groups_init(void); void apps_pids_init(void); @@ -675,6 +678,8 @@ struct pid_stat *find_pid_entry(pid_t pid); void del_pid_entry(pid_t pid); void update_pid_comm(struct pid_stat *p, const char *comm); +bool is_process_manager(struct pid_stat *p); +bool is_process_aggregator(struct pid_stat *p); // -------------------------------------------------------------------------------------------------------------------- // targets management diff --git a/src/collectors/apps.plugin/apps_targets.c b/src/collectors/apps.plugin/apps_targets.c index c35eb1119eed5b..6a680ad1f91549 100644 --- a/src/collectors/apps.plugin/apps_targets.c +++ b/src/collectors/apps.plugin/apps_targets.c @@ -31,35 +31,7 @@ struct target *find_target_by_name(struct target *base, const char *name) { } // -------------------------------------------------------------------------------------------------------------------- -// Tree - -static inline STRING *comm_from_cmdline(STRING *comm, STRING *cmdline) { - if(!cmdline) return sanitize_chart_meta_string(comm); - - const char *cl = string2str(cmdline); - size_t len = string_strlen(cmdline); - - char buf_cmd[len + 1]; - // if it is enclosed in (), remove the parenthesis - if(cl[0] == '(' && cl[len - 1] == ')') { - memcpy(buf_cmd, &cl[1], len - 2); - buf_cmd[len - 2] = '\0'; - } - else - memcpy(buf_cmd, cl, sizeof(buf_cmd)); - - char *start = strstr(buf_cmd, string2str(comm)); - if(start) { - char *end = start + string_strlen(comm); - while(*end && !isspace((uint8_t)*end) && *end != '/' && *end != '\\') end++; - *end = '\0'; - - sanitize_chart_meta(start); - return string_strdupz(start); - } - - return sanitize_chart_meta_string(comm); -} +// Process managers and aggregators struct comm_list { STRING *comm; @@ -111,21 +83,26 @@ static void managed_list_add(struct managed_list *list, const char *s) { static STRING *KernelAggregator = NULL; -void apps_orchestrators_and_aggregators_init(void) { +void apps_managers_and_aggregators_init(void) { KernelAggregator = string_strdupz("kernel"); managed_list_clear(&tree.managers); #if defined(OS_LINUX) - managed_list_add(&tree.managers, "init"); // linux systems - managed_list_add(&tree.managers, "systemd"); // lxc containers and host systems (this also catches "systemd --user") - managed_list_add(&tree.managers, "containerd-shim"); // docker containers - managed_list_add(&tree.managers, "docker-init"); // docker containers - managed_list_add(&tree.managers, "dumb-init"); // some docker containers use this - managed_list_add(&tree.managers, "gnome-shell"); // gnome user applications + managed_list_add(&tree.managers, "init"); // linux systems + managed_list_add(&tree.managers, "systemd"); // lxc containers and host systems (this also catches "systemd --user") + managed_list_add(&tree.managers, "containerd-shim-runc-v2"); // docker containers + managed_list_add(&tree.managers, "docker-init"); // docker containers + managed_list_add(&tree.managers, "dumb-init"); // some docker containers use this + managed_list_add(&tree.managers, "openrc-run.sh"); // openrc + managed_list_add(&tree.managers, "crond"); // linux crond + managed_list_add(&tree.managers, "gnome-shell"); // gnome user applications + managed_list_add(&tree.managers, "plasmashell"); // kde user applications + managed_list_add(&tree.managers, "xfwm4"); // xfce4 user applications #elif defined(OS_WINDOWS) - managed_list_add(&tree.managers, "System"); - managed_list_add(&tree.managers, "services"); managed_list_add(&tree.managers, "wininit"); + managed_list_add(&tree.managers, "services"); + managed_list_add(&tree.managers, "explorer"); + managed_list_add(&tree.managers, "System"); #elif defined(OS_FREEBSD) managed_list_add(&tree.managers, "init"); #elif defined(OS_MACOS) @@ -142,49 +119,52 @@ void apps_orchestrators_and_aggregators_init(void) { #endif } -static inline bool is_orchestrator(struct pid_stat *p) { +bool is_process_manager(struct pid_stat *p) { for(size_t c = 0; c < tree.managers.used ; c++) { - if(p->comm == tree.managers.array[c].comm) + if(p->comm == tree.managers.array[c].comm || + p->comm_orig == tree.managers.array[c].comm) return true; } return false; } -static inline bool is_aggregator(struct pid_stat *p) { +bool is_process_aggregator(struct pid_stat *p) { for(size_t c = 0; c < tree.aggregators.used ; c++) { - if(p->comm == tree.aggregators.array[c].comm) + if(p->comm == tree.aggregators.array[c].comm || + p->comm_orig == tree.aggregators.array[c].comm) return true; } return false; } +// -------------------------------------------------------------------------------------------------------------------- +// Tree + struct target *get_tree_target(struct pid_stat *p) { // // skip fast all the children that are more than 3 levels down // while(p->parent && p->parent->pid != INIT_PID && p->parent->parent && p->parent->parent->parent) // p = p->parent; // keep the children of INIT_PID, and process orchestrators - while(p->parent && p->parent->pid != INIT_PID && p->parent->pid != 0 && !is_orchestrator(p->parent)) + while(p->parent && p->parent->pid != INIT_PID && p->parent->pid != 0 && !p->parent->is_manager) p = p->parent; // merge all processes into process aggregators - STRING *search_for = string_dup(p->comm); - bool aggregator = false; - if((p->ppid == 0 && p->pid != INIT_PID) || (p->parent && is_aggregator(p->parent))) { - aggregator = true; + STRING *search_for = NULL; + if((p->ppid == 0 && p->pid != INIT_PID) || (p->parent && p->parent->is_aggregator)) { search_for = string_dup(KernelAggregator); } - - if(!aggregator) { + else { #if (PROCESSES_HAVE_COMM_AND_NAME == 1) - search_for = sanitize_chart_meta_string(p->name ? p->name : p->comm); + search_for = string_dup(p->name ? p->name : p->comm); #else - search_for = comm_from_cmdline(p->comm, p->cmdline); + search_for = string_dup(p->comm); #endif } + // find an existing target with the required name struct target *w; for(w = apps_groups_root_target; w ; w = w->next) { if (w->name == search_for) { @@ -196,7 +176,7 @@ struct target *get_tree_target(struct pid_stat *p) { w = callocz(sizeof(struct target), 1); w->type = TARGET_TYPE_TREE; w->starts_with = w->ends_with = false; - w->compare = string_dup(p->comm); + w->ag.compare = string_dup(search_for); w->id = search_for; w->name = string_dup(search_for); w->clean_name = get_clean_name(w->name); @@ -302,17 +282,17 @@ struct target *apps_groups_root_target = NULL; // find or create a new target // there are targets that are just aggregated to other target (the second argument) -static struct target *get_apps_groups_target(const char *id, struct target *target, const char *name) { - bool tdebug = false, thidden = target ? target->hidden : false, ends_with = false, starts_with = false; +static struct target *get_apps_groups_target(const char *comm, struct target *target, const char *name) { + bool ends_with = false, starts_with = false, has_asterisk_inside = false; - STRING *id_lookup = NULL; + STRING *comm_lookup = NULL; STRING *name_lookup = NULL; // extract the options from the id { - size_t len = strlen(id); + size_t len = strlen(comm); char buf[len + 1]; - memcpy(buf, id, sizeof(buf)); + memcpy(buf, comm, sizeof(buf)); if(buf[len - 1] == '*') { buf[--len] = '\0'; @@ -320,37 +300,25 @@ static struct target *get_apps_groups_target(const char *id, struct target *targ } const char *nid = buf; - while (nid[0] == '-' || nid[0] == '+' || nid[0] == '*') { - if (nid[0] == '-') thidden = true; - if (nid[0] == '+') tdebug = true; - if (nid[0] == '*') ends_with = true; + if (nid[0] == '*') { + ends_with = true; nid++; } - id_lookup = string_strdupz(nid); + if(strchr(nid, '*')) + has_asterisk_inside = true; + + comm_lookup = string_strdupz(nid); } // extract the options from the name - { - size_t len = strlen(name); - char buf[len + 1]; - memcpy(buf, name, sizeof(buf)); - - const char *nn = buf; - while (nn[0] == '-' || nn[0] == '+') { - if (nn[0] == '-') thidden = true; - if (nn[0] == '+') tdebug = true; - nn++; - } - - name_lookup = string_strdupz(nn); - } + name_lookup = string_strdupz(name); // find if it already exists struct target *w, *last = apps_groups_root_target; for(w = apps_groups_root_target ; w ; w = w->next) { - if(w->id == id_lookup) { - string_freez(id_lookup); + if(w->id == comm_lookup) { + string_freez(comm_lookup); string_freez(name_lookup); return w; } @@ -368,19 +336,22 @@ static struct target *get_apps_groups_target(const char *id, struct target *targ if(target && target->target) fatal("Internal Error: request to link process '%s' to target '%s' which is linked to target '%s'", - id, string2str(target->id), string2str(target->target->id)); + comm, string2str(target->id), string2str(target->target->id)); w = callocz(sizeof(struct target), 1); w->type = TARGET_TYPE_APP_GROUP; - w->compare = string_dup(id_lookup); + w->ag.compare = string_dup(comm_lookup); w->starts_with = starts_with; w->ends_with = ends_with; - w->id = string_dup(id_lookup); + w->id = string_dup(comm_lookup); + + if(has_asterisk_inside) + w->ag.pattern = simple_pattern_create(comm, " ", SIMPLE_PATTERN_EXACT, true); if(unlikely(!target)) w->name = string_dup(name_lookup); // copy the name else - w->name = string_dup(id_lookup); // copy the id + w->name = string_dup(comm_lookup); // copy the id // dots are used to distinguish chart type and id in streaming, so we should replace them w->clean_name = get_clean_name(w->name); @@ -388,29 +359,20 @@ static struct target *get_apps_groups_target(const char *id, struct target *targ if(w->starts_with && w->ends_with) proc_pid_cmdline_is_needed = true; - w->hidden = thidden; -#ifdef NETDATA_INTERNAL_CHECKS - w->debug_enabled = tdebug; -#else - if(tdebug) - fprintf(stderr, "apps.plugin has been compiled without debugging\n"); -#endif w->target = target; // append it, to maintain the order in apps_groups.conf if(last) last->next = w; else apps_groups_root_target = w; - debug_log("ADDING TARGET ID '%s', process name '%s' (%s), aggregated on target '%s', options: %s %s" + debug_log("ADDING TARGET ID '%s', process name '%s' (%s), aggregated on target '%s'" , string2str(w->id) - , string2str(w->compare) + , string2str(w->ag.compare) , (w->starts_with && w->ends_with)?"substring":((w->starts_with)?"prefix":((w->ends_with)?"suffix":"exact")) , w->target?w->target->name:w->name - , (w->hidden)?"hidden":"-" - , (w->debug_enabled)?"debug":"-" ); - string_freez(id_lookup); + string_freez(comm_lookup); string_freez(name_lookup); return w; diff --git a/src/collectors/windows-events.plugin/windows-events.c b/src/collectors/windows-events.plugin/windows-events.c index 29881c039f7739..24f8f59a597a97 100644 --- a/src/collectors/windows-events.plugin/windows-events.c +++ b/src/collectors/windows-events.plugin/windows-events.c @@ -1139,6 +1139,15 @@ int main(int argc __maybe_unused, char **argv __maybe_unused) { sid_cache_init(); field_cache_init(); + if(!EnableWindowsPrivilege(SE_SECURITY_NAME)) + nd_log(NDLS_COLLECTORS, NDLP_WARNING, "Failed to enable %s privilege", SE_SECURITY_NAME); + + if(!EnableWindowsPrivilege(SE_BACKUP_NAME)) + nd_log(NDLS_COLLECTORS, NDLP_WARNING, "Failed to enable %s privilege", SE_BACKUP_NAME); + + if(!EnableWindowsPrivilege(SE_AUDIT_NAME)) + nd_log(NDLS_COLLECTORS, NDLP_WARNING, "Failed to enable %s privilege", SE_AUDIT_NAME); + // ------------------------------------------------------------------------ // debug diff --git a/src/health/health_notifications.c b/src/health/health_notifications.c index 85dd2d0d8b0ceb..443c0246f06fc3 100644 --- a/src/health/health_notifications.c +++ b/src/health/health_notifications.c @@ -20,17 +20,27 @@ struct health_raised_summary { }; void health_alarm_wait_for_execution(ALARM_ENTRY *ae) { - if (!(ae->flags & HEALTH_ENTRY_FLAG_EXEC_IN_PROGRESS)) - return; + // this has to ALWAYS remove the given alarm entry from the queue - if(!ae->popen_instance) { - // nd_log(NDLS_DAEMON, NDLP_ERR, "attempted to wait for the execution of alert that has not spawn a notification"); - return; + int code = 0; + + if (!(ae->flags & HEALTH_ENTRY_FLAG_EXEC_IN_PROGRESS)) { + nd_log(NDLS_DAEMON, NDLP_ERR, "attempted to wait for the execution of alert that has not an execution in progress"); + code = 128; + goto cleanup; } - ae->exec_code = spawn_popen_wait(ae->popen_instance); + if(!ae->popen_instance) { + nd_log(NDLS_DAEMON, NDLP_ERR, "attempted to wait for the execution of alert that has not spawn a notification"); + code = 128; + goto cleanup; + } + code = spawn_popen_wait(ae->popen_instance); netdata_log_debug(D_HEALTH, "done executing command - returned with code %d", ae->exec_code); + +cleanup: + ae->exec_code = code; ae->flags &= ~HEALTH_ENTRY_FLAG_EXEC_IN_PROGRESS; if(ae->exec_code != 0) @@ -466,13 +476,18 @@ void health_send_notification(RRDHOST *host, ALARM_ENTRY *ae, struct health_rais ae->exec_run_timestamp = now_realtime_sec(); /* will be updated by real time after spawning */ netdata_log_debug(D_HEALTH, "executing command '%s'", command_to_run); - ae->flags |= HEALTH_ENTRY_FLAG_EXEC_IN_PROGRESS; ae->popen_instance = spawn_popen_run(command_to_run); - enqueue_alarm_notify_in_progress(ae); + if(ae->popen_instance) { + ae->flags |= HEALTH_ENTRY_FLAG_EXEC_IN_PROGRESS; + enqueue_alarm_notify_in_progress(ae); + } + else + netdata_log_error("Failed to execute alarm notification"); + health_alarm_log_save(host, ae); - } else { - netdata_log_error("Failed to format command arguments"); } + else + netdata_log_error("Failed to format command arguments"); buffer_free(warn_alarms); buffer_free(crit_alarms); diff --git a/src/libnetdata/os/os-windows-wrappers.c b/src/libnetdata/os/os-windows-wrappers.c index 64076eae28af09..161f2aefd05139 100644 --- a/src/libnetdata/os/os-windows-wrappers.c +++ b/src/libnetdata/os/os-windows-wrappers.c @@ -58,4 +58,42 @@ bool netdata_registry_get_string(char *out, unsigned int length, void *hKey, cha return status; } +bool EnableWindowsPrivilege(const char *privilegeName) { + HANDLE hToken; + LUID luid; + TOKEN_PRIVILEGES tkp; + + // Open the process token with appropriate access rights + if (!OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &hToken)) + return false; + + // Lookup the LUID for the specified privilege + if (!LookupPrivilegeValue(NULL, privilegeName, &luid)) { + CloseHandle(hToken); // Close the token handle before returning + return false; + } + + // Set up the TOKEN_PRIVILEGES structure + tkp.PrivilegeCount = 1; + tkp.Privileges[0].Luid = luid; + tkp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED; + + // Adjust the token's privileges + if (!AdjustTokenPrivileges(hToken, FALSE, &tkp, sizeof(tkp), NULL, NULL)) { + CloseHandle(hToken); // Close the token handle before returning + return false; + } + + // Check if AdjustTokenPrivileges succeeded + if (GetLastError() == ERROR_NOT_ALL_ASSIGNED) { + CloseHandle(hToken); // Close the token handle before returning + return false; + } + + // Close the handle to the token after success + CloseHandle(hToken); + + return true; +} + #endif diff --git a/src/libnetdata/os/os-windows-wrappers.h b/src/libnetdata/os/os-windows-wrappers.h index 5ae73043a3da53..30e1fc50d21016 100644 --- a/src/libnetdata/os/os-windows-wrappers.h +++ b/src/libnetdata/os/os-windows-wrappers.h @@ -14,5 +14,7 @@ bool netdata_registry_get_dword(unsigned int *out, void *hKey, char *subKey, cha long netdata_registry_get_string_from_open_key(char *out, unsigned int length, void *lKey, char *name); bool netdata_registry_get_string(char *out, unsigned int length, void *hKey, char *subKey, char *name); +bool EnableWindowsPrivilege(const char *privilegeName); + #endif // OS_WINDOWS #endif //NETDATA_OS_WINDOWS_WRAPPERS_H diff --git a/src/libnetdata/spawn_server/spawn_server_windows.c b/src/libnetdata/spawn_server/spawn_server_windows.c index 09218568c791ee..8c7d76cd2416a6 100644 --- a/src/libnetdata/spawn_server/spawn_server_windows.c +++ b/src/libnetdata/spawn_server/spawn_server_windows.c @@ -54,7 +54,7 @@ static BUFFER *argv_to_windows(const char **argv) { BUFFER *wb = buffer_create(0, NULL); // argv[0] is the path - char b[strlen(argv[0]) * 2 + 1024]; + char b[strlen(argv[0]) * 2 + FILENAME_MAX]; cygwin_conv_path(CCP_POSIX_TO_WIN_A | CCP_ABSOLUTE, argv[0], b, sizeof(b)); for(size_t i = 0; argv[i] ;i++) { @@ -84,6 +84,8 @@ static BUFFER *argv_to_windows(const char **argv) { else buffer_putc(wb, ' '); } + else if (needs_quotes) + buffer_putc(wb, '"'); for(const char *c = s; *c ; c++) { switch(*c) { From 934fa2e1090a05211253051aa87f443863cc44b6 Mon Sep 17 00:00:00 2001 From: Stelios Fragkakis <52996999+stelfrag@users.noreply.github.com> Date: Wed, 2 Oct 2024 18:32:06 +0300 Subject: [PATCH 21/23] Handle mqtt ping timeouts (#18653) * Handle mqtt ping timeouts * Increase ping timeout * Reset ping_timeout when disconnection is requested --- src/aclk/aclk.c | 30 +++++++++++++++------- src/aclk/aclk.h | 9 ++++++- src/aclk/aclk_rx_msgs.c | 2 +- src/aclk/mqtt_websockets/mqtt_ng.c | 1 + src/aclk/mqtt_websockets/mqtt_ng.h | 2 +- src/aclk/mqtt_websockets/mqtt_wss_client.c | 28 +++++++++++++------- src/claim/claim.c | 2 +- 7 files changed, 52 insertions(+), 22 deletions(-) diff --git a/src/aclk/aclk.c b/src/aclk/aclk.c index 41f26ded5904b9..2f8e8b703b6a8f 100644 --- a/src/aclk/aclk.c +++ b/src/aclk/aclk.c @@ -23,7 +23,6 @@ int aclk_pubacks_per_conn = 0; // How many PubAcks we got since MQTT conn est. int aclk_rcvd_cloud_msgs = 0; int aclk_connection_counter = 0; -int disconnect_req = 0; static bool aclk_connected = false; static inline void aclk_set_connected(void) { @@ -51,7 +50,8 @@ bool aclk_online_for_nodes(void) { int aclk_ctx_based = 0; int aclk_disable_runtime = 0; -int aclk_kill_link = 0; + +ACLK_DISCONNECT_ACTION disconnect_req = ACLK_NO_DISCONNECT; usec_t aclk_session_us = 0; time_t aclk_session_sec = 0; @@ -301,14 +301,26 @@ static int handle_connection(mqtt_wss_client client) return 1; } - if (disconnect_req || aclk_kill_link) { - nd_log(NDLS_DAEMON, NDLP_NOTICE, - "Going to restart connection due to disconnect_req=%s (cloud req), aclk_kill_link=%s (reclaim)", - disconnect_req ? "true" : "false", - aclk_kill_link ? "true" : "false"); + if (disconnect_req != ACLK_NO_DISCONNECT) { + const char *reason; + switch (disconnect_req) { + case ACLK_CLOUD_DISCONNECT: + reason = "cloud request"; + break; + case ACLK_PING_TIMEOUT: + reason = "ping timeout"; + break; + case ACLK_RELOAD_CONF: + reason = "reclaim"; + break; + default: + reason = "unknown"; + break; + } + + nd_log(NDLS_DAEMON, NDLP_NOTICE, "Going to restart connection due to \"%s\"", reason); - disconnect_req = 0; - aclk_kill_link = 0; + disconnect_req = ACLK_NO_DISCONNECT; aclk_graceful_disconnect(client); aclk_shared_state.mqtt_shutdown_msg_id = -1; aclk_shared_state.mqtt_shutdown_msg_rcvd = 0; diff --git a/src/aclk/aclk.h b/src/aclk/aclk.h index b8e719bc87fcc9..45a2eac85494f7 100644 --- a/src/aclk/aclk.h +++ b/src/aclk/aclk.h @@ -11,6 +11,13 @@ // stable for the purposes of TBEB (truncated binary exponential backoff) #define ACLK_PUBACKS_CONN_STABLE 3 +typedef enum { + ACLK_NO_DISCONNECT = 0, + ACLK_CLOUD_DISCONNECT = 1, + ACLK_RELOAD_CONF = 2, + ACLK_PING_TIMEOUT = 3 +} ACLK_DISCONNECT_ACTION; + typedef enum __attribute__((packed)) { ACLK_STATUS_CONNECTED = 0, ACLK_STATUS_NONE, @@ -62,7 +69,7 @@ extern time_t aclk_session_sec; extern time_t aclk_block_until; extern int aclk_connection_counter; -extern int disconnect_req; +extern ACLK_DISCONNECT_ACTION disconnect_req; void *aclk_main(void *ptr); diff --git a/src/aclk/aclk_rx_msgs.c b/src/aclk/aclk_rx_msgs.c index ce517048c2c93a..36bd3599d622fe 100644 --- a/src/aclk/aclk_rx_msgs.c +++ b/src/aclk/aclk_rx_msgs.c @@ -407,7 +407,7 @@ int handle_disconnect_req(const char *msg, size_t msg_len) "Cloud asks not to reconnect for %u seconds. We shall honor that request", (unsigned int)cmd->reconnect_after_s); } - disconnect_req = 1; + disconnect_req = ACLK_CLOUD_DISCONNECT; freez(cmd->error_description); freez(cmd); return 0; diff --git a/src/aclk/mqtt_websockets/mqtt_ng.c b/src/aclk/mqtt_websockets/mqtt_ng.c index daf7931151bdec..6026a1e55ef67c 100644 --- a/src/aclk/mqtt_websockets/mqtt_ng.c +++ b/src/aclk/mqtt_websockets/mqtt_ng.c @@ -1804,6 +1804,7 @@ static int parse_data(struct mqtt_ng_client *client) return MQTT_NG_CLIENT_PROTOCOL_ERROR; } parser->state = MQTT_PARSE_MQTT_PACKET_DONE; + ping_timeout = 0; break; case MQTT_CPT_DISCONNECT: rc = parse_disconnect_varhdr(client); diff --git a/src/aclk/mqtt_websockets/mqtt_ng.h b/src/aclk/mqtt_websockets/mqtt_ng.h index 1661f540e2c795..c5f6d94ccf9dbe 100644 --- a/src/aclk/mqtt_websockets/mqtt_ng.h +++ b/src/aclk/mqtt_websockets/mqtt_ng.h @@ -10,7 +10,7 @@ #define MQTT_NG_MSGGEN_MSG_TOO_BIG 3 struct mqtt_ng_client; - +extern time_t ping_timeout; /* Converts integer to MQTT Variable Byte Integer as per 1.5.5 of MQTT 5 specs * @param input value to be converted * @param output pointer to memory where output will be written to. Must allow up to 4 bytes to be written. diff --git a/src/aclk/mqtt_websockets/mqtt_wss_client.c b/src/aclk/mqtt_websockets/mqtt_wss_client.c index 2b2c972bb7fe5d..92c489905a4583 100644 --- a/src/aclk/mqtt_websockets/mqtt_wss_client.c +++ b/src/aclk/mqtt_websockets/mqtt_wss_client.c @@ -9,12 +9,16 @@ #include "mqtt_ng.h" #include "ws_client.h" #include "common_internal.h" +#include "../aclk.h" #define PIPE_READ_END 0 #define PIPE_WRITE_END 1 #define POLLFD_SOCKET 0 #define POLLFD_PIPE 1 +#define PING_TIMEOUT (60) //Expect a ping response within this time (seconds) +time_t ping_timeout = 0; + #if (OPENSSL_VERSION_NUMBER < OPENSSL_VERSION_110) && (SSLEAY_VERSION_NUMBER >= OPENSSL_VERSION_097) #include #endif @@ -744,13 +748,11 @@ static int handle_mqtt_internal(mqtt_wss_client client) return 0; } -#define SEC_TO_MSEC 1000 -static long long int t_till_next_keepalive_ms(mqtt_wss_client client) +static int t_till_next_keepalive_ms(mqtt_wss_client client) { time_t last_send = mqtt_ng_last_send_time(client->mqtt); - long long int next_mqtt_keep_alive = (last_send * SEC_TO_MSEC) - + (client->mqtt_keepalive * (SEC_TO_MSEC * 0.75 /* SEND IN ADVANCE */)); - return(next_mqtt_keep_alive - (time(NULL) * SEC_TO_MSEC)); + time_t next_mqtt_keep_alive = last_send + client->mqtt_keepalive * 0.75; + return ((next_mqtt_keep_alive - now_realtime_sec()) * MSEC_PER_SEC); } #ifdef MQTT_WSS_CPUSTATS @@ -777,10 +779,12 @@ int mqtt_wss_service(mqtt_wss_client client, int timeout_ms) #endif // Check user requested TO doesn't interfere with MQTT keep alives - long long int till_next_keep_alive = t_till_next_keepalive_ms(client); - if (client->mqtt_connected && (timeout_ms < 0 || timeout_ms >= till_next_keep_alive)) { - timeout_ms = till_next_keep_alive; - send_keepalive = 1; + if (!ping_timeout) { + int till_next_keep_alive = t_till_next_keepalive_ms(client); + if (client->mqtt_connected && (timeout_ms < 0 || timeout_ms >= till_next_keep_alive)) { + timeout_ms = till_next_keep_alive; + send_keepalive = 1; + } } #ifdef MQTT_WSS_CPUSTATS @@ -802,11 +806,17 @@ int mqtt_wss_service(mqtt_wss_client client, int timeout_ms) #endif if (ret == 0) { + time_t now = now_realtime_sec(); if (send_keepalive) { // otherwise we shortened the timeout ourselves to take care of // MQTT keep alives mqtt_ng_ping(client->mqtt); + ping_timeout = now + PING_TIMEOUT; } else { + if (ping_timeout && ping_timeout < now) { + disconnect_req = ACLK_PING_TIMEOUT; + ping_timeout = 0; + } // if poll timed out and user requested timeout was being used // return here let user do his work and he will call us back soon return 0; diff --git a/src/claim/claim.c b/src/claim/claim.c index 03fb18c388fefa..da64a1367c6531 100644 --- a/src/claim/claim.c +++ b/src/claim/claim.c @@ -148,7 +148,7 @@ bool load_claiming_state(void) { if (aclk_online()) { nd_log(NDLS_DAEMON, NDLP_ERR, "CLAIM: agent was already connected to NC - forcing reconnection under new credentials"); - aclk_kill_link = 1; + disconnect_req = ACLK_RELOAD_CONF; } aclk_disable_runtime = 0; From e61d416c0e1bbe95179134d30022f391e3000aee Mon Sep 17 00:00:00 2001 From: Costa Tsaousis Date: Wed, 2 Oct 2024 20:00:54 +0300 Subject: [PATCH 22/23] Fix win apps uptime (#18662) zero invalid uptime --- src/collectors/apps.plugin/apps_os_windows.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/collectors/apps.plugin/apps_os_windows.c b/src/collectors/apps.plugin/apps_os_windows.c index 38a552bb0dff12..55687280037489 100644 --- a/src/collectors/apps.plugin/apps_os_windows.c +++ b/src/collectors/apps.plugin/apps_os_windows.c @@ -805,6 +805,9 @@ static inline kernel_uint_t perflib_elapsed(COUNTER_DATA *d) { internal_fatal(d->current.CounterType != PERF_ELAPSED_TIME || !freq1, "Wrong gauge type"); + if(!data1 || !time1 || !freq1 || data1 > (ULONGLONG)time1) + return 0; + return (time1 - data1) / freq1; } From dbec34183bd9293e5d41e28bdb0eae2e957ce0fe Mon Sep 17 00:00:00 2001 From: netdatabot Date: Thu, 3 Oct 2024 00:19:45 +0000 Subject: [PATCH 23/23] [ci skip] Update changelog and version for nightly build: v1.99.0-230-nightly. --- CHANGELOG.md | 19 +++++++++++-------- packaging/version | 2 +- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4a96dfc021a1ae..dba2915b1734cb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,14 +2,20 @@ ## [**Next release**](https://github.com/netdata/netdata/tree/HEAD) -[Full Changelog](https://github.com/netdata/netdata/compare/v1.47.2...HEAD) +[Full Changelog](https://github.com/netdata/netdata/compare/v1.47.3...HEAD) **Merged pull requests:** +- Fix win apps uptime [\#18662](https://github.com/netdata/netdata/pull/18662) ([ktsaou](https://github.com/ktsaou)) +- bump go toolchain v1.22.8 [\#18659](https://github.com/netdata/netdata/pull/18659) ([ilyam8](https://github.com/ilyam8)) +- go.d sd fix sprig funcmap [\#18658](https://github.com/netdata/netdata/pull/18658) ([ilyam8](https://github.com/ilyam8)) +- Handle mqtt ping timeouts [\#18653](https://github.com/netdata/netdata/pull/18653) ([stelfrag](https://github.com/stelfrag)) +- apps.plugin improvements [\#18652](https://github.com/netdata/netdata/pull/18652) ([ktsaou](https://github.com/ktsaou)) - go.d remove duplicate chart check in tests [\#18650](https://github.com/netdata/netdata/pull/18650) ([ilyam8](https://github.com/ilyam8)) - fixed freebsd cpu calculation [\#18648](https://github.com/netdata/netdata/pull/18648) ([ktsaou](https://github.com/ktsaou)) - Regenerate integrations.js [\#18647](https://github.com/netdata/netdata/pull/18647) ([netdatabot](https://github.com/netdatabot)) - Use temporary file for commit date check. [\#18646](https://github.com/netdata/netdata/pull/18646) ([Ferroin](https://github.com/Ferroin)) +- New wording about edit-config script in docs [\#18639](https://github.com/netdata/netdata/pull/18639) ([Ancairon](https://github.com/Ancairon)) - Update file names. [\#18638](https://github.com/netdata/netdata/pull/18638) ([vkalintiris](https://github.com/vkalintiris)) - Move plugins.d directory outside of collectors [\#18637](https://github.com/netdata/netdata/pull/18637) ([vkalintiris](https://github.com/vkalintiris)) - go.d/smartctl: fix exit status check in scan [\#18635](https://github.com/netdata/netdata/pull/18635) ([ilyam8](https://github.com/ilyam8)) @@ -187,6 +193,10 @@ - fix warnings in Dockerfile [\#18395](https://github.com/netdata/netdata/pull/18395) ([NicolasCARPi](https://github.com/NicolasCARPi)) - Use existing ACLK event loop for cloud queries [\#18218](https://github.com/netdata/netdata/pull/18218) ([stelfrag](https://github.com/stelfrag)) +## [v1.47.3](https://github.com/netdata/netdata/tree/v1.47.3) (2024-10-02) + +[Full Changelog](https://github.com/netdata/netdata/compare/v1.47.2...v1.47.3) + ## [v1.47.2](https://github.com/netdata/netdata/tree/v1.47.2) (2024-09-24) [Full Changelog](https://github.com/netdata/netdata/compare/v1.47.1...v1.47.2) @@ -414,13 +424,6 @@ - Switch to legacy images for CentOS 7 CI. [\#18085](https://github.com/netdata/netdata/pull/18085) ([Ferroin](https://github.com/Ferroin)) - Track LTS for Debian EOL status. [\#18084](https://github.com/netdata/netdata/pull/18084) ([Ferroin](https://github.com/Ferroin)) - Remove Debian 10 from supported platforms. [\#18083](https://github.com/netdata/netdata/pull/18083) ([Ferroin](https://github.com/Ferroin)) -- Remove Ubuntu 23.10 from supported platforms. [\#18082](https://github.com/netdata/netdata/pull/18082) ([Ferroin](https://github.com/Ferroin)) -- go.d fail2ban: add docker support [\#18081](https://github.com/netdata/netdata/pull/18081) ([ilyam8](https://github.com/ilyam8)) -- Improve alerts [\#18080](https://github.com/netdata/netdata/pull/18080) ([stelfrag](https://github.com/stelfrag)) -- Bump golang.org/x/net from 0.26.0 to 0.27.0 in /src/go [\#18078](https://github.com/netdata/netdata/pull/18078) ([dependabot[bot]](https://github.com/apps/dependabot)) -- Bump github.com/gofrs/flock from 0.11.0 to 0.12.0 in /src/go [\#18077](https://github.com/netdata/netdata/pull/18077) ([dependabot[bot]](https://github.com/apps/dependabot)) -- proc: collect ksm/swap/cma/zswap only when feature enabled [\#18076](https://github.com/netdata/netdata/pull/18076) ([ilyam8](https://github.com/ilyam8)) -- health add alarm docker container down [\#18075](https://github.com/netdata/netdata/pull/18075) ([ilyam8](https://github.com/ilyam8)) ## [v1.46.3](https://github.com/netdata/netdata/tree/v1.46.3) (2024-07-23) diff --git a/packaging/version b/packaging/version index b41648f786959f..3ab666e837443c 100644 --- a/packaging/version +++ b/packaging/version @@ -1 +1 @@ -v1.99.0-223-nightly +v1.99.0-230-nightly