From 347c54770ca530a2f30b063cf83411e136db6010 Mon Sep 17 00:00:00 2001 From: ccztux Date: Mon, 27 Feb 2023 16:57:00 +0100 Subject: [PATCH 01/13] Fixed: Naemon stops executing checks and doesnt respawn Core Worker processes (#418) --- src/naemon/workers.c | 74 ++++++++++++++++++++++++++++++-------------- 1 file changed, 51 insertions(+), 23 deletions(-) diff --git a/src/naemon/workers.c b/src/naemon/workers.c index 35ac0d67f..368e186bb 100644 --- a/src/naemon/workers.c +++ b/src/naemon/workers.c @@ -64,6 +64,9 @@ static struct wproc_list *to_remove = NULL; unsigned int wproc_num_workers_online = 0, wproc_num_workers_desired = 0; unsigned int wproc_num_workers_spawned = 0; +static int get_desired_workers(int desired_workers); +static int spawn_core_worker(void); + #define tv2float(tv) ((float)((tv)->tv_sec) + ((float)(tv)->tv_usec) / 1000000.0) static void wproc_logdump_buffer(int debuglevel, int verbosity, const char *prefix, char *buf) @@ -414,6 +417,7 @@ static int handle_worker_result(int sd, int events, void *arg) char *buf, *error_reason = NULL; size_t size; int ret; + unsigned int desired_workers; struct wproc_worker *wp = (struct wproc_worker *)arg; ret = nm_bufferqueue_read(wp->bq, wp->sd); @@ -428,17 +432,32 @@ static int handle_worker_result(int sd, int events, void *arg) nm_log(NSLOG_INFO_MESSAGE, "wproc: Socket to worker %s broken, removing", wp->name); wproc_num_workers_online--; iobroker_unregister(nagios_iobs, sd); - if (workers.len <= 0) { - /* there aren't global workers left, we can't run any more checks - * we should try respawning a few of the standard ones - */ - nm_log(NSLOG_RUNTIME_ERROR, "wproc: All our workers are dead, we can't do anything!"); - } /* remove worker from worker list - this ensures that we don't reassign * its jobs back to itself*/ remove_worker(wp); + desired_workers = get_desired_workers(num_check_workers); + + if (workers.len < desired_workers) { + /* there aren't global workers left, we can't run any more checks + * we should try respawning a few of the standard ones + */ + nm_log(NSLOG_RUNTIME_ERROR, "wproc: We have have less Core Workers than we should have, trying to respawn Core Worker"); + + /* Respawn a worker */ + if ((ret = spawn_core_worker()) < 0) { + nm_log(NSLOG_RUNTIME_ERROR, "wproc: Failed to respawn Core Worker"); + } else { + nm_log(NSLOG_INFO_MESSAGE, "wproc: Respawning Core Worker %u was successful", ret); + } + } else if (workers.len == 0) { + /* there aren't global workers left, we can't run any more checks + * we should try respawning a few of the standard ones + */ + nm_log(NSLOG_RUNTIME_ERROR, "wproc: All our workers are dead, we can't do anything!"); + } + /* reassign this dead worker's jobs */ g_hash_table_iter_init(&iter, wp->jobs); while (g_hash_table_iter_next(&iter, NULL, &job_)) { @@ -664,24 +683,8 @@ static int spawn_core_worker(void) } -int init_workers(int desired_workers) +static int get_desired_workers(int desired_workers) { - int i; - - /* - * we register our query handler before launching workers, - * so other workers can join us whenever they're ready - */ - specialized_workers = g_hash_table_new_full(g_str_hash, g_str_equal, - free, NULL - ); - if (!qh_register_handler("wproc", "Worker process management and info", 0, wproc_query_handler)) { - log_debug_info(DEBUGL_IPC, DEBUGV_BASIC, "wproc: Successfully registered manager as @wproc with query handler\n"); - } else { - nm_log(NSLOG_RUNTIME_ERROR, "wproc: Failed to register manager with query handler\n"); - return -1; - } - if (desired_workers <= 0) { int cpus = online_cpus(); @@ -708,6 +711,31 @@ int init_workers(int desired_workers) if (desired_workers < (int)workers.len) return -1; + return desired_workers; +} + + +int init_workers(int desired_workers) +{ + int i; + + /* + * we register our query handler before launching workers, + * so other workers can join us whenever they're ready + */ + specialized_workers = g_hash_table_new_full(g_str_hash, g_str_equal, + free, NULL + ); + if (!qh_register_handler("wproc", "Worker process management and info", 0, wproc_query_handler)) { + log_debug_info(DEBUGL_IPC, DEBUGV_BASIC, "wproc: Successfully registered manager as @wproc with query handler\n"); + } else { + nm_log(NSLOG_RUNTIME_ERROR, "wproc: Failed to register manager with query handler\n"); + return -1; + } + + /* Get the number of workers we need */ + desired_workers = get_desired_workers(desired_workers); + for (i = 0; i < desired_workers; i++) spawn_core_worker(); From bf8bf43e0344d11833c446674453edc82e7cce20 Mon Sep 17 00:00:00 2001 From: Christian Zettel Date: Mon, 27 Feb 2023 17:10:00 +0100 Subject: [PATCH 02/13] Update workers.c Fixed the indentation stuff... --- src/naemon/workers.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/naemon/workers.c b/src/naemon/workers.c index 368e186bb..2cee17527 100644 --- a/src/naemon/workers.c +++ b/src/naemon/workers.c @@ -417,7 +417,7 @@ static int handle_worker_result(int sd, int events, void *arg) char *buf, *error_reason = NULL; size_t size; int ret; - unsigned int desired_workers; + unsigned int desired_workers; struct wproc_worker *wp = (struct wproc_worker *)arg; ret = nm_bufferqueue_read(wp->bq, wp->sd); @@ -437,7 +437,7 @@ static int handle_worker_result(int sd, int events, void *arg) * its jobs back to itself*/ remove_worker(wp); - desired_workers = get_desired_workers(num_check_workers); + desired_workers = get_desired_workers(num_check_workers); if (workers.len < desired_workers) { /* there aren't global workers left, we can't run any more checks @@ -445,18 +445,18 @@ static int handle_worker_result(int sd, int events, void *arg) */ nm_log(NSLOG_RUNTIME_ERROR, "wproc: We have have less Core Workers than we should have, trying to respawn Core Worker"); - /* Respawn a worker */ - if ((ret = spawn_core_worker()) < 0) { - nm_log(NSLOG_RUNTIME_ERROR, "wproc: Failed to respawn Core Worker"); - } else { - nm_log(NSLOG_INFO_MESSAGE, "wproc: Respawning Core Worker %u was successful", ret); - } + /* Respawn a worker */ + if ((ret = spawn_core_worker()) < 0) { + nm_log(NSLOG_RUNTIME_ERROR, "wproc: Failed to respawn Core Worker"); + } else { + nm_log(NSLOG_INFO_MESSAGE, "wproc: Respawning Core Worker %u was successful", ret); + } } else if (workers.len == 0) { /* there aren't global workers left, we can't run any more checks * we should try respawning a few of the standard ones */ nm_log(NSLOG_RUNTIME_ERROR, "wproc: All our workers are dead, we can't do anything!"); - } + } /* reassign this dead worker's jobs */ g_hash_table_iter_init(&iter, wp->jobs); @@ -711,7 +711,7 @@ static int get_desired_workers(int desired_workers) if (desired_workers < (int)workers.len) return -1; - return desired_workers; + return desired_workers; } @@ -733,8 +733,8 @@ int init_workers(int desired_workers) return -1; } - /* Get the number of workers we need */ - desired_workers = get_desired_workers(desired_workers); + /* Get the number of workers we need */ + desired_workers = get_desired_workers(desired_workers); for (i = 0; i < desired_workers; i++) spawn_core_worker(); From 302af625c8906f498986e1352cb272bc29ebc999 Mon Sep 17 00:00:00 2001 From: Christian Zettel Date: Mon, 27 Feb 2023 17:34:20 +0100 Subject: [PATCH 03/13] Update workers.c Comment changed. --- src/naemon/workers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/naemon/workers.c b/src/naemon/workers.c index 2cee17527..9674c5588 100644 --- a/src/naemon/workers.c +++ b/src/naemon/workers.c @@ -453,7 +453,7 @@ static int handle_worker_result(int sd, int events, void *arg) } } else if (workers.len == 0) { /* there aren't global workers left, we can't run any more checks - * we should try respawning a few of the standard ones + * this should never happen, because the respawning will be done in the upper if condition */ nm_log(NSLOG_RUNTIME_ERROR, "wproc: All our workers are dead, we can't do anything!"); } From 3b9b2f0491675e54d7ef8171a19dc6e28954b770 Mon Sep 17 00:00:00 2001 From: nook24 Date: Thu, 2 Mar 2023 21:52:13 +0000 Subject: [PATCH 04/13] Call wproc_destroy with the WPROC_FORCE flag, whenever a Naemon worker process dies. This will cleanup any processes. I'm not sure why the WPROC_FORCE flag exists at all. Maybe this could cause problems, when external workers not spawned by Naemon itself connect to the Query Handler? The original commit ec4dc03db0a08f06591d31edad987e6ae389ab37 from 10 years ago did not contain more information about this Signed-off-by: nook24 --- src/naemon/workers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/naemon/workers.c b/src/naemon/workers.c index 9674c5588..d1b68aab1 100644 --- a/src/naemon/workers.c +++ b/src/naemon/workers.c @@ -468,7 +468,7 @@ static int handle_worker_result(int sd, int events, void *arg) ); } - wproc_destroy(wp, 0); + wproc_destroy(wp, WPROC_FORCE); return 0; } while ((buf = worker_ioc2msg(wp->bq, &size, 0))) { From 7940a8187b2ab1a328481da5f0d4a77fe0f83169 Mon Sep 17 00:00:00 2001 From: ccztux Date: Mon, 6 Mar 2023 11:30:06 +0100 Subject: [PATCH 05/13] The function get_desired_workers now always returns the desired workers, as requested in https://github.com/naemon/naemon-core/pull/421#pullrequestreview-1323354780 --- src/naemon/workers.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/naemon/workers.c b/src/naemon/workers.c index d1b68aab1..588148b89 100644 --- a/src/naemon/workers.c +++ b/src/naemon/workers.c @@ -702,14 +702,8 @@ static int get_desired_workers(int desired_workers) } } } - wproc_num_workers_desired = desired_workers; - - if (workers_alive() == desired_workers) - return 0; - /* can't shrink the number of workers (yet) */ - if (desired_workers < (int)workers.len) - return -1; + wproc_num_workers_desired = desired_workers; return desired_workers; } @@ -736,6 +730,13 @@ int init_workers(int desired_workers) /* Get the number of workers we need */ desired_workers = get_desired_workers(desired_workers); + if (workers_alive() == desired_workers) + return 0; + + /* can't shrink the number of workers (yet) */ + if (desired_workers < (int)workers.len) + return -1; + for (i = 0; i < desired_workers; i++) spawn_core_worker(); From d6c4c6eaa86c3031c03457f48e74bcdc68aa9451 Mon Sep 17 00:00:00 2001 From: nook24 Date: Thu, 23 Mar 2023 21:00:16 +0000 Subject: [PATCH 06/13] Do not crash if wps is NULL. This could happen if a core worker dies and gets respawned Signed-off-by: nook24 --- src/naemon/checks_host.c | 52 +++++++++++++++++++++------------------- src/naemon/workers.c | 7 +++++- 2 files changed, 33 insertions(+), 26 deletions(-) diff --git a/src/naemon/checks_host.c b/src/naemon/checks_host.c index e7f486207..91924f5b2 100644 --- a/src/naemon/checks_host.c +++ b/src/naemon/checks_host.c @@ -636,33 +636,35 @@ static void handle_worker_host_check(wproc_result *wpres, void *arg, int flags) if (currently_running_host_checks > 0) currently_running_host_checks--; - hst = find_host(cr->host_name); - if (hst && wpres) { - hst->is_executing = FALSE; - memcpy(&cr->rusage, &wpres->rusage, sizeof(wpres->rusage)); - cr->start_time.tv_sec = wpres->start.tv_sec; - cr->start_time.tv_usec = wpres->start.tv_usec; - cr->finish_time.tv_sec = wpres->stop.tv_sec; - cr->finish_time.tv_usec = wpres->stop.tv_usec; - if (WIFEXITED(wpres->wait_status)) { - cr->return_code = WEXITSTATUS(wpres->wait_status); - } else { - cr->return_code = STATE_UNKNOWN; - } + if (wpres) { + hst = find_host(cr->host_name); + if (hst) { + hst->is_executing = FALSE; + memcpy(&cr->rusage, &wpres->rusage, sizeof(wpres->rusage)); + cr->start_time.tv_sec = wpres->start.tv_sec; + cr->start_time.tv_usec = wpres->start.tv_usec; + cr->finish_time.tv_sec = wpres->stop.tv_sec; + cr->finish_time.tv_usec = wpres->stop.tv_usec; + if (WIFEXITED(wpres->wait_status)) { + cr->return_code = WEXITSTATUS(wpres->wait_status); + } else { + cr->return_code = STATE_UNKNOWN; + } - if (wpres->outstd && *wpres->outstd) { - cr->output = nm_strdup(wpres->outstd); - } else if (wpres->outerr && *wpres->outerr) { - nm_asprintf(&cr->output, "(No output on stdout) stderr: %s", wpres->outerr); - } else { - cr->output = NULL; - } + if (wpres->outstd && *wpres->outstd) { + cr->output = nm_strdup(wpres->outstd); + } else if (wpres->outerr && *wpres->outerr) { + nm_asprintf(&cr->output, "(No output on stdout) stderr: %s", wpres->outerr); + } else { + cr->output = NULL; + } - cr->early_timeout = wpres->early_timeout; - cr->exited_ok = wpres->exited_ok; - cr->engine = NULL; - cr->source = wpres->source; - process_check_result(cr); + cr->early_timeout = wpres->early_timeout; + cr->exited_ok = wpres->exited_ok; + cr->engine = NULL; + cr->source = wpres->source; + process_check_result(cr); + } } free_check_result(cr); nm_free(cr); diff --git a/src/naemon/workers.c b/src/naemon/workers.c index 588148b89..53be1edd6 100644 --- a/src/naemon/workers.c +++ b/src/naemon/workers.c @@ -163,6 +163,11 @@ static void run_job_callback(struct wproc_job *job, struct wproc_result *wpres, { if (!job || !job->callback) return; + + if (!wpres) { + nm_log(NSLOG_RUNTIME_ERROR, "---!!!--- wpres is null or so TODO REMOVE THIS"); + return; + } (*job->callback)(wpres, job->data, val); job->callback = NULL; @@ -446,7 +451,7 @@ static int handle_worker_result(int sd, int events, void *arg) nm_log(NSLOG_RUNTIME_ERROR, "wproc: We have have less Core Workers than we should have, trying to respawn Core Worker"); /* Respawn a worker */ - if ((ret = spawn_core_worker()) < 0) { + if ((ret = spawn_core_worker()) < 0) { nm_log(NSLOG_RUNTIME_ERROR, "wproc: Failed to respawn Core Worker"); } else { nm_log(NSLOG_INFO_MESSAGE, "wproc: Respawning Core Worker %u was successful", ret); From b5c28d2d370fcbedc1e20d0838d53b157aedb6a8 Mon Sep 17 00:00:00 2001 From: Sven Nierlein Date: Wed, 28 Jun 2023 11:29:30 +0200 Subject: [PATCH 07/13] use dynamic sized command line buffer Previously we used a fixed size 8k buffer when parsing command line arguments. This sounds much, but there are command lines bigger than 8k and they are simply cut off without any warning. Instead we use a dynamic sized buffer with the size of the raw command now. --- src/naemon/common.h | 2 -- src/naemon/macros.c | 72 ++++++++++++++++++++++++--------------------- 2 files changed, 39 insertions(+), 35 deletions(-) diff --git a/src/naemon/common.h b/src/naemon/common.h index 26e8ed070..98df18c32 100644 --- a/src/naemon/common.h +++ b/src/naemon/common.h @@ -471,8 +471,6 @@ NAGIOS_END_DECL #define MAX_FILENAME_LENGTH 256 /* max length of path/filename that Nagios will process */ #define MAX_INPUT_BUFFER 1024 /* size in bytes of max. input buffer (for reading files, misc stuff) */ -#define MAX_COMMAND_BUFFER 8192 /* max length of raw or processed command line */ - #define MAX_DATETIME_LENGTH 48 diff --git a/src/naemon/macros.c b/src/naemon/macros.c index 664223048..c0f460c84 100644 --- a/src/naemon/macros.c +++ b/src/naemon/macros.c @@ -121,8 +121,9 @@ static int grab_custom_object_macro_r(nagios_macros *mac, char *macro_name, cust /* given a "raw" command, return the "expanded" or "whole" command line */ int get_raw_command_line_r(nagios_macros *mac, command *cmd_ptr, char *cmd, char **full_command, int macro_options) { - char temp_arg[MAX_COMMAND_BUFFER] = ""; + char *temp_arg = NULL; char *arg_buffer = NULL; + size_t cmd_len = 0; register int x = 0; register int y = 0; register int arg_index = 0; @@ -139,51 +140,56 @@ int get_raw_command_line_r(nagios_macros *mac, command *cmd_ptr, char *cmd, char /* get the full command line */ *full_command = nm_strdup((cmd_ptr->command_line == NULL) ? "" : cmd_ptr->command_line); - /* XXX: Crazy indent */ - /* get the command arguments */ - if (cmd != NULL) { + if (cmd == NULL) { + log_debug_info(DEBUGL_COMMANDS | DEBUGL_CHECKS | DEBUGL_MACROS, 2, "Expanded Command Output: %s\n", *full_command); + return OK; + } - /* skip the command name (we're about to get the arguments)... */ - for (arg_index = 0;; arg_index++) { - if (cmd[arg_index] == '!' || cmd[arg_index] == '\x0') - break; - } + cmd_len = strlen(cmd); + temp_arg = nm_malloc(cmd_len); - /* get each command argument */ - for (x = 0; x < MAX_COMMAND_ARGUMENTS; x++) { + /* get the command arguments */ + /* skip the command name (we're about to get the arguments)... */ + for (arg_index = 0;; arg_index++) { + if (cmd[arg_index] == '!' || cmd[arg_index] == '\x0') + break; + } - /* we reached the end of the arguments... */ - if (cmd[arg_index] == '\x0') - break; + /* get each command argument */ + for (x = 0; x < MAX_COMMAND_ARGUMENTS; x++) { - /* get the next argument */ - /* can't use strtok(), as that's used in process_macros... */ - for (arg_index++, y = 0; y < (int)sizeof(temp_arg) - 1; arg_index++) { + /* we reached the end of the arguments... */ + if (cmd[arg_index] == '\x0') + break; - /* handle escaped argument delimiters */ - if (cmd[arg_index] == '\\' && cmd[arg_index + 1] == '!') { - arg_index++; - } else if (cmd[arg_index] == '!' || cmd[arg_index] == '\x0') { - /* end of argument */ - break; - } + /* get the next argument */ + /* can't use strtok(), as that's used in process_macros... */ + for (arg_index++, y = 0; y < (int)cmd_len - 1; arg_index++) { - /* copy the character */ - temp_arg[y] = cmd[arg_index]; - y++; + /* handle escaped argument delimiters */ + if (cmd[arg_index] == '\\' && cmd[arg_index + 1] == '!') { + arg_index++; + } else if (cmd[arg_index] == '!' || cmd[arg_index] == '\x0') { + /* end of argument */ + break; } - temp_arg[y] = '\x0'; - - /* ADDED 01/29/04 EG */ - /* process any macros we find in the argument */ - process_macros_r(mac, temp_arg, &arg_buffer, macro_options); - mac->argv[x] = arg_buffer; + /* copy the character */ + temp_arg[y] = cmd[arg_index]; + y++; } + temp_arg[y] = '\x0'; + + /* ADDED 01/29/04 EG */ + /* process any macros we find in the argument */ + process_macros_r(mac, temp_arg, &arg_buffer, macro_options); + + mac->argv[x] = arg_buffer; } log_debug_info(DEBUGL_COMMANDS | DEBUGL_CHECKS | DEBUGL_MACROS, 2, "Expanded Command Output: %s\n", *full_command); + nm_free(temp_arg); return OK; } From 7dabe866e209ba8ae8e8822ab18b569867c1001f Mon Sep 17 00:00:00 2001 From: nook24 Date: Fri, 7 Jul 2023 09:57:49 +0200 Subject: [PATCH 08/13] Set LD_LIBRARY_PATH when running inside of VS Code to the correct location Signed-off-by: nook24 --- .vscode/c_cpp_properties.json | 5 +++-- .vscode/launch.json | 8 +++++++- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/.vscode/c_cpp_properties.json b/.vscode/c_cpp_properties.json index e67a52115..1bd872feb 100644 --- a/.vscode/c_cpp_properties.json +++ b/.vscode/c_cpp_properties.json @@ -9,7 +9,8 @@ //"/usr/lib/gcc/x86_64-redhat-linux/12/include/**", // Fedora "/usr/local/include/**", "/usr/include/**", - "/usr/lib64/**" + "/usr/lib64/**", + "/usr/lib/**" ], "defines": [], "compilerPath": "/usr/bin/gcc", @@ -19,4 +20,4 @@ } ], "version": 4 -} +} \ No newline at end of file diff --git a/.vscode/launch.json b/.vscode/launch.json index de7bb05a9..e2844e484 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -12,13 +12,19 @@ // This will trigger the "make all" task from tasks.json "preLaunchTask": "make all", // We are not using the binary from the build folder because we do not want to run the "make install" task. - // The "make install" task is only to generate the default naemon configurtion files + // The "make install" task is only to generate the default naemon configurtion files "program": "${workspaceFolder}/src/naemon/.libs/naemon", "args": [ //"--help", "${workspaceFolder}/build/etc/naemon/naemon.cfg" ], "cwd": "${workspaceFolder}", + "environment": [ + { + "name": "LD_LIBRARY_PATH", + "value": "${workspaceFolder}/.libs" + } + ], // Optional parameter. If true, the debugger should stop at the entrypoint of the target. "stopAtEntry": true, From 59f73c07434e2f707469a8fb2b26bd83d1cd44fa Mon Sep 17 00:00:00 2001 From: Sven Nierlein Date: Tue, 29 Aug 2023 18:03:05 +0200 Subject: [PATCH 09/13] be more specific when unloading a neb module Print the current and expected api version number along with the error. This gives a hint about wether the neb module is too new or too old. --- src/naemon/nebmods.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/naemon/nebmods.c b/src/naemon/nebmods.c index 5bf41b33f..4958ce1f0 100644 --- a/src/naemon/nebmods.c +++ b/src/naemon/nebmods.c @@ -195,9 +195,11 @@ int neb_load_module(nebmodule *mod) /* check the module API version */ if (module_version_ptr == NULL || ((*module_version_ptr) != CURRENT_NEB_API_VERSION)) { - - nm_log(NSLOG_RUNTIME_ERROR, "Error: Module '%s' is using an old or unspecified version of the event broker API. Module will be unloaded.\n", mod->filename); - + if (module_version_ptr == NULL) { + nm_log(NSLOG_RUNTIME_ERROR, "Error: Module '%s' did not specify a version of the event broker API. Module will be unloaded.\n", mod->filename); + } else { + nm_log(NSLOG_RUNTIME_ERROR, "Error: Module '%s' is using an incompatible version (v%d) of the event broker API (current version: v%d). Module will be unloaded.\n", mod->filename, *module_version_ptr, CURRENT_NEB_API_VERSION); + } neb_unload_module(mod, NEBMODULE_FORCE_UNLOAD, NEBMODULE_ERROR_API_VERSION); return ERROR; From ffeecc19957b605b4f175a7751fa1e34a9736f1b Mon Sep 17 00:00:00 2001 From: Sven Nierlein Date: Sun, 17 Sep 2023 18:55:38 +0200 Subject: [PATCH 10/13] add systemd to build requires this (should) fix this build error on obs: ``` [ 139s] /.build_patchrpmcheck_scr: line 55: systemd-tmpfiles: command not found [ 139s] postinstall script of naemon-core-1.4.1-lp154.18.1.x86_64.rpm failed ``` --- naemon-core.spec | 1 + 1 file changed, 1 insertion(+) diff --git a/naemon-core.spec b/naemon-core.spec index 3eb5261df..a4a017474 100644 --- a/naemon-core.spec +++ b/naemon-core.spec @@ -41,6 +41,7 @@ Requires(pre): systemd Requires(post): systemd Requires(preun): systemd Requires(postun): systemd +BuildRequires: pkgconfig(systemd) %if 0%{suse_version} < 1230 Requires(pre): pwdutils %else From 660bd2961cb89b12a37ade537570984cfccc6092 Mon Sep 17 00:00:00 2001 From: Sven Nierlein Date: Thu, 12 Oct 2023 13:15:15 +0200 Subject: [PATCH 11/13] write objects.precache in tmp file first, then move it It's a good standard to do so and in fact, we do this already in several places, ex. the status.dat. This ensures the file is ready and completly written before it will be used. The issue here is, that naemon starts without any issues if the precached file is empty for any reason. Except it has zero hosts/services then and removes all existing states/downtime/comments. Signed-off-by: Sven Nierlein --- src/naemon/objects.c | 59 ++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 54 insertions(+), 5 deletions(-) diff --git a/src/naemon/objects.c b/src/naemon/objects.c index 9bba13e0a..9023cf082 100644 --- a/src/naemon/objects.c +++ b/src/naemon/objects.c @@ -11,6 +11,7 @@ #include "logging.h" #include "globals.h" #include "nm_alloc.h" +#include "utils.h" int __nagios_object_structure_version = CURRENT_OBJECT_STRUCTURE_VERSION; @@ -20,6 +21,9 @@ int fcache_objects(char *cache_file) FILE *fp = NULL; time_t current_time = 0L; unsigned int i; + char *tmp_file = NULL; + int fd = 0; + int result = OK; /* some people won't want to cache their objects */ if (!cache_file || !strcmp(cache_file, "/dev/null")) @@ -27,10 +31,22 @@ int fcache_objects(char *cache_file) time(¤t_time); + nm_asprintf(&tmp_file, "%sXXXXXX", cache_file); + if (tmp_file == NULL) + return ERROR; + + if ((fd = mkstemp(tmp_file)) == -1) { + nm_log(NSLOG_RUNTIME_ERROR, "Error: Unable to create temp file '%s' for writing object cache data: %s\n", tmp_file, strerror(errno)); + nm_free(tmp_file); + return ERROR; + } + /* open the cache file for writing */ - fp = fopen(cache_file, "w"); + fp = (FILE *)fopen(tmp_file, "w"); if (fp == NULL) { - nm_log(NSLOG_CONFIG_WARNING, "Warning: Could not open object cache file '%s' for writing!\n", cache_file); + unlink(tmp_file); + nm_log(NSLOG_CONFIG_WARNING, "Warning: Could not open object cache data file '%s' for writing!\n", tmp_file); + nm_free(tmp_file); return ERROR; } @@ -44,7 +60,6 @@ int fcache_objects(char *cache_file) fprintf(fp, "# Created: %s", ctime(¤t_time)); fprintf(fp, "########################################\n\n"); - /* cache timeperiods */ for (i = 0; i < num_objects.timeperiods; i++) fcache_timeperiod(fp, timeperiod_ary[i]); @@ -109,7 +124,41 @@ int fcache_objects(char *cache_file) fcache_hostescalation(fp, esclist->object_ptr); } - fclose(fp); + /* reset file permissions */ + fchmod(fd, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH); + + /* flush the file to disk */ + fflush(fp); + + /* fsync the file so that it is completely written out before moving it */ + fsync(fd); + + /* close the temp file */ + result = ferror(fp) | fclose(fp); + + /* save/close was successful */ + if (result == 0) { + + result = OK; + + /* move the temp file to the status log (overwrite the old status log) */ + if (my_rename(tmp_file, cache_file)) { + unlink(tmp_file); + nm_log(NSLOG_RUNTIME_ERROR, "Error: Unable to update cache data file '%s': %s", cache_file, strerror(errno)); + result = ERROR; + } + } + + /* a problem occurred saving the file */ + else { + result = ERROR; + + /* remove temp file and log an error */ + unlink(tmp_file); + nm_log(NSLOG_RUNTIME_ERROR, "Error: Unable to save cache data file: %s", strerror(errno)); + } + + nm_free(tmp_file); - return OK; + return result; } From 83b25ec4c0f68c51a7f7291d0bfebbef817fdb0f Mon Sep 17 00:00:00 2001 From: Sven Nierlein Date: Fri, 13 Oct 2023 09:48:30 +0200 Subject: [PATCH 12/13] fix typo in comment left over from copy/pasted code. --- src/naemon/objects.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/naemon/objects.c b/src/naemon/objects.c index 9023cf082..18f256055 100644 --- a/src/naemon/objects.c +++ b/src/naemon/objects.c @@ -141,7 +141,7 @@ int fcache_objects(char *cache_file) result = OK; - /* move the temp file to the status log (overwrite the old status log) */ + /* move the temp file to the objects data file (overwrite the old objects.cache) */ if (my_rename(tmp_file, cache_file)) { unlink(tmp_file); nm_log(NSLOG_RUNTIME_ERROR, "Error: Unable to update cache data file '%s': %s", cache_file, strerror(errno)); From b829a682e10415913b4da87ffded09c26b057d87 Mon Sep 17 00:00:00 2001 From: nook24 Date: Fri, 1 Dec 2023 21:15:02 +0100 Subject: [PATCH 13/13] Remove debugging output Signed-off-by: nook24 --- src/naemon/workers.c | 1 - 1 file changed, 1 deletion(-) diff --git a/src/naemon/workers.c b/src/naemon/workers.c index 53be1edd6..f15b98bef 100644 --- a/src/naemon/workers.c +++ b/src/naemon/workers.c @@ -165,7 +165,6 @@ static void run_job_callback(struct wproc_job *job, struct wproc_result *wpres, return; if (!wpres) { - nm_log(NSLOG_RUNTIME_ERROR, "---!!!--- wpres is null or so TODO REMOVE THIS"); return; }