Skip to content

Commit

Permalink
node-control: improve ssh only backends
Browse files Browse the repository at this point in the history
  • Loading branch information
sni committed Oct 14, 2024
1 parent b483b3d commit f452ecd
Show file tree
Hide file tree
Showing 9 changed files with 101 additions and 47 deletions.
6 changes: 3 additions & 3 deletions lib/Thruk/Action/AddDefaults.pm
Original file line number Diff line number Diff line change
Expand Up @@ -754,10 +754,10 @@ sub set_configs_stash {
=cut
sub set_possible_backends {
my ($c,$disabled_backends) = @_;
my ($c,$disabled_backends, $peers) = @_;

my @possible_backends;
for my $b (@{$c->db->get_peers($c->stash->{'config_backends_only'} || 0)}) {
for my $b (@{$peers // $c->db->get_peers($c->stash->{'config_backends_only'} || 0)}) {
push @possible_backends, $b->{'key'};
}

Expand Down Expand Up @@ -846,7 +846,7 @@ sub update_site_panel_hashes {
}

# create sections and subsection for site panel
$c->db->update_sections(); # need to recalculate, using the config tool removes sections without config backends
$c->db->update_sections($backends); # need to recalculate, using the config tool removes sections without config backends
_calculate_section_totals($c, $c->db->{'sections'}, $backend_detail, $initial_backends);

my $show_sitepanel = 'list';
Expand Down
15 changes: 13 additions & 2 deletions lib/Thruk/Backend/Manager.pm
Original file line number Diff line number Diff line change
Expand Up @@ -131,11 +131,21 @@ calculate sections
=cut

sub update_sections {
my($self) = @_;
my($self, $backends) = @_;

$self->{'sections'} = {};
$self->{'sections_depth'} = 0;
for my $peer (@{$self->get_peers(1)}) {

my $peers = [];
if($backends) {
for my $key (@{$backends}) {
push @{$peers}, $self->get_peer_by_key($key);
}
} else {
$peers = $self->get_peers(1);
}

for my $peer (@{$peers}) {
my @sections = split(/\/+/mx, $peer->{'section'});
if(scalar @sections == 0) {
@sections = ();
Expand Down Expand Up @@ -233,6 +243,7 @@ sub get_peers_by_tags {
my($self, $tags) = @_;
my @peers;

$tags = Thruk::Base::list($tags);
for my $b (@{$self->pool->{'objects'}}) {
next if(defined $b->{'active'} && !$b->{'active'});

Expand Down
5 changes: 4 additions & 1 deletion lib/Thruk/Utils/IO.pm
Original file line number Diff line number Diff line change
Expand Up @@ -894,7 +894,7 @@ sub cmd {
if(ref $cmd eq 'ARRAY') {
my $prog = shift @{$cmd};
&timing_breakpoint('IO::cmd: '.$prog.' <args...>');
_debug('running cmd: '.join(' ', @{$cmd})) if $c;
_debug('running cmd: '.$prog.' '.join(' ', @{$cmd})) if $c;
my($pid, $wtr, $rdr, @lines);
$pid = open3($wtr, $rdr, $rdr, $prog, @{$cmd});
my $sel = IO::Select->new;
Expand Down Expand Up @@ -976,6 +976,9 @@ sub cmd {
$c = $c || $Thruk::Globals::c || undef;
$c->stash->{'total_io_cmd'} += $elapsed if $c;

# log full command line of slow commands
$c->stats->profile(comment => join(" ", @{Thruk::Base::list($cmd)})) if($c && $elapsed > 1);

return($rc, $output) if wantarray;
return($output);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,13 @@ sub index {
$c->stash->{'show_all_button'} = $config->{'all_button'} // 1;
$c->stash->{'skip_confirm'} = $config->{'skip_confirms'} ? 'noop_' : '';

my $peers = Thruk::NodeControl::Utils::get_peers($c);
my $servers = [];
for my $peer (@{$peers}) {
push @{$servers}, Thruk::NodeControl::Utils::get_server($c, $peer, $config);
}
Thruk::Action::AddDefaults::set_possible_backends($c, undef, $peers);

my $action = $c->req->parameters->{'action'} || 'list';

if($action && $action ne 'list') {
Expand All @@ -75,11 +82,6 @@ sub index {
return(1);
}

my $servers = [];
for my $peer (@{Thruk::NodeControl::Utils::get_peers($c)}) {
push @{$servers}, Thruk::NodeControl::Utils::get_server($c, $peer, $config);
}

if(!$config->{'omd_default_version'}) {
my(undef, $version) = Thruk::Utils::IO::cmd("omd version -b");
chomp($version);
Expand Down Expand Up @@ -232,8 +234,15 @@ sub _omd_service_cmd {
my($c, $peer, $cmd) = @_;
return unless Thruk::Utils::check_csrf($c);
my $service = $c->req->parameters->{'service'};
Thruk::NodeControl::Utils::omd_service($c, $peer, $service, $cmd);
return($c->render(json => {'success' => 1}));
my $res = Thruk::NodeControl::Utils::omd_service($c, $peer, $service, $cmd);
if($res && $res->{'rc'} == 0) {
return($c->render(json => {'success' => 1}));
}
my $details = "";
if($res && $res->{'stderr'}) {
$details = "\n".$res->{'stdout'}.$res->{'stderr'};
}
return($c->render(json => {'success' => 0, 'error' => "failed to ".$cmd." ".$service.$details }));
}

##########################################################
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ sub get_peers {
my($c) = @_;
my @peers;
my $dups = {};
for my $peer (@{$c->db->get_local_peers()}, @{$c->db->get_http_peers(1)}) {
for my $peer (@{$c->db->get_local_peers()}, @{$c->db->get_http_peers(1)}, @{$c->db->get_peers_by_tags('node-control')}) {
next if (defined $peer->{'disabled'} && $peer->{'disabled'} == HIDDEN_LMD_PARENT);
next if $dups->{$peer->{'key'}}; # backend can be in both lists
$dups->{$peer->{'key'}} = 1;
Expand Down Expand Up @@ -170,7 +170,8 @@ sub get_server {
updating => $facts->{'updating'} || 0, # update job id
os_updating => $facts->{'os_updating'} || 0, # os update id
os_sec_updating => $facts->{'os_sec_updating'} || 0, # sec update job id
host_name => $facts->{'ansible_facts'}->{'ansible_fqdn'},
host_name => undef,
ansible_fqdn => $facts->{'ansible_facts'}->{'ansible_fqdn'},
omd_version => $facts->{'omd_version'} // '',
omd_versions => $facts->{'omd_versions'} // [],
omd_cleanable => $facts->{'omd_cleanable'} // [],
Expand Down Expand Up @@ -293,7 +294,7 @@ sub _ansible_get_facts {
# available subsets are listed here:
# https://docs.ansible.com/ansible/latest/collections/ansible/builtin/setup_module.html#parameter-gather_subset
# however, older ansible release don't support all of them and bail out
my $f = _ansible_adhoc_cmd($c, $peer, "-m setup -a 'gather_subset=hardware,virtual gather_timeout=30'");
my $f = _ansible_adhoc_cmd($c, $peer, "-m setup -a 'gather_subset=hardware,virtual' -a 'gather_timeout=30'");
my $runtime = _runtime_data($c, $peer);
my $pkgs = _ansible_available_packages($c, $peer, $f);
my $updates = _ansible_available_os_updates($c, $peer, $f);
Expand Down Expand Up @@ -327,7 +328,7 @@ sub _runtime_data {
$runtime->{'omd_disk_free'} = $3;
}

my(undef, $has_tmux) = _remote_cmd($c, $peer, '/bin/sh -c "command -v tmux"');
my(undef, $has_tmux) = _remote_cmd($c, $peer, '/bin/sh -c \'command -v tmux\'');
if($has_tmux =~ m/tmux$/gmx) {
$runtime->{'has_tmux'} = $has_tmux;
}
Expand Down Expand Up @@ -394,6 +395,7 @@ sub _ansible_available_packages {
next if $in_use{$v};
push @cleanable, $v;
}
@inst = reverse sort @inst;

return({ omd_packages_available => \@pkgs, omd_versions => \@inst, omd_cleanable => \@cleanable, omd_sites => \%omd_sites });
}
Expand Down Expand Up @@ -840,7 +842,7 @@ sub _remote_cmd {
my($c, $peer, $cmd, $background_options, $env) = @_;
my($rc, $out, $err);

if($peer->is_local() || $peer->is_peer_machine_reachable_by_http()) {
if(!$peer->{'ssh_ok'} && ($peer->is_local() || $peer->is_peer_machine_reachable_by_http())) {
eval {
($rc, $out) = $peer->cmd($c, $cmd, $background_options, $env);
};
Expand All @@ -852,35 +854,35 @@ sub _remote_cmd {

# fallback to ssh if possible
my $facts = ansible_get_facts($c, $peer, 0);
my $host_name = $facts->{'ansible_facts'}->{'ansible_fqdn'};
my $config = config($c);
if(!$config->{'ssh_fallback'}) {
die("http(s) connection failed\n".$err) if $err;
die("no http(s) control connection available\n");
}

my $server = get_server($c, $peer, $config);
my $host_name = $server->{'host_name'};
if(!$host_name) {
my $server = get_server($c, $peer, $config);
$host_name = $server->{'host_name'};
die("http(s) connection failed\n".$err);
}

if($host_name && !$background_options) {
_warn("remote cmd failed, trying ssh fallback: %s", $err) if $err;
_debug("fallback to ssh");
my $env_vars = "";
for my $key (sort keys %{$env}) {
$env_vars .= " --extra-vars $key=\"".$env->{$key}."\"";
}
($rc, $out) = Thruk::Utils::IO::cmd($c, "ansible all -i $host_name, -m shell -a \"".$cmd."\"".$env_vars);
if($out =~ m/^.*?\s+\|\s+UNREACHABLE.*?=>/mx) {
die("http(s) and ssh connection failed\nhttp(s):\n".$err."\n\nssh:\n".$out) if $err;
die("ssh connection failed\n".$out);
}
$out =~ s/^.*?\s+\|\s+.*?\s+\|\s+rc=\d\s+>>//gmx;
return($rc, $out);
_debug("remote cmd failed, trying ssh fallback: %s", $err) if $err;
my $env_vars = "";
for my $key (sort keys %{$env}) {
$env_vars .= " --extra-vars $key=\"".$env->{$key}."\"";
}

die("http(s) connection failed\n".$err);
my $fullcmd = "ansible all -i ".$server->{'omd_site'}."\@$host_name, -m shell -a \"".$cmd."\"".$env_vars;
($rc, $out) = Thruk::Utils::IO::cmd($fullcmd, { env => { 'ANSIBLE_PYTHON_INTERPRETER' => 'auto_silent' }});
if($out =~ m/^.*?\s+\|\s+UNREACHABLE.*?=>/mx) {
die("http(s) and ssh connection failed\nhttp(s):\n".$err."\n\nssh:\n".$out) if $err;
die("ssh connection failed\n".$out);
}
$out =~ s/^.*?\s+\|\s+.*?\s+\|\s+rc=\d\s+>>\s*//gmx;
if($out =~ m/usage:\ ansible/mx) {
confess("ansible command failed: cmd: ".$fullcmd."\n".$out);
}
$peer->{'ssh_ok'} = 1;
return($rc, $out);
}

##########################################################
Expand Down Expand Up @@ -923,7 +925,8 @@ sub _remote_run_hook {
##########################################################
sub _ansible_adhoc_cmd {
my($c, $peer, $args) = @_;
my($rc, $data) = _remote_cmd($c, $peer, 'ansible all -i localhost, -c local '.$args." 2>/dev/null");
my $cmd = 'ansible all -i localhost, -c local '.$args." 2>/dev/null";
my($rc, $data) = _remote_cmd($c, $peer, $cmd);
if($rc != 0) {
die("ansible failed: rc $rc ".$data);
}
Expand Down Expand Up @@ -959,8 +962,9 @@ sub omd_service {
'background' => 1,
'clean' => 1,
});
Thruk::Utils::External::wait_for_peer_job($c, $peer, $job, 0.2, 90);
return;
my $jobdata = Thruk::Utils::External::wait_for_peer_job($c, $peer, $job, 0.2, 90);
delete $peer->{'ssh_ok'}; # http might work again now
return $jobdata;
}

##########################################################
Expand All @@ -973,9 +977,14 @@ sub _omd_service_cmd {
};
if($@) {
_warn("omd cmd failed: %s", $@);
return;
}
if($rc != 0) {
_warn("omd cmd failed: %s", $out);
return;
}
update_runtime_data($c, $peer, 1);
return;
return 1;
}

##########################################################
Expand Down
16 changes: 16 additions & 0 deletions plugins/plugins-available/node-control/root/node_control-3.18.js
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,14 @@ function nc_omd_service(btn, extraData) {

// update node row
refresh_all_changed_rows(null, 'TD.js-node-row');

if(!success) {
thruk_xhr_error('setting omd service failed: ', '', textStatus, jqXHR, false);
return;
}
if(data && data.error) {
thruk_message(1, data.error);
}
}, extraData);
}

Expand All @@ -129,5 +137,13 @@ function nc_peer_state(btn, extraData) {

// update node row
refresh_all_changed_rows(null, 'TD.js-node-row');

if(!success) {
thruk_xhr_error('setting peer state failed: ', '', textStatus, jqXHR, false);
return;
}
if(data && data.error) {
thruk_message(1, data.error);
}
}, extraData);
}
10 changes: 6 additions & 4 deletions plugins/plugins-available/node-control/templates/node_control.tt
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@
<td colspan="[% IF show_os_updates %]7[% ELSE %]6[% END %]" class="align-top textALERT whitespace-pre truncate overflow-hidden [% IF error_lines.size > 1 %]clickable[% END %]" style="max-width: 60vw;" [% IF error_lines.size > 1 %]onclick="toggleElement('errors_[% s.peer_key | html %]'); reapplyAllStripes(); return false;"[% END %]>[% error_lines.0 | html; error_started = 1 %]</td>
[% ELSE %]
[% IF s.omd_version && omd_default_version != s.omd_version %]
<td class="align-top WARNING" title="node uses an outdated omd release.">[% s.omd_version.replace('-labs-edition', '') | html %]</td>
<td class="align-top WARNING" title="node uses an outdated omd release, available:&#013;[% s.omd_versions.join("&#013;").replace('-labs-edition', '') %]">[% s.omd_version.replace('-labs-edition', '') | html %]</td>
[% ELSE %]
<td class="align-top">[% s.omd_version.replace('-labs-edition', '') | html %]</td>
[% END %]
Expand All @@ -151,7 +151,9 @@
[% IF service == "OVERALL"; NEXT; END %]
[% IF s.omd_status.$service != 0; failed.push(service); END %]
[% END %]
[% IF failed.size == 1 %]
[% IF failed.size == s.omd_status.keys.size - 1 %]
<div class="badge CRITICAL">stopped</div>
[% ELSIF failed.size == 1 %]
<div class="badge WARNING">[% failed.0 %]</div>
[% ELSE %]
<div class="badge WARNING">[% failed.size %] stopped</div>
Expand All @@ -162,7 +164,7 @@
[% IF show_os_updates %]
<td class="align-top">
<div class="flexrow flex-nowrap gap-x-1 justify-between">
<span class="clickable" onclick="openModalWindowUrl('node_control.cgi?action=facts&modal=1&peer=[% s.peer_key | html %]');">[% s.os_name | html %] [% s.os_version | html %]</span>
<span>[% s.os_name | html %] [% s.os_version | html %]</span>
<form action="node_control.cgi" method="POST">
<input type="hidden" name="peer" value="[% s.peer_key | html %]">
<input type="hidden" name="CSRFtoken" value="[% get_user_token(c) %]">
Expand All @@ -186,7 +188,7 @@
</div>
</td>
[% END %]
<td>[% s.machine_type %]</td>
<td class="clickable" onclick="openModalWindowUrl('node_control.cgi?action=facts&modal=1&peer=[% s.peer_key | html %]');" title="show facts">[% s.machine_type %]</td>
<td class='align-top relative overflow-hidden'>
[% IF s.cpu_perc; %][%IF s.cpu_perc > 1; s.cpu_perc = 1; END %]
<div style='width: [% 100 * s.cpu_perc %]%; height: 100%;' class='[% IF s.cpu_perc > 0.9 %]CRITICAL[% ELSIF s.cpu_perc > 0.8 %]WARNING[% ELSE %]OK[% END %] absolute top-0 left-0'></div>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,16 +21,20 @@
[% END %]
</div>
<div class="body">
[% pd=s.peer_key %]
[% IF !pi_detail.defined(pd) || !pi_detail.$pd.defined("enable_notifications") %]
<span class="textHINT">Backend status is not available.<br>Site is probably offline.</span>
[% ELSE %]
<form action="node_control.cgi" method="POST">
<input type="hidden" name="CSRFtoken" value="[% get_user_token(c) %]">
<table class="w-fit mx-auto innercellborder">
[% pd=s.peer_key %]
[% PROCESS buttongroup name="notifications" label="Notifications" status=pi_detail.$pd.enable_notifications %]
[% PROCESS buttongroup name="hostchecks" label="Host Checks" status=pi_detail.$pd.execute_host_checks %]
[% PROCESS buttongroup name="servicechecks" label="Service Checks" status=pi_detail.$pd.execute_service_checks %]
[% PROCESS buttongroup name="eventhandlers" label="Event Handler" status=pi_detail.$pd.enable_event_handlers %]
</table>
</form>
[% END %]
</div>
[% IF modal %]
<div class="footer justify-center">
Expand Down
2 changes: 1 addition & 1 deletion themes/base.css
Original file line number Diff line number Diff line change
Expand Up @@ -1821,7 +1821,7 @@ BODY.minimal3 {
* feedback messages
*/
#thruk_message {
@apply card shadow-float absolute p-1 z-50 min-w-full lg:min-w-[600px] max-w-[90vw] top-14 left-1/2 transform -translate-x-1/2;
@apply card shadow-float absolute p-1 z-[100] min-w-full lg:min-w-[600px] max-w-[90vw] top-14 left-1/2 transform -translate-x-1/2;
>DIV {
@apply flexrow flex-nowrap gap-2 justify-center;
}
Expand Down

0 comments on commit f452ecd

Please sign in to comment.