Skip to content
This repository has been archived by the owner on Oct 24, 2018. It is now read-only.

Commit

Permalink
NMIS release 8.6.4G
Browse files Browse the repository at this point in the history
  • Loading branch information
Alexander Zangerl committed Mar 25, 2018
1 parent 179fd13 commit 74900d6
Show file tree
Hide file tree
Showing 22 changed files with 255 additions and 147 deletions.
2 changes: 1 addition & 1 deletion admin/migrate_rrd_locations.pl
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
#
# nmis collection is disabled while this operation is performed, and a record
# of operations is kept for rolling back in case of problems.
our $VERSION = "8.6.3G";
our $VERSION = "8.6.4G";

use strict;
use File::Copy;
Expand Down
114 changes: 67 additions & 47 deletions admin/support.pl
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,12 @@
# http://support.opmantek.com/users/
#
# *****************************************************************************
our $VERSION = "1.7.0";
our $VERSION = "1.8.0";
use strict;
use Data::Dumper;
use File::Basename;
use File::Temp;
use File::Path;
use POSIX qw();
use Cwd;
use FindBin;
Expand All @@ -51,7 +52,7 @@
\n\n";

die $usage if (@ARGV == 1 && $ARGV[0] =~ /^-[h\?]/);

my %args = getArguements(@ARGV);

die $usage if ($args{action} ne "collect");
Expand Down Expand Up @@ -139,17 +140,17 @@
# collect evidence
my $timelabel = POSIX::strftime("%Y-%m-%d-%H%M",localtime);
my $targetdir = "$td/nmis-collect.$timelabel";
mkdir($targetdir);
File::Path::make_path($targetdir, { chmod => 0755 });
print "collecting support evidence...\n";
my $status = collect_evidence($targetdir, \%args);
die "failed to collect evidence: $status\n" if ($status);

my $omkzfn;
# if omk and its support tool found, run that as well if allowed to!
if (-d "/usr/local/omk" && -f "/usr/local/omk/bin/support.pl" && !$args{no_other_tools})
{
open(LF, ">$targetdir/omk-support.log");

print "\nFound local OMK installation with OMK support tool.
Please wait while we collect OMK information as well.\n";
open(F, "/usr/local/omk/bin/support.pl action=collect no_system_stats=1 no_other_tools=1 2>&1 |")
Expand All @@ -165,16 +166,16 @@
close F;
close LF;
}

print "\nEvidence collection complete, zipping things up...\n";

# do we have zip? or only tar+gz?
my $canzip=0;
$status = system("zip --version >/dev/null 2>&1");
$canzip=1 if (POSIX::WIFEXITED($status) && !POSIX::WEXITSTATUS($status));

my $zfn = "/tmp/nmis-support-$timelabel.".($canzip?"zip":"tgz");

# zip mustn't become too large, hence we possibly tail/truncate some or all log files
opendir(D,"$targetdir/logs")
or warn "can't read $targetdir/logs dir: $!\n";
Expand All @@ -194,11 +195,11 @@
$status = system("tar","-czf",$zfn,"nmis-collect.$timelabel");
}
chdir($curdir);

die "cannot create support zip file $zfn: $!\n"
if (POSIX::WEXITSTATUS($status));
last if (-s $zfn < $maxzip);

# hmm, too big: shrink the log files one by one until the size works out
unlink($zfn);
print "zipfile too big, trying to shrink some logfiles...\n";
Expand All @@ -213,8 +214,8 @@
die "\nPROBLEM: cannot reduce zip file size any further!\nPlease rerun $0 with maxzipsize=N higher than $maxzip.\n";
}
}


print "\nAll done.\n\nCollected system information is in $zfn\n";
print "OMK information is in $omkzfn\n\n" if ($omkzfn);
print "Please include ".($omkzfn? "these zip files": "this zip file"). " when you contact
Expand Down Expand Up @@ -293,7 +294,7 @@ sub collect_evidence
$dirstocheck .= " $dbdir" if ($dbdir !~ /^$basedir/);
$dirstocheck .= " $logdir" if ($logdir !~ /^$basedir/);

mkdir("$targetdir/system_status");
File::Path::make_path("$targetdir/system_status", { chmod => 0755 });
# dump a recursive file list, ls -haRH does NOT work as it won't follow links except given on the cmdline
# this needs to cover dbdir and vardir if outside
system("find -L $dirstocheck -type d -print0| xargs -0 ls -laH > $targetdir/system_status/filelist.txt") == 0
Expand Down Expand Up @@ -328,7 +329,7 @@ sub collect_evidence
system("mount >> $targetdir/system_status/disk_info");

system("uname -av > $targetdir/system_status/uname");
mkdir("$targetdir/system_status/osrelease");
File::Path::make_path("$targetdir/system_status/osrelease", { chmod => 0755 });
system("cp -a /etc/*release /etc/*version $targetdir/system_status/osrelease/ 2>/dev/null");

if (!$args->{no_system_stats})
Expand All @@ -352,7 +353,7 @@ sub collect_evidence
or warn "can't save routing table: $!\n";

# capture the cron files, root's and nmis's tabs
mkdir("$targetdir/system_status/cron");
File::Path::make_path("$targetdir/system_status/cron", { chmod => 0755 });
system("cp -a /etc/cron* $targetdir/system_status/cron") == 0
or warn "can't save cron files: $!\n";

Expand All @@ -364,7 +365,7 @@ sub collect_evidence
if ($apachehome)
{
my $apachetarget = "$targetdir/system_status/apache";
mkdir ($apachetarget) if (!-d $apachetarget);
File::Path::make_path($apachetarget, { chmod => 0755 });
# on centos/RH there are symlinks pointing to all the apache module binaries, we don't
# want these (so -a or --dereference is essential)
system("cp -a $apachehome/* $apachetarget");
Expand All @@ -380,7 +381,7 @@ sub collect_evidence
}

# collect all defined log files
mkdir("$targetdir/logs");
File::Path::make_path("$targetdir/logs", { chmod => 0755 });
my @logfiles = grep(/^.+$/, (map { $globalconf->{$_} } (grep(/_log$/, keys %$globalconf))));
if (!@logfiles) # if the nmis load failed, fall back to the most essential standard logs
{
Expand Down Expand Up @@ -415,9 +416,7 @@ sub collect_evidence
or warn "ATTENTION: can't copy logfile $lfn to $targetdir!\n";
}
}
mkdir("$targetdir/conf",0755);
mkdir("$targetdir/conf/scripts",0755);
mkdir("$targetdir/conf/nodeconf",0755);
File::Path::make_path("$targetdir/conf/scripts", "$targetdir/conf/nodeconf" , { chmod => 0755 });

# copy all of conf/ and models/ but NOT any stray stuff beneath
system("cp","-r","$basedir/models",$targetdir) == 0
Expand Down Expand Up @@ -458,43 +457,64 @@ sub collect_evidence
}

# copy generic var files (=var/nmis-*)
mkdir("$targetdir/var");
opendir(D,"$vardir") or warn "can't read var dir $vardir: $!\n";
my @generics = grep(/^nmis[-_]/, readdir(D));
closedir(D);
system("cp", "-r", (map { "$vardir/$_" } (@generics)),
"$targetdir/var") == 0 or warn "can't copy var files: $!\n";
File::Path::make_path("$targetdir/var", { chmod => 0755 });
opendir(D,"$vardir") or warn "can't read var dir $vardir: $!\n";
my @generics = grep(/^nmis[-_]/, readdir(D));
closedir(D);
system("cp", "-r", (map { "$vardir/$_" } (@generics)),
"$targetdir/var") == 0 or warn "can't copy var files: $!\n";

# if node info requested copy those files as well
# special case: want ALL nodes
if ($thisnode eq "*")
# if node info requested copy those files as well
# special case: want ALL nodes
if ($thisnode eq "*")
{
# all node-related files...
system("cp $vardir/*.* $targetdir/var/") == 0
or warn "can't copy all nodes' files: $!\n";
#...and their current events
opendir(D, "$vardir/events") or warn "can't read $vardir/events: $!\n";
for my $onenode (grep(!/^\./,readdir(D)))
{
system("cp $vardir/* $targetdir/var/") == 0
or warn "can't copy all nodes' files: $!\n";
# no events for this node -> skip
next if (!(my @list = glob("$vardir/events/$onenode/current/*")));
my $curevdir = "$targetdir/var/events/$onenode/current";
File::Path::make_path($curevdir, { chmod => 0755 });
system("cp $vardir/events/$onenode/current/* $curevdir") == 0
or warn "can't copy $onenode events to $curevdir: $!\n";
}
elsif ($thisnode)
closedir(D);
}
elsif ($thisnode)
{
my $lnt = &loadLocalNodeTable;
for my $nextnode (split(/\s*,\s*/,$thisnode))
{
my $lnt = &loadLocalNodeTable;
for my $nextnode (split(/\s*,\s*/,$thisnode))
if ($lnt->{$nextnode})
{
if ($lnt->{$nextnode})
{
my $fileprefix = "$vardir/".lc($nextnode);
my @files_to_copy = (-r "$fileprefix-node.json")?
("$fileprefix-node.json", "$fileprefix-view.json") :
("$fileprefix-node.nmis", "$fileprefix-view.nmis");
my $fileprefix = "$vardir/".lc($nextnode);
my @files_to_copy = (-r "$fileprefix-node.json")?
("$fileprefix-node.json", "$fileprefix-view.json") :
("$fileprefix-node.nmis", "$fileprefix-view.nmis");

system("cp", @files_to_copy, "$targetdir/var/") == 0
or warn "can't copy node ${nextnode}'s node files: $!\n";
}
else
system("cp", @files_to_copy, "$targetdir/var/") == 0
or warn "can't copy node ${nextnode}'s node files: $!\n";

my $curevdir = "$targetdir/var/events/$nextnode/current";
if (my @list = glob("$vardir/events/$nextnode/current/*")) # no events for this node -> skip
{
warn("ATTENTION: the requested node \"$nextnode\" isn't known to NMIS!\n");
File::Path::make_path($curevdir, { chmod => 0755 });
system("cp $vardir/events/$nextnode/current/* $curevdir") == 0
or warn "can't copy $nextnode events to $curevdir: $!\n";
}
}
else
{
warn("ATTENTION: the requested node \"$nextnode\" isn't known to NMIS!\n");
}
}
}

return undef;
return undef;
}

# print question, return true if y (or in unattended mode). default is yes.
Expand Down
2 changes: 1 addition & 1 deletion admin/update_config_defaults.pl
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
#
# *****************************************************************************
use strict;
our $VERSION = "8.6.3G";
our $VERSION = "8.6.4G";

# Auto configure to the <nmis-base>/lib
use FindBin;
Expand Down
4 changes: 2 additions & 2 deletions admin/upgrade_models.pl
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
# *****************************************************************************
#
# this helper upgrades model files where safe to do so
our $VERSION="8.6.3G";
our $VERSION="8.6.4G";

use strict;
use Digest::MD5; # good enough
Expand Down Expand Up @@ -527,4 +527,4 @@ sub compute_signature
Model-ZyXEL-IES.nmis 25aaccabbd188208 593c69cbe0391cf3 0da840e65c219e00 3ab9322fece09797 4e5954eff8347757 7b098aa3c8afa7de
Model-ZyXEL-MGS.nmis c3ea5aec5b903e8e bff9ef1e5d0a70d8 94ca1a1be8a5eeee 701cf09b9a9dae1e
Model-net-snmp.nmis c9b5fa32bd1cd51f a78ed1067f7f14ab e321e3f8a79b25c0 13f1d8c3e10ebebc 997fc7bd3be516be 70491c897fe8d828 e106c9b396e76944 d24bab000b0a6fbe b4d10d3789afa1a6 5d97f9cf73a61919 b6518274fab46b78 aa24077be26e5897
Model.nmis ba5cdf626ee516c1 bece80b7b44d959b 34592112596682e2 a6443ed36ccd2120 c91082df42a88c17 fc6e00d8485d47c7 85b6e9852b359133 0b8ce0fbc6085bea fc31c4ba46c1f4be b8427208bee2fc4d 11d418a22fc2adfb 3c7c7f1471f80e2c efb216ab07a50fd0 d0c4c790f815e46a af53a22555c57f63
Model.nmis 8bd4ab3e997c6228 bece80b7b44d959b 34592112596682e2 a6443ed36ccd2120 c91082df42a88c17 fc6e00d8485d47c7 85b6e9852b359133 0b8ce0fbc6085bea fc31c4ba46c1f4be b8427208bee2fc4d 11d418a22fc2adfb 3c7c7f1471f80e2c efb216ab07a50fd0 d0c4c790f815e46a af53a22555c57f63 ba5cdf626ee516c1
2 changes: 1 addition & 1 deletion admin/upgrade_tables.pl
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
# *****************************************************************************
#
# this helper upgrades table files where safe to do so
our $VERSION="8.6.3G";
our $VERSION="8.6.4G";

use strict;
use Digest::MD5; # good enough
Expand Down
2 changes: 1 addition & 1 deletion bin/fpingd.pl
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
# http://support.opmantek.com/users/
#
# *****************************************************************************
our $VERSION = "8.6.3G";
our $VERSION = "8.6.4G";

use FindBin qw($Bin);
use lib "$FindBin::Bin/../lib";
Expand Down
49 changes: 36 additions & 13 deletions bin/nmis.pl
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@
{
runThreads(type=>$type, nodeselect=>$nodeselect, groupselect=>$groupselect, mthread=>$mthread, mthreadDebug=>$mthreadDebug);
}
elsif ( $type eq "escalate") { runEscalate(); printRunTime(); } # included in type=collect
elsif ( $type eq "escalate") { runEscalate(independent => 1); printRunTime(); } # included in type=collect
elsif ( $type eq "config" ) { checkConfig(change => "true"); }
elsif ( $type eq "audit" ) { checkConfig(audit => "true", change => "false"); }
elsif ( $type eq "links" ) { runLinks(); } # included in type=update
Expand Down Expand Up @@ -221,7 +221,10 @@
}
elsif ( $type eq "master" ) { nmisMaster(); printRunTime(); } # MIGHT be included in type=collect
elsif ( $type eq "groupsync" ) { sync_groups(); }
elsif ( $type eq "purge" ) { my $error = purge_files(); die "$error\n" if $error; }
elsif ( $type eq "purge" ) {
my $error = purge_files(); die "$error\n" if $error;
my $res = NMIS::purge_outages(); die "$res->error\n" if !$res->{success};
}
else { checkArgs(); }

exit;
Expand Down Expand Up @@ -516,29 +519,37 @@ sub expand_candidate_list
$whichflavours{$maybe}->{wmi} = $whichflavours{$maybe}->{snmp} = 1; # and ignore the last-xyz markers
}
# nodes that have not been pollable since forever: run at most once daily
# ...except if the demote_faulty_nodes config option is set to false
elsif (!$ninfo->{system}->{nodeModel} or $ninfo->{system}->{nodeModel} eq "Model")
{
my $lasttry = $ninfo->{system}->{last_poll} // 0;

# was polling attempted at all and in the last 30 days? then once daily from
# try once every 5 minutes if demote_faulty_nodes is set to false,
# otherwise: was polling attempted at all and in the last 30 days? then once daily from
# that last try - otherwise try one now
my $nexttry = ($lasttry && ($now - $lasttry) <= 30*86400)? ($lasttry + 86400 * 0.95) : $now;
my $nexttry = !getbool($C->{demote_faulty_nodes},"invert")? # === ne false
($lasttry && ($now - $lasttry) <= 30*86400)? ($lasttry + 86400 * 0.95) : $now : $lasttry + 300 ;

if ($nexttry <= $now)
{
push @todo_nodes, $maybe;
$whichflavours{$maybe}->{wmi} = $whichflavours{$maybe}->{snmp} = 1;
}
else
# if demotion is enabled, log this pretty dire situation
# but not too noisily - with a default poll every minute this
# will log the issue once an hour.
elsif (!getbool($C->{demote_faulty_nodes},"invert")) # === ne false
{
# log this pretty dire situation but not too noisy - with a default poll every minute this
# will log the issue once an hour
my $goodtimes = int((($now - $lasttry) % 3600) / 60);
my $msg = "Node $maybe has no valid nodeModel, never polled successfully, "
. "demoted to frequency once daily, last attempt $lasttry, next $nexttry";
logMsg($msg) if ($goodtimes == 0);
dbg($msg);
}
else
{
dbg("Node $maybe has no valid nodeModel, never polled successfully. demote_faulty_nodes is disabled, last attempt $lasttry, next $nexttry.");
}
}
# logic for collect now or later: candidate if no past successful collect whatsoever,
# or if either of the two worked and was done long enough ago.
Expand Down Expand Up @@ -5622,7 +5633,7 @@ sub runServices
# program is disconnected from stdin; stderr goes into a tmpfile
# and is collected separately for diagnostics

my $stderrsink = POSIX::tmpnam(); # good enough, no atomic open required
my $stderrsink = File::Temp::mktemp(File::Spec->tmpdir()."/nmis.XXXXXX"); # good enough, no atomic open required
dbg("running external program '$thisservice->{Program} $finalargs', "
.(getbool($thisservice->{Collect_Output})? "collecting":"ignoring")." output");
$pid = open(PRG,"$thisservice->{Program} $finalargs </dev/null 2>$stderrsink |");
Expand Down Expand Up @@ -7028,17 +7039,29 @@ sub summaryCache
### things, ie if escalate0 = 5 then an interface goes down, no alert sent, next
### poll interface goes up and event cancelled! Downside is a little longer before
### receiving first notification, so it depends on what the support SLA is.

### 11-Nov-11, keiths, update to this, changed the escalation so that through policy you can
### wait for 5 mins or just notify now, so Ecalation0 is 0 seconds, Escalation1 is 300 seconds
### then in Ecalations.xxxx, core devices might notify at Escalation0 while others at Escalation1
# args: independent (optional, default 0; if set we ensure that only one runescalate process is active)
# returns: nothing
sub runEscalate
{
my %args = @_;
my $C = loadConfTable();

if (getbool($args{independent}))
{
# check that there are no other running/stuck/delayed escalate processes
my $others = func::find_nmis_processes(config => $C,
type => 'escalate');
if (keys %$others)
{
logMsg("ERROR other type=escalate processes running (".join(", ", keys %$others)."), aborting operation.");
info("ERROR other type=escalate processes running (".join(", ", keys %$others)."), aborting operation.");
return;
}
}
$0 = "nmis-".$C->{conf}."-escalate";

my $pollTimer = NMIS::Timing->new;

my $C = loadConfTable();
my $NT = loadLocalNodeTable();

my $outage_time;
Expand Down
Loading

0 comments on commit 74900d6

Please sign in to comment.