Skip to content

Commit

Permalink
Update reboot_watch to allow for one retry in case elevate-cpanel fails
Browse files Browse the repository at this point in the history
Case RE-991: Make reboot_watch more tolerant for failures and have it
fail earlier if the script is reporting that it failed

Changelog:
  • Loading branch information
Travis Holloway committed Nov 25, 2024
1 parent f2db87d commit 084631f
Showing 1 changed file with 59 additions and 1 deletion.
60 changes: 59 additions & 1 deletion .github/workflows/openstack/reboot_watch
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ use constant ELEVATE_LOG_PATH => '/var/log/elevate-cpanel.log';
use constant ELEVATE_PID => '/var/run/elevate-cpanel.pid';

use File::Tail;
use JSON::PP;
use POSIX;

my $RETVAL = 1;
Expand Down Expand Up @@ -35,14 +36,71 @@ while ( $RETVAL != 0 ) {
sub _check_elevate_log_for_REBOOT_STRING {
my ( $filepath, $REBOOT_STRING, $RETRIES ) = @_;

my $has_continued = 0;

my $i = 0;
$file = File::Tail->new( name => $filepath, maxinterval => 1, adjustafter => 5, interval => 1 );
while ( defined( $line = $file->read ) ) {
_pid_check() unless $ENV{SKIP_PID_CHECK};
if ( index( $line, $ENV{REBOOT_STRING} ) >= 0 ) {
_success_message();
_exit_with_haste(0);
}
elsif ( $i % 10 == 0 ) {
if ( _script_has_failed() ) {

# If it failed due to temporary mirror issues,
# then it may take a minute or two for the mirrors to be stable again
sleep 60;

$has_continued ? _exit_with_haste(1) : _restart_script();
$has_continued = 1;
}
}

$i++;
}
}

sub _script_has_failed {

# This is too slow (~.57 seconds to make it as a system call)
# So we are going pull this data out of '/var/cpanel/elevate' directly
# (~.007 seconds to pull the data out of the json file) which is
# what the script is doing
# my $status = `/scripts/elevate-cpanel --status`;
# chomp $status;

open( my $fh, '<', '/var/cpanel/elevate' ) or _exit_with_haste(1);
my $raw_content = do { local $/; <$fh>; };
close $fh;

my $elevate_data = JSON::PP->new->decode($raw_content);
my $elevate_status = $elevate_data->{status};
return $elevate_status eq 'failed' ? 1 : 0;
}

sub _restart_script {
my $pid = fork();
_exit_with_haste(1) unless defined $pid;
if ($pid) {
my $time = POSIX::strftime( "%Y-%m-%d %H:%M:%S", localtime );
print "## [$time] [WARN]: elevate-cpanel failed. Attempting to restart the script to see if the failure was due to a temporary issue ##\n";

waitpid( $pid, 0 );
return;
}
else {

# release the pid so the service can use it
unlink ELEVATE_PID;

# Do it this way so that this process goes away since --continue
# will follow the elevate log afterwards
system( '/usr/bin/systemctl', 'start', 'elevate-cpanel.service' );
exit 0;
}

return;
}

sub _pre_success_message {
Expand Down

0 comments on commit 084631f

Please sign in to comment.