diff --git a/libpod/container_internal.go b/libpod/container_internal.go index d3302a5978..d1eeb7f851 100644 --- a/libpod/container_internal.go +++ b/libpod/container_internal.go @@ -272,7 +272,9 @@ func (c *Container) handleRestartPolicy(ctx context.Context) (_ bool, retErr err } if c.config.HealthCheckConfig != nil { - if err := c.removeTransientFiles(ctx, c.config.StartupHealthCheckConfig != nil && !c.state.StartupHCPassed); err != nil { + if err := c.removeTransientFiles(ctx, + c.config.StartupHealthCheckConfig != nil && !c.state.StartupHCPassed, + c.state.HCUnitName); err != nil { return false, err } } @@ -1590,7 +1592,9 @@ func (c *Container) restartWithTimeout(ctx context.Context, timeout uint) (retEr } if c.config.HealthCheckConfig != nil { - if err := c.removeTransientFiles(context.Background(), c.config.StartupHealthCheckConfig != nil && !c.state.StartupHCPassed); err != nil { + if err := c.removeTransientFiles(context.Background(), + c.config.StartupHealthCheckConfig != nil && !c.state.StartupHCPassed, + c.state.HCUnitName); err != nil { logrus.Error(err.Error()) } } @@ -2047,7 +2051,9 @@ func (c *Container) cleanup(ctx context.Context) error { // Remove healthcheck unit/timer file if it execs if c.config.HealthCheckConfig != nil { - if err := c.removeTransientFiles(ctx, c.config.StartupHealthCheckConfig != nil && !c.state.StartupHCPassed); err != nil { + if err := c.removeTransientFiles(ctx, + c.config.StartupHealthCheckConfig != nil && !c.state.StartupHCPassed, + c.state.HCUnitName); err != nil { logrus.Errorf("Removing timer for container %s healthcheck: %v", c.ID(), err) } } diff --git a/libpod/healthcheck.go b/libpod/healthcheck.go index 8f0253a17f..3b027e63ec 100644 --- a/libpod/healthcheck.go +++ b/libpod/healthcheck.go @@ -278,6 +278,7 @@ func (c *Container) incrementStartupHCSuccessCounter(ctx context.Context) { if recreateTimer { logrus.Infof("Startup healthcheck for container %s passed, recreating timer", c.ID()) + oldUnit := c.state.HCUnitName // Create the new, standard healthcheck timer first. if err := c.createTimer(c.HealthCheckConfig().Interval.String(), false); err != nil { logrus.Errorf("Error recreating container %s healthcheck: %v", c.ID(), err) @@ -291,7 +292,7 @@ func (c *Container) incrementStartupHCSuccessCounter(ctx context.Context) { // Which happens to be us. // So this has to be last - after this, systemd serves us a // SIGTERM and we exit. - if err := c.removeTransientFiles(ctx, true); err != nil { + if err := c.removeTransientFiles(ctx, true, oldUnit); err != nil { logrus.Errorf("Error removing container %s healthcheck: %v", c.ID(), err) return } diff --git a/libpod/healthcheck_linux.go b/libpod/healthcheck_linux.go index 52a4561868..344c4a2029 100644 --- a/libpod/healthcheck_linux.go +++ b/libpod/healthcheck_linux.go @@ -108,7 +108,7 @@ func (c *Container) startTimer(isStartup bool) error { // removeTransientFiles removes the systemd timer and unit files // for the container -func (c *Container) removeTransientFiles(ctx context.Context, isStartup bool) error { +func (c *Container) removeTransientFiles(ctx context.Context, isStartup bool, unitName string) error { if c.disableHealthCheckSystemd(isStartup) { return nil } @@ -122,7 +122,6 @@ func (c *Container) removeTransientFiles(ctx context.Context, isStartup bool) er // clean up as much as possible. stopErrors := []error{} - unitName := c.state.HCUnitName if unitName == "" { unitName = c.hcUnitName(isStartup, true) } diff --git a/libpod/healthcheck_nosystemd_linux.go b/libpod/healthcheck_nosystemd_linux.go index cd8503f82a..c338caf1cd 100644 --- a/libpod/healthcheck_nosystemd_linux.go +++ b/libpod/healthcheck_nosystemd_linux.go @@ -18,6 +18,6 @@ func (c *Container) startTimer(isStartup bool) error { // removeTransientFiles removes the systemd timer and unit files // for the container -func (c *Container) removeTransientFiles(ctx context.Context, isStartup bool) error { +func (c *Container) removeTransientFiles(ctx context.Context, isStartup bool, unitName string) error { return nil } diff --git a/libpod/healthcheck_unsupported.go b/libpod/healthcheck_unsupported.go index 0517465dbc..8d733698b8 100644 --- a/libpod/healthcheck_unsupported.go +++ b/libpod/healthcheck_unsupported.go @@ -18,6 +18,6 @@ func (c *Container) startTimer(isStartup bool) error { // removeTransientFiles removes the systemd timer and unit files // for the container -func (c *Container) removeTransientFiles(ctx context.Context, isStartup bool) error { +func (c *Container) removeTransientFiles(ctx context.Context, isStartup bool, unitName string) error { return nil } diff --git a/test/system/220-healthcheck.bats b/test/system/220-healthcheck.bats index c5f0f0ac2c..95c4e4646c 100644 --- a/test/system/220-healthcheck.bats +++ b/test/system/220-healthcheck.bats @@ -5,6 +5,7 @@ # load helpers +load helpers.systemd # Helper function: run 'podman inspect' and check various given fields @@ -38,7 +39,10 @@ function _check_health { --health-interval 1s \ --health-retries 3 \ --health-on-failure=kill \ + --health-startup-cmd /home/podman/healthcheck \ + --health-startup-interval 1s \ $IMAGE /home/podman/pause + cid="$output" run_podman inspect healthcheck_c --format "{{.Config.HealthcheckOnFailureAction}}" is "$output" "kill" "on-failure action is set to kill" @@ -70,6 +74,10 @@ Log[-1].ExitCode | 1 Log[-1].Output | \"Uh-oh on stdout!\\\nUh-oh on stderr!\" " "$current_time" "healthy" + # Check that we now we do have valid podman units with this + # name so that the leak check below does not turn into a NOP without noticing. + assert "$(systemctl list-units --type timer | grep $cid)" =~ "podman" "Healthcheck systemd unit exists" + current_time=$(date --iso-8601=seconds) # After three successive failures, container should no longer be healthy sleep 5 @@ -85,6 +93,10 @@ Log[-1].Output | \"Uh-oh on stdout!\\\nUh-oh on stderr!\" # Clean up run_podman rm -t 0 -f healthcheck_c + + # Important check for https://github.com/containers/podman/issues/22884 + # We never should leak the unit files, healthcheck uses the cid in name so just grep that. + assert "$(systemctl list-units --type timer | grep $cid)" == "" "Healthcheck systemd unit cleanup" } @test "podman healthcheck - restart cleans up old state" {