Skip to content

Commit

Permalink
Merge pull request #22895 from Luap99/hc-startup-leak
Browse files Browse the repository at this point in the history
libpod: do not leak systemd hc startup unit timer
  • Loading branch information
openshift-merge-bot[bot] authored Jun 4, 2024
2 parents e9ef727 + e8ea1e7 commit b637678
Show file tree
Hide file tree
Showing 6 changed files with 26 additions and 8 deletions.
12 changes: 9 additions & 3 deletions libpod/container_internal.go
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,9 @@ func (c *Container) handleRestartPolicy(ctx context.Context) (_ bool, retErr err
}

if c.config.HealthCheckConfig != nil {
if err := c.removeTransientFiles(ctx, c.config.StartupHealthCheckConfig != nil && !c.state.StartupHCPassed); err != nil {
if err := c.removeTransientFiles(ctx,
c.config.StartupHealthCheckConfig != nil && !c.state.StartupHCPassed,
c.state.HCUnitName); err != nil {
return false, err
}
}
Expand Down Expand Up @@ -1590,7 +1592,9 @@ func (c *Container) restartWithTimeout(ctx context.Context, timeout uint) (retEr
}

if c.config.HealthCheckConfig != nil {
if err := c.removeTransientFiles(context.Background(), c.config.StartupHealthCheckConfig != nil && !c.state.StartupHCPassed); err != nil {
if err := c.removeTransientFiles(context.Background(),
c.config.StartupHealthCheckConfig != nil && !c.state.StartupHCPassed,
c.state.HCUnitName); err != nil {
logrus.Error(err.Error())
}
}
Expand Down Expand Up @@ -2047,7 +2051,9 @@ func (c *Container) cleanup(ctx context.Context) error {

// Remove healthcheck unit/timer file if it execs
if c.config.HealthCheckConfig != nil {
if err := c.removeTransientFiles(ctx, c.config.StartupHealthCheckConfig != nil && !c.state.StartupHCPassed); err != nil {
if err := c.removeTransientFiles(ctx,
c.config.StartupHealthCheckConfig != nil && !c.state.StartupHCPassed,
c.state.HCUnitName); err != nil {
logrus.Errorf("Removing timer for container %s healthcheck: %v", c.ID(), err)
}
}
Expand Down
3 changes: 2 additions & 1 deletion libpod/healthcheck.go
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,7 @@ func (c *Container) incrementStartupHCSuccessCounter(ctx context.Context) {
if recreateTimer {
logrus.Infof("Startup healthcheck for container %s passed, recreating timer", c.ID())

oldUnit := c.state.HCUnitName
// Create the new, standard healthcheck timer first.
if err := c.createTimer(c.HealthCheckConfig().Interval.String(), false); err != nil {
logrus.Errorf("Error recreating container %s healthcheck: %v", c.ID(), err)
Expand All @@ -291,7 +292,7 @@ func (c *Container) incrementStartupHCSuccessCounter(ctx context.Context) {
// Which happens to be us.
// So this has to be last - after this, systemd serves us a
// SIGTERM and we exit.
if err := c.removeTransientFiles(ctx, true); err != nil {
if err := c.removeTransientFiles(ctx, true, oldUnit); err != nil {
logrus.Errorf("Error removing container %s healthcheck: %v", c.ID(), err)
return
}
Expand Down
3 changes: 1 addition & 2 deletions libpod/healthcheck_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ func (c *Container) startTimer(isStartup bool) error {

// removeTransientFiles removes the systemd timer and unit files
// for the container
func (c *Container) removeTransientFiles(ctx context.Context, isStartup bool) error {
func (c *Container) removeTransientFiles(ctx context.Context, isStartup bool, unitName string) error {
if c.disableHealthCheckSystemd(isStartup) {
return nil
}
Expand All @@ -122,7 +122,6 @@ func (c *Container) removeTransientFiles(ctx context.Context, isStartup bool) er
// clean up as much as possible.
stopErrors := []error{}

unitName := c.state.HCUnitName
if unitName == "" {
unitName = c.hcUnitName(isStartup, true)
}
Expand Down
2 changes: 1 addition & 1 deletion libpod/healthcheck_nosystemd_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,6 @@ func (c *Container) startTimer(isStartup bool) error {

// removeTransientFiles removes the systemd timer and unit files
// for the container
func (c *Container) removeTransientFiles(ctx context.Context, isStartup bool) error {
func (c *Container) removeTransientFiles(ctx context.Context, isStartup bool, unitName string) error {
return nil
}
2 changes: 1 addition & 1 deletion libpod/healthcheck_unsupported.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,6 @@ func (c *Container) startTimer(isStartup bool) error {

// removeTransientFiles removes the systemd timer and unit files
// for the container
func (c *Container) removeTransientFiles(ctx context.Context, isStartup bool) error {
func (c *Container) removeTransientFiles(ctx context.Context, isStartup bool, unitName string) error {
return nil
}
12 changes: 12 additions & 0 deletions test/system/220-healthcheck.bats
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#

load helpers
load helpers.systemd


# Helper function: run 'podman inspect' and check various given fields
Expand Down Expand Up @@ -38,7 +39,10 @@ function _check_health {
--health-interval 1s \
--health-retries 3 \
--health-on-failure=kill \
--health-startup-cmd /home/podman/healthcheck \
--health-startup-interval 1s \
$IMAGE /home/podman/pause
cid="$output"

run_podman inspect healthcheck_c --format "{{.Config.HealthcheckOnFailureAction}}"
is "$output" "kill" "on-failure action is set to kill"
Expand Down Expand Up @@ -70,6 +74,10 @@ Log[-1].ExitCode | 1
Log[-1].Output | \"Uh-oh on stdout!\\\nUh-oh on stderr!\"
" "$current_time" "healthy"

# Check that we now we do have valid podman units with this
# name so that the leak check below does not turn into a NOP without noticing.
assert "$(systemctl list-units --type timer | grep $cid)" =~ "podman" "Healthcheck systemd unit exists"

current_time=$(date --iso-8601=seconds)
# After three successive failures, container should no longer be healthy
sleep 5
Expand All @@ -85,6 +93,10 @@ Log[-1].Output | \"Uh-oh on stdout!\\\nUh-oh on stderr!\"

# Clean up
run_podman rm -t 0 -f healthcheck_c

# Important check for https://github.com/containers/podman/issues/22884
# We never should leak the unit files, healthcheck uses the cid in name so just grep that.
assert "$(systemctl list-units --type timer | grep $cid)" == "" "Healthcheck systemd unit cleanup"
}

@test "podman healthcheck - restart cleans up old state" {
Expand Down

1 comment on commit b637678

@packit-as-a-service
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

podman-next COPR build failed. @containers/packit-build please check.

Please sign in to comment.