Skip to content

Commit

Permalink
Merge pull request #3362 from apostasie/hardening-lifecycle-state-3350
Browse files Browse the repository at this point in the history
Hardening lifecycle-state-store, name-store, and oci-hooks
  • Loading branch information
AkihiroSuda authored Aug 29, 2024
2 parents 05c0aa2 + b954c7a commit 1a55c72
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 19 deletions.
23 changes: 17 additions & 6 deletions pkg/namestore/namestore.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ import (
"path/filepath"
"strings"

"github.com/containerd/log"

"github.com/containerd/nerdctl/v2/pkg/identifiers"
"github.com/containerd/nerdctl/v2/pkg/lockutil"
)
Expand Down Expand Up @@ -56,12 +58,21 @@ func (x *nameStore) Acquire(name, id string) error {
}
fn := func() error {
fileName := filepath.Join(x.dir, name)
// If containerd was bounced, previously running containers that would get restarted will go again through
// onCreateRuntime (unlike in a "normal" stop/start flow).
// As such, we need to allow reacquiring by the same id
// See: https://github.com/containerd/nerdctl/issues/3354
if b, err := os.ReadFile(fileName); err == nil && string(b) != id {
return fmt.Errorf("name %q is already used by ID %q", name, string(b))
if b, err := os.ReadFile(fileName); err == nil {
if strings.TrimSpace(string(b)) == "" {
// currently acquired for an empty id - this obviously should never happen
// this is recoverable, and we are not hard erroring, but still indicative that something was wrong
// https://github.com/containerd/nerdctl/issues/3351
log.L.Errorf("current name %q is reserved for a an empty id - please report this is as a bug", name)
} else if string(b) != id {
// if acquired by a different container, we error out here
return fmt.Errorf("name %q is already used by ID %q", name, string(b))
}
// Otherwise, this is just re-acquiring after a restart
// For example, if containerd was bounced, previously running containers that would get restarted will go
// again through onCreateRuntime (unlike in a "normal" stop/start flow).
// As such, we are allowing reacquiring by the same id
// See: https://github.com/containerd/nerdctl/issues/3354
}
return os.WriteFile(fileName, []byte(id), 0600)
}
Expand Down
32 changes: 22 additions & 10 deletions pkg/ocihook/ocihook.go
Original file line number Diff line number Diff line change
Expand Up @@ -505,25 +505,37 @@ func onCreateRuntime(opts *handlerOpts) error {
log.L.WithError(err).Error("failed re-acquiring name - see https://github.com/containerd/nerdctl/issues/2992")
}

var netError error
if opts.cni != nil {
if err = applyNetworkSettings(opts); err != nil {
return err
}
netError = applyNetworkSettings(opts)
}

// Set StartedAt
lf := state.NewLifecycleState(opts.state.Annotations[labels.StateDir])
return lf.WithLock(func() error {
err := lf.Load()
if err != nil {
return err
}

return errors.Join(netError, lf.WithLock(func() error {
// Errors are voluntarily ignored here, as they should not be fatal.
// The lifecycle struct is also already warning about the issue.
_ = lf.Load()
lf.StartedAt = time.Now()
lf.CreateError = netError != nil
return lf.Save()
})
}))
}

func onPostStop(opts *handlerOpts) error {
// See https://github.com/containerd/nerdctl/issues/3357
// Check if we actually errored during runtimeCreate
// If that is the case, CreateError is set, and we are in postStop while the container will NOT be deleted (see ticket).
// In that case, do NOT treat this as a deletion, as the container is still there.
// Reset CreateError, and return.
lf := state.NewLifecycleState(opts.state.Annotations[labels.StateDir])
if lf.WithLock(lf.Load) == nil {
if lf.CreateError {
lf.CreateError = false
return lf.WithLock(lf.Save)
}
}

ctx := context.Background()
ns := opts.state.Annotations[labels.Namespace]
if opts.cni != nil {
Expand Down
16 changes: 13 additions & 3 deletions pkg/ocihook/state/state.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ import (
"path/filepath"
"time"

"github.com/containerd/log"

"github.com/containerd/nerdctl/v2/pkg/lockutil"
)

Expand All @@ -49,8 +51,9 @@ func NewLifecycleState(stateDir string) *LifecycleState {
}

type LifecycleState struct {
stateDir string
StartedAt time.Time `json:"started_at"`
stateDir string
StartedAt time.Time `json:"started_at"`
CreateError bool `json:"create_error"`
}

func (lf *LifecycleState) WithLock(fun func() error) error {
Expand All @@ -71,18 +74,25 @@ func (lf *LifecycleState) Load() error {
} else {
err = json.Unmarshal(data, lf)
if err != nil {
// Logging an error, as Load errors are generally ignored downstream
log.L.Error("unable to unmarshall lifecycle data")
return fmt.Errorf("unable to unmarshall lifecycle data: %w", err)
}
}
return nil
}

func (lf *LifecycleState) Save() error {
// Write atomically (write, then move) to avoid incomplete writes from happening
data, err := json.Marshal(lf)
if err != nil {
return fmt.Errorf("unable to marshall lifecycle data: %w", err)
}
err = os.WriteFile(filepath.Join(lf.stateDir, lifecycleFile), data, 0600)
err = os.WriteFile(filepath.Join(lf.stateDir, "."+lifecycleFile), data, 0600)
if err != nil {
return fmt.Errorf("unable to write lifecycle file: %w", err)
}
err = os.Rename(filepath.Join(lf.stateDir, "."+lifecycleFile), filepath.Join(lf.stateDir, lifecycleFile))
if err != nil {
return fmt.Errorf("unable to write lifecycle file: %w", err)
}
Expand Down

0 comments on commit 1a55c72

Please sign in to comment.