From 124c4416c90c1c28a1e66c52162d5626291e0636 Mon Sep 17 00:00:00 2001 From: Antonio Murdaca Date: Sun, 20 Oct 2019 08:59:09 +0200 Subject: [PATCH] pkg/daemon: drain before applying changes Signed-off-by: Antonio Murdaca --- pkg/daemon/daemon.go | 5 ++- pkg/daemon/update.go | 75 +++++++++++++++++++++++++------------------- 2 files changed, 46 insertions(+), 34 deletions(-) diff --git a/pkg/daemon/daemon.go b/pkg/daemon/daemon.go index 8dfac62bd8..3b232c17d8 100644 --- a/pkg/daemon/daemon.go +++ b/pkg/daemon/daemon.go @@ -830,7 +830,10 @@ func (dn *Daemon) checkStateOnFirstRun() error { // take a stab at that and re-run the drain+reboot routine if state.pendingConfig != nil && bootID == dn.bootID { dn.logSystem("drain interrupted, retrying") - return dn.drainAndReboot(state.pendingConfig) + if err := dn.drain(); err != nil { + return err + } + return dn.finalizeAndReboot(state.pendingConfig) } if err := dn.detectEarlySSHAccessesFromBoot(); err != nil { diff --git a/pkg/daemon/update.go b/pkg/daemon/update.go index 7e412d6afd..e16d41a299 100644 --- a/pkg/daemon/update.go +++ b/pkg/daemon/update.go @@ -99,10 +99,10 @@ func (dn *Daemon) updateOSAndReboot(newConfig *mcfgv1.MachineConfig) (retErr err if err := dn.updateOS(newConfig); err != nil { return err } - return dn.drainAndReboot(newConfig) + return dn.finalizeAndReboot(newConfig) } -func (dn *Daemon) drainAndReboot(newConfig *mcfgv1.MachineConfig) (retErr error) { +func (dn *Daemon) finalizeAndReboot(newConfig *mcfgv1.MachineConfig) (retErr error) { if out, err := dn.storePendingState(newConfig, 1); err != nil { return errors.Wrapf(err, "failed to log pending config: %s", string(out)) } @@ -121,43 +121,48 @@ func (dn *Daemon) drainAndReboot(newConfig *mcfgv1.MachineConfig) (retErr error) dn.recorder.Eventf(getNodeRef(dn.node), corev1.EventTypeNormal, "PendingConfig", fmt.Sprintf("Written pending config %s", newConfig.GetName())) } + // reboot. this function shouldn't actually return. + return dn.reboot(fmt.Sprintf("Node will reboot into config %v", newConfig.GetName())) +} + +func (dn *Daemon) drain() error { // Skip draining of the node when we're not cluster driven - if dn.kubeClient != nil { - dn.logSystem("Update prepared; beginning drain") + if dn.kubeClient == nil { + return nil + } + + dn.logSystem("Update prepared; beginning drain") - dn.recorder.Eventf(getNodeRef(dn.node), corev1.EventTypeNormal, "Drain", "Draining node to update config.") + dn.recorder.Eventf(getNodeRef(dn.node), corev1.EventTypeNormal, "Drain", "Draining node to update config.") - backoff := wait.Backoff{ - Steps: 5, - Duration: 10 * time.Second, - Factor: 2, + backoff := wait.Backoff{ + Steps: 5, + Duration: 10 * time.Second, + Factor: 2, + } + var lastErr error + if err := wait.ExponentialBackoff(backoff, func() (bool, error) { + err := drain.Drain(dn.kubeClient, []*corev1.Node{dn.node}, &drain.DrainOptions{ + DeleteLocalData: true, + Force: true, + GracePeriodSeconds: -1, + IgnoreDaemonsets: true, + Logger: &drainLogger{}, + }) + if err == nil { + return true, nil } - var lastErr error - if err := wait.ExponentialBackoff(backoff, func() (bool, error) { - err := drain.Drain(dn.kubeClient, []*corev1.Node{dn.node}, &drain.DrainOptions{ - DeleteLocalData: true, - Force: true, - GracePeriodSeconds: -1, - IgnoreDaemonsets: true, - Logger: &drainLogger{}, - }) - if err == nil { - return true, nil - } - lastErr = err - glog.Infof("Draining failed with: %v, retrying", err) - return false, nil - }); err != nil { - if err == wait.ErrWaitTimeout { - return errors.Wrapf(lastErr, "failed to drain node (%d tries): %v", backoff.Steps, err) - } - return errors.Wrap(err, "failed to drain node") + lastErr = err + glog.Infof("Draining failed with: %v, retrying", err) + return false, nil + }); err != nil { + if err == wait.ErrWaitTimeout { + return errors.Wrapf(lastErr, "failed to drain node (%d tries): %v", backoff.Steps, err) } - dn.logSystem("drain complete") + return errors.Wrap(err, "failed to drain node") } - - // reboot. this function shouldn't actually return. - return dn.reboot(fmt.Sprintf("Node will reboot into config %v", newConfig.GetName())) + dn.logSystem("drain complete") + return nil } var errUnreconcilable = errors.New("unreconcilable") @@ -213,6 +218,10 @@ func (dn *Daemon) update(oldConfig, newConfig *mcfgv1.MachineConfig) (retErr err dn.logSystem("Starting update from %s to %s: %+v", oldConfigName, newConfigName, diff) } + if err := dn.drain(); err != nil { + return err + } + // update files on disk that need updating if err := dn.updateFiles(oldConfig, newConfig); err != nil { return err