Skip to content

Commit

Permalink
fix: clear fast-failover status when node goes not ready
Browse files Browse the repository at this point in the history
Signed-off-by: James Munson <[email protected]>
  • Loading branch information
james-munson authored and derekbit committed Dec 14, 2024
1 parent af1f1e2 commit f37529b
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 2 deletions.
44 changes: 44 additions & 0 deletions controller/node_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -434,6 +434,13 @@ func (nc *NodeController) syncNode(key string) (err error) {
return err
}

// Set any RWX leases to non-delinquent if owned by not-ready node.
// Usefulness of delinquent state has passed.
if err = nc.clearDelinquentLeasesIfNodeNotReady(node); err != nil {
log.WithError(err).Warnf("Failed to clear delinquent leases for node %v", node.Name)
return err
}

node.Status.Region, node.Status.Zone = types.GetRegionAndZone(kubeNode.Labels)

if nc.controllerID != node.Name {
Expand Down Expand Up @@ -2218,3 +2225,40 @@ func (nc *NodeController) SetSchedulableCondition(node *longhorn.Node, kubeNode
corev1.EventTypeNormal)
}
}

func (nc *NodeController) clearDelinquentLeasesIfNodeNotReady(node *longhorn.Node) error {
enabled, err := nc.ds.GetSettingAsBool(types.SettingNameRWXVolumeFastFailover)
if err != nil {
return errors.Wrapf(err, "failed to get setting %v", types.SettingNameRWXVolumeFastFailover)
}
if !enabled {
return nil
}

isDownOrDeleted, err := nc.ds.IsNodeDownOrDeleted(node.Name)
if err != nil {
return errors.Wrapf(err, "failed to check IsNodeDownOrDeleted, node=%v", node.Name)
}
if !isDownOrDeleted {
return nil
}

sms, err := nc.ds.ListShareManagersRO()
if err != nil {
return errors.Wrap(err, "failed to list share managers")
}
var storedError error
for _, sm := range sms {
// It's tempting to filter by sm.Status.OwnerID here, but don't. It's already been modified to a new node to handle the fast failover.
// Anyway, we just need its name. Share manager name is volume name is lease name.
err = nc.ds.ClearDelinquentAndStaleStateIfVolumeIsDelinquent(sm.Name, node.Name)
if err != nil {
nc.logger.WithError(err).Warnf("failed to clear delinquent lease for volume %v, node %v", sm.Name, node.Name)
if storedError == nil {
storedError = errors.Wrapf(err, "failed to clear delinquent lease for volume %v, node %v", sm.Name, node.Name)
}
}
}

return storedError
}
2 changes: 1 addition & 1 deletion controller/volume_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -1480,7 +1480,7 @@ func (c *VolumeController) handleDelinquentAndStaleStateForFaultedRWXVolume(v *l
if !isRegularRWXVolume(v) {
return nil
}
return c.ds.ClearDelinquentAndStaleStateIfVolumeIsDelinquent(v.Name)
return c.ds.ClearDelinquentAndStaleStateIfVolumeIsDelinquent(v.Name, "")
}

func (c *VolumeController) requestRemountIfFileSystemReadOnly(v *longhorn.Volume, e *longhorn.Engine) {
Expand Down
6 changes: 5 additions & 1 deletion datastore/kubernetes.go
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,7 @@ func (s *DataStore) UpdateLease(lease *coordinationv1.Lease) (*coordinationv1.Le
return s.kubeClient.CoordinationV1().Leases(s.namespace).Update(context.TODO(), lease, metav1.UpdateOptions{})
}

func (s *DataStore) ClearDelinquentAndStaleStateIfVolumeIsDelinquent(volumeName string) (err error) {
func (s *DataStore) ClearDelinquentAndStaleStateIfVolumeIsDelinquent(volumeName string, nodeName string) (err error) {
defer func() {
err = errors.Wrapf(err, "failed to ClearDelinquentAndStaleStateIfVolumeIsDelinquent")
}()
Expand All @@ -306,6 +306,10 @@ func (s *DataStore) ClearDelinquentAndStaleStateIfVolumeIsDelinquent(volumeName
// Ref: IsRWXVolumeDelinquent() function
return nil
}
if nodeName != "" && nodeName != holder {
// If a node is specified, only clear state for it.
return nil
}
if !(lease.Spec.AcquireTime).IsZero() {
// Non Zero lease.Spec.AcquireTime means not delinquent.
// Ref: IsRWXVolumeDelinquent() function
Expand Down

0 comments on commit f37529b

Please sign in to comment.