Skip to content

Commit

Permalink
dynamic host volumes: move node pool governance to placement filter (…
Browse files Browse the repository at this point in the history
…CE) (#24867)

Enterprise governance checks happen after dynamic host volumes are placed, so if
node pool governance is active and you don't set a node pool or node ID for a
volume, it's possible to get a placement that fails node pool governance even
though there might be other nodes in the cluster that would be valid placements.

Move the node pool governance for host volumes into the placement path, so that
we're checking a specific node pool when node pool or node ID are set, but
otherwise filtering out candidate nodes by node pool.

This changset is the CE version of ENT/2200.

Ref: https://hashicorp.atlassian.net/browse/NET-11549
Ref: hashicorp/nomad-enterprise#2200
  • Loading branch information
tgross authored Jan 15, 2025
1 parent a292ecc commit 044784b
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 8 deletions.
27 changes: 25 additions & 2 deletions nomad/host_volume_endpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -508,9 +508,17 @@ func (v *HostVolume) placeHostVolume(snap *state.StateSnapshot, vol *structs.Hos
return node, nil
}

poolFilterFn, err := v.enterpriseNodePoolFilter(snap, vol)
if err != nil {
return nil, err
}

var iter memdb.ResultIterator
var err error
if vol.NodePool != "" {
if !poolFilterFn(vol.NodePool) {
return nil, fmt.Errorf("namespace %q does not allow volumes to use node pool %q",
vol.Namespace, vol.NodePool)
}
iter, err = snap.NodesByNodePool(nil, vol.NodePool)
} else {
iter, err = snap.Nodes(nil)
Expand All @@ -532,6 +540,12 @@ func (v *HostVolume) placeHostVolume(snap *state.StateSnapshot, vol *structs.Hos
constraints = append(constraints, vol.Constraints...)
checker = scheduler.NewConstraintChecker(ctx, constraints)

var (
filteredByExisting int
filteredByGovernance int
filteredByFeasibility int
)

for {
raw := iter.Next()
if raw == nil {
Expand All @@ -544,11 +558,18 @@ func (v *HostVolume) placeHostVolume(snap *state.StateSnapshot, vol *structs.Hos
// haven't yet written to state. The client will reject requests to
// create/register a volume with the same name with a different ID.
if _, hasVol := candidate.HostVolumes[vol.Name]; hasVol {
filteredByExisting++
continue
}

if !poolFilterFn(candidate.NodePool) {
filteredByGovernance++
continue
}

if checker != nil {
if ok := checker.Feasible(candidate); !ok {
filteredByFeasibility++
continue
}
}
Expand All @@ -559,7 +580,9 @@ func (v *HostVolume) placeHostVolume(snap *state.StateSnapshot, vol *structs.Hos

}

return nil, fmt.Errorf("no node meets constraints")
return nil, fmt.Errorf(
"no node meets constraints: %d nodes had existing volume, %d nodes filtered by node pool governance, %d nodes were infeasible",
filteredByExisting, filteredByGovernance, filteredByFeasibility)
}

// placementContext implements the scheduler.ConstraintContext interface, a
Expand Down
8 changes: 7 additions & 1 deletion nomad/host_volume_endpoint_ce.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ import (
)

// enforceEnterprisePolicy is the CE stub for Enterprise governance via
// Sentinel policy, quotas, and node pools
// Sentinel policy and quotas
func (v *HostVolume) enforceEnterprisePolicy(
_ *state.StateSnapshot,
_ *structs.HostVolume,
Expand All @@ -21,3 +21,9 @@ func (v *HostVolume) enforceEnterprisePolicy(
) (error, error) {
return nil, nil
}

// enterpriseNodePoolFilter is the CE stub for filtering nodes during placement
// via Enterprise node pool governance.
func (v *HostVolume) enterpriseNodePoolFilter(_ *state.StateSnapshot, _ *structs.HostVolume) (func(string) bool, error) {
return func(_ string) bool { return true }, nil
}
10 changes: 5 additions & 5 deletions nomad/host_volume_endpoint_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -168,12 +168,12 @@ func TestHostVolumeEndpoint_CreateRegisterGetDelete(t *testing.T) {
var resp structs.HostVolumeCreateResponse
req.AuthToken = token
err := msgpackrpc.CallWithCodec(codec, "HostVolume.Create", req, &resp)
must.EqError(t, err, `could not place volume "example1": no node meets constraints`)
must.EqError(t, err, `could not place volume "example1": no node meets constraints: 0 nodes had existing volume, 0 nodes filtered by node pool governance, 1 nodes were infeasible`)

req.Volume = vol2.Copy()
resp = structs.HostVolumeCreateResponse{}
err = msgpackrpc.CallWithCodec(codec, "HostVolume.Create", req, &resp)
must.EqError(t, err, `could not place volume "example2": no node meets constraints`)
must.EqError(t, err, `could not place volume "example2": no node meets constraints: 0 nodes had existing volume, 0 nodes filtered by node pool governance, 1 nodes were infeasible`)
})

t.Run("valid create", func(t *testing.T) {
Expand Down Expand Up @@ -725,12 +725,12 @@ func TestHostVolumeEndpoint_placeVolume(t *testing.T) {
Operand: "=",
},
}},
expectErr: "no node meets constraints",
expectErr: "no node meets constraints: 0 nodes had existing volume, 0 nodes filtered by node pool governance, 4 nodes were infeasible",
},
{
name: "no matching plugin",
vol: &structs.HostVolume{PluginID: "not-mkdir"},
expectErr: "no node meets constraints",
expectErr: "no node meets constraints: 0 nodes had existing volume, 0 nodes filtered by node pool governance, 4 nodes were infeasible",
},
{
name: "match already has a volume with the same name",
Expand All @@ -744,7 +744,7 @@ func TestHostVolumeEndpoint_placeVolume(t *testing.T) {
Operand: "=",
},
}},
expectErr: "no node meets constraints",
expectErr: "no node meets constraints: 1 nodes had existing volume, 0 nodes filtered by node pool governance, 3 nodes were infeasible",
},
}

Expand Down

0 comments on commit 044784b

Please sign in to comment.