From 709fba28d4eee870ecacc3805f19a582fca58900 Mon Sep 17 00:00:00 2001 From: iljarotar Date: Tue, 8 Nov 2022 12:05:12 +0100 Subject: [PATCH 01/45] compare number of waiting machines to partition pool size and react accordingly --- cmd/metal-api/internal/datastore/event.go | 141 +++++++++++++++++- cmd/metal-api/internal/datastore/machine.go | 59 ++++++++ cmd/metal-api/internal/fsm/events.go | 3 + cmd/metal-api/internal/fsm/fsm.go | 47 +++--- cmd/metal-api/internal/fsm/fsm_test.go | 29 +++- cmd/metal-api/internal/fsm/states/states.go | 26 ++++ cmd/metal-api/internal/fsm/states/waiting.go | 7 + cmd/metal-api/internal/grpc/event-service.go | 13 +- cmd/metal-api/internal/metal/machine.go | 2 + cmd/metal-api/internal/metal/partition.go | 1 + .../internal/service/machine-service.go | 6 +- go.sum | 4 - 12 files changed, 286 insertions(+), 52 deletions(-) diff --git a/cmd/metal-api/internal/datastore/event.go b/cmd/metal-api/internal/datastore/event.go index 25e603968..eb356a1d4 100644 --- a/cmd/metal-api/internal/datastore/event.go +++ b/cmd/metal-api/internal/datastore/event.go @@ -1,8 +1,18 @@ package datastore import ( + "crypto/rand" + "math" + "math/big" + mathrand "math/rand" + "strconv" + "strings" + "time" + "github.com/metal-stack/metal-api/cmd/metal-api/internal/fsm" + "github.com/metal-stack/metal-api/cmd/metal-api/internal/fsm/states" "github.com/metal-stack/metal-api/cmd/metal-api/internal/metal" + "github.com/metal-stack/metal-lib/bus" "go.uber.org/zap" ) @@ -37,8 +47,8 @@ func (rs *RethinkStore) CreateProvisioningEventContainer(ec *metal.ProvisioningE func (rs *RethinkStore) UpsertProvisioningEventContainer(ec *metal.ProvisioningEventContainer) error { return rs.upsertEntity(rs.eventTable(), ec) } -func (rs *RethinkStore) ProvisioningEventForMachine(log *zap.SugaredLogger, event *metal.ProvisioningEvent, machineID string) (*metal.ProvisioningEventContainer, error) { - ec, err := rs.FindProvisioningEventContainer(machineID) +func (rs *RethinkStore) ProvisioningEventForMachine(log *zap.SugaredLogger, publisher bus.Publisher, event *metal.ProvisioningEvent, machine *metal.Machine) (*metal.ProvisioningEventContainer, error) { + ec, err := rs.FindProvisioningEventContainer(machine.ID) if err != nil && !metal.IsNotFound(err) { return nil, err } @@ -46,13 +56,20 @@ func (rs *RethinkStore) ProvisioningEventForMachine(log *zap.SugaredLogger, even if ec == nil { ec = &metal.ProvisioningEventContainer{ Base: metal.Base{ - ID: machineID, + ID: machine.ID, }, Liveliness: metal.MachineLivelinessAlive, } } - newEC, err := fsm.HandleProvisioningEvent(log, ec, event) + config := states.StateConfig{ + Log: log, + Container: ec, + Event: event, + Message: states.FSMMessageEmpty, + } + + newEC, err := fsm.HandleProvisioningEvent(&config) if err != nil { return nil, err } @@ -62,7 +79,121 @@ func (rs *RethinkStore) ProvisioningEventForMachine(log *zap.SugaredLogger, even } newEC.TrimEvents(100) - err = rs.UpsertProvisioningEventContainer(newEC) + if err != nil { + return nil, err + } + + if config.Message == states.FSMMessageEnterWaitingState || config.Message == states.FSMMessageLeaveWaitingState { + err = rs.AdjustNumberOfWaitingMachines(log, publisher, machine) + } + return newEC, err } + +func (rs *RethinkStore) AdjustNumberOfWaitingMachines(log *zap.SugaredLogger, publisher bus.Publisher, machine *metal.Machine) error { + p, err := rs.FindPartition(machine.PartitionID) + if err != nil { + return err + } + + waitingMachines, err := rs.listWaitingMachines(machine.PartitionID, machine.SizeID) + if err != nil { + return err + } + + poolSizeExceeded := 0 + if strings.Contains(p.WaitingPoolSize, "%") { + poolSizeExceeded = rs.waitingMachinesMatchPoolSize(len(waitingMachines), p.WaitingPoolSize, true, machine) + } else { + poolSizeExceeded = rs.waitingMachinesMatchPoolSize(len(waitingMachines), p.WaitingPoolSize, false, machine) + } + + if poolSizeExceeded == 0 { + return nil + } + + if poolSizeExceeded > 0 { + m := randomMachine(waitingMachines) + err = rs.machineCommand(log, &m, publisher, metal.MachineOffCmd, metal.ShutdownState) + if err != nil { + return err + } + } else { + shutdownMachines, err := rs.listShutdownMachines(machine.PartitionID, machine.SizeID) + if err != nil { + return err + } + + m := randomMachine(shutdownMachines) + err = rs.machineCommand(log, &m, publisher, metal.MachineOnCmd, metal.AvailableState) + if err != nil { + return err + } + } + + return nil +} + +func (rs *RethinkStore) waitingMachinesMatchPoolSize(actual int, required string, inPercent bool, m *metal.Machine) int { + if !inPercent { + r, err := strconv.ParseInt(required, 10, 32) + if err != nil { + return 0 + } + return actual - int(r) + } + + p := strings.Split(required, "%")[0] + r, err := strconv.ParseInt(p, 10, 32) + if err != nil { + return 0 + } + + allMachines, err := rs.listMachinesInPartition(m.PartitionID, m.SizeID) + if err != nil { + return 0 + } + + return actual - int(math.Round(float64(len(allMachines))*float64(r)/100)) +} + +func randomMachine(machines metal.Machines) metal.Machine { + var idx int + b, err := rand.Int(rand.Reader, big.NewInt(int64(len(machines)))) //nolint:gosec + if err != nil { + idx = int(b.Uint64()) + } else { + mathrand.Seed(time.Now().UnixNano()) + // nolint + idx = mathrand.Intn(len(machines)) + } + + return machines[idx] +} + +func (rs *RethinkStore) machineCommand(logger *zap.SugaredLogger, m *metal.Machine, publisher bus.Publisher, cmd metal.MachineCommand, state metal.MState) error { + evt := metal.MachineEvent{ + Type: metal.COMMAND, + Cmd: &metal.MachineExecCommand{ + Command: cmd, + TargetMachineID: m.ID, + IPMI: &m.IPMI, + }, + } + + newMachine := *m + newMachine.State = metal.MachineState{Value: state} + err := rs.UpdateMachine(m, &newMachine) + if err != nil { + return err + } + + logger.Infow("publish event", "event", evt, "command", *evt.Cmd) + err = publisher.Publish(metal.TopicMachine.GetFQN(m.PartitionID), evt) + if err != nil { + return err + } + + return nil +} diff --git a/cmd/metal-api/internal/datastore/machine.go b/cmd/metal-api/internal/datastore/machine.go index 93f28c172..47994a483 100644 --- a/cmd/metal-api/internal/datastore/machine.go +++ b/cmd/metal-api/internal/datastore/machine.go @@ -429,6 +429,22 @@ func (rs *RethinkStore) ListMachines() (metal.Machines, error) { return ms, err } +func (rs *RethinkStore) listMachinesInPartition(partitionid, sizeid string) (metal.Machines, error) { + q := *rs.machineTable() + q = q.Filter(map[string]interface{}{ + "partitionid": partitionid, + "sizeid": sizeid, + }) + + var machines metal.Machines + err := rs.searchEntities(&q, &machines) + if err != nil { + return nil, err + } + + return machines, nil +} + // CreateMachine creates a new machine in the database as "unallocated new machines". // If the given machine has an allocation, the function returns an error because // allocated machines cannot be created. If there is already a machine with the @@ -518,3 +534,46 @@ func (rs *RethinkStore) FindWaitingMachine(partitionid, sizeid string) (*metal.M } return &newMachine, nil } + +func (rs *RethinkStore) listWaitingMachines(partitionid, sizeid string) (metal.Machines, error) { + q := *rs.machineTable() + q = q.Filter(map[string]interface{}{ + "allocation": nil, + "partitionid": partitionid, + "sizeid": sizeid, + "state": map[string]string{ + "value": string(metal.AvailableState), + }, + "waiting": true, + "preallocated": false, + }) + + var machines metal.Machines + err := rs.searchEntities(&q, &machines) + if err != nil { + return nil, err + } + + return machines, nil +} + +func (rs *RethinkStore) listShutdownMachines(partitionid, sizeid string) (metal.Machines, error) { + q := *rs.machineTable() + q = q.Filter(map[string]interface{}{ + "allocation": nil, + "partitionid": partitionid, + "sizeid": sizeid, + "state": map[string]string{ + "value": string(metal.ShutdownState), + }, + "preallocated": false, + }) + + var machines metal.Machines + err := rs.searchEntities(&q, &machines) + if err != nil { + return nil, err + } + + return machines, nil +} diff --git a/cmd/metal-api/internal/fsm/events.go b/cmd/metal-api/internal/fsm/events.go index 0c59e8c89..9fd891500 100644 --- a/cmd/metal-api/internal/fsm/events.go +++ b/cmd/metal-api/internal/fsm/events.go @@ -159,5 +159,8 @@ func eventCallbacks(config *states.StateConfig) fsm.Callbacks { callbacks["enter_"+name] = state.OnTransition } + waiting := allStates[states.Waiting.String()].(*states.WaitingState) + callbacks["leave_"+states.Waiting.String()] = waiting.OnLeave + return callbacks } diff --git a/cmd/metal-api/internal/fsm/fsm.go b/cmd/metal-api/internal/fsm/fsm.go index 88ac952f9..ca0a89406 100644 --- a/cmd/metal-api/internal/fsm/fsm.go +++ b/cmd/metal-api/internal/fsm/fsm.go @@ -8,7 +8,6 @@ import ( "github.com/looplab/fsm" "github.com/metal-stack/metal-api/cmd/metal-api/internal/fsm/states" "github.com/metal-stack/metal-api/cmd/metal-api/internal/metal" - "go.uber.org/zap" ) // HandleProvisioningEvent can be called to determine whether the given incoming event follows an expected lifecycle of a machine considering the event history of the given provisioning event container. @@ -16,57 +15,51 @@ import ( // The function returns a new provisioning event container that can then be safely persisted in the database. If an error is returned, the incoming event is not supposed to be persisted in the database. // // Among other things, this function can detect crash loops or other irregularities within a machine lifecycle and enriches the returned provisioning event container with this information. -func HandleProvisioningEvent(log *zap.SugaredLogger, ec *metal.ProvisioningEventContainer, event *metal.ProvisioningEvent) (*metal.ProvisioningEventContainer, error) { - if ec == nil { - return nil, fmt.Errorf("provisioning event container must not be nil") - } - - if event == nil { - return nil, fmt.Errorf("provisioning event must not be nil") +func HandleProvisioningEvent(c *states.StateConfig) (*metal.ProvisioningEventContainer, error) { + if err := c.Validate(); err != nil { + return nil, err } var ( - clone = *ec - container = &clone - f = fsm.NewFSM( - initialStateFromEventContainer(container), + f = fsm.NewFSM( + initialStateFromEventContainer(c.Container), Events(), - eventCallbacks(&states.StateConfig{Log: log, Event: event, Container: container}), + eventCallbacks(c), ) ) - err := f.Event(event.Event.String()) + err := f.Event(c.Event.Event.String()) if err == nil { - return container, nil + return c.Container, nil } if errors.As(err, &fsm.InvalidEventError{}) { - if event.Message == "" { - event.Message = fmt.Sprintf("[unexpectedly received in %s]", strings.ToLower(f.Current())) + if c.Event.Message == "" { + c.Event.Message = fmt.Sprintf("[unexpectedly received in %s]", strings.ToLower(f.Current())) } else { - event.Message = fmt.Sprintf("[unexpectedly received in %s]: %s", strings.ToLower(f.Current()), event.Message) + c.Event.Message = fmt.Sprintf("[unexpectedly received in %s]: %s", strings.ToLower(f.Current()), c.Event.Message) } - container.LastEventTime = &event.Time - container.Liveliness = metal.MachineLivelinessAlive - container.LastErrorEvent = event + c.Container.LastEventTime = &c.Event.Time + c.Container.Liveliness = metal.MachineLivelinessAlive + c.Container.LastErrorEvent = c.Event - switch e := event.Event; e { //nolint:exhaustive + switch e := c.Event.Event; e { //nolint:exhaustive case metal.ProvisioningEventPXEBooting, metal.ProvisioningEventPreparing: - container.CrashLoop = true - container.Events = append([]metal.ProvisioningEvent{*event}, container.Events...) + c.Container.CrashLoop = true + c.Container.Events = append([]metal.ProvisioningEvent{*c.Event}, c.Container.Events...) case metal.ProvisioningEventAlive: // under no circumstances we want to persists alive in the events container. // when this happens the FSM gets stuck in invalid transitions // (e.g. all following transitions are invalid and all subsequent alive events will be stored, cramping history). default: - container.Events = append([]metal.ProvisioningEvent{*event}, container.Events...) + c.Container.Events = append([]metal.ProvisioningEvent{*c.Event}, c.Container.Events...) } - return container, nil + return c.Container, nil } - return nil, fmt.Errorf("internal error while calculating provisioning event container for machine %s: %w", container.ID, err) + return nil, fmt.Errorf("internal error while calculating provisioning event container for machine %s: %w", c.Container.ID, err) } func initialStateFromEventContainer(container *metal.ProvisioningEventContainer) string { diff --git a/cmd/metal-api/internal/fsm/fsm_test.go b/cmd/metal-api/internal/fsm/fsm_test.go index 4e2d20dab..b067a8ad3 100644 --- a/cmd/metal-api/internal/fsm/fsm_test.go +++ b/cmd/metal-api/internal/fsm/fsm_test.go @@ -5,6 +5,7 @@ import ( "time" "github.com/google/go-cmp/cmp" + "github.com/metal-stack/metal-api/cmd/metal-api/internal/fsm/states" "github.com/metal-stack/metal-api/cmd/metal-api/internal/metal" "go.uber.org/zap/zaptest" ) @@ -629,7 +630,13 @@ func TestHandleProvisioningEvent(t *testing.T) { for i := range tests { tt := tests[i] t.Run(tt.name, func(t *testing.T) { - got, err := HandleProvisioningEvent(zaptest.NewLogger(t).Sugar(), tt.container, tt.event) + params := states.StateConfig{ + Log: zaptest.NewLogger(t).Sugar(), + Container: tt.container, + Event: tt.event, + } + + got, err := HandleProvisioningEvent(¶ms) if diff := cmp.Diff(tt.wantErr, err); diff != "" { t.Errorf("HandleProvisioningEvent() diff = %s", diff) } @@ -649,15 +656,21 @@ func TestReactionToAllIncomingEvents(t *testing.T) { // this test ensures that for every incoming event we have a proper transition for e1 := range metal.AllProvisioningEventTypes { for e2 := range metal.AllProvisioningEventTypes { - _, err := HandleProvisioningEvent(zaptest.NewLogger(t).Sugar(), &metal.ProvisioningEventContainer{ - Events: metal.ProvisioningEvents{ - { - Event: e2, + params := states.StateConfig{ + Log: zaptest.NewLogger(t).Sugar(), + Container: &metal.ProvisioningEventContainer{ + Events: metal.ProvisioningEvents{ + { + Event: e2, + }, }, }, - }, &metal.ProvisioningEvent{ - Event: e1, - }) + Event: &metal.ProvisioningEvent{ + Event: e1, + }, + } + + _, err := HandleProvisioningEvent(¶ms) if err != nil { t.Errorf("transitioning from state %s with event %s: %s", e2, e1, err) } diff --git a/cmd/metal-api/internal/fsm/states/states.go b/cmd/metal-api/internal/fsm/states/states.go index 6f1604b28..b600be84a 100644 --- a/cmd/metal-api/internal/fsm/states/states.go +++ b/cmd/metal-api/internal/fsm/states/states.go @@ -1,6 +1,8 @@ package states import ( + "fmt" + "github.com/looplab/fsm" "github.com/metal-stack/metal-api/cmd/metal-api/internal/metal" "go.uber.org/zap" @@ -21,20 +23,44 @@ const ( MachineReclaim stateType = "State Machine Reclaim" ) +const ( + FSMMessageEmpty FSMMessage = "" + FSMMessageEnterWaitingState FSMMessage = "Enter Waiting State" + FSMMessageLeaveWaitingState FSMMessage = "Leave Waiting State" +) + type FSMState interface { OnTransition(e *fsm.Event) } type stateType string +type FSMMessage string func (t stateType) String() string { return string(t) } +func (m FSMMessage) String() string { + return string(m) +} + type StateConfig struct { Log *zap.SugaredLogger Container *metal.ProvisioningEventContainer Event *metal.ProvisioningEvent + Message FSMMessage +} + +func (c *StateConfig) Validate() error { + if c.Container == nil { + return fmt.Errorf("provisioning event container must not be nil") + } + + if c.Event == nil { + return fmt.Errorf("provisioning event must not be nil") + } + + return nil } func AllStates(c *StateConfig) map[string]FSMState { diff --git a/cmd/metal-api/internal/fsm/states/waiting.go b/cmd/metal-api/internal/fsm/states/waiting.go index c02a22b1f..0e3b83c09 100644 --- a/cmd/metal-api/internal/fsm/states/waiting.go +++ b/cmd/metal-api/internal/fsm/states/waiting.go @@ -8,15 +8,22 @@ import ( type WaitingState struct { container *metal.ProvisioningEventContainer event *metal.ProvisioningEvent + config *StateConfig } func newWaiting(c *StateConfig) *WaitingState { return &WaitingState{ container: c.Container, event: c.Event, + config: c, } } func (p *WaitingState) OnTransition(e *fsm.Event) { appendEventToContainer(p.event, p.container) + p.config.Message = FSMMessageEnterWaitingState +} + +func (p *WaitingState) OnLeave(e *fsm.Event) { + p.config.Message = FSMMessageLeaveWaitingState } diff --git a/cmd/metal-api/internal/grpc/event-service.go b/cmd/metal-api/internal/grpc/event-service.go index b25f37b2e..5cea6aed8 100644 --- a/cmd/metal-api/internal/grpc/event-service.go +++ b/cmd/metal-api/internal/grpc/event-service.go @@ -8,19 +8,22 @@ import ( "github.com/metal-stack/metal-api/cmd/metal-api/internal/datastore" "github.com/metal-stack/metal-api/cmd/metal-api/internal/metal" v1 "github.com/metal-stack/metal-api/pkg/api/v1" + "github.com/metal-stack/metal-lib/bus" "go.uber.org/multierr" "go.uber.org/zap" ) type EventService struct { - log *zap.SugaredLogger - ds *datastore.RethinkStore + log *zap.SugaredLogger + ds *datastore.RethinkStore + publisher bus.Publisher } func NewEventService(cfg *ServerConfig) *EventService { return &EventService{ - ds: cfg.Store, - log: cfg.Logger.Named("event-service"), + ds: cfg.Store, + log: cfg.Logger.Named("event-service"), + publisher: cfg.Publisher, } } func (e *EventService) Send(ctx context.Context, req *v1.EventServiceSendRequest) (*v1.EventServiceSendResponse, error) { @@ -70,7 +73,7 @@ func (e *EventService) Send(ctx context.Context, req *v1.EventServiceSendRequest Message: event.Message, } - _, err = e.ds.ProvisioningEventForMachine(e.log, &ev, machineID) + _, err = e.ds.ProvisioningEventForMachine(e.log, e.publisher, &ev, m) if err != nil { processErr = multierr.Append(processErr, err) failed = append(failed, machineID) diff --git a/cmd/metal-api/internal/metal/machine.go b/cmd/metal-api/internal/metal/machine.go index a01add2f8..25eab788c 100644 --- a/cmd/metal-api/internal/metal/machine.go +++ b/cmd/metal-api/internal/metal/machine.go @@ -21,6 +21,8 @@ const ( ReservedState MState = "RESERVED" // LockedState describes a machine state where a machine cannot be deleted or allocated anymore LockedState MState = "LOCKED" + // ShutdownState describes a machine state where a machine was shut down on purpose and should not be resurrected + ShutdownState MState = "SHUTDOWN" ) var ( diff --git a/cmd/metal-api/internal/metal/partition.go b/cmd/metal-api/internal/metal/partition.go index 3688f0acd..95964ff77 100644 --- a/cmd/metal-api/internal/metal/partition.go +++ b/cmd/metal-api/internal/metal/partition.go @@ -6,6 +6,7 @@ type Partition struct { BootConfiguration BootConfiguration `rethinkdb:"bootconfig" json:"bootconfig"` MgmtServiceAddress string `rethinkdb:"mgmtserviceaddr" json:"mgmtserviceaddr"` PrivateNetworkPrefixLength uint8 `rethinkdb:"privatenetworkprefixlength" json:"privatenetworkprefixlength"` + WaitingPoolSize string `rethinkdb:"waitingpoolsize" json:"waitingpoolsize"` } // BootConfiguration defines the metal-hammer initrd, kernel and commandline diff --git a/cmd/metal-api/internal/service/machine-service.go b/cmd/metal-api/internal/service/machine-service.go index bb77acd56..d909c2cf1 100644 --- a/cmd/metal-api/internal/service/machine-service.go +++ b/cmd/metal-api/internal/service/machine-service.go @@ -1529,7 +1529,7 @@ func (r machineResource) freeMachine(request *restful.Request, response *restful Event: metal.ProvisioningEventMachineReclaim, Message: "free machine called", } - _, err = r.ds.ProvisioningEventForMachine(logger, &ev, id) + _, err = r.ds.ProvisioningEventForMachine(logger, r, &ev, m) if err != nil { r.log.Errorw("error sending provisioning event after machine free", "error", err) } @@ -1820,7 +1820,7 @@ func ResurrectMachines(ds *datastore.RethinkStore, publisher bus.Publisher, ep * continue } - if provisioningEvents.Liveliness == metal.MachineLivelinessDead && time.Since(*provisioningEvents.LastEventTime) > metal.MachineResurrectAfter { + if provisioningEvents.Liveliness == metal.MachineLivelinessDead && time.Since(*provisioningEvents.LastEventTime) > metal.MachineResurrectAfter && m.State.Value != metal.ShutdownState { logger.Infow("resurrecting dead machine", "machineID", m.ID, "liveliness", provisioningEvents.Liveliness, "since", time.Since(*provisioningEvents.LastEventTime).String()) err = act.freeMachine(publisher, &m, headscaleClient, logger) if err != nil { @@ -1992,7 +1992,7 @@ func (r *machineResource) machineCmd(cmd metal.MachineCommand, request *restful. Event: metal.ProvisioningEventPlannedReboot, Message: string(cmd), } - _, err = r.ds.ProvisioningEventForMachine(logger, &ev, id) + _, err = r.ds.ProvisioningEventForMachine(logger, r, &ev, newMachine) if err != nil { r.sendError(request, response, defaultError(err)) return diff --git a/go.sum b/go.sum index e8e18848c..a6c7b0c0b 100644 --- a/go.sum +++ b/go.sum @@ -430,8 +430,6 @@ github.com/docker/distribution v2.8.1+incompatible/go.mod h1:J2gT2udsDAN96Uj4Kfc github.com/docker/docker v1.4.2-0.20190924003213-a8608b5b67c7/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk= github.com/docker/docker v20.10.5+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk= github.com/docker/docker v20.10.17+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk= -github.com/docker/docker v20.10.18+incompatible h1:SN84VYXTBNGn92T/QwIRPlum9zfemfitN7pbsp26WSc= -github.com/docker/docker v20.10.18+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk= github.com/docker/docker v20.10.20+incompatible h1:kH9tx6XO+359d+iAkumyKDc5Q1kOwPuAUaeri48nD6E= github.com/docker/docker v20.10.20+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk= github.com/docker/docker-credential-helpers v0.6.3/go.mod h1:WRaJzqw3CTB9bk10avuGsjVBZsD05qeibJ1/TYlvc0Y= @@ -2352,8 +2350,6 @@ google.golang.org/grpc v1.45.0/go.mod h1:lN7owxKUQEqMfSyQikvvk5tf/6zMPsrK+ONuO11 google.golang.org/grpc v1.46.0/go.mod h1:vN9eftEi1UMyUsIF80+uQXhHjbXYbm0uXoFCACuMGWk= google.golang.org/grpc v1.46.2/go.mod h1:vN9eftEi1UMyUsIF80+uQXhHjbXYbm0uXoFCACuMGWk= google.golang.org/grpc v1.47.0/go.mod h1:vN9eftEi1UMyUsIF80+uQXhHjbXYbm0uXoFCACuMGWk= -google.golang.org/grpc v1.50.0 h1:fPVVDxY9w++VjTZsYvXWqEf9Rqar/e+9zYfxKK+W+YU= -google.golang.org/grpc v1.50.0/go.mod h1:ZgQEeidpAuNRZ8iRrlBKXZQP1ghovWIVhdJRyCDK+GI= google.golang.org/grpc v1.50.1 h1:DS/BukOZWp8s6p4Dt/tOaJaTQyPyOoCcrjroHuCeLzY= google.golang.org/grpc v1.50.1/go.mod h1:ZgQEeidpAuNRZ8iRrlBKXZQP1ghovWIVhdJRyCDK+GI= google.golang.org/grpc/cmd/protoc-gen-go-grpc v1.1.0/go.mod h1:6Kw0yEErY5E/yWrBtf03jp27GLLJujG4z/JK95pnjjw= From 72197ff51e4d8a1c9ee7c053656ce041ec75de8f Mon Sep 17 00:00:00 2001 From: iljarotar Date: Tue, 22 Nov 2022 12:06:06 +0100 Subject: [PATCH 02/45] poolsize validation, better db queries, pass function to fsm --- cmd/metal-api/internal/datastore/event.go | 129 +----------- cmd/metal-api/internal/datastore/machine.go | 60 +----- cmd/metal-api/internal/datastore/poolsize.go | 185 ++++++++++++++++++ cmd/metal-api/internal/fsm/states/states.go | 22 +-- cmd/metal-api/internal/fsm/states/waiting.go | 20 +- .../internal/service/machine-service.go | 6 +- .../internal/service/partition-service.go | 52 +++++ .../internal/service/v1/partition.go | 2 + 8 files changed, 268 insertions(+), 208 deletions(-) create mode 100644 cmd/metal-api/internal/datastore/poolsize.go diff --git a/cmd/metal-api/internal/datastore/event.go b/cmd/metal-api/internal/datastore/event.go index eb356a1d4..8d1ce3b1e 100644 --- a/cmd/metal-api/internal/datastore/event.go +++ b/cmd/metal-api/internal/datastore/event.go @@ -1,14 +1,6 @@ package datastore import ( - "crypto/rand" - "math" - "math/big" - mathrand "math/rand" - "strconv" - "strings" - "time" - "github.com/metal-stack/metal-api/cmd/metal-api/internal/fsm" "github.com/metal-stack/metal-api/cmd/metal-api/internal/fsm/states" "github.com/metal-stack/metal-api/cmd/metal-api/internal/metal" @@ -63,10 +55,12 @@ func (rs *RethinkStore) ProvisioningEventForMachine(log *zap.SugaredLogger, publ } config := states.StateConfig{ - Log: log, - Container: ec, - Event: event, - Message: states.FSMMessageEmpty, + Log: log, + Container: ec, + Event: event, + AdjustWaitingMachines: rs.adjustNumberOfWaitingMachines, + Publisher: publisher, + Machine: machine, } newEC, err := fsm.HandleProvisioningEvent(&config) @@ -84,116 +78,5 @@ func (rs *RethinkStore) ProvisioningEventForMachine(log *zap.SugaredLogger, publ return nil, err } - if config.Message == states.FSMMessageEnterWaitingState || config.Message == states.FSMMessageLeaveWaitingState { - err = rs.AdjustNumberOfWaitingMachines(log, publisher, machine) - } - return newEC, err } - -func (rs *RethinkStore) AdjustNumberOfWaitingMachines(log *zap.SugaredLogger, publisher bus.Publisher, machine *metal.Machine) error { - p, err := rs.FindPartition(machine.PartitionID) - if err != nil { - return err - } - - waitingMachines, err := rs.listWaitingMachines(machine.PartitionID, machine.SizeID) - if err != nil { - return err - } - - poolSizeExceeded := 0 - if strings.Contains(p.WaitingPoolSize, "%") { - poolSizeExceeded = rs.waitingMachinesMatchPoolSize(len(waitingMachines), p.WaitingPoolSize, true, machine) - } else { - poolSizeExceeded = rs.waitingMachinesMatchPoolSize(len(waitingMachines), p.WaitingPoolSize, false, machine) - } - - if poolSizeExceeded == 0 { - return nil - } - - if poolSizeExceeded > 0 { - m := randomMachine(waitingMachines) - err = rs.machineCommand(log, &m, publisher, metal.MachineOffCmd, metal.ShutdownState) - if err != nil { - return err - } - } else { - shutdownMachines, err := rs.listShutdownMachines(machine.PartitionID, machine.SizeID) - if err != nil { - return err - } - - m := randomMachine(shutdownMachines) - err = rs.machineCommand(log, &m, publisher, metal.MachineOnCmd, metal.AvailableState) - if err != nil { - return err - } - } - - return nil -} - -func (rs *RethinkStore) waitingMachinesMatchPoolSize(actual int, required string, inPercent bool, m *metal.Machine) int { - if !inPercent { - r, err := strconv.ParseInt(required, 10, 32) - if err != nil { - return 0 - } - return actual - int(r) - } - - p := strings.Split(required, "%")[0] - r, err := strconv.ParseInt(p, 10, 32) - if err != nil { - return 0 - } - - allMachines, err := rs.listMachinesInPartition(m.PartitionID, m.SizeID) - if err != nil { - return 0 - } - - return actual - int(math.Round(float64(len(allMachines))*float64(r)/100)) -} - -func randomMachine(machines metal.Machines) metal.Machine { - var idx int - b, err := rand.Int(rand.Reader, big.NewInt(int64(len(machines)))) //nolint:gosec - if err != nil { - idx = int(b.Uint64()) - } else { - mathrand.Seed(time.Now().UnixNano()) - // nolint - idx = mathrand.Intn(len(machines)) - } - - return machines[idx] -} - -func (rs *RethinkStore) machineCommand(logger *zap.SugaredLogger, m *metal.Machine, publisher bus.Publisher, cmd metal.MachineCommand, state metal.MState) error { - evt := metal.MachineEvent{ - Type: metal.COMMAND, - Cmd: &metal.MachineExecCommand{ - Command: cmd, - TargetMachineID: m.ID, - IPMI: &m.IPMI, - }, - } - - newMachine := *m - newMachine.State = metal.MachineState{Value: state} - err := rs.UpdateMachine(m, &newMachine) - if err != nil { - return err - } - - logger.Infow("publish event", "event", evt, "command", *evt.Cmd) - err = publisher.Publish(metal.TopicMachine.GetFQN(m.PartitionID), evt) - if err != nil { - return err - } - - return nil -} diff --git a/cmd/metal-api/internal/datastore/machine.go b/cmd/metal-api/internal/datastore/machine.go index 47994a483..c57f1ec62 100644 --- a/cmd/metal-api/internal/datastore/machine.go +++ b/cmd/metal-api/internal/datastore/machine.go @@ -58,6 +58,7 @@ type MachineSearchQuery struct { // state StateValue *string `json:"state_value" optional:"true"` + Waiting *bool `json:"waiting" optional:"true"` // ipmi IpmiAddress *string `json:"ipmi_address" optional:"true"` @@ -429,22 +430,6 @@ func (rs *RethinkStore) ListMachines() (metal.Machines, error) { return ms, err } -func (rs *RethinkStore) listMachinesInPartition(partitionid, sizeid string) (metal.Machines, error) { - q := *rs.machineTable() - q = q.Filter(map[string]interface{}{ - "partitionid": partitionid, - "sizeid": sizeid, - }) - - var machines metal.Machines - err := rs.searchEntities(&q, &machines) - if err != nil { - return nil, err - } - - return machines, nil -} - // CreateMachine creates a new machine in the database as "unallocated new machines". // If the given machine has an allocation, the function returns an error because // allocated machines cannot be created. If there is already a machine with the @@ -534,46 +519,3 @@ func (rs *RethinkStore) FindWaitingMachine(partitionid, sizeid string) (*metal.M } return &newMachine, nil } - -func (rs *RethinkStore) listWaitingMachines(partitionid, sizeid string) (metal.Machines, error) { - q := *rs.machineTable() - q = q.Filter(map[string]interface{}{ - "allocation": nil, - "partitionid": partitionid, - "sizeid": sizeid, - "state": map[string]string{ - "value": string(metal.AvailableState), - }, - "waiting": true, - "preallocated": false, - }) - - var machines metal.Machines - err := rs.searchEntities(&q, &machines) - if err != nil { - return nil, err - } - - return machines, nil -} - -func (rs *RethinkStore) listShutdownMachines(partitionid, sizeid string) (metal.Machines, error) { - q := *rs.machineTable() - q = q.Filter(map[string]interface{}{ - "allocation": nil, - "partitionid": partitionid, - "sizeid": sizeid, - "state": map[string]string{ - "value": string(metal.ShutdownState), - }, - "preallocated": false, - }) - - var machines metal.Machines - err := rs.searchEntities(&q, &machines) - if err != nil { - return nil, err - } - - return machines, nil -} diff --git a/cmd/metal-api/internal/datastore/poolsize.go b/cmd/metal-api/internal/datastore/poolsize.go new file mode 100644 index 000000000..68ae2e8c2 --- /dev/null +++ b/cmd/metal-api/internal/datastore/poolsize.go @@ -0,0 +1,185 @@ +package datastore + +import ( + "math" + "math/rand" + "strconv" + "strings" + "time" + + "github.com/metal-stack/metal-api/cmd/metal-api/internal/metal" + "github.com/metal-stack/metal-lib/bus" + "go.uber.org/zap" +) + +func (rs *RethinkStore) adjustNumberOfWaitingMachines(log *zap.SugaredLogger, publisher bus.Publisher, machine *metal.Machine) error { + p, err := rs.FindPartition(machine.PartitionID) + if err != nil { + return err + } + + stateValue := string(metal.AvailableState) + waiting := true + q := MachineSearchQuery{ + PartitionID: &p.ID, + SizeID: &machine.SizeID, + StateValue: &stateValue, + Waiting: &waiting, + } + + waitingMachines := metal.Machines{} + err = rs.SearchMachines(&q, &waitingMachines) + if err != nil { + return err + } + + poolSizeExcess := 0 + if strings.Contains(p.WaitingPoolSize, "%") { + poolSizeExcess = rs.waitingMachinesMatchPoolSize(len(waitingMachines), p.WaitingPoolSize, true, machine) + } else { + poolSizeExcess = rs.waitingMachinesMatchPoolSize(len(waitingMachines), p.WaitingPoolSize, false, machine) + } + + if poolSizeExcess == 0 { + return nil + } + + if poolSizeExcess > 0 { + log.Infow("shut down spare machines", "number", poolSizeExcess, "partition", p) + err := rs.shutdownMachines(log, publisher, waitingMachines, poolSizeExcess) + if err != nil { + return err + } + } else { + stateValue = string(metal.ShutdownState) + q = MachineSearchQuery{ + PartitionID: &p.ID, + SizeID: &machine.SizeID, + StateValue: &stateValue, + } + + shutdownMachines := metal.Machines{} + err = rs.SearchMachines(&q, &shutdownMachines) + if err != nil { + return err + } + + poolSizeExcess = int(math.Abs(float64(poolSizeExcess))) + if len(shutdownMachines) < poolSizeExcess { + log.Infow("not enough machines to meet waiting pool size; power on all remaining", "number", len(shutdownMachines), "partition", p) + err = rs.powerOnMachines(log, publisher, shutdownMachines, len(shutdownMachines)) + if err != nil { + return err + } + } + + log.Infow("power on missing machines", "number", poolSizeExcess, "partition", p) + err = rs.powerOnMachines(log, publisher, shutdownMachines, poolSizeExcess) + if err != nil { + return err + } + } + + return nil +} + +func (rs *RethinkStore) waitingMachinesMatchPoolSize(actual int, required string, inPercent bool, m *metal.Machine) int { + if !inPercent { + r, err := strconv.ParseInt(required, 10, 64) + if err != nil { + return 0 + } + return actual - int(r) + } + + p := strings.Split(required, "%")[0] + r, err := strconv.ParseInt(p, 10, 64) + if err != nil { + return 0 + } + + q := MachineSearchQuery{ + PartitionID: &m.PartitionID, + SizeID: &m.SizeID, + } + + allMachines := metal.Machines{} + err = rs.SearchMachines(&q, &allMachines) + if err != nil { + return 0 + } + + return actual - int(math.Round(float64(len(allMachines))*float64(r)/100)) +} + +func randomIndices(n, k int) []int { + indices := make([]int, n) + for i := range indices { + indices[i] = i + } + + for i := 0; i < (n - k); i++ { + rand.Seed(time.Now().UnixNano()) + r := rand.Intn(len(indices)) + indices = append(indices[:r], indices[r+1:]...) + } + + return indices +} + +func (rs *RethinkStore) shutdownMachines(logger *zap.SugaredLogger, publisher bus.Publisher, machines metal.Machines, number int) error { + indices := randomIndices(len(machines), number) + for i := range indices { + state := metal.MachineState{ + Value: metal.ShutdownState, + Description: "shut down as exceeding maximum partition poolsize", + } + idx := indices[i] + + err := rs.machineCommand(logger, &machines[idx], publisher, metal.MachineOffCmd, state) + if err != nil { + return err + } + } + return nil +} + +func (rs *RethinkStore) powerOnMachines(logger *zap.SugaredLogger, publisher bus.Publisher, machines metal.Machines, number int) error { + indices := randomIndices(len(machines), number) + for i := range indices { + state := metal.MachineState{Value: metal.AvailableState} + idx := indices[i] + + err := rs.machineCommand(logger, &machines[idx], publisher, metal.MachineOnCmd, state) + if err != nil { + return err + } + } + return nil +} + +func (rs *RethinkStore) machineCommand(logger *zap.SugaredLogger, m *metal.Machine, publisher bus.Publisher, cmd metal.MachineCommand, state metal.MachineState) error { + evt := metal.MachineEvent{ + Type: metal.COMMAND, + Cmd: &metal.MachineExecCommand{ + Command: cmd, + TargetMachineID: m.ID, + IPMI: &m.IPMI, + }, + } + + newMachine := *m + newMachine.State = state + err := rs.UpdateMachine(m, &newMachine) + if err != nil { + return err + } + + logger.Infow("publish event", "event", evt, "command", *evt.Cmd) + err = publisher.Publish(metal.TopicMachine.GetFQN(m.PartitionID), evt) + if err != nil { + return err + } + + return nil +} diff --git a/cmd/metal-api/internal/fsm/states/states.go b/cmd/metal-api/internal/fsm/states/states.go index b600be84a..90c66b066 100644 --- a/cmd/metal-api/internal/fsm/states/states.go +++ b/cmd/metal-api/internal/fsm/states/states.go @@ -5,6 +5,7 @@ import ( "github.com/looplab/fsm" "github.com/metal-stack/metal-api/cmd/metal-api/internal/metal" + "github.com/metal-stack/metal-lib/bus" "go.uber.org/zap" ) @@ -23,32 +24,23 @@ const ( MachineReclaim stateType = "State Machine Reclaim" ) -const ( - FSMMessageEmpty FSMMessage = "" - FSMMessageEnterWaitingState FSMMessage = "Enter Waiting State" - FSMMessageLeaveWaitingState FSMMessage = "Leave Waiting State" -) - type FSMState interface { OnTransition(e *fsm.Event) } type stateType string -type FSMMessage string func (t stateType) String() string { return string(t) } -func (m FSMMessage) String() string { - return string(m) -} - type StateConfig struct { - Log *zap.SugaredLogger - Container *metal.ProvisioningEventContainer - Event *metal.ProvisioningEvent - Message FSMMessage + Log *zap.SugaredLogger + Container *metal.ProvisioningEventContainer + Event *metal.ProvisioningEvent + AdjustWaitingMachines func(log *zap.SugaredLogger, publisher bus.Publisher, machine *metal.Machine) error + Publisher bus.Publisher + Machine *metal.Machine } func (c *StateConfig) Validate() error { diff --git a/cmd/metal-api/internal/fsm/states/waiting.go b/cmd/metal-api/internal/fsm/states/waiting.go index 0e3b83c09..f5741478e 100644 --- a/cmd/metal-api/internal/fsm/states/waiting.go +++ b/cmd/metal-api/internal/fsm/states/waiting.go @@ -2,28 +2,28 @@ package states import ( "github.com/looplab/fsm" - "github.com/metal-stack/metal-api/cmd/metal-api/internal/metal" ) type WaitingState struct { - container *metal.ProvisioningEventContainer - event *metal.ProvisioningEvent - config *StateConfig + config *StateConfig } func newWaiting(c *StateConfig) *WaitingState { return &WaitingState{ - container: c.Container, - event: c.Event, - config: c, + config: c, } } func (p *WaitingState) OnTransition(e *fsm.Event) { - appendEventToContainer(p.event, p.container) - p.config.Message = FSMMessageEnterWaitingState + appendEventToContainer(p.config.Event, p.config.Container) + + if p.config.AdjustWaitingMachines != nil { + e.Err = p.config.AdjustWaitingMachines(p.config.Log, p.config.Publisher, p.config.Machine) + } } func (p *WaitingState) OnLeave(e *fsm.Event) { - p.config.Message = FSMMessageLeaveWaitingState + if p.config.AdjustWaitingMachines != nil { + e.Err = p.config.AdjustWaitingMachines(p.config.Log, p.config.Publisher, p.config.Machine) + } } diff --git a/cmd/metal-api/internal/service/machine-service.go b/cmd/metal-api/internal/service/machine-service.go index d909c2cf1..ea584cab5 100644 --- a/cmd/metal-api/internal/service/machine-service.go +++ b/cmd/metal-api/internal/service/machine-service.go @@ -1820,7 +1820,11 @@ func ResurrectMachines(ds *datastore.RethinkStore, publisher bus.Publisher, ep * continue } - if provisioningEvents.Liveliness == metal.MachineLivelinessDead && time.Since(*provisioningEvents.LastEventTime) > metal.MachineResurrectAfter && m.State.Value != metal.ShutdownState { + if m.State.Value == metal.ShutdownState { + continue + } + + if provisioningEvents.Liveliness == metal.MachineLivelinessDead && time.Since(*provisioningEvents.LastEventTime) > metal.MachineResurrectAfter { logger.Infow("resurrecting dead machine", "machineID", m.ID, "liveliness", provisioningEvents.Liveliness, "since", time.Since(*provisioningEvents.LastEventTime).String()) err = act.freeMachine(publisher, &m, headscaleClient, logger) if err != nil { diff --git a/cmd/metal-api/internal/service/partition-service.go b/cmd/metal-api/internal/service/partition-service.go index f9d60e85d..4bc5a797a 100644 --- a/cmd/metal-api/internal/service/partition-service.go +++ b/cmd/metal-api/internal/service/partition-service.go @@ -3,6 +3,8 @@ package service import ( "errors" "net/http" + "strconv" + "strings" "github.com/metal-stack/metal-api/cmd/metal-api/internal/datastore" "github.com/metal-stack/metal-api/cmd/metal-api/internal/metal" @@ -207,6 +209,17 @@ func (r *partitionResource) createPartition(request *restful.Request, response * commandLine = *requestPayload.PartitionBootConfiguration.CommandLine } + var waitingpoolsize string + if requestPayload.PartitionWaitingPoolSize != nil { + waitingpoolsize = *requestPayload.PartitionWaitingPoolSize + } + + err = validateWaitingPoolSize(waitingpoolsize) + if err != nil { + r.sendError(request, response, httperrors.BadRequest(err)) + return + } + p := &metal.Partition{ Base: metal.Base{ ID: requestPayload.ID, @@ -220,6 +233,7 @@ func (r *partitionResource) createPartition(request *restful.Request, response * KernelURL: kernelURL, CommandLine: commandLine, }, + WaitingPoolSize: waitingpoolsize, } fqn := metal.TopicMachine.GetFQN(p.GetID()) @@ -302,6 +316,15 @@ func (r *partitionResource) updatePartition(request *restful.Request, response * newPartition.BootConfiguration.CommandLine = *requestPayload.PartitionBootConfiguration.CommandLine } + if requestPayload.PartitionWaitingPoolSize != nil { + err = validateWaitingPoolSize(*requestPayload.PartitionWaitingPoolSize) + if err != nil { + r.sendError(request, response, httperrors.BadRequest(err)) + return + } + newPartition.WaitingPoolSize = *requestPayload.PartitionWaitingPoolSize + } + err = r.ds.UpdatePartition(oldPartition, &newPartition) if err != nil { r.sendError(request, response, httperrors.BadRequest(err)) @@ -447,3 +470,32 @@ func (r *partitionResource) calcPartitionCapacity(pcr *v1.PartitionCapacityReque return partitionCapacities, err } + +func validateWaitingPoolSize(input string) error { + if strings.Contains(input, "%") { + tokens := strings.Split(input, "%") + if len(tokens) != 1 { + return errors.New("waiting pool size must be a positive integer or a positive integer value in percent, e.g. 15%") + } + + p, err := strconv.ParseInt(tokens[0], 10, 64) + if err != nil { + return errors.New("could not parse waiting pool size") + } + + if p < 0 || p > 100 { + return errors.New("waiting pool size must be between 0% and 100%") + } + } else { + p, err := strconv.ParseInt(input, 10, 64) + if err != nil { + return errors.New("could not parse waiting pool size") + } + + if p < 0 { + return errors.New("waiting pool size must be a positive integer or a positive integer value in percent, e.g. 15%") + } + } + + return nil +} diff --git a/cmd/metal-api/internal/service/v1/partition.go b/cmd/metal-api/internal/service/v1/partition.go index 49f1bbff5..7ca3535f6 100644 --- a/cmd/metal-api/internal/service/v1/partition.go +++ b/cmd/metal-api/internal/service/v1/partition.go @@ -19,12 +19,14 @@ type PartitionCreateRequest struct { Common PartitionBase PartitionBootConfiguration PartitionBootConfiguration `json:"bootconfig" description:"the boot configuration of this partition"` + PartitionWaitingPoolSize *string `json:"waitingpoolsize" description:"the waiting pool size of this partition` } type PartitionUpdateRequest struct { Common MgmtServiceAddress *string `json:"mgmtserviceaddress" description:"the address to the management service of this partition" optional:"true"` PartitionBootConfiguration *PartitionBootConfiguration `json:"bootconfig" description:"the boot configuration of this partition" optional:"true"` + PartitionWaitingPoolSize *string `json:"waitingpoolsize" description:"the waiting pool size of this partition` } type PartitionResponse struct { From 4e72c2ac97a218a77123bb45340e020559d5b3ce Mon Sep 17 00:00:00 2001 From: iljarotar Date: Thu, 24 Nov 2022 12:13:55 +0100 Subject: [PATCH 03/45] pool scaler package --- cmd/metal-api/internal/datastore/poolsize.go | 197 ++++++------------ cmd/metal-api/internal/eventbus/nsq.go | 19 ++ cmd/metal-api/internal/scaler/poolscaler.go | 149 +++++++++++++ .../internal/service/machine-service.go | 26 +-- 4 files changed, 240 insertions(+), 151 deletions(-) create mode 100644 cmd/metal-api/internal/scaler/poolscaler.go diff --git a/cmd/metal-api/internal/datastore/poolsize.go b/cmd/metal-api/internal/datastore/poolsize.go index 68ae2e8c2..399750a8b 100644 --- a/cmd/metal-api/internal/datastore/poolsize.go +++ b/cmd/metal-api/internal/datastore/poolsize.go @@ -1,173 +1,93 @@ package datastore import ( - "math" - "math/rand" - "strconv" - "strings" - "time" - + e "github.com/metal-stack/metal-api/cmd/metal-api/internal/eventbus" "github.com/metal-stack/metal-api/cmd/metal-api/internal/metal" + s "github.com/metal-stack/metal-api/cmd/metal-api/internal/scaler" "github.com/metal-stack/metal-lib/bus" "go.uber.org/zap" ) -func (rs *RethinkStore) adjustNumberOfWaitingMachines(log *zap.SugaredLogger, publisher bus.Publisher, machine *metal.Machine) error { - p, err := rs.FindPartition(machine.PartitionID) +type Scaler struct { + rs *RethinkStore + publisher bus.Publisher +} + +func (s *Scaler) AllMachines(partitionid, sizeid string) (metal.Machines, error) { + q := MachineSearchQuery{ + PartitionID: &partitionid, + SizeID: &sizeid, + } + + allMachines := metal.Machines{} + err := s.rs.SearchMachines(&q, &allMachines) if err != nil { - return err + return nil, err } + return allMachines, nil +} + +func (s *Scaler) WaitingMachines(partitionid, sizeid string) (machines metal.Machines, err error) { stateValue := string(metal.AvailableState) waiting := true q := MachineSearchQuery{ - PartitionID: &p.ID, - SizeID: &machine.SizeID, + PartitionID: &partitionid, + SizeID: &sizeid, StateValue: &stateValue, Waiting: &waiting, } waitingMachines := metal.Machines{} - err = rs.SearchMachines(&q, &waitingMachines) + err = s.rs.SearchMachines(&q, &waitingMachines) if err != nil { - return err + return nil, err } - poolSizeExcess := 0 - if strings.Contains(p.WaitingPoolSize, "%") { - poolSizeExcess = rs.waitingMachinesMatchPoolSize(len(waitingMachines), p.WaitingPoolSize, true, machine) - } else { - poolSizeExcess = rs.waitingMachinesMatchPoolSize(len(waitingMachines), p.WaitingPoolSize, false, machine) - } - - if poolSizeExcess == 0 { - return nil - } - - if poolSizeExcess > 0 { - log.Infow("shut down spare machines", "number", poolSizeExcess, "partition", p) - err := rs.shutdownMachines(log, publisher, waitingMachines, poolSizeExcess) - if err != nil { - return err - } - } else { - stateValue = string(metal.ShutdownState) - q = MachineSearchQuery{ - PartitionID: &p.ID, - SizeID: &machine.SizeID, - StateValue: &stateValue, - } - - shutdownMachines := metal.Machines{} - err = rs.SearchMachines(&q, &shutdownMachines) - if err != nil { - return err - } - - poolSizeExcess = int(math.Abs(float64(poolSizeExcess))) - if len(shutdownMachines) < poolSizeExcess { - log.Infow("not enough machines to meet waiting pool size; power on all remaining", "number", len(shutdownMachines), "partition", p) - err = rs.powerOnMachines(log, publisher, shutdownMachines, len(shutdownMachines)) - if err != nil { - return err - } - } - - log.Infow("power on missing machines", "number", poolSizeExcess, "partition", p) - err = rs.powerOnMachines(log, publisher, shutdownMachines, poolSizeExcess) - if err != nil { - return err - } - } - - return nil + return waitingMachines, nil } -func (rs *RethinkStore) waitingMachinesMatchPoolSize(actual int, required string, inPercent bool, m *metal.Machine) int { - if !inPercent { - r, err := strconv.ParseInt(required, 10, 64) - if err != nil { - return 0 - } - return actual - int(r) - } - - p := strings.Split(required, "%")[0] - r, err := strconv.ParseInt(p, 10, 64) - if err != nil { - return 0 - } - +func (s *Scaler) ShutdownMachines(partitionid, sizeid string) (metal.Machines, error) { + stateValue := string(metal.ShutdownState) q := MachineSearchQuery{ - PartitionID: &m.PartitionID, - SizeID: &m.SizeID, + PartitionID: &partitionid, + SizeID: &sizeid, + StateValue: &stateValue, } - allMachines := metal.Machines{} - err = rs.SearchMachines(&q, &allMachines) + shutdownMachines := metal.Machines{} + err := s.rs.SearchMachines(&q, &shutdownMachines) if err != nil { - return 0 + return nil, err } - return actual - int(math.Round(float64(len(allMachines))*float64(r)/100)) + return shutdownMachines, nil } -func randomIndices(n, k int) []int { - indices := make([]int, n) - for i := range indices { - indices[i] = i - } - - for i := 0; i < (n - k); i++ { - rand.Seed(time.Now().UnixNano()) - r := rand.Intn(len(indices)) - indices = append(indices[:r], indices[r+1:]...) +func (s *Scaler) Shutdown(m *metal.Machine) error { + state := metal.MachineState{ + Value: metal.ShutdownState, + Description: "shut down as exceeding maximum partition poolsize", } - return indices -} - -func (rs *RethinkStore) shutdownMachines(logger *zap.SugaredLogger, publisher bus.Publisher, machines metal.Machines, number int) error { - indices := randomIndices(len(machines), number) - for i := range indices { - state := metal.MachineState{ - Value: metal.ShutdownState, - Description: "shut down as exceeding maximum partition poolsize", - } - idx := indices[i] - - err := rs.machineCommand(logger, &machines[idx], publisher, metal.MachineOffCmd, state) - if err != nil { - return err - } + err := s.rs.publishCommandAndUpdate(s.rs.log, m, s.publisher, metal.MachineOnCmd, state) + if err != nil { + return err } return nil } -func (rs *RethinkStore) powerOnMachines(logger *zap.SugaredLogger, publisher bus.Publisher, machines metal.Machines, number int) error { - indices := randomIndices(len(machines), number) - for i := range indices { - state := metal.MachineState{Value: metal.AvailableState} - idx := indices[i] +func (s *Scaler) PowerOn(m *metal.Machine) error { + state := metal.MachineState{Value: metal.AvailableState} - err := rs.machineCommand(logger, &machines[idx], publisher, metal.MachineOnCmd, state) - if err != nil { - return err - } + err := s.rs.publishCommandAndUpdate(s.rs.log, m, s.publisher, metal.MachineOnCmd, state) + if err != nil { + return err } return nil } -func (rs *RethinkStore) machineCommand(logger *zap.SugaredLogger, m *metal.Machine, publisher bus.Publisher, cmd metal.MachineCommand, state metal.MachineState) error { - evt := metal.MachineEvent{ - Type: metal.COMMAND, - Cmd: &metal.MachineExecCommand{ - Command: cmd, - TargetMachineID: m.ID, - IPMI: &m.IPMI, - }, - } - +func (rs *RethinkStore) publishCommandAndUpdate(logger *zap.SugaredLogger, m *metal.Machine, publisher bus.Publisher, cmd metal.MachineCommand, state metal.MachineState) error { newMachine := *m newMachine.State = state err := rs.UpdateMachine(m, &newMachine) @@ -175,8 +95,27 @@ func (rs *RethinkStore) machineCommand(logger *zap.SugaredLogger, m *metal.Machi return err } - logger.Infow("publish event", "event", evt, "command", *evt.Cmd) - err = publisher.Publish(metal.TopicMachine.GetFQN(m.PartitionID), evt) + err = e.PublishMachineCmd(logger, m, publisher, metal.MachineOnCmd) + if err != nil { + return err + } + + return nil +} + +func (rs *RethinkStore) adjustNumberOfWaitingMachines(log *zap.SugaredLogger, publisher bus.Publisher, machine *metal.Machine) error { + p, err := rs.FindPartition(machine.PartitionID) + if err != nil { + return err + } + + manager := &Scaler{ + rs: rs, + publisher: publisher, + } + scaler := s.NewPoolScaler(log, manager) + + err = scaler.AdjustNumberOfWaitingMachines(machine, p) if err != nil { return err } diff --git a/cmd/metal-api/internal/eventbus/nsq.go b/cmd/metal-api/internal/eventbus/nsq.go index a3ebcf5ea..e4c17c0c6 100644 --- a/cmd/metal-api/internal/eventbus/nsq.go +++ b/cmd/metal-api/internal/eventbus/nsq.go @@ -109,3 +109,22 @@ func (n NSQClient) createTopics(partitions metal.Partitions, topics []metal.NSQT func (n NSQClient) delay() { time.Sleep(nsqdRetryDelay) } + +func PublishMachineCmd(logger *zap.SugaredLogger, m *metal.Machine, publisher bus.Publisher, cmd metal.MachineCommand) error { + evt := metal.MachineEvent{ + Type: metal.COMMAND, + Cmd: &metal.MachineExecCommand{ + Command: cmd, + TargetMachineID: m.ID, + IPMI: &m.IPMI, + }, + } + + logger.Infow("publish event", "event", evt, "command", *evt.Cmd) + err := publisher.Publish(metal.TopicMachine.GetFQN(m.PartitionID), evt) + if err != nil { + return err + } + + return nil +} diff --git a/cmd/metal-api/internal/scaler/poolscaler.go b/cmd/metal-api/internal/scaler/poolscaler.go new file mode 100644 index 000000000..daa56476a --- /dev/null +++ b/cmd/metal-api/internal/scaler/poolscaler.go @@ -0,0 +1,149 @@ +package scaler + +import ( + "math" + "math/rand" + "strconv" + "strings" + "time" + + "github.com/metal-stack/metal-api/cmd/metal-api/internal/metal" + "go.uber.org/zap" +) + +type ( + PoolScaler struct { + log *zap.SugaredLogger + manager MachineManager + } + PoolScalerConfig struct { + Log *zap.SugaredLogger + Manager MachineManager + } + MachineManager interface { + AllMachines(partitionid, sizeid string) (metal.Machines, error) + WaitingMachines(partitionid, sizeid string) (machines metal.Machines, err error) + ShutdownMachines(partitionid, sizeid string) (metal.Machines, error) + + Shutdown(m *metal.Machine) error + PowerOn(m *metal.Machine) error + } +) + +func NewPoolScaler(log *zap.SugaredLogger, manager MachineManager) *PoolScaler { + return &PoolScaler{ + log: log, + manager: manager, + } +} + +// AdjustNumberOfWaitingMachines compares the number of waiting machines to the required pool size of the partition and shuts down or powers on machines accordingly +func (p *PoolScaler) AdjustNumberOfWaitingMachines(m *metal.Machine, partition *metal.Partition) error { + waitingMachines, err := p.manager.WaitingMachines(partition.ID, m.SizeID) + if err != nil { + return err + } + + poolSizeExcess := 0 + if strings.Contains(partition.WaitingPoolSize, "%") { + poolSizeExcess = p.waitingMachinesMatchPoolSize(len(waitingMachines), partition.WaitingPoolSize, true, partition.ID, m.SizeID) + } else { + poolSizeExcess = p.waitingMachinesMatchPoolSize(len(waitingMachines), partition.WaitingPoolSize, false, partition.ID, m.SizeID) + } + + if poolSizeExcess == 0 { + return nil + } + + if poolSizeExcess > 0 { + p.log.Infow("shut down spare machines", "number", poolSizeExcess, "partition", p) + err := p.shutdownMachines(waitingMachines, poolSizeExcess) + if err != nil { + return err + } + } else { + shutdownMachines, err := p.manager.ShutdownMachines(partition.ID, m.SizeID) + if err != nil { + return err + } + + poolSizeExcess = int(math.Abs(float64(poolSizeExcess))) + if len(shutdownMachines) < poolSizeExcess { + p.log.Infow("not enough machines to meet waiting pool size; power on all remaining", "number", len(shutdownMachines), "partition", p) + err = p.powerOnMachines(shutdownMachines, len(shutdownMachines)) + if err != nil { + return err + } + } + + p.log.Infow("power on missing machines", "number", poolSizeExcess, "partition", p) + err = p.powerOnMachines(shutdownMachines, poolSizeExcess) + if err != nil { + return err + } + } + + return nil +} + +func (p *PoolScaler) waitingMachinesMatchPoolSize(actual int, required string, inPercent bool, parititionid, sizeid string) int { + if !inPercent { + r, err := strconv.ParseInt(required, 10, 64) + if err != nil { + return 0 + } + return actual - int(r) + } + + percent := strings.Split(required, "%")[0] + r, err := strconv.ParseInt(percent, 10, 64) + if err != nil { + return 0 + } + + allMachines, err := p.manager.AllMachines(parititionid, sizeid) + if err != nil { + return 0 + } + + return actual - int(math.Round(float64(len(allMachines))*float64(r)/100)) +} + +func randomIndices(n, k int) []int { + indices := make([]int, n) + for i := range indices { + indices[i] = i + } + + for i := 0; i < (n - k); i++ { + rand.Seed(time.Now().UnixNano()) + r := rand.Intn(len(indices)) + indices = append(indices[:r], indices[r+1:]...) + } + + return indices +} + +func (p *PoolScaler) shutdownMachines(machines metal.Machines, number int) error { + indices := randomIndices(len(machines), number) + for i := range indices { + idx := indices[i] + err := p.manager.Shutdown(&machines[idx]) + if err != nil { + return err + } + } + return nil +} + +func (p *PoolScaler) powerOnMachines(machines metal.Machines, number int) error { + indices := randomIndices(len(machines), number) + for i := range indices { + idx := indices[i] + err := p.manager.PowerOn(&machines[idx]) + if err != nil { + return err + } + } + return nil +} diff --git a/cmd/metal-api/internal/service/machine-service.go b/cmd/metal-api/internal/service/machine-service.go index ea584cab5..223cf0d1f 100644 --- a/cmd/metal-api/internal/service/machine-service.go +++ b/cmd/metal-api/internal/service/machine-service.go @@ -29,6 +29,7 @@ import ( mdm "github.com/metal-stack/masterdata-api/pkg/client" "github.com/metal-stack/metal-api/cmd/metal-api/internal/datastore" + e "github.com/metal-stack/metal-api/cmd/metal-api/internal/eventbus" "github.com/metal-stack/metal-api/cmd/metal-api/internal/ipam" "github.com/metal-stack/metal-api/cmd/metal-api/internal/metal" v1 "github.com/metal-stack/metal-api/cmd/metal-api/internal/service/v1" @@ -1505,7 +1506,7 @@ func (r machineResource) freeMachine(request *restful.Request, response *restful logger := r.logger(request) - err = publishMachineCmd(logger, m, r.Publisher, metal.ChassisIdentifyLEDOffCmd) + err = e.PublishMachineCmd(logger, m, r.Publisher, metal.ChassisIdentifyLEDOffCmd) if err != nil { logger.Error("unable to publish machine command", zap.String("command", string(metal.ChassisIdentifyLEDOffCmd)), zap.String("machineID", m.ID), zap.Error(err)) } @@ -1678,7 +1679,7 @@ func (r *machineResource) reinstallMachine(request *restful.Request, response *r return } - err = publishMachineCmd(logger, m, r.Publisher, metal.MachineReinstallCmd) + err = e.PublishMachineCmd(logger, m, r.Publisher, metal.MachineReinstallCmd) if err != nil { logger.Error("unable to publish machine command", zap.String("command", string(metal.MachineReinstallCmd)), zap.String("machineID", m.ID), zap.Error(err)) } @@ -2023,7 +2024,7 @@ func (r *machineResource) machineCmd(cmd metal.MachineCommand, request *restful. } } - err = publishMachineCmd(logger, newMachine, r.Publisher, cmd) + err = e.PublishMachineCmd(logger, newMachine, r.Publisher, cmd) if err != nil { r.sendError(request, response, defaultError(err)) return @@ -2038,25 +2039,6 @@ func (r *machineResource) machineCmd(cmd metal.MachineCommand, request *restful. r.send(request, response, http.StatusOK, resp) } -func publishMachineCmd(logger *zap.SugaredLogger, m *metal.Machine, publisher bus.Publisher, cmd metal.MachineCommand) error { - evt := metal.MachineEvent{ - Type: metal.COMMAND, - Cmd: &metal.MachineExecCommand{ - Command: cmd, - TargetMachineID: m.ID, - IPMI: &m.IPMI, - }, - } - - logger.Infow("publish event", "event", evt, "command", *evt.Cmd) - err := publisher.Publish(metal.TopicMachine.GetFQN(m.PartitionID), evt) - if err != nil { - return err - } - - return nil -} - func machineHasIssues(m *v1.MachineResponse) bool { if m.Partition == nil { return true From b7fceb390b0a6e582d67f2d40369dc58035f48ea Mon Sep 17 00:00:00 2001 From: iljarotar Date: Thu, 24 Nov 2022 17:09:18 +0100 Subject: [PATCH 04/45] test adjust number of waiting machines function --- Makefile | 4 + cmd/metal-api/internal/datastore/event.go | 24 +- cmd/metal-api/internal/datastore/poolsize.go | 61 ++--- cmd/metal-api/internal/fsm/states/states.go | 14 +- cmd/metal-api/internal/fsm/states/waiting.go | 8 +- .../internal/scaler/pool_scaler_mock_test.go | 125 ++++++++++ cmd/metal-api/internal/scaler/poolscaler.go | 15 +- .../internal/scaler/poolscaler_test.go | 215 ++++++++++++++++++ 8 files changed, 402 insertions(+), 64 deletions(-) create mode 100644 cmd/metal-api/internal/scaler/pool_scaler_mock_test.go create mode 100644 cmd/metal-api/internal/scaler/poolscaler_test.go diff --git a/Makefile b/Makefile index 6fefd0526..2af14b7a1 100644 --- a/Makefile +++ b/Makefile @@ -43,3 +43,7 @@ visualize-fsm: cd cmd/metal-api/internal/tools/visualize_fsm go run main.go dot -Tsvg fsm.dot > fsm.svg + +.PHONY: mocks +mocks: + docker run --user $$(id -u):$$(id -g) --rm -w /work -v ${PWD}:/work vektra/mockery:v2.14.0 --name MachineManager --dir /work/cmd/metal-api/internal/scaler --output /work/cmd/metal-api/internal/scaler --filename pool_scaler_mock_test.go --testonly --inpackage \ No newline at end of file diff --git a/cmd/metal-api/internal/datastore/event.go b/cmd/metal-api/internal/datastore/event.go index 8d1ce3b1e..088b8b938 100644 --- a/cmd/metal-api/internal/datastore/event.go +++ b/cmd/metal-api/internal/datastore/event.go @@ -4,6 +4,7 @@ import ( "github.com/metal-stack/metal-api/cmd/metal-api/internal/fsm" "github.com/metal-stack/metal-api/cmd/metal-api/internal/fsm/states" "github.com/metal-stack/metal-api/cmd/metal-api/internal/metal" + s "github.com/metal-stack/metal-api/cmd/metal-api/internal/scaler" "github.com/metal-stack/metal-lib/bus" "go.uber.org/zap" ) @@ -54,13 +55,24 @@ func (rs *RethinkStore) ProvisioningEventForMachine(log *zap.SugaredLogger, publ } } + p, err := rs.FindPartition(machine.PartitionID) + if err != nil { + return nil, err + } + + manager := &manager{ + rs: rs, + publisher: publisher, + } + scaler := s.NewPoolScaler(log, manager) + config := states.StateConfig{ - Log: log, - Container: ec, - Event: event, - AdjustWaitingMachines: rs.adjustNumberOfWaitingMachines, - Publisher: publisher, - Machine: machine, + Log: log, + Container: ec, + Event: event, + Scaler: scaler, + Partition: p, + Machine: machine, } newEC, err := fsm.HandleProvisioningEvent(&config) diff --git a/cmd/metal-api/internal/datastore/poolsize.go b/cmd/metal-api/internal/datastore/poolsize.go index 399750a8b..a6bda2f74 100644 --- a/cmd/metal-api/internal/datastore/poolsize.go +++ b/cmd/metal-api/internal/datastore/poolsize.go @@ -3,24 +3,25 @@ package datastore import ( e "github.com/metal-stack/metal-api/cmd/metal-api/internal/eventbus" "github.com/metal-stack/metal-api/cmd/metal-api/internal/metal" - s "github.com/metal-stack/metal-api/cmd/metal-api/internal/scaler" "github.com/metal-stack/metal-lib/bus" "go.uber.org/zap" ) -type Scaler struct { - rs *RethinkStore - publisher bus.Publisher +type manager struct { + rs *RethinkStore + publisher bus.Publisher + partitionid string + sizeid string } -func (s *Scaler) AllMachines(partitionid, sizeid string) (metal.Machines, error) { +func (m *manager) AllMachines() (metal.Machines, error) { q := MachineSearchQuery{ - PartitionID: &partitionid, - SizeID: &sizeid, + PartitionID: &m.partitionid, + SizeID: &m.sizeid, } allMachines := metal.Machines{} - err := s.rs.SearchMachines(&q, &allMachines) + err := m.rs.SearchMachines(&q, &allMachines) if err != nil { return nil, err } @@ -28,18 +29,18 @@ func (s *Scaler) AllMachines(partitionid, sizeid string) (metal.Machines, error) return allMachines, nil } -func (s *Scaler) WaitingMachines(partitionid, sizeid string) (machines metal.Machines, err error) { +func (m *manager) WaitingMachines() (metal.Machines, error) { stateValue := string(metal.AvailableState) waiting := true q := MachineSearchQuery{ - PartitionID: &partitionid, - SizeID: &sizeid, + PartitionID: &m.partitionid, + SizeID: &m.sizeid, StateValue: &stateValue, Waiting: &waiting, } waitingMachines := metal.Machines{} - err = s.rs.SearchMachines(&q, &waitingMachines) + err := m.rs.SearchMachines(&q, &waitingMachines) if err != nil { return nil, err } @@ -47,16 +48,16 @@ func (s *Scaler) WaitingMachines(partitionid, sizeid string) (machines metal.Mac return waitingMachines, nil } -func (s *Scaler) ShutdownMachines(partitionid, sizeid string) (metal.Machines, error) { +func (m *manager) ShutdownMachines() (metal.Machines, error) { stateValue := string(metal.ShutdownState) q := MachineSearchQuery{ - PartitionID: &partitionid, - SizeID: &sizeid, + PartitionID: &m.partitionid, + SizeID: &m.sizeid, StateValue: &stateValue, } shutdownMachines := metal.Machines{} - err := s.rs.SearchMachines(&q, &shutdownMachines) + err := m.rs.SearchMachines(&q, &shutdownMachines) if err != nil { return nil, err } @@ -64,23 +65,23 @@ func (s *Scaler) ShutdownMachines(partitionid, sizeid string) (metal.Machines, e return shutdownMachines, nil } -func (s *Scaler) Shutdown(m *metal.Machine) error { +func (m *manager) Shutdown(machine *metal.Machine) error { state := metal.MachineState{ Value: metal.ShutdownState, Description: "shut down as exceeding maximum partition poolsize", } - err := s.rs.publishCommandAndUpdate(s.rs.log, m, s.publisher, metal.MachineOnCmd, state) + err := m.rs.publishCommandAndUpdate(m.rs.log, machine, m.publisher, metal.MachineOnCmd, state) if err != nil { return err } return nil } -func (s *Scaler) PowerOn(m *metal.Machine) error { +func (m *manager) PowerOn(machine *metal.Machine) error { state := metal.MachineState{Value: metal.AvailableState} - err := s.rs.publishCommandAndUpdate(s.rs.log, m, s.publisher, metal.MachineOnCmd, state) + err := m.rs.publishCommandAndUpdate(m.rs.log, machine, m.publisher, metal.MachineOnCmd, state) if err != nil { return err } @@ -102,23 +103,3 @@ func (rs *RethinkStore) publishCommandAndUpdate(logger *zap.SugaredLogger, m *me return nil } - -func (rs *RethinkStore) adjustNumberOfWaitingMachines(log *zap.SugaredLogger, publisher bus.Publisher, machine *metal.Machine) error { - p, err := rs.FindPartition(machine.PartitionID) - if err != nil { - return err - } - - manager := &Scaler{ - rs: rs, - publisher: publisher, - } - scaler := s.NewPoolScaler(log, manager) - - err = scaler.AdjustNumberOfWaitingMachines(machine, p) - if err != nil { - return err - } - - return nil -} diff --git a/cmd/metal-api/internal/fsm/states/states.go b/cmd/metal-api/internal/fsm/states/states.go index 90c66b066..0ff72dd95 100644 --- a/cmd/metal-api/internal/fsm/states/states.go +++ b/cmd/metal-api/internal/fsm/states/states.go @@ -5,7 +5,7 @@ import ( "github.com/looplab/fsm" "github.com/metal-stack/metal-api/cmd/metal-api/internal/metal" - "github.com/metal-stack/metal-lib/bus" + "github.com/metal-stack/metal-api/cmd/metal-api/internal/scaler" "go.uber.org/zap" ) @@ -35,12 +35,12 @@ func (t stateType) String() string { } type StateConfig struct { - Log *zap.SugaredLogger - Container *metal.ProvisioningEventContainer - Event *metal.ProvisioningEvent - AdjustWaitingMachines func(log *zap.SugaredLogger, publisher bus.Publisher, machine *metal.Machine) error - Publisher bus.Publisher - Machine *metal.Machine + Log *zap.SugaredLogger + Container *metal.ProvisioningEventContainer + Event *metal.ProvisioningEvent + Scaler *scaler.PoolScaler + Machine *metal.Machine + Partition *metal.Partition } func (c *StateConfig) Validate() error { diff --git a/cmd/metal-api/internal/fsm/states/waiting.go b/cmd/metal-api/internal/fsm/states/waiting.go index f5741478e..69a462177 100644 --- a/cmd/metal-api/internal/fsm/states/waiting.go +++ b/cmd/metal-api/internal/fsm/states/waiting.go @@ -17,13 +17,13 @@ func newWaiting(c *StateConfig) *WaitingState { func (p *WaitingState) OnTransition(e *fsm.Event) { appendEventToContainer(p.config.Event, p.config.Container) - if p.config.AdjustWaitingMachines != nil { - e.Err = p.config.AdjustWaitingMachines(p.config.Log, p.config.Publisher, p.config.Machine) + if p.config.Scaler != nil { + e.Err = p.config.Scaler.AdjustNumberOfWaitingMachines(p.config.Machine, p.config.Partition) } } func (p *WaitingState) OnLeave(e *fsm.Event) { - if p.config.AdjustWaitingMachines != nil { - e.Err = p.config.AdjustWaitingMachines(p.config.Log, p.config.Publisher, p.config.Machine) + if p.config.Scaler != nil { + e.Err = p.config.Scaler.AdjustNumberOfWaitingMachines(p.config.Machine, p.config.Partition) } } diff --git a/cmd/metal-api/internal/scaler/pool_scaler_mock_test.go b/cmd/metal-api/internal/scaler/pool_scaler_mock_test.go new file mode 100644 index 000000000..9e11a7c0a --- /dev/null +++ b/cmd/metal-api/internal/scaler/pool_scaler_mock_test.go @@ -0,0 +1,125 @@ +// Code generated by mockery v2.14.0. DO NOT EDIT. + +package scaler + +import ( + metal "github.com/metal-stack/metal-api/cmd/metal-api/internal/metal" + mock "github.com/stretchr/testify/mock" +) + +// MockMachineManager is an autogenerated mock type for the MachineManager type +type MockMachineManager struct { + mock.Mock +} + +// AllMachines provides a mock function with given fields: +func (_m *MockMachineManager) AllMachines() (metal.Machines, error) { + ret := _m.Called() + + var r0 metal.Machines + if rf, ok := ret.Get(0).(func() metal.Machines); ok { + r0 = rf() + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(metal.Machines) + } + } + + var r1 error + if rf, ok := ret.Get(1).(func() error); ok { + r1 = rf() + } else { + r1 = ret.Error(1) + } + + return r0, r1 +} + +// PowerOn provides a mock function with given fields: m +func (_m *MockMachineManager) PowerOn(m *metal.Machine) error { + ret := _m.Called(m) + + var r0 error + if rf, ok := ret.Get(0).(func(*metal.Machine) error); ok { + r0 = rf(m) + } else { + r0 = ret.Error(0) + } + + return r0 +} + +// Shutdown provides a mock function with given fields: m +func (_m *MockMachineManager) Shutdown(m *metal.Machine) error { + ret := _m.Called(m) + + var r0 error + if rf, ok := ret.Get(0).(func(*metal.Machine) error); ok { + r0 = rf(m) + } else { + r0 = ret.Error(0) + } + + return r0 +} + +// ShutdownMachines provides a mock function with given fields: +func (_m *MockMachineManager) ShutdownMachines() (metal.Machines, error) { + ret := _m.Called() + + var r0 metal.Machines + if rf, ok := ret.Get(0).(func() metal.Machines); ok { + r0 = rf() + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(metal.Machines) + } + } + + var r1 error + if rf, ok := ret.Get(1).(func() error); ok { + r1 = rf() + } else { + r1 = ret.Error(1) + } + + return r0, r1 +} + +// WaitingMachines provides a mock function with given fields: +func (_m *MockMachineManager) WaitingMachines() (metal.Machines, error) { + ret := _m.Called() + + var r0 metal.Machines + if rf, ok := ret.Get(0).(func() metal.Machines); ok { + r0 = rf() + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(metal.Machines) + } + } + + var r1 error + if rf, ok := ret.Get(1).(func() error); ok { + r1 = rf() + } else { + r1 = ret.Error(1) + } + + return r0, r1 +} + +type mockConstructorTestingTNewMockMachineManager interface { + mock.TestingT + Cleanup(func()) +} + +// NewMockMachineManager creates a new instance of MockMachineManager. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. +func NewMockMachineManager(t mockConstructorTestingTNewMockMachineManager) *MockMachineManager { + mock := &MockMachineManager{} + mock.Mock.Test(t) + + t.Cleanup(func() { mock.AssertExpectations(t) }) + + return mock +} diff --git a/cmd/metal-api/internal/scaler/poolscaler.go b/cmd/metal-api/internal/scaler/poolscaler.go index daa56476a..1d8df1c9b 100644 --- a/cmd/metal-api/internal/scaler/poolscaler.go +++ b/cmd/metal-api/internal/scaler/poolscaler.go @@ -21,9 +21,9 @@ type ( Manager MachineManager } MachineManager interface { - AllMachines(partitionid, sizeid string) (metal.Machines, error) - WaitingMachines(partitionid, sizeid string) (machines metal.Machines, err error) - ShutdownMachines(partitionid, sizeid string) (metal.Machines, error) + AllMachines() (metal.Machines, error) + WaitingMachines() (metal.Machines, error) + ShutdownMachines() (metal.Machines, error) Shutdown(m *metal.Machine) error PowerOn(m *metal.Machine) error @@ -39,7 +39,7 @@ func NewPoolScaler(log *zap.SugaredLogger, manager MachineManager) *PoolScaler { // AdjustNumberOfWaitingMachines compares the number of waiting machines to the required pool size of the partition and shuts down or powers on machines accordingly func (p *PoolScaler) AdjustNumberOfWaitingMachines(m *metal.Machine, partition *metal.Partition) error { - waitingMachines, err := p.manager.WaitingMachines(partition.ID, m.SizeID) + waitingMachines, err := p.manager.WaitingMachines() if err != nil { return err } @@ -62,7 +62,7 @@ func (p *PoolScaler) AdjustNumberOfWaitingMachines(m *metal.Machine, partition * return err } } else { - shutdownMachines, err := p.manager.ShutdownMachines(partition.ID, m.SizeID) + shutdownMachines, err := p.manager.ShutdownMachines() if err != nil { return err } @@ -74,6 +74,7 @@ func (p *PoolScaler) AdjustNumberOfWaitingMachines(m *metal.Machine, partition * if err != nil { return err } + return nil } p.log.Infow("power on missing machines", "number", poolSizeExcess, "partition", p) @@ -86,7 +87,7 @@ func (p *PoolScaler) AdjustNumberOfWaitingMachines(m *metal.Machine, partition * return nil } -func (p *PoolScaler) waitingMachinesMatchPoolSize(actual int, required string, inPercent bool, parititionid, sizeid string) int { +func (p *PoolScaler) waitingMachinesMatchPoolSize(actual int, required string, inPercent bool, partitionid, sizeid string) int { if !inPercent { r, err := strconv.ParseInt(required, 10, 64) if err != nil { @@ -101,7 +102,7 @@ func (p *PoolScaler) waitingMachinesMatchPoolSize(actual int, required string, i return 0 } - allMachines, err := p.manager.AllMachines(parititionid, sizeid) + allMachines, err := p.manager.AllMachines() if err != nil { return 0 } diff --git a/cmd/metal-api/internal/scaler/poolscaler_test.go b/cmd/metal-api/internal/scaler/poolscaler_test.go new file mode 100644 index 000000000..71b221722 --- /dev/null +++ b/cmd/metal-api/internal/scaler/poolscaler_test.go @@ -0,0 +1,215 @@ +package scaler + +import ( + "testing" + + "github.com/metal-stack/metal-api/cmd/metal-api/internal/metal" + "go.uber.org/zap/zaptest" +) + +func TestPoolScaler_waitingMachinesMatchPoolSize(t *testing.T) { + tests := []struct { + name string + actual int + required string + inPercent bool + partitionid string + sizeid string + mockFn func(mock *MockMachineManager) + want int + }{ + { + name: "less waiting machines than required", + actual: 10, + required: "15", + inPercent: false, + partitionid: "", + sizeid: "", + want: -5, + }, + { + name: "more waiting machines than required", + actual: 20, + required: "15", + inPercent: false, + partitionid: "", + sizeid: "", + want: 5, + }, + { + name: "less waiting machines than required in percent", + actual: 20, + required: "30%", + inPercent: true, + partitionid: "", + sizeid: "", + mockFn: func(mock *MockMachineManager) { + mock.On("AllMachines").Once().Return(append(metal.Machines{}, make([]metal.Machine, 150)...), nil) + }, + want: -25, + }, + { + name: "more waiting machines than required in percent", + actual: 24, + required: "15%", + inPercent: true, + partitionid: "", + sizeid: "", + mockFn: func(mock *MockMachineManager) { + mock.On("AllMachines").Once().Return(append(metal.Machines{}, make([]metal.Machine, 150)...), nil) + }, + want: 1, + }, + { + name: "waiting machines match pool size", + actual: 20, + required: "20", + inPercent: false, + partitionid: "", + sizeid: "", + want: 0, + }, + { + name: "waiting machines match pool size in percent", + actual: 30, + required: "20%", + inPercent: true, + partitionid: "", + sizeid: "", + mockFn: func(mock *MockMachineManager) { + mock.On("AllMachines").Once().Return(append(metal.Machines{}, make([]metal.Machine, 150)...), nil) + }, + want: 0, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + manager := NewMockMachineManager(t) + + if tt.mockFn != nil { + tt.mockFn(manager) + } + + p := &PoolScaler{ + log: zaptest.NewLogger(t).Sugar(), + manager: manager, + } + if got := p.waitingMachinesMatchPoolSize(tt.actual, tt.required, tt.inPercent, tt.partitionid, tt.sizeid); got != tt.want { + t.Errorf("PoolScaler.waitingMachinesMatchPoolSize() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestPoolScaler_AdjustNumberOfWaitingMachines(t *testing.T) { + tests := []struct { + name string + m *metal.Machine + partition *metal.Partition + mockFn func(mock *MockMachineManager) + wantErr bool + }{ + { + name: "waiting machines match pool size; nothing to do", + m: &metal.Machine{SizeID: ""}, + partition: &metal.Partition{WaitingPoolSize: "15"}, + mockFn: func(mock *MockMachineManager) { + mock.On("WaitingMachines").Once().Return(append(metal.Machines{}, make([]metal.Machine, 15)...), nil) + }, + wantErr: false, + }, + { + name: "waiting machines match pool size in percent; nothing to do", + m: &metal.Machine{SizeID: ""}, + partition: &metal.Partition{WaitingPoolSize: "30%"}, + mockFn: func(mock *MockMachineManager) { + mock.On("AllMachines").Once().Return(append(metal.Machines{}, make([]metal.Machine, 150)...), nil) + mock.On("WaitingMachines").Once().Return(append(metal.Machines{}, make([]metal.Machine, 45)...), nil) + }, + wantErr: false, + }, + { + name: "more waiting machines needed; power on 5 machines", + m: &metal.Machine{SizeID: ""}, + partition: &metal.Partition{WaitingPoolSize: "15"}, + mockFn: func(mock *MockMachineManager) { + mock.On("WaitingMachines").Once().Return(append(metal.Machines{}, make([]metal.Machine, 10)...), nil) + mock.On("ShutdownMachines").Once().Return(append(metal.Machines{}, make([]metal.Machine, 5)...), nil) + mock.On("PowerOn", &metal.Machine{}).Return(nil).Times(5) + }, + wantErr: false, + }, + { + name: "more waiting machines needed in percent; power on 10 machines", + m: &metal.Machine{SizeID: ""}, + partition: &metal.Partition{WaitingPoolSize: "30%"}, + mockFn: func(mock *MockMachineManager) { + mock.On("AllMachines").Once().Return(append(metal.Machines{}, make([]metal.Machine, 150)...), nil) + mock.On("WaitingMachines").Once().Return(append(metal.Machines{}, make([]metal.Machine, 35)...), nil) + mock.On("ShutdownMachines").Once().Return(append(metal.Machines{}, make([]metal.Machine, 10)...), nil) + mock.On("PowerOn", &metal.Machine{}).Return(nil).Times(10) + }, + wantErr: false, + }, + { + name: "pool size exceeded; power off 5 machines", + m: &metal.Machine{SizeID: ""}, + partition: &metal.Partition{WaitingPoolSize: "15"}, + mockFn: func(mock *MockMachineManager) { + mock.On("WaitingMachines").Once().Return(append(metal.Machines{}, make([]metal.Machine, 20)...), nil) + mock.On("Shutdown", &metal.Machine{}).Return(nil).Times(5) + }, + wantErr: false, + }, + { + name: "pool size exceeded in percent; power off 5 machines", + m: &metal.Machine{SizeID: ""}, + partition: &metal.Partition{WaitingPoolSize: "15%"}, + mockFn: func(mock *MockMachineManager) { + mock.On("AllMachines").Once().Return(append(metal.Machines{}, make([]metal.Machine, 150)...), nil) + mock.On("WaitingMachines").Once().Return(append(metal.Machines{}, make([]metal.Machine, 28)...), nil) + mock.On("Shutdown", &metal.Machine{}).Return(nil).Times(5) + }, + wantErr: false, + }, + { + name: "more machines needed than available; power on all remaining", + m: &metal.Machine{SizeID: ""}, + partition: &metal.Partition{WaitingPoolSize: "30"}, + mockFn: func(mock *MockMachineManager) { + mock.On("ShutdownMachines").Once().Return(append(metal.Machines{}, make([]metal.Machine, 10)...), nil) + mock.On("WaitingMachines").Once().Return(append(metal.Machines{}, make([]metal.Machine, 15)...), nil) + mock.On("PowerOn", &metal.Machine{}).Return(nil).Times(10) + }, + wantErr: false, + }, + { + name: "no more machines left; do nothing", + m: &metal.Machine{SizeID: ""}, + partition: &metal.Partition{WaitingPoolSize: "15%"}, + mockFn: func(mock *MockMachineManager) { + mock.On("AllMachines").Once().Return(append(metal.Machines{}, make([]metal.Machine, 150)...), nil) + mock.On("ShutdownMachines").Once().Return(append(metal.Machines{}, make([]metal.Machine, 0)...), nil) + mock.On("WaitingMachines").Once().Return(append(metal.Machines{}, make([]metal.Machine, 22)...), nil) + }, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + manager := NewMockMachineManager(t) + + if tt.mockFn != nil { + tt.mockFn(manager) + } + + p := &PoolScaler{ + log: zaptest.NewLogger(t).Sugar(), + manager: manager, + } + if err := p.AdjustNumberOfWaitingMachines(tt.m, tt.partition); (err != nil) != tt.wantErr { + t.Errorf("PoolScaler.AdjustNumberOfWaitingMachines() error = %v, wantErr %v", err, tt.wantErr) + } + }) + } +} From 6105eb749da492d8e3811c342dbc25396ac2f7b3 Mon Sep 17 00:00:00 2001 From: iljarotar Date: Tue, 29 Nov 2022 10:19:28 +0100 Subject: [PATCH 05/45] check if pool scaling is disabled --- cmd/metal-api/internal/scaler/poolscaler.go | 5 +++++ cmd/metal-api/internal/scaler/poolscaler_test.go | 6 ++++++ cmd/metal-api/internal/service/partition-service.go | 4 ++++ 3 files changed, 15 insertions(+) diff --git a/cmd/metal-api/internal/scaler/poolscaler.go b/cmd/metal-api/internal/scaler/poolscaler.go index 1d8df1c9b..6ba7c7b70 100644 --- a/cmd/metal-api/internal/scaler/poolscaler.go +++ b/cmd/metal-api/internal/scaler/poolscaler.go @@ -39,6 +39,11 @@ func NewPoolScaler(log *zap.SugaredLogger, manager MachineManager) *PoolScaler { // AdjustNumberOfWaitingMachines compares the number of waiting machines to the required pool size of the partition and shuts down or powers on machines accordingly func (p *PoolScaler) AdjustNumberOfWaitingMachines(m *metal.Machine, partition *metal.Partition) error { + // WaitingPoolSize == 0 means scaling is disabled + if partition.WaitingPoolSize == "0" { + return nil + } + waitingMachines, err := p.manager.WaitingMachines() if err != nil { return err diff --git a/cmd/metal-api/internal/scaler/poolscaler_test.go b/cmd/metal-api/internal/scaler/poolscaler_test.go index 71b221722..1f6b4a00c 100644 --- a/cmd/metal-api/internal/scaler/poolscaler_test.go +++ b/cmd/metal-api/internal/scaler/poolscaler_test.go @@ -194,6 +194,12 @@ func TestPoolScaler_AdjustNumberOfWaitingMachines(t *testing.T) { }, wantErr: false, }, + { + name: "pool scaling disabled; do nothing", + m: &metal.Machine{SizeID: ""}, + partition: &metal.Partition{WaitingPoolSize: "0"}, + wantErr: false, + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { diff --git a/cmd/metal-api/internal/service/partition-service.go b/cmd/metal-api/internal/service/partition-service.go index 4bc5a797a..17a5f19d5 100644 --- a/cmd/metal-api/internal/service/partition-service.go +++ b/cmd/metal-api/internal/service/partition-service.go @@ -212,6 +212,8 @@ func (r *partitionResource) createPartition(request *restful.Request, response * var waitingpoolsize string if requestPayload.PartitionWaitingPoolSize != nil { waitingpoolsize = *requestPayload.PartitionWaitingPoolSize + } else { + waitingpoolsize = "0" } err = validateWaitingPoolSize(waitingpoolsize) @@ -323,6 +325,8 @@ func (r *partitionResource) updatePartition(request *restful.Request, response * return } newPartition.WaitingPoolSize = *requestPayload.PartitionWaitingPoolSize + } else { + newPartition.WaitingPoolSize = "0" } err = r.ds.UpdatePartition(oldPartition, &newPartition) From 489f657a7896f42c3d71ff2f1f77cc388fb15f6f Mon Sep 17 00:00:00 2001 From: iljarotar Date: Fri, 2 Dec 2022 10:41:48 +0100 Subject: [PATCH 06/45] min and max waiting pool size; validation; tests --- cmd/metal-api/internal/fsm/states/waiting.go | 4 +- cmd/metal-api/internal/metal/partition.go | 3 +- cmd/metal-api/internal/scaler/poolscaler.go | 61 ++++--- .../internal/scaler/poolscaler_test.go | 149 ++++-------------- .../internal/service/partition-service.go | 98 ++++++++---- .../service/partition-service_test.go | 72 ++++++++- .../internal/service/v1/partition.go | 12 +- 7 files changed, 217 insertions(+), 182 deletions(-) diff --git a/cmd/metal-api/internal/fsm/states/waiting.go b/cmd/metal-api/internal/fsm/states/waiting.go index 69a462177..2179d7fa4 100644 --- a/cmd/metal-api/internal/fsm/states/waiting.go +++ b/cmd/metal-api/internal/fsm/states/waiting.go @@ -18,12 +18,12 @@ func (p *WaitingState) OnTransition(e *fsm.Event) { appendEventToContainer(p.config.Event, p.config.Container) if p.config.Scaler != nil { - e.Err = p.config.Scaler.AdjustNumberOfWaitingMachines(p.config.Machine, p.config.Partition) + e.Err = p.config.Scaler.AdjustNumberOfWaitingMachines(p.config.Partition) } } func (p *WaitingState) OnLeave(e *fsm.Event) { if p.config.Scaler != nil { - e.Err = p.config.Scaler.AdjustNumberOfWaitingMachines(p.config.Machine, p.config.Partition) + e.Err = p.config.Scaler.AdjustNumberOfWaitingMachines(p.config.Partition) } } diff --git a/cmd/metal-api/internal/metal/partition.go b/cmd/metal-api/internal/metal/partition.go index 95964ff77..1acd92a74 100644 --- a/cmd/metal-api/internal/metal/partition.go +++ b/cmd/metal-api/internal/metal/partition.go @@ -6,7 +6,8 @@ type Partition struct { BootConfiguration BootConfiguration `rethinkdb:"bootconfig" json:"bootconfig"` MgmtServiceAddress string `rethinkdb:"mgmtserviceaddr" json:"mgmtserviceaddr"` PrivateNetworkPrefixLength uint8 `rethinkdb:"privatenetworkprefixlength" json:"privatenetworkprefixlength"` - WaitingPoolSize string `rethinkdb:"waitingpoolsize" json:"waitingpoolsize"` + WaitingPoolMinSize string `rethinkdb:"waitingpoolminsize" json:"waitingpoolminsize"` + WaitingPoolMaxSize string `rethinkdb:"waitingpoolmaxsize" json:"waitingpoolmaxsize"` } // BootConfiguration defines the metal-hammer initrd, kernel and commandline diff --git a/cmd/metal-api/internal/scaler/poolscaler.go b/cmd/metal-api/internal/scaler/poolscaler.go index 6ba7c7b70..6d9bc5c7a 100644 --- a/cmd/metal-api/internal/scaler/poolscaler.go +++ b/cmd/metal-api/internal/scaler/poolscaler.go @@ -38,23 +38,14 @@ func NewPoolScaler(log *zap.SugaredLogger, manager MachineManager) *PoolScaler { } // AdjustNumberOfWaitingMachines compares the number of waiting machines to the required pool size of the partition and shuts down or powers on machines accordingly -func (p *PoolScaler) AdjustNumberOfWaitingMachines(m *metal.Machine, partition *metal.Partition) error { - // WaitingPoolSize == 0 means scaling is disabled - if partition.WaitingPoolSize == "0" { - return nil - } - +func (p *PoolScaler) AdjustNumberOfWaitingMachines(partition *metal.Partition) error { waitingMachines, err := p.manager.WaitingMachines() if err != nil { return err } poolSizeExcess := 0 - if strings.Contains(partition.WaitingPoolSize, "%") { - poolSizeExcess = p.waitingMachinesMatchPoolSize(len(waitingMachines), partition.WaitingPoolSize, true, partition.ID, m.SizeID) - } else { - poolSizeExcess = p.waitingMachinesMatchPoolSize(len(waitingMachines), partition.WaitingPoolSize, false, partition.ID, m.SizeID) - } + poolSizeExcess = p.calculatePoolSizeExcess(len(waitingMachines), partition.WaitingPoolMinSize, partition.WaitingPoolMaxSize) if poolSizeExcess == 0 { return nil @@ -92,17 +83,41 @@ func (p *PoolScaler) AdjustNumberOfWaitingMachines(m *metal.Machine, partition * return nil } -func (p *PoolScaler) waitingMachinesMatchPoolSize(actual int, required string, inPercent bool, partitionid, sizeid string) int { - if !inPercent { - r, err := strconv.ParseInt(required, 10, 64) +// calculatePoolSizeExcess checks if there are less waiting machines than minRequired or more than maxRequired +// if yes, it returns the difference between the actual amount of waiting machines and the average of minRequired and maxRequired +// if no, it returns 0 +func (p *PoolScaler) calculatePoolSizeExcess(actual int, minRequired, maxRequired string) int { + if strings.Contains(minRequired, "%") != strings.Contains(maxRequired, "%") { + return 0 + } + + if !strings.Contains(minRequired, "%") && !strings.Contains(maxRequired, "%") { + min, err := strconv.ParseInt(minRequired, 10, 64) + if err != nil { + return 0 + } + + max, err := strconv.ParseInt(maxRequired, 10, 64) if err != nil { return 0 } - return actual - int(r) + + if int64(actual) >= min && int64(actual) <= max { + return 0 + } + + average := int(math.Round((float64(max) + float64(min)) / 2)) + return actual - average + } + + minString := strings.Split(minRequired, "%")[0] + minPercent, err := strconv.ParseInt(minString, 10, 64) + if err != nil { + return 0 } - percent := strings.Split(required, "%")[0] - r, err := strconv.ParseInt(percent, 10, 64) + maxString := strings.Split(maxRequired, "%")[0] + maxPercent, err := strconv.ParseInt(maxString, 10, 64) if err != nil { return 0 } @@ -112,7 +127,15 @@ func (p *PoolScaler) waitingMachinesMatchPoolSize(actual int, required string, i return 0 } - return actual - int(math.Round(float64(len(allMachines))*float64(r)/100)) + min := int(math.Round(float64(len(allMachines)) * float64(minPercent) / 100)) + max := int(math.Round(float64(len(allMachines)) * float64(maxPercent) / 100)) + + if actual >= min && actual <= max { + return 0 + } + + average := int(math.Round((float64(max) + float64(min)) / 2)) + return actual - average } func randomIndices(n, k int) []int { @@ -122,7 +145,7 @@ func randomIndices(n, k int) []int { } for i := 0; i < (n - k); i++ { - rand.Seed(time.Now().UnixNano()) + rand.Seed(time.Now().UnixNano()) //nolint:gosec r := rand.Intn(len(indices)) indices = append(indices[:r], indices[r+1:]...) } diff --git a/cmd/metal-api/internal/scaler/poolscaler_test.go b/cmd/metal-api/internal/scaler/poolscaler_test.go index 1f6b4a00c..3f44f0b03 100644 --- a/cmd/metal-api/internal/scaler/poolscaler_test.go +++ b/cmd/metal-api/internal/scaler/poolscaler_test.go @@ -7,100 +7,6 @@ import ( "go.uber.org/zap/zaptest" ) -func TestPoolScaler_waitingMachinesMatchPoolSize(t *testing.T) { - tests := []struct { - name string - actual int - required string - inPercent bool - partitionid string - sizeid string - mockFn func(mock *MockMachineManager) - want int - }{ - { - name: "less waiting machines than required", - actual: 10, - required: "15", - inPercent: false, - partitionid: "", - sizeid: "", - want: -5, - }, - { - name: "more waiting machines than required", - actual: 20, - required: "15", - inPercent: false, - partitionid: "", - sizeid: "", - want: 5, - }, - { - name: "less waiting machines than required in percent", - actual: 20, - required: "30%", - inPercent: true, - partitionid: "", - sizeid: "", - mockFn: func(mock *MockMachineManager) { - mock.On("AllMachines").Once().Return(append(metal.Machines{}, make([]metal.Machine, 150)...), nil) - }, - want: -25, - }, - { - name: "more waiting machines than required in percent", - actual: 24, - required: "15%", - inPercent: true, - partitionid: "", - sizeid: "", - mockFn: func(mock *MockMachineManager) { - mock.On("AllMachines").Once().Return(append(metal.Machines{}, make([]metal.Machine, 150)...), nil) - }, - want: 1, - }, - { - name: "waiting machines match pool size", - actual: 20, - required: "20", - inPercent: false, - partitionid: "", - sizeid: "", - want: 0, - }, - { - name: "waiting machines match pool size in percent", - actual: 30, - required: "20%", - inPercent: true, - partitionid: "", - sizeid: "", - mockFn: func(mock *MockMachineManager) { - mock.On("AllMachines").Once().Return(append(metal.Machines{}, make([]metal.Machine, 150)...), nil) - }, - want: 0, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - manager := NewMockMachineManager(t) - - if tt.mockFn != nil { - tt.mockFn(manager) - } - - p := &PoolScaler{ - log: zaptest.NewLogger(t).Sugar(), - manager: manager, - } - if got := p.waitingMachinesMatchPoolSize(tt.actual, tt.required, tt.inPercent, tt.partitionid, tt.sizeid); got != tt.want { - t.Errorf("PoolScaler.waitingMachinesMatchPoolSize() = %v, want %v", got, tt.want) - } - }) - } -} - func TestPoolScaler_AdjustNumberOfWaitingMachines(t *testing.T) { tests := []struct { name string @@ -110,18 +16,18 @@ func TestPoolScaler_AdjustNumberOfWaitingMachines(t *testing.T) { wantErr bool }{ { - name: "waiting machines match pool size; nothing to do", + name: "waiting machines match pool size; do nothing", m: &metal.Machine{SizeID: ""}, - partition: &metal.Partition{WaitingPoolSize: "15"}, + partition: &metal.Partition{WaitingPoolMinSize: "10", WaitingPoolMaxSize: "20"}, mockFn: func(mock *MockMachineManager) { - mock.On("WaitingMachines").Once().Return(append(metal.Machines{}, make([]metal.Machine, 15)...), nil) + mock.On("WaitingMachines").Once().Return(append(metal.Machines{}, make([]metal.Machine, 10)...), nil) }, wantErr: false, }, { - name: "waiting machines match pool size in percent; nothing to do", + name: "waiting machines match pool size in percent; do nothing", m: &metal.Machine{SizeID: ""}, - partition: &metal.Partition{WaitingPoolSize: "30%"}, + partition: &metal.Partition{WaitingPoolMinSize: "25%", WaitingPoolMaxSize: "30%"}, mockFn: func(mock *MockMachineManager) { mock.On("AllMachines").Once().Return(append(metal.Machines{}, make([]metal.Machine, 150)...), nil) mock.On("WaitingMachines").Once().Return(append(metal.Machines{}, make([]metal.Machine, 45)...), nil) @@ -129,53 +35,53 @@ func TestPoolScaler_AdjustNumberOfWaitingMachines(t *testing.T) { wantErr: false, }, { - name: "more waiting machines needed; power on 5 machines", + name: "more waiting machines needed; power on 8 machines", m: &metal.Machine{SizeID: ""}, - partition: &metal.Partition{WaitingPoolSize: "15"}, + partition: &metal.Partition{WaitingPoolMinSize: "15", WaitingPoolMaxSize: "20"}, mockFn: func(mock *MockMachineManager) { mock.On("WaitingMachines").Once().Return(append(metal.Machines{}, make([]metal.Machine, 10)...), nil) - mock.On("ShutdownMachines").Once().Return(append(metal.Machines{}, make([]metal.Machine, 5)...), nil) - mock.On("PowerOn", &metal.Machine{}).Return(nil).Times(5) + mock.On("ShutdownMachines").Once().Return(append(metal.Machines{}, make([]metal.Machine, 10)...), nil) + mock.On("PowerOn", &metal.Machine{}).Return(nil).Times(8) }, wantErr: false, }, { name: "more waiting machines needed in percent; power on 10 machines", m: &metal.Machine{SizeID: ""}, - partition: &metal.Partition{WaitingPoolSize: "30%"}, + partition: &metal.Partition{WaitingPoolMinSize: "30%", WaitingPoolMaxSize: "40%"}, mockFn: func(mock *MockMachineManager) { mock.On("AllMachines").Once().Return(append(metal.Machines{}, make([]metal.Machine, 150)...), nil) mock.On("WaitingMachines").Once().Return(append(metal.Machines{}, make([]metal.Machine, 35)...), nil) - mock.On("ShutdownMachines").Once().Return(append(metal.Machines{}, make([]metal.Machine, 10)...), nil) - mock.On("PowerOn", &metal.Machine{}).Return(nil).Times(10) + mock.On("ShutdownMachines").Once().Return(append(metal.Machines{}, make([]metal.Machine, 20)...), nil) + mock.On("PowerOn", &metal.Machine{}).Return(nil).Times(18) }, wantErr: false, }, { - name: "pool size exceeded; power off 5 machines", + name: "pool size exceeded; power off 7 machines", m: &metal.Machine{SizeID: ""}, - partition: &metal.Partition{WaitingPoolSize: "15"}, + partition: &metal.Partition{WaitingPoolMinSize: "10", WaitingPoolMaxSize: "15"}, mockFn: func(mock *MockMachineManager) { mock.On("WaitingMachines").Once().Return(append(metal.Machines{}, make([]metal.Machine, 20)...), nil) - mock.On("Shutdown", &metal.Machine{}).Return(nil).Times(5) + mock.On("Shutdown", &metal.Machine{}).Return(nil).Times(7) }, wantErr: false, }, { - name: "pool size exceeded in percent; power off 5 machines", + name: "pool size exceeded in percent; power off 4 machines", m: &metal.Machine{SizeID: ""}, - partition: &metal.Partition{WaitingPoolSize: "15%"}, + partition: &metal.Partition{WaitingPoolMinSize: "15%", WaitingPoolMaxSize: "20%"}, mockFn: func(mock *MockMachineManager) { mock.On("AllMachines").Once().Return(append(metal.Machines{}, make([]metal.Machine, 150)...), nil) - mock.On("WaitingMachines").Once().Return(append(metal.Machines{}, make([]metal.Machine, 28)...), nil) - mock.On("Shutdown", &metal.Machine{}).Return(nil).Times(5) + mock.On("WaitingMachines").Once().Return(append(metal.Machines{}, make([]metal.Machine, 31)...), nil) + mock.On("Shutdown", &metal.Machine{}).Return(nil).Times(4) }, wantErr: false, }, { name: "more machines needed than available; power on all remaining", m: &metal.Machine{SizeID: ""}, - partition: &metal.Partition{WaitingPoolSize: "30"}, + partition: &metal.Partition{WaitingPoolMinSize: "30", WaitingPoolMaxSize: "40"}, mockFn: func(mock *MockMachineManager) { mock.On("ShutdownMachines").Once().Return(append(metal.Machines{}, make([]metal.Machine, 10)...), nil) mock.On("WaitingMachines").Once().Return(append(metal.Machines{}, make([]metal.Machine, 15)...), nil) @@ -186,7 +92,7 @@ func TestPoolScaler_AdjustNumberOfWaitingMachines(t *testing.T) { { name: "no more machines left; do nothing", m: &metal.Machine{SizeID: ""}, - partition: &metal.Partition{WaitingPoolSize: "15%"}, + partition: &metal.Partition{WaitingPoolMinSize: "15%", WaitingPoolMaxSize: "20%"}, mockFn: func(mock *MockMachineManager) { mock.On("AllMachines").Once().Return(append(metal.Machines{}, make([]metal.Machine, 150)...), nil) mock.On("ShutdownMachines").Once().Return(append(metal.Machines{}, make([]metal.Machine, 0)...), nil) @@ -197,11 +103,16 @@ func TestPoolScaler_AdjustNumberOfWaitingMachines(t *testing.T) { { name: "pool scaling disabled; do nothing", m: &metal.Machine{SizeID: ""}, - partition: &metal.Partition{WaitingPoolSize: "0"}, - wantErr: false, + partition: &metal.Partition{WaitingPoolMinSize: "0%", WaitingPoolMaxSize: "100%"}, + mockFn: func(mock *MockMachineManager) { + mock.On("WaitingMachines").Once().Return(append(metal.Machines{}, make([]metal.Machine, 22)...), nil) + mock.On("AllMachines").Once().Return(append(metal.Machines{}, make([]metal.Machine, 150)...), nil) + }, + wantErr: false, }, } - for _, tt := range tests { + for i := range tests { + tt := tests[i] t.Run(tt.name, func(t *testing.T) { manager := NewMockMachineManager(t) @@ -213,7 +124,7 @@ func TestPoolScaler_AdjustNumberOfWaitingMachines(t *testing.T) { log: zaptest.NewLogger(t).Sugar(), manager: manager, } - if err := p.AdjustNumberOfWaitingMachines(tt.m, tt.partition); (err != nil) != tt.wantErr { + if err := p.AdjustNumberOfWaitingMachines(tt.partition); (err != nil) != tt.wantErr { t.Errorf("PoolScaler.AdjustNumberOfWaitingMachines() error = %v, wantErr %v", err, tt.wantErr) } }) diff --git a/cmd/metal-api/internal/service/partition-service.go b/cmd/metal-api/internal/service/partition-service.go index 17a5f19d5..493f09ded 100644 --- a/cmd/metal-api/internal/service/partition-service.go +++ b/cmd/metal-api/internal/service/partition-service.go @@ -209,14 +209,21 @@ func (r *partitionResource) createPartition(request *restful.Request, response * commandLine = *requestPayload.PartitionBootConfiguration.CommandLine } - var waitingpoolsize string - if requestPayload.PartitionWaitingPoolSize != nil { - waitingpoolsize = *requestPayload.PartitionWaitingPoolSize + var waitingpoolminsize string + if requestPayload.PartitionWaitingPoolMinSize != nil { + waitingpoolminsize = *requestPayload.PartitionWaitingPoolMinSize } else { - waitingpoolsize = "0" + waitingpoolminsize = "0%" } - err = validateWaitingPoolSize(waitingpoolsize) + var waitingpoolmaxsize string + if requestPayload.PartitionWaitingPoolMaxSize != nil { + waitingpoolmaxsize = *requestPayload.PartitionWaitingPoolMaxSize + } else { + waitingpoolmaxsize = "100%" + } + + err = validateWaitingPoolSize(waitingpoolminsize, waitingpoolmaxsize) if err != nil { r.sendError(request, response, httperrors.BadRequest(err)) return @@ -235,7 +242,8 @@ func (r *partitionResource) createPartition(request *restful.Request, response * KernelURL: kernelURL, CommandLine: commandLine, }, - WaitingPoolSize: waitingpoolsize, + WaitingPoolMinSize: waitingpoolminsize, + WaitingPoolMaxSize: waitingpoolmaxsize, } fqn := metal.TopicMachine.GetFQN(p.GetID()) @@ -318,17 +326,29 @@ func (r *partitionResource) updatePartition(request *restful.Request, response * newPartition.BootConfiguration.CommandLine = *requestPayload.PartitionBootConfiguration.CommandLine } - if requestPayload.PartitionWaitingPoolSize != nil { - err = validateWaitingPoolSize(*requestPayload.PartitionWaitingPoolSize) - if err != nil { - r.sendError(request, response, httperrors.BadRequest(err)) - return - } - newPartition.WaitingPoolSize = *requestPayload.PartitionWaitingPoolSize + var waitingpoolminsize string + if requestPayload.PartitionWaitingPoolMinSize != nil { + waitingpoolminsize = *requestPayload.PartitionWaitingPoolMinSize } else { - newPartition.WaitingPoolSize = "0" + waitingpoolminsize = "0%" } + var waitingpoolmaxsize string + if requestPayload.PartitionWaitingPoolMaxSize != nil { + waitingpoolmaxsize = *requestPayload.PartitionWaitingPoolMaxSize + } else { + waitingpoolmaxsize = "100%" + } + + err = validateWaitingPoolSize(waitingpoolminsize, waitingpoolmaxsize) + if err != nil { + r.sendError(request, response, httperrors.BadRequest(err)) + return + } + + newPartition.WaitingPoolMinSize = waitingpoolminsize + newPartition.WaitingPoolMaxSize = waitingpoolmaxsize + err = r.ds.UpdatePartition(oldPartition, &newPartition) if err != nil { r.sendError(request, response, httperrors.BadRequest(err)) @@ -475,30 +495,42 @@ func (r *partitionResource) calcPartitionCapacity(pcr *v1.PartitionCapacityReque return partitionCapacities, err } -func validateWaitingPoolSize(input string) error { - if strings.Contains(input, "%") { - tokens := strings.Split(input, "%") - if len(tokens) != 1 { - return errors.New("waiting pool size must be a positive integer or a positive integer value in percent, e.g. 15%") - } +func validateWaitingPoolSize(minString, maxString string) error { + if strings.Contains(minString, "%") != strings.Contains(maxString, "%") { + return errors.New("minimum and maximum pool sizes must either be both in percent or both an absolute value") + } - p, err := strconv.ParseInt(tokens[0], 10, 64) - if err != nil { - return errors.New("could not parse waiting pool size") + if strings.Contains(minString, "%") && strings.Contains(maxString, "%") { + minTokens := strings.Split(minString, "%") + maxTokens := strings.Split(maxString, "%") + if len(minTokens) != 2 || minTokens[1] != "" { + return errors.New("invalid format for minimum waiting pool sizes") } - if p < 0 || p > 100 { - return errors.New("waiting pool size must be between 0% and 100%") - } - } else { - p, err := strconv.ParseInt(input, 10, 64) - if err != nil { - return errors.New("could not parse waiting pool size") + if len(maxTokens) != 2 || maxTokens[1] != "" { + return errors.New("invalid format for maximum waiting pool sizes") } - if p < 0 { - return errors.New("waiting pool size must be a positive integer or a positive integer value in percent, e.g. 15%") - } + minString = minTokens[0] + maxString = maxTokens[0] + } + + min, err := strconv.ParseInt(minString, 10, 64) + if err != nil { + return errors.New("could not parse minimum waiting pool size") + } + + max, err := strconv.ParseInt(maxString, 10, 64) + if err != nil { + return errors.New("could not parse maximum waiting pool size") + } + + if min < 0 || max < 0 { + return errors.New("minimum and maximum waiting pool sizes must be greater or equal to 0") + } + + if max < min { + return errors.New("minimum waiting pool size must be less or equal to maximum pool size") } return nil diff --git a/cmd/metal-api/internal/service/partition-service_test.go b/cmd/metal-api/internal/service/partition-service_test.go index 503c5dce7..ea9f99dd2 100644 --- a/cmd/metal-api/internal/service/partition-service_test.go +++ b/cmd/metal-api/internal/service/partition-service_test.go @@ -3,20 +3,20 @@ package service import ( "bytes" "encoding/json" + "errors" "fmt" "net/http" "net/http/httptest" "testing" - "github.com/stretchr/testify/assert" - "go.uber.org/zap/zaptest" - restful "github.com/emicklei/go-restful/v3" "github.com/metal-stack/metal-api/cmd/metal-api/internal/datastore" v1 "github.com/metal-stack/metal-api/cmd/metal-api/internal/service/v1" "github.com/metal-stack/metal-api/cmd/metal-api/internal/testdata" "github.com/metal-stack/metal-lib/httperrors" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "go.uber.org/zap/zaptest" ) type nopTopicCreater struct { @@ -271,3 +271,69 @@ func TestPartitionCapacity(t *testing.T) { require.Equal(t, 5, c.Total) require.Equal(t, 0, c.Free) } + +func Test_validateWaitingPoolSize(t *testing.T) { + tests := []struct { + name string + min string + max string + wantErr error + }{ + { + name: "min max format mismatch", + min: "15%", + max: "30", + wantErr: errors.New("minimum and maximum pool sizes must either be both in percent or both an absolute value"), + }, + { + name: "invalid format for min in percent", + min: "15%#", + max: "30%", + wantErr: errors.New("invalid format for minimum waiting pool sizes"), + }, + { + name: "parse error for min", + min: "15#", + max: "30", + wantErr: errors.New("could not parse minimum waiting pool size"), + }, + { + name: "parse error for max", + min: "15", + max: "30#", + wantErr: errors.New("could not parse maximum waiting pool size"), + }, + { + name: "negative value for min", + min: "-15", + max: "0", + wantErr: errors.New("minimum and maximum waiting pool sizes must be greater or equal to 0"), + }, + { + name: "max less than min", + min: "15", + max: "0", + wantErr: errors.New("minimum waiting pool size must be less or equal to maximum pool size"), + }, + { + name: "everything okay", + min: "15", + max: "30", + wantErr: nil, + }, + { + name: "everything okay in percent", + min: "15%", + max: "30%", + wantErr: nil, + }, + } + for i := range tests { + tt := tests[i] + t.Run(tt.name, func(t *testing.T) { + if err := validateWaitingPoolSize(tt.min, tt.max); (err != nil || tt.wantErr != nil) && (err == nil || tt.wantErr == nil || err.Error() != tt.wantErr.Error()) { + t.Errorf("validateWaitingPoolSize() error = %v, wantErr %v", err, tt.wantErr) + } + }) + } +} diff --git a/cmd/metal-api/internal/service/v1/partition.go b/cmd/metal-api/internal/service/v1/partition.go index 7ca3535f6..cb441c20f 100644 --- a/cmd/metal-api/internal/service/v1/partition.go +++ b/cmd/metal-api/internal/service/v1/partition.go @@ -18,15 +18,17 @@ type PartitionBootConfiguration struct { type PartitionCreateRequest struct { Common PartitionBase - PartitionBootConfiguration PartitionBootConfiguration `json:"bootconfig" description:"the boot configuration of this partition"` - PartitionWaitingPoolSize *string `json:"waitingpoolsize" description:"the waiting pool size of this partition` + PartitionBootConfiguration PartitionBootConfiguration `json:"bootconfig" description:"the boot configuration of this partition"` + PartitionWaitingPoolMinSize *string `json:"waitingpoolminsize" description:"the minimum waiting pool size of this partition"` + PartitionWaitingPoolMaxSize *string `json:"waitingpoolmaxsize" description:"the maximum waiting pool size of this partition"` } type PartitionUpdateRequest struct { Common - MgmtServiceAddress *string `json:"mgmtserviceaddress" description:"the address to the management service of this partition" optional:"true"` - PartitionBootConfiguration *PartitionBootConfiguration `json:"bootconfig" description:"the boot configuration of this partition" optional:"true"` - PartitionWaitingPoolSize *string `json:"waitingpoolsize" description:"the waiting pool size of this partition` + MgmtServiceAddress *string `json:"mgmtserviceaddress" description:"the address to the management service of this partition" optional:"true"` + PartitionBootConfiguration *PartitionBootConfiguration `json:"bootconfig" description:"the boot configuration of this partition" optional:"true"` + PartitionWaitingPoolMinSize *string `json:"waitingpoolminsize" description:"the minimum waiting pool size of this partition"` + PartitionWaitingPoolMaxSize *string `json:"waitingpoolmaxsize" description:"the maximum waiting pool size of this partition"` } type PartitionResponse struct { From 50b5771b8590931391a918c36c8f5dc0b44f3053 Mon Sep 17 00:00:00 2001 From: iljarotar Date: Fri, 2 Dec 2022 10:49:11 +0100 Subject: [PATCH 07/45] linter --- cmd/metal-api/internal/datastore/poolsize.go | 2 +- cmd/metal-api/internal/scaler/poolscaler.go | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cmd/metal-api/internal/datastore/poolsize.go b/cmd/metal-api/internal/datastore/poolsize.go index a6bda2f74..00b44a336 100644 --- a/cmd/metal-api/internal/datastore/poolsize.go +++ b/cmd/metal-api/internal/datastore/poolsize.go @@ -96,7 +96,7 @@ func (rs *RethinkStore) publishCommandAndUpdate(logger *zap.SugaredLogger, m *me return err } - err = e.PublishMachineCmd(logger, m, publisher, metal.MachineOnCmd) + err = e.PublishMachineCmd(logger, m, publisher, cmd) if err != nil { return err } diff --git a/cmd/metal-api/internal/scaler/poolscaler.go b/cmd/metal-api/internal/scaler/poolscaler.go index 6d9bc5c7a..ff57686d7 100644 --- a/cmd/metal-api/internal/scaler/poolscaler.go +++ b/cmd/metal-api/internal/scaler/poolscaler.go @@ -145,8 +145,8 @@ func randomIndices(n, k int) []int { } for i := 0; i < (n - k); i++ { - rand.Seed(time.Now().UnixNano()) //nolint:gosec - r := rand.Intn(len(indices)) + rand.Seed(time.Now().UnixNano()) + r := rand.Intn(len(indices)) //nolint:gosec indices = append(indices[:r], indices[r+1:]...) } From ec55300e085ee3b910d3d2fc4945487e1518a3cb Mon Sep 17 00:00:00 2001 From: iljarotar Date: Fri, 2 Dec 2022 11:05:28 +0100 Subject: [PATCH 08/45] spec --- spec/metal-api.json | 33 +++++++++++++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/spec/metal-api.json b/spec/metal-api.json index ec5e9a9fc..e72889249 100644 --- a/spec/metal-api.json +++ b/spec/metal-api.json @@ -228,6 +228,9 @@ "type": "string" }, "type": "array" + }, + "waiting": { + "type": "boolean" } } }, @@ -1125,6 +1128,9 @@ "type": "string" }, "type": "array" + }, + "waiting": { + "type": "boolean" } } }, @@ -2235,6 +2241,9 @@ "type": "string" }, "type": "array" + }, + "waiting": { + "type": "boolean" } } }, @@ -3457,11 +3466,21 @@ "maximum": 30, "minimum": 16, "type": "integer" + }, + "waitingpoolmaxsize": { + "description": "the maximum waiting pool size of this partition", + "type": "string" + }, + "waitingpoolminsize": { + "description": "the minimum waiting pool size of this partition", + "type": "string" } }, "required": [ "bootconfig", - "id" + "id", + "waitingpoolmaxsize", + "waitingpoolminsize" ] }, "v1.PartitionResponse": { @@ -3534,10 +3553,20 @@ "name": { "description": "a readable name for this entity", "type": "string" + }, + "waitingpoolmaxsize": { + "description": "the maximum waiting pool size of this partition", + "type": "string" + }, + "waitingpoolminsize": { + "description": "the minimum waiting pool size of this partition", + "type": "string" } }, "required": [ - "id" + "id", + "waitingpoolmaxsize", + "waitingpoolminsize" ] }, "v1.Project": { From 086aea2ea61a91d90c5bf4d567389da9daf0b3b3 Mon Sep 17 00:00:00 2001 From: iljarotar <77339620+iljarotar@users.noreply.github.com> Date: Tue, 6 Dec 2022 09:32:24 +0100 Subject: [PATCH 09/45] Update cmd/metal-api/internal/scaler/poolscaler.go Co-authored-by: Gerrit --- cmd/metal-api/internal/scaler/poolscaler.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/metal-api/internal/scaler/poolscaler.go b/cmd/metal-api/internal/scaler/poolscaler.go index ff57686d7..9a1d58dc6 100644 --- a/cmd/metal-api/internal/scaler/poolscaler.go +++ b/cmd/metal-api/internal/scaler/poolscaler.go @@ -52,7 +52,7 @@ func (p *PoolScaler) AdjustNumberOfWaitingMachines(partition *metal.Partition) e } if poolSizeExcess > 0 { - p.log.Infow("shut down spare machines", "number", poolSizeExcess, "partition", p) + p.log.Infow("shut down spare machines", "number", poolSizeExcess, "partition", partition.ID) err := p.shutdownMachines(waitingMachines, poolSizeExcess) if err != nil { return err From df0da914d208cb19161ca6d9e3517eef22412ef1 Mon Sep 17 00:00:00 2001 From: iljarotar <77339620+iljarotar@users.noreply.github.com> Date: Tue, 6 Dec 2022 09:32:52 +0100 Subject: [PATCH 10/45] Update cmd/metal-api/internal/scaler/poolscaler.go Co-authored-by: Gerrit --- cmd/metal-api/internal/scaler/poolscaler.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/metal-api/internal/scaler/poolscaler.go b/cmd/metal-api/internal/scaler/poolscaler.go index 9a1d58dc6..e9b241d48 100644 --- a/cmd/metal-api/internal/scaler/poolscaler.go +++ b/cmd/metal-api/internal/scaler/poolscaler.go @@ -65,7 +65,7 @@ func (p *PoolScaler) AdjustNumberOfWaitingMachines(partition *metal.Partition) e poolSizeExcess = int(math.Abs(float64(poolSizeExcess))) if len(shutdownMachines) < poolSizeExcess { - p.log.Infow("not enough machines to meet waiting pool size; power on all remaining", "number", len(shutdownMachines), "partition", p) + p.log.Infow("not enough machines to meet waiting pool size; power on all remaining", "number", len(shutdownMachines), "partition", partition.ID) err = p.powerOnMachines(shutdownMachines, len(shutdownMachines)) if err != nil { return err From a8fbaed1c973a579c9d531deb09502cae82641c3 Mon Sep 17 00:00:00 2001 From: iljarotar <77339620+iljarotar@users.noreply.github.com> Date: Tue, 6 Dec 2022 09:33:17 +0100 Subject: [PATCH 11/45] Update cmd/metal-api/internal/scaler/poolscaler.go Co-authored-by: Gerrit --- cmd/metal-api/internal/scaler/poolscaler.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/metal-api/internal/scaler/poolscaler.go b/cmd/metal-api/internal/scaler/poolscaler.go index e9b241d48..8c135213c 100644 --- a/cmd/metal-api/internal/scaler/poolscaler.go +++ b/cmd/metal-api/internal/scaler/poolscaler.go @@ -73,7 +73,7 @@ func (p *PoolScaler) AdjustNumberOfWaitingMachines(partition *metal.Partition) e return nil } - p.log.Infow("power on missing machines", "number", poolSizeExcess, "partition", p) + p.log.Infow("power on missing machines", "number", poolSizeExcess, "partition", partition.ID) err = p.powerOnMachines(shutdownMachines, poolSizeExcess) if err != nil { return err From beefa8da0d5572d8da84058a2553bf0653a56a34 Mon Sep 17 00:00:00 2001 From: iljarotar Date: Tue, 6 Dec 2022 11:27:02 +0100 Subject: [PATCH 12/45] scaler range type for partition and noop state for fsm --- cmd/metal-api/internal/datastore/event.go | 9 +- cmd/metal-api/internal/datastore/machine.go | 1 - cmd/metal-api/internal/datastore/poolsize.go | 2 - cmd/metal-api/internal/fsm/events.go | 1 + cmd/metal-api/internal/fsm/states/alive.go | 1 + .../internal/fsm/states/booting-new-kernel.go | 1 + cmd/metal-api/internal/fsm/states/crashed.go | 1 + cmd/metal-api/internal/fsm/states/initial.go | 1 + .../internal/fsm/states/installing.go | 1 + .../internal/fsm/states/machine-reclaim.go | 1 + .../internal/fsm/states/noop-state.go | 10 ++ .../internal/fsm/states/phoned-home.go | 1 + .../internal/fsm/states/planned-reboot.go | 1 + .../internal/fsm/states/preparing.go | 1 + .../internal/fsm/states/pxe-booting.go | 1 + .../internal/fsm/states/registering.go | 1 + cmd/metal-api/internal/fsm/states/states.go | 2 +- cmd/metal-api/internal/fsm/states/waiting.go | 4 +- cmd/metal-api/internal/metal/partition.go | 64 +++++++++++- .../internal/metal/partition_test.go | 64 ++++++++++++ cmd/metal-api/internal/scaler/poolscaler.go | 99 ++++++++----------- .../internal/scaler/poolscaler_test.go | 98 ++++++++++++------ .../internal/service/partition-service.go | 94 ++++-------------- .../service/partition-service_test.go | 67 ------------- 24 files changed, 284 insertions(+), 242 deletions(-) create mode 100644 cmd/metal-api/internal/fsm/states/noop-state.go diff --git a/cmd/metal-api/internal/datastore/event.go b/cmd/metal-api/internal/datastore/event.go index 088b8b938..b2db85ee3 100644 --- a/cmd/metal-api/internal/datastore/event.go +++ b/cmd/metal-api/internal/datastore/event.go @@ -64,14 +64,19 @@ func (rs *RethinkStore) ProvisioningEventForMachine(log *zap.SugaredLogger, publ rs: rs, publisher: publisher, } - scaler := s.NewPoolScaler(log, manager) + + c := &s.PoolScalerConfig{ + Log: log, + Manager: manager, + Partition: *p, + } + scaler := s.NewPoolScaler(c) config := states.StateConfig{ Log: log, Container: ec, Event: event, Scaler: scaler, - Partition: p, Machine: machine, } diff --git a/cmd/metal-api/internal/datastore/machine.go b/cmd/metal-api/internal/datastore/machine.go index c57f1ec62..93f28c172 100644 --- a/cmd/metal-api/internal/datastore/machine.go +++ b/cmd/metal-api/internal/datastore/machine.go @@ -58,7 +58,6 @@ type MachineSearchQuery struct { // state StateValue *string `json:"state_value" optional:"true"` - Waiting *bool `json:"waiting" optional:"true"` // ipmi IpmiAddress *string `json:"ipmi_address" optional:"true"` diff --git a/cmd/metal-api/internal/datastore/poolsize.go b/cmd/metal-api/internal/datastore/poolsize.go index 00b44a336..3e512e267 100644 --- a/cmd/metal-api/internal/datastore/poolsize.go +++ b/cmd/metal-api/internal/datastore/poolsize.go @@ -31,12 +31,10 @@ func (m *manager) AllMachines() (metal.Machines, error) { func (m *manager) WaitingMachines() (metal.Machines, error) { stateValue := string(metal.AvailableState) - waiting := true q := MachineSearchQuery{ PartitionID: &m.partitionid, SizeID: &m.sizeid, StateValue: &stateValue, - Waiting: &waiting, } waitingMachines := metal.Machines{} diff --git a/cmd/metal-api/internal/fsm/events.go b/cmd/metal-api/internal/fsm/events.go index 9fd891500..1d6f6aa0e 100644 --- a/cmd/metal-api/internal/fsm/events.go +++ b/cmd/metal-api/internal/fsm/events.go @@ -157,6 +157,7 @@ func eventCallbacks(config *states.StateConfig) fsm.Callbacks { for name, state := range allStates { callbacks["enter_"+name] = state.OnTransition + callbacks["leave_"+name] = state.OnLeave } waiting := allStates[states.Waiting.String()].(*states.WaitingState) diff --git a/cmd/metal-api/internal/fsm/states/alive.go b/cmd/metal-api/internal/fsm/states/alive.go index 0cdd53695..2ec93301e 100644 --- a/cmd/metal-api/internal/fsm/states/alive.go +++ b/cmd/metal-api/internal/fsm/states/alive.go @@ -7,6 +7,7 @@ import ( ) type AliveState struct { + noopState log *zap.SugaredLogger container *metal.ProvisioningEventContainer event *metal.ProvisioningEvent diff --git a/cmd/metal-api/internal/fsm/states/booting-new-kernel.go b/cmd/metal-api/internal/fsm/states/booting-new-kernel.go index aee66f7c7..626525555 100644 --- a/cmd/metal-api/internal/fsm/states/booting-new-kernel.go +++ b/cmd/metal-api/internal/fsm/states/booting-new-kernel.go @@ -6,6 +6,7 @@ import ( ) type BootingNewKernelState struct { + noopState container *metal.ProvisioningEventContainer event *metal.ProvisioningEvent } diff --git a/cmd/metal-api/internal/fsm/states/crashed.go b/cmd/metal-api/internal/fsm/states/crashed.go index 89556e825..ced108248 100644 --- a/cmd/metal-api/internal/fsm/states/crashed.go +++ b/cmd/metal-api/internal/fsm/states/crashed.go @@ -6,6 +6,7 @@ import ( ) type CrashState struct { + noopState container *metal.ProvisioningEventContainer event *metal.ProvisioningEvent } diff --git a/cmd/metal-api/internal/fsm/states/initial.go b/cmd/metal-api/internal/fsm/states/initial.go index 1fd8c5a22..dd535d883 100644 --- a/cmd/metal-api/internal/fsm/states/initial.go +++ b/cmd/metal-api/internal/fsm/states/initial.go @@ -8,6 +8,7 @@ import ( ) type InitialState struct { + noopState container *metal.ProvisioningEventContainer event *metal.ProvisioningEvent } diff --git a/cmd/metal-api/internal/fsm/states/installing.go b/cmd/metal-api/internal/fsm/states/installing.go index 7aebefa04..6622aae77 100644 --- a/cmd/metal-api/internal/fsm/states/installing.go +++ b/cmd/metal-api/internal/fsm/states/installing.go @@ -6,6 +6,7 @@ import ( ) type InstallingState struct { + noopState container *metal.ProvisioningEventContainer event *metal.ProvisioningEvent } diff --git a/cmd/metal-api/internal/fsm/states/machine-reclaim.go b/cmd/metal-api/internal/fsm/states/machine-reclaim.go index fb66c3b29..3c454979d 100644 --- a/cmd/metal-api/internal/fsm/states/machine-reclaim.go +++ b/cmd/metal-api/internal/fsm/states/machine-reclaim.go @@ -6,6 +6,7 @@ import ( ) type MachineReclaimState struct { + noopState container *metal.ProvisioningEventContainer event *metal.ProvisioningEvent } diff --git a/cmd/metal-api/internal/fsm/states/noop-state.go b/cmd/metal-api/internal/fsm/states/noop-state.go new file mode 100644 index 000000000..3d2effe1d --- /dev/null +++ b/cmd/metal-api/internal/fsm/states/noop-state.go @@ -0,0 +1,10 @@ +package states + +import ( + "github.com/looplab/fsm" +) + +type noopState struct{} + +func (_ noopState) OnTransition(e *fsm.Event) {} +func (_ noopState) OnLeave(e *fsm.Event) {} diff --git a/cmd/metal-api/internal/fsm/states/phoned-home.go b/cmd/metal-api/internal/fsm/states/phoned-home.go index a3c5c4ffa..e0d665f27 100644 --- a/cmd/metal-api/internal/fsm/states/phoned-home.go +++ b/cmd/metal-api/internal/fsm/states/phoned-home.go @@ -12,6 +12,7 @@ import ( const failedMachineReclaimThreshold = 5 * time.Minute type PhonedHomeState struct { + noopState log *zap.SugaredLogger container *metal.ProvisioningEventContainer event *metal.ProvisioningEvent diff --git a/cmd/metal-api/internal/fsm/states/planned-reboot.go b/cmd/metal-api/internal/fsm/states/planned-reboot.go index 5b4268e9e..5b30ae211 100644 --- a/cmd/metal-api/internal/fsm/states/planned-reboot.go +++ b/cmd/metal-api/internal/fsm/states/planned-reboot.go @@ -6,6 +6,7 @@ import ( ) type PlannedRebootState struct { + noopState container *metal.ProvisioningEventContainer event *metal.ProvisioningEvent } diff --git a/cmd/metal-api/internal/fsm/states/preparing.go b/cmd/metal-api/internal/fsm/states/preparing.go index c42ec2fb6..fc33682c1 100644 --- a/cmd/metal-api/internal/fsm/states/preparing.go +++ b/cmd/metal-api/internal/fsm/states/preparing.go @@ -6,6 +6,7 @@ import ( ) type PreparingState struct { + noopState container *metal.ProvisioningEventContainer event *metal.ProvisioningEvent } diff --git a/cmd/metal-api/internal/fsm/states/pxe-booting.go b/cmd/metal-api/internal/fsm/states/pxe-booting.go index d2661b5b3..b7a8e5ae3 100644 --- a/cmd/metal-api/internal/fsm/states/pxe-booting.go +++ b/cmd/metal-api/internal/fsm/states/pxe-booting.go @@ -6,6 +6,7 @@ import ( ) type PXEBootingState struct { + noopState container *metal.ProvisioningEventContainer event *metal.ProvisioningEvent } diff --git a/cmd/metal-api/internal/fsm/states/registering.go b/cmd/metal-api/internal/fsm/states/registering.go index 7efcc50af..eebc99b36 100644 --- a/cmd/metal-api/internal/fsm/states/registering.go +++ b/cmd/metal-api/internal/fsm/states/registering.go @@ -6,6 +6,7 @@ import ( ) type RegisteringState struct { + noopState container *metal.ProvisioningEventContainer event *metal.ProvisioningEvent } diff --git a/cmd/metal-api/internal/fsm/states/states.go b/cmd/metal-api/internal/fsm/states/states.go index 0ff72dd95..d11289216 100644 --- a/cmd/metal-api/internal/fsm/states/states.go +++ b/cmd/metal-api/internal/fsm/states/states.go @@ -26,6 +26,7 @@ const ( type FSMState interface { OnTransition(e *fsm.Event) + OnLeave(e *fsm.Event) } type stateType string @@ -40,7 +41,6 @@ type StateConfig struct { Event *metal.ProvisioningEvent Scaler *scaler.PoolScaler Machine *metal.Machine - Partition *metal.Partition } func (c *StateConfig) Validate() error { diff --git a/cmd/metal-api/internal/fsm/states/waiting.go b/cmd/metal-api/internal/fsm/states/waiting.go index 2179d7fa4..0085e3f79 100644 --- a/cmd/metal-api/internal/fsm/states/waiting.go +++ b/cmd/metal-api/internal/fsm/states/waiting.go @@ -18,12 +18,12 @@ func (p *WaitingState) OnTransition(e *fsm.Event) { appendEventToContainer(p.config.Event, p.config.Container) if p.config.Scaler != nil { - e.Err = p.config.Scaler.AdjustNumberOfWaitingMachines(p.config.Partition) + e.Err = p.config.Scaler.AdjustNumberOfWaitingMachines() } } func (p *WaitingState) OnLeave(e *fsm.Event) { if p.config.Scaler != nil { - e.Err = p.config.Scaler.AdjustNumberOfWaitingMachines(p.config.Partition) + e.Err = p.config.Scaler.AdjustNumberOfWaitingMachines() } } diff --git a/cmd/metal-api/internal/metal/partition.go b/cmd/metal-api/internal/metal/partition.go index 1acd92a74..41f09cdc1 100644 --- a/cmd/metal-api/internal/metal/partition.go +++ b/cmd/metal-api/internal/metal/partition.go @@ -1,13 +1,18 @@ package metal +import ( + "errors" + "strconv" + "strings" +) + // A Partition represents a location. type Partition struct { Base BootConfiguration BootConfiguration `rethinkdb:"bootconfig" json:"bootconfig"` MgmtServiceAddress string `rethinkdb:"mgmtserviceaddr" json:"mgmtserviceaddr"` PrivateNetworkPrefixLength uint8 `rethinkdb:"privatenetworkprefixlength" json:"privatenetworkprefixlength"` - WaitingPoolMinSize string `rethinkdb:"waitingpoolminsize" json:"waitingpoolminsize"` - WaitingPoolMaxSize string `rethinkdb:"waitingpoolmaxsize" json:"waitingpoolmaxsize"` + WaitingPoolRange ScalerRange `rethinkdb:"waitingpoolrange" json:"waitingpoolrange"` } // BootConfiguration defines the metal-hammer initrd, kernel and commandline @@ -31,3 +36,58 @@ func (sz Partitions) ByID() PartitionMap { } return res } + +type ScalerRange struct { + WaitingPoolMinSize string + WaitingPoolMaxSize string +} + +func (r *ScalerRange) GetMin() (min int64, err error, inPercent bool) { + if strings.HasSuffix(r.WaitingPoolMinSize, "%") { + s := strings.Split(r.WaitingPoolMinSize, "%")[0] + min, err := strconv.ParseInt(s, 10, 64) + return min, err, true + } + min, err = strconv.ParseInt(r.WaitingPoolMinSize, 10, 64) + return min, err, false +} + +func (r *ScalerRange) GetMax() (min int64, err error, inPercent bool) { + if strings.HasSuffix(r.WaitingPoolMaxSize, "%") { + s := strings.Split(r.WaitingPoolMaxSize, "%")[0] + min, err := strconv.ParseInt(s, 10, 64) + return min, err, true + } + min, err = strconv.ParseInt(r.WaitingPoolMaxSize, 10, 64) + return min, err, false +} + +func (r *ScalerRange) IsDisabled() bool { + return r.WaitingPoolMinSize == "" && r.WaitingPoolMaxSize == "" +} + +func (r *ScalerRange) Validate() error { + min, err, minInPercent := r.GetMin() + if err != nil { + return errors.New("could not parse minimum waiting pool size") + } + + max, err, maxInPercent := r.GetMax() + if err != nil { + return errors.New("could not parse maximum waiting pool size") + } + + if minInPercent != maxInPercent { + return errors.New("minimum and maximum pool sizes must either be both in percent or both an absolute value") + } + + if min < 0 || max < 0 { + return errors.New("minimum and maximum waiting pool sizes must be greater or equal to 0") + } + + if max < min { + return errors.New("minimum waiting pool size must be less or equal to maximum pool size") + } + + return nil +} diff --git a/cmd/metal-api/internal/metal/partition_test.go b/cmd/metal-api/internal/metal/partition_test.go index e0a6f8b64..b9e0c973a 100644 --- a/cmd/metal-api/internal/metal/partition_test.go +++ b/cmd/metal-api/internal/metal/partition_test.go @@ -1,6 +1,7 @@ package metal import ( + "errors" "reflect" "testing" ) @@ -50,3 +51,66 @@ func TestPartitions_ByID(t *testing.T) { }) } } + +func TestScalerRange_Validate(t *testing.T) { + tests := []struct { + name string + min string + max string + wantErr error + }{ + { + name: "min max format mismatch", + min: "15%", + max: "30", + wantErr: errors.New("minimum and maximum pool sizes must either be both in percent or both an absolute value"), + }, + { + name: "parse error for min", + min: "15#%", + max: "30", + wantErr: errors.New("could not parse minimum waiting pool size"), + }, + { + name: "parse error for max", + min: "15", + max: "#30", + wantErr: errors.New("could not parse maximum waiting pool size"), + }, + { + name: "negative value for min", + min: "-15", + max: "0", + wantErr: errors.New("minimum and maximum waiting pool sizes must be greater or equal to 0"), + }, + { + name: "max less than min", + min: "15", + max: "0", + wantErr: errors.New("minimum waiting pool size must be less or equal to maximum pool size"), + }, + { + name: "everything okay", + min: "15", + max: "30", + wantErr: nil, + }, + { + name: "everything okay in percent", + min: "15%", + max: "30%", + wantErr: nil, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + r := &ScalerRange{ + WaitingPoolMinSize: tt.min, + WaitingPoolMaxSize: tt.max, + } + if err := r.Validate(); (err != nil || tt.wantErr != nil) && (err == nil || tt.wantErr == nil || err.Error() != tt.wantErr.Error()) { + t.Errorf("ScalerRange.Validate() error = %v, wantErr %v", err, tt.wantErr) + } + }) + } +} diff --git a/cmd/metal-api/internal/scaler/poolscaler.go b/cmd/metal-api/internal/scaler/poolscaler.go index 8c135213c..87a6dd876 100644 --- a/cmd/metal-api/internal/scaler/poolscaler.go +++ b/cmd/metal-api/internal/scaler/poolscaler.go @@ -3,8 +3,6 @@ package scaler import ( "math" "math/rand" - "strconv" - "strings" "time" "github.com/metal-stack/metal-api/cmd/metal-api/internal/metal" @@ -13,12 +11,14 @@ import ( type ( PoolScaler struct { - log *zap.SugaredLogger - manager MachineManager + log *zap.SugaredLogger + manager MachineManager + partition metal.Partition } PoolScalerConfig struct { - Log *zap.SugaredLogger - Manager MachineManager + Log *zap.SugaredLogger + Manager MachineManager + Partition metal.Partition } MachineManager interface { AllMachines() (metal.Machines, error) @@ -30,29 +30,34 @@ type ( } ) -func NewPoolScaler(log *zap.SugaredLogger, manager MachineManager) *PoolScaler { +func NewPoolScaler(c *PoolScalerConfig) *PoolScaler { return &PoolScaler{ - log: log, - manager: manager, + log: c.Log.With("partition", c.Partition.ID), + manager: c.Manager, + partition: c.Partition, } } // AdjustNumberOfWaitingMachines compares the number of waiting machines to the required pool size of the partition and shuts down or powers on machines accordingly -func (p *PoolScaler) AdjustNumberOfWaitingMachines(partition *metal.Partition) error { +func (p *PoolScaler) AdjustNumberOfWaitingMachines() error { + if p.partition.WaitingPoolRange.IsDisabled() { + return nil + } + waitingMachines, err := p.manager.WaitingMachines() if err != nil { return err } - poolSizeExcess := 0 - poolSizeExcess = p.calculatePoolSizeExcess(len(waitingMachines), partition.WaitingPoolMinSize, partition.WaitingPoolMaxSize) + var poolSizeExcess int + poolSizeExcess = p.calculatePoolSizeExcess(len(waitingMachines), p.partition.WaitingPoolRange) if poolSizeExcess == 0 { return nil } if poolSizeExcess > 0 { - p.log.Infow("shut down spare machines", "number", poolSizeExcess, "partition", partition.ID) + p.log.Infow("shut down spare machines", "number", poolSizeExcess) err := p.shutdownMachines(waitingMachines, poolSizeExcess) if err != nil { return err @@ -65,7 +70,7 @@ func (p *PoolScaler) AdjustNumberOfWaitingMachines(partition *metal.Partition) e poolSizeExcess = int(math.Abs(float64(poolSizeExcess))) if len(shutdownMachines) < poolSizeExcess { - p.log.Infow("not enough machines to meet waiting pool size; power on all remaining", "number", len(shutdownMachines), "partition", partition.ID) + p.log.Infow("not enough machines to meet waiting pool size; power on all remaining", "number", len(shutdownMachines)) err = p.powerOnMachines(shutdownMachines, len(shutdownMachines)) if err != nil { return err @@ -73,7 +78,7 @@ func (p *PoolScaler) AdjustNumberOfWaitingMachines(partition *metal.Partition) e return nil } - p.log.Infow("power on missing machines", "number", poolSizeExcess, "partition", partition.ID) + p.log.Infow("power on missing machines", "number", poolSizeExcess) err = p.powerOnMachines(shutdownMachines, poolSizeExcess) if err != nil { return err @@ -86,56 +91,37 @@ func (p *PoolScaler) AdjustNumberOfWaitingMachines(partition *metal.Partition) e // calculatePoolSizeExcess checks if there are less waiting machines than minRequired or more than maxRequired // if yes, it returns the difference between the actual amount of waiting machines and the average of minRequired and maxRequired // if no, it returns 0 -func (p *PoolScaler) calculatePoolSizeExcess(actual int, minRequired, maxRequired string) int { - if strings.Contains(minRequired, "%") != strings.Contains(maxRequired, "%") { - return 0 - } - - if !strings.Contains(minRequired, "%") && !strings.Contains(maxRequired, "%") { - min, err := strconv.ParseInt(minRequired, 10, 64) - if err != nil { - return 0 - } - - max, err := strconv.ParseInt(maxRequired, 10, 64) - if err != nil { - return 0 - } - - if int64(actual) >= min && int64(actual) <= max { - return 0 - } - - average := int(math.Round((float64(max) + float64(min)) / 2)) - return actual - average - } - - minString := strings.Split(minRequired, "%")[0] - minPercent, err := strconv.ParseInt(minString, 10, 64) +func (p *PoolScaler) calculatePoolSizeExcess(actual int, scalerRange metal.ScalerRange) int { + rangeMin, err, inPercent := scalerRange.GetMin() if err != nil { return 0 } - maxString := strings.Split(maxRequired, "%")[0] - maxPercent, err := strconv.ParseInt(maxString, 10, 64) + rangeMax, err, _ := scalerRange.GetMax() if err != nil { return 0 } - allMachines, err := p.manager.AllMachines() - if err != nil { - return 0 - } + min := rangeMin + max := rangeMax + average := float64(rangeMax) + float64(rangeMin)/2 - min := int(math.Round(float64(len(allMachines)) * float64(minPercent) / 100)) - max := int(math.Round(float64(len(allMachines)) * float64(maxPercent) / 100)) + if inPercent { + allMachines, err := p.manager.AllMachines() + if err != nil { + return 0 + } - if actual >= min && actual <= max { + min = int64(math.Round(float64(len(allMachines)) * float64(rangeMin) / 100)) + max = int64(math.Round(float64(len(allMachines)) * float64(rangeMax) / 100)) + average = float64(len(allMachines)) * average / 100 + } + + if int64(actual) >= min && int64(actual) <= max { return 0 } - average := int(math.Round((float64(max) + float64(min)) / 2)) - return actual - average + return actual - int(average) } func randomIndices(n, k int) []int { @@ -144,13 +130,10 @@ func randomIndices(n, k int) []int { indices[i] = i } - for i := 0; i < (n - k); i++ { - rand.Seed(time.Now().UnixNano()) - r := rand.Intn(len(indices)) //nolint:gosec - indices = append(indices[:r], indices[r+1:]...) - } + rand.Seed(time.Now().UnixNano()) + rand.Shuffle(len(indices), func(i, j int) { indices[i], indices[j] = indices[j], indices[i] }) - return indices + return indices[:k] } func (p *PoolScaler) shutdownMachines(machines metal.Machines, number int) error { diff --git a/cmd/metal-api/internal/scaler/poolscaler_test.go b/cmd/metal-api/internal/scaler/poolscaler_test.go index 3f44f0b03..a5f533fcd 100644 --- a/cmd/metal-api/internal/scaler/poolscaler_test.go +++ b/cmd/metal-api/internal/scaler/poolscaler_test.go @@ -10,24 +10,31 @@ import ( func TestPoolScaler_AdjustNumberOfWaitingMachines(t *testing.T) { tests := []struct { name string - m *metal.Machine partition *metal.Partition mockFn func(mock *MockMachineManager) wantErr bool }{ { - name: "waiting machines match pool size; do nothing", - m: &metal.Machine{SizeID: ""}, - partition: &metal.Partition{WaitingPoolMinSize: "10", WaitingPoolMaxSize: "20"}, + name: "waiting machines match pool size; do nothing", + partition: &metal.Partition{ + WaitingPoolRange: metal.ScalerRange{ + WaitingPoolMinSize: "10", + WaitingPoolMaxSize: "20", + }, + }, mockFn: func(mock *MockMachineManager) { mock.On("WaitingMachines").Once().Return(append(metal.Machines{}, make([]metal.Machine, 10)...), nil) }, wantErr: false, }, { - name: "waiting machines match pool size in percent; do nothing", - m: &metal.Machine{SizeID: ""}, - partition: &metal.Partition{WaitingPoolMinSize: "25%", WaitingPoolMaxSize: "30%"}, + name: "waiting machines match pool size in percent; do nothing", + partition: &metal.Partition{ + WaitingPoolRange: metal.ScalerRange{ + WaitingPoolMinSize: "25%", + WaitingPoolMaxSize: "30%", + }, + }, mockFn: func(mock *MockMachineManager) { mock.On("AllMachines").Once().Return(append(metal.Machines{}, make([]metal.Machine, 150)...), nil) mock.On("WaitingMachines").Once().Return(append(metal.Machines{}, make([]metal.Machine, 45)...), nil) @@ -35,9 +42,13 @@ func TestPoolScaler_AdjustNumberOfWaitingMachines(t *testing.T) { wantErr: false, }, { - name: "more waiting machines needed; power on 8 machines", - m: &metal.Machine{SizeID: ""}, - partition: &metal.Partition{WaitingPoolMinSize: "15", WaitingPoolMaxSize: "20"}, + name: "more waiting machines needed; power on 8 machines", + partition: &metal.Partition{ + WaitingPoolRange: metal.ScalerRange{ + WaitingPoolMinSize: "15", + WaitingPoolMaxSize: "20", + }, + }, mockFn: func(mock *MockMachineManager) { mock.On("WaitingMachines").Once().Return(append(metal.Machines{}, make([]metal.Machine, 10)...), nil) mock.On("ShutdownMachines").Once().Return(append(metal.Machines{}, make([]metal.Machine, 10)...), nil) @@ -46,9 +57,13 @@ func TestPoolScaler_AdjustNumberOfWaitingMachines(t *testing.T) { wantErr: false, }, { - name: "more waiting machines needed in percent; power on 10 machines", - m: &metal.Machine{SizeID: ""}, - partition: &metal.Partition{WaitingPoolMinSize: "30%", WaitingPoolMaxSize: "40%"}, + name: "more waiting machines needed in percent; power on 10 machines", + partition: &metal.Partition{ + WaitingPoolRange: metal.ScalerRange{ + WaitingPoolMinSize: "30%", + WaitingPoolMaxSize: "40%", + }, + }, mockFn: func(mock *MockMachineManager) { mock.On("AllMachines").Once().Return(append(metal.Machines{}, make([]metal.Machine, 150)...), nil) mock.On("WaitingMachines").Once().Return(append(metal.Machines{}, make([]metal.Machine, 35)...), nil) @@ -58,9 +73,13 @@ func TestPoolScaler_AdjustNumberOfWaitingMachines(t *testing.T) { wantErr: false, }, { - name: "pool size exceeded; power off 7 machines", - m: &metal.Machine{SizeID: ""}, - partition: &metal.Partition{WaitingPoolMinSize: "10", WaitingPoolMaxSize: "15"}, + name: "pool size exceeded; power off 7 machines", + partition: &metal.Partition{ + WaitingPoolRange: metal.ScalerRange{ + WaitingPoolMinSize: "10", + WaitingPoolMaxSize: "15", + }, + }, mockFn: func(mock *MockMachineManager) { mock.On("WaitingMachines").Once().Return(append(metal.Machines{}, make([]metal.Machine, 20)...), nil) mock.On("Shutdown", &metal.Machine{}).Return(nil).Times(7) @@ -68,9 +87,13 @@ func TestPoolScaler_AdjustNumberOfWaitingMachines(t *testing.T) { wantErr: false, }, { - name: "pool size exceeded in percent; power off 4 machines", - m: &metal.Machine{SizeID: ""}, - partition: &metal.Partition{WaitingPoolMinSize: "15%", WaitingPoolMaxSize: "20%"}, + name: "pool size exceeded in percent; power off 4 machines", + partition: &metal.Partition{ + WaitingPoolRange: metal.ScalerRange{ + WaitingPoolMinSize: "15%", + WaitingPoolMaxSize: "20%", + }, + }, mockFn: func(mock *MockMachineManager) { mock.On("AllMachines").Once().Return(append(metal.Machines{}, make([]metal.Machine, 150)...), nil) mock.On("WaitingMachines").Once().Return(append(metal.Machines{}, make([]metal.Machine, 31)...), nil) @@ -79,9 +102,13 @@ func TestPoolScaler_AdjustNumberOfWaitingMachines(t *testing.T) { wantErr: false, }, { - name: "more machines needed than available; power on all remaining", - m: &metal.Machine{SizeID: ""}, - partition: &metal.Partition{WaitingPoolMinSize: "30", WaitingPoolMaxSize: "40"}, + name: "more machines needed than available; power on all remaining", + partition: &metal.Partition{ + WaitingPoolRange: metal.ScalerRange{ + WaitingPoolMinSize: "30", + WaitingPoolMaxSize: "40", + }, + }, mockFn: func(mock *MockMachineManager) { mock.On("ShutdownMachines").Once().Return(append(metal.Machines{}, make([]metal.Machine, 10)...), nil) mock.On("WaitingMachines").Once().Return(append(metal.Machines{}, make([]metal.Machine, 15)...), nil) @@ -90,9 +117,13 @@ func TestPoolScaler_AdjustNumberOfWaitingMachines(t *testing.T) { wantErr: false, }, { - name: "no more machines left; do nothing", - m: &metal.Machine{SizeID: ""}, - partition: &metal.Partition{WaitingPoolMinSize: "15%", WaitingPoolMaxSize: "20%"}, + name: "no more machines left; do nothing", + partition: &metal.Partition{ + WaitingPoolRange: metal.ScalerRange{ + WaitingPoolMinSize: "15%", + WaitingPoolMaxSize: "20%", + }, + }, mockFn: func(mock *MockMachineManager) { mock.On("AllMachines").Once().Return(append(metal.Machines{}, make([]metal.Machine, 150)...), nil) mock.On("ShutdownMachines").Once().Return(append(metal.Machines{}, make([]metal.Machine, 0)...), nil) @@ -101,9 +132,13 @@ func TestPoolScaler_AdjustNumberOfWaitingMachines(t *testing.T) { wantErr: false, }, { - name: "pool scaling disabled; do nothing", - m: &metal.Machine{SizeID: ""}, - partition: &metal.Partition{WaitingPoolMinSize: "0%", WaitingPoolMaxSize: "100%"}, + name: "pool scaling disabled; do nothing", + partition: &metal.Partition{ + WaitingPoolRange: metal.ScalerRange{ + WaitingPoolMinSize: "", + WaitingPoolMaxSize: "", + }, + }, mockFn: func(mock *MockMachineManager) { mock.On("WaitingMachines").Once().Return(append(metal.Machines{}, make([]metal.Machine, 22)...), nil) mock.On("AllMachines").Once().Return(append(metal.Machines{}, make([]metal.Machine, 150)...), nil) @@ -121,10 +156,11 @@ func TestPoolScaler_AdjustNumberOfWaitingMachines(t *testing.T) { } p := &PoolScaler{ - log: zaptest.NewLogger(t).Sugar(), - manager: manager, + log: zaptest.NewLogger(t).Sugar(), + manager: manager, + partition: *tt.partition, } - if err := p.AdjustNumberOfWaitingMachines(tt.partition); (err != nil) != tt.wantErr { + if err := p.AdjustNumberOfWaitingMachines(); (err != nil) != tt.wantErr { t.Errorf("PoolScaler.AdjustNumberOfWaitingMachines() error = %v, wantErr %v", err, tt.wantErr) } }) diff --git a/cmd/metal-api/internal/service/partition-service.go b/cmd/metal-api/internal/service/partition-service.go index 493f09ded..210dc5197 100644 --- a/cmd/metal-api/internal/service/partition-service.go +++ b/cmd/metal-api/internal/service/partition-service.go @@ -3,8 +3,6 @@ package service import ( "errors" "net/http" - "strconv" - "strings" "github.com/metal-stack/metal-api/cmd/metal-api/internal/datastore" "github.com/metal-stack/metal-api/cmd/metal-api/internal/metal" @@ -209,22 +207,17 @@ func (r *partitionResource) createPartition(request *restful.Request, response * commandLine = *requestPayload.PartitionBootConfiguration.CommandLine } - var waitingpoolminsize string - if requestPayload.PartitionWaitingPoolMinSize != nil { - waitingpoolminsize = *requestPayload.PartitionWaitingPoolMinSize - } else { - waitingpoolminsize = "0%" + waitingPoolRange := metal.ScalerRange{ + WaitingPoolMinSize: "", + WaitingPoolMaxSize: "", } - var waitingpoolmaxsize string - if requestPayload.PartitionWaitingPoolMaxSize != nil { - waitingpoolmaxsize = *requestPayload.PartitionWaitingPoolMaxSize - } else { - waitingpoolmaxsize = "100%" + if requestPayload.PartitionWaitingPoolMinSize != nil && requestPayload.PartitionWaitingPoolMaxSize != nil { + waitingPoolRange.WaitingPoolMinSize = *requestPayload.PartitionWaitingPoolMinSize + waitingPoolRange.WaitingPoolMaxSize = *requestPayload.PartitionWaitingPoolMaxSize } - err = validateWaitingPoolSize(waitingpoolminsize, waitingpoolmaxsize) - if err != nil { + if err = waitingPoolRange.Validate(); err != nil { r.sendError(request, response, httperrors.BadRequest(err)) return } @@ -242,8 +235,7 @@ func (r *partitionResource) createPartition(request *restful.Request, response * KernelURL: kernelURL, CommandLine: commandLine, }, - WaitingPoolMinSize: waitingpoolminsize, - WaitingPoolMaxSize: waitingpoolmaxsize, + WaitingPoolRange: waitingPoolRange, } fqn := metal.TopicMachine.GetFQN(p.GetID()) @@ -326,29 +318,20 @@ func (r *partitionResource) updatePartition(request *restful.Request, response * newPartition.BootConfiguration.CommandLine = *requestPayload.PartitionBootConfiguration.CommandLine } - var waitingpoolminsize string - if requestPayload.PartitionWaitingPoolMinSize != nil { - waitingpoolminsize = *requestPayload.PartitionWaitingPoolMinSize - } else { - waitingpoolminsize = "0%" - } + if requestPayload.PartitionWaitingPoolMinSize != nil && requestPayload.PartitionWaitingPoolMaxSize != nil { + waitingPoolRange := metal.ScalerRange{ + WaitingPoolMinSize: *requestPayload.PartitionWaitingPoolMinSize, + WaitingPoolMaxSize: *requestPayload.PartitionWaitingPoolMaxSize, + } - var waitingpoolmaxsize string - if requestPayload.PartitionWaitingPoolMaxSize != nil { - waitingpoolmaxsize = *requestPayload.PartitionWaitingPoolMaxSize - } else { - waitingpoolmaxsize = "100%" - } + if err = waitingPoolRange.Validate(); err != nil { + r.sendError(request, response, httperrors.BadRequest(err)) + return + } - err = validateWaitingPoolSize(waitingpoolminsize, waitingpoolmaxsize) - if err != nil { - r.sendError(request, response, httperrors.BadRequest(err)) - return + newPartition.WaitingPoolRange = waitingPoolRange } - newPartition.WaitingPoolMinSize = waitingpoolminsize - newPartition.WaitingPoolMaxSize = waitingpoolmaxsize - err = r.ds.UpdatePartition(oldPartition, &newPartition) if err != nil { r.sendError(request, response, httperrors.BadRequest(err)) @@ -494,44 +477,3 @@ func (r *partitionResource) calcPartitionCapacity(pcr *v1.PartitionCapacityReque return partitionCapacities, err } - -func validateWaitingPoolSize(minString, maxString string) error { - if strings.Contains(minString, "%") != strings.Contains(maxString, "%") { - return errors.New("minimum and maximum pool sizes must either be both in percent or both an absolute value") - } - - if strings.Contains(minString, "%") && strings.Contains(maxString, "%") { - minTokens := strings.Split(minString, "%") - maxTokens := strings.Split(maxString, "%") - if len(minTokens) != 2 || minTokens[1] != "" { - return errors.New("invalid format for minimum waiting pool sizes") - } - - if len(maxTokens) != 2 || maxTokens[1] != "" { - return errors.New("invalid format for maximum waiting pool sizes") - } - - minString = minTokens[0] - maxString = maxTokens[0] - } - - min, err := strconv.ParseInt(minString, 10, 64) - if err != nil { - return errors.New("could not parse minimum waiting pool size") - } - - max, err := strconv.ParseInt(maxString, 10, 64) - if err != nil { - return errors.New("could not parse maximum waiting pool size") - } - - if min < 0 || max < 0 { - return errors.New("minimum and maximum waiting pool sizes must be greater or equal to 0") - } - - if max < min { - return errors.New("minimum waiting pool size must be less or equal to maximum pool size") - } - - return nil -} diff --git a/cmd/metal-api/internal/service/partition-service_test.go b/cmd/metal-api/internal/service/partition-service_test.go index ea9f99dd2..89a4b033a 100644 --- a/cmd/metal-api/internal/service/partition-service_test.go +++ b/cmd/metal-api/internal/service/partition-service_test.go @@ -3,7 +3,6 @@ package service import ( "bytes" "encoding/json" - "errors" "fmt" "net/http" "net/http/httptest" @@ -271,69 +270,3 @@ func TestPartitionCapacity(t *testing.T) { require.Equal(t, 5, c.Total) require.Equal(t, 0, c.Free) } - -func Test_validateWaitingPoolSize(t *testing.T) { - tests := []struct { - name string - min string - max string - wantErr error - }{ - { - name: "min max format mismatch", - min: "15%", - max: "30", - wantErr: errors.New("minimum and maximum pool sizes must either be both in percent or both an absolute value"), - }, - { - name: "invalid format for min in percent", - min: "15%#", - max: "30%", - wantErr: errors.New("invalid format for minimum waiting pool sizes"), - }, - { - name: "parse error for min", - min: "15#", - max: "30", - wantErr: errors.New("could not parse minimum waiting pool size"), - }, - { - name: "parse error for max", - min: "15", - max: "30#", - wantErr: errors.New("could not parse maximum waiting pool size"), - }, - { - name: "negative value for min", - min: "-15", - max: "0", - wantErr: errors.New("minimum and maximum waiting pool sizes must be greater or equal to 0"), - }, - { - name: "max less than min", - min: "15", - max: "0", - wantErr: errors.New("minimum waiting pool size must be less or equal to maximum pool size"), - }, - { - name: "everything okay", - min: "15", - max: "30", - wantErr: nil, - }, - { - name: "everything okay in percent", - min: "15%", - max: "30%", - wantErr: nil, - }, - } - for i := range tests { - tt := tests[i] - t.Run(tt.name, func(t *testing.T) { - if err := validateWaitingPoolSize(tt.min, tt.max); (err != nil || tt.wantErr != nil) && (err == nil || tt.wantErr == nil || err.Error() != tt.wantErr.Error()) { - t.Errorf("validateWaitingPoolSize() error = %v, wantErr %v", err, tt.wantErr) - } - }) - } -} From 4d400cf50936b41375451d007443f838f040c36f Mon Sep 17 00:00:00 2001 From: iljarotar Date: Tue, 6 Dec 2022 11:31:17 +0100 Subject: [PATCH 13/45] linter --- cmd/metal-api/internal/metal/partition_test.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cmd/metal-api/internal/metal/partition_test.go b/cmd/metal-api/internal/metal/partition_test.go index b9e0c973a..f58612836 100644 --- a/cmd/metal-api/internal/metal/partition_test.go +++ b/cmd/metal-api/internal/metal/partition_test.go @@ -102,7 +102,8 @@ func TestScalerRange_Validate(t *testing.T) { wantErr: nil, }, } - for _, tt := range tests { + for i := range tests { + tt := tests[i] t.Run(tt.name, func(t *testing.T) { r := &ScalerRange{ WaitingPoolMinSize: tt.min, From 28473cd77694c70b9c407657c2232c0ff857098b Mon Sep 17 00:00:00 2001 From: iljarotar Date: Tue, 6 Dec 2022 11:37:55 +0100 Subject: [PATCH 14/45] fix pool size validation --- cmd/metal-api/internal/metal/partition.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cmd/metal-api/internal/metal/partition.go b/cmd/metal-api/internal/metal/partition.go index 41f09cdc1..5b31fe91c 100644 --- a/cmd/metal-api/internal/metal/partition.go +++ b/cmd/metal-api/internal/metal/partition.go @@ -67,6 +67,10 @@ func (r *ScalerRange) IsDisabled() bool { } func (r *ScalerRange) Validate() error { + if r.IsDisabled() { + return nil + } + min, err, minInPercent := r.GetMin() if err != nil { return errors.New("could not parse minimum waiting pool size") From f3f3e1f2289b0822759415f21198cf4065239081 Mon Sep 17 00:00:00 2001 From: iljarotar Date: Tue, 6 Dec 2022 11:52:04 +0100 Subject: [PATCH 15/45] fix calculatePoolSizeExcess function --- cmd/metal-api/internal/scaler/poolscaler.go | 4 ++-- cmd/metal-api/internal/scaler/poolscaler_test.go | 8 ++------ 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/cmd/metal-api/internal/scaler/poolscaler.go b/cmd/metal-api/internal/scaler/poolscaler.go index 87a6dd876..c8c56a3ed 100644 --- a/cmd/metal-api/internal/scaler/poolscaler.go +++ b/cmd/metal-api/internal/scaler/poolscaler.go @@ -104,7 +104,7 @@ func (p *PoolScaler) calculatePoolSizeExcess(actual int, scalerRange metal.Scale min := rangeMin max := rangeMax - average := float64(rangeMax) + float64(rangeMin)/2 + average := (float64(rangeMax) + float64(rangeMin)) / 2 if inPercent { allMachines, err := p.manager.AllMachines() @@ -121,7 +121,7 @@ func (p *PoolScaler) calculatePoolSizeExcess(actual int, scalerRange metal.Scale return 0 } - return actual - int(average) + return actual - int(math.Round(average)) } func randomIndices(n, k int) []int { diff --git a/cmd/metal-api/internal/scaler/poolscaler_test.go b/cmd/metal-api/internal/scaler/poolscaler_test.go index a5f533fcd..4393583fa 100644 --- a/cmd/metal-api/internal/scaler/poolscaler_test.go +++ b/cmd/metal-api/internal/scaler/poolscaler_test.go @@ -87,7 +87,7 @@ func TestPoolScaler_AdjustNumberOfWaitingMachines(t *testing.T) { wantErr: false, }, { - name: "pool size exceeded in percent; power off 4 machines", + name: "pool size exceeded in percent; power off 5 machines", partition: &metal.Partition{ WaitingPoolRange: metal.ScalerRange{ WaitingPoolMinSize: "15%", @@ -97,7 +97,7 @@ func TestPoolScaler_AdjustNumberOfWaitingMachines(t *testing.T) { mockFn: func(mock *MockMachineManager) { mock.On("AllMachines").Once().Return(append(metal.Machines{}, make([]metal.Machine, 150)...), nil) mock.On("WaitingMachines").Once().Return(append(metal.Machines{}, make([]metal.Machine, 31)...), nil) - mock.On("Shutdown", &metal.Machine{}).Return(nil).Times(4) + mock.On("Shutdown", &metal.Machine{}).Return(nil).Times(5) }, wantErr: false, }, @@ -139,10 +139,6 @@ func TestPoolScaler_AdjustNumberOfWaitingMachines(t *testing.T) { WaitingPoolMaxSize: "", }, }, - mockFn: func(mock *MockMachineManager) { - mock.On("WaitingMachines").Once().Return(append(metal.Machines{}, make([]metal.Machine, 22)...), nil) - mock.On("AllMachines").Once().Return(append(metal.Machines{}, make([]metal.Machine, 150)...), nil) - }, wantErr: false, }, } From 79d82ce4340e685fb1a3529159590a3c2d4c589f Mon Sep 17 00:00:00 2001 From: iljarotar Date: Tue, 6 Dec 2022 11:56:43 +0100 Subject: [PATCH 16/45] spec --- spec/metal-api.json | 9 --------- 1 file changed, 9 deletions(-) diff --git a/spec/metal-api.json b/spec/metal-api.json index e72889249..5c78821cb 100644 --- a/spec/metal-api.json +++ b/spec/metal-api.json @@ -228,9 +228,6 @@ "type": "string" }, "type": "array" - }, - "waiting": { - "type": "boolean" } } }, @@ -1128,9 +1125,6 @@ "type": "string" }, "type": "array" - }, - "waiting": { - "type": "boolean" } } }, @@ -2241,9 +2235,6 @@ "type": "string" }, "type": "array" - }, - "waiting": { - "type": "boolean" } } }, From 9faf66c53041aa7efe32682970e0eb87b115beee Mon Sep 17 00:00:00 2001 From: iljarotar Date: Thu, 8 Dec 2022 11:18:28 +0100 Subject: [PATCH 17/45] remove scaler range from partition; constructor for scaler range --- cmd/metal-api/internal/fsm/events.go | 3 - cmd/metal-api/internal/metal/partition.go | 43 ++++++++---- .../internal/metal/partition_test.go | 4 ++ cmd/metal-api/internal/scaler/poolscaler.go | 31 ++++----- .../internal/scaler/poolscaler_test.go | 65 +++++++------------ .../internal/service/partition-service.go | 32 ++++----- .../internal/service/v1/partition.go | 4 +- 7 files changed, 87 insertions(+), 95 deletions(-) diff --git a/cmd/metal-api/internal/fsm/events.go b/cmd/metal-api/internal/fsm/events.go index 1d6f6aa0e..ee6feff56 100644 --- a/cmd/metal-api/internal/fsm/events.go +++ b/cmd/metal-api/internal/fsm/events.go @@ -160,8 +160,5 @@ func eventCallbacks(config *states.StateConfig) fsm.Callbacks { callbacks["leave_"+name] = state.OnLeave } - waiting := allStates[states.Waiting.String()].(*states.WaitingState) - callbacks["leave_"+states.Waiting.String()] = waiting.OnLeave - return callbacks } diff --git a/cmd/metal-api/internal/metal/partition.go b/cmd/metal-api/internal/metal/partition.go index 5b31fe91c..8cf647266 100644 --- a/cmd/metal-api/internal/metal/partition.go +++ b/cmd/metal-api/internal/metal/partition.go @@ -2,6 +2,7 @@ package metal import ( "errors" + "math" "strconv" "strings" ) @@ -12,7 +13,8 @@ type Partition struct { BootConfiguration BootConfiguration `rethinkdb:"bootconfig" json:"bootconfig"` MgmtServiceAddress string `rethinkdb:"mgmtserviceaddr" json:"mgmtserviceaddr"` PrivateNetworkPrefixLength uint8 `rethinkdb:"privatenetworkprefixlength" json:"privatenetworkprefixlength"` - WaitingPoolRange ScalerRange `rethinkdb:"waitingpoolrange" json:"waitingpoolrange"` + WaitingPoolMinSize string `rethinkdb:"waitingpoolminsize" json:"waitingpoolminsize"` + WaitingPoolMaxSize string `rethinkdb:"waitingpoolmaxsize" json:"waitingpoolmaxsize"` } // BootConfiguration defines the metal-hammer initrd, kernel and commandline @@ -42,24 +44,41 @@ type ScalerRange struct { WaitingPoolMaxSize string } -func (r *ScalerRange) GetMin() (min int64, err error, inPercent bool) { +func NewScalerRange(min, max string) (*ScalerRange, error) { + r := &ScalerRange{WaitingPoolMinSize: min, WaitingPoolMaxSize: max} + + if err := r.Validate(); err != nil { + return nil, err + } + return r, nil +} + +func (r *ScalerRange) Min(of int) (int64, error) { if strings.HasSuffix(r.WaitingPoolMinSize, "%") { s := strings.Split(r.WaitingPoolMinSize, "%")[0] min, err := strconv.ParseInt(s, 10, 64) - return min, err, true + if err != nil { + return 0, err + } + + return int64(math.Round(float64(of) * float64(min) / 100)), nil } - min, err = strconv.ParseInt(r.WaitingPoolMinSize, 10, 64) - return min, err, false + + return strconv.ParseInt(r.WaitingPoolMinSize, 10, 64) } -func (r *ScalerRange) GetMax() (min int64, err error, inPercent bool) { +func (r *ScalerRange) Max(of int) (int64, error) { if strings.HasSuffix(r.WaitingPoolMaxSize, "%") { s := strings.Split(r.WaitingPoolMaxSize, "%")[0] min, err := strconv.ParseInt(s, 10, 64) - return min, err, true + if err != nil { + return 0, err + } + + return int64(math.Round(float64(of) * float64(min) / 100)), nil } - min, err = strconv.ParseInt(r.WaitingPoolMaxSize, 10, 64) - return min, err, false + + return strconv.ParseInt(r.WaitingPoolMaxSize, 10, 64) } func (r *ScalerRange) IsDisabled() bool { @@ -71,17 +90,17 @@ func (r *ScalerRange) Validate() error { return nil } - min, err, minInPercent := r.GetMin() + min, err := r.Min(100) if err != nil { return errors.New("could not parse minimum waiting pool size") } - max, err, maxInPercent := r.GetMax() + max, err := r.Max(100) if err != nil { return errors.New("could not parse maximum waiting pool size") } - if minInPercent != maxInPercent { + if strings.HasSuffix(r.WaitingPoolMinSize, "%") != strings.HasSuffix(r.WaitingPoolMaxSize, "%") { return errors.New("minimum and maximum pool sizes must either be both in percent or both an absolute value") } diff --git a/cmd/metal-api/internal/metal/partition_test.go b/cmd/metal-api/internal/metal/partition_test.go index f58612836..4db0ef414 100644 --- a/cmd/metal-api/internal/metal/partition_test.go +++ b/cmd/metal-api/internal/metal/partition_test.go @@ -101,6 +101,10 @@ func TestScalerRange_Validate(t *testing.T) { max: "30%", wantErr: nil, }, + { + name: "pool scaling disabled", + wantErr: nil, + }, } for i := range tests { tt := tests[i] diff --git a/cmd/metal-api/internal/scaler/poolscaler.go b/cmd/metal-api/internal/scaler/poolscaler.go index c8c56a3ed..7f5050af4 100644 --- a/cmd/metal-api/internal/scaler/poolscaler.go +++ b/cmd/metal-api/internal/scaler/poolscaler.go @@ -40,7 +40,12 @@ func NewPoolScaler(c *PoolScalerConfig) *PoolScaler { // AdjustNumberOfWaitingMachines compares the number of waiting machines to the required pool size of the partition and shuts down or powers on machines accordingly func (p *PoolScaler) AdjustNumberOfWaitingMachines() error { - if p.partition.WaitingPoolRange.IsDisabled() { + waitingPoolRange, err := metal.NewScalerRange(p.partition.WaitingPoolMinSize, p.partition.WaitingPoolMaxSize) + if err != nil { + return err + } + + if waitingPoolRange.IsDisabled() { return nil } @@ -50,7 +55,7 @@ func (p *PoolScaler) AdjustNumberOfWaitingMachines() error { } var poolSizeExcess int - poolSizeExcess = p.calculatePoolSizeExcess(len(waitingMachines), p.partition.WaitingPoolRange) + poolSizeExcess = p.calculatePoolSizeExcess(len(waitingMachines), *waitingPoolRange) if poolSizeExcess == 0 { return nil @@ -92,31 +97,23 @@ func (p *PoolScaler) AdjustNumberOfWaitingMachines() error { // if yes, it returns the difference between the actual amount of waiting machines and the average of minRequired and maxRequired // if no, it returns 0 func (p *PoolScaler) calculatePoolSizeExcess(actual int, scalerRange metal.ScalerRange) int { - rangeMin, err, inPercent := scalerRange.GetMin() + allMachines, err := p.manager.AllMachines() if err != nil { return 0 } - rangeMax, err, _ := scalerRange.GetMax() + min, err := scalerRange.Min(len(allMachines)) if err != nil { return 0 } - min := rangeMin - max := rangeMax - average := (float64(rangeMax) + float64(rangeMin)) / 2 - - if inPercent { - allMachines, err := p.manager.AllMachines() - if err != nil { - return 0 - } - - min = int64(math.Round(float64(len(allMachines)) * float64(rangeMin) / 100)) - max = int64(math.Round(float64(len(allMachines)) * float64(rangeMax) / 100)) - average = float64(len(allMachines)) * average / 100 + max, err := scalerRange.Max(len(allMachines)) + if err != nil { + return 0 } + average := (float64(max) + float64(min)) / 2 + if int64(actual) >= min && int64(actual) <= max { return 0 } diff --git a/cmd/metal-api/internal/scaler/poolscaler_test.go b/cmd/metal-api/internal/scaler/poolscaler_test.go index 4393583fa..5dce1f999 100644 --- a/cmd/metal-api/internal/scaler/poolscaler_test.go +++ b/cmd/metal-api/internal/scaler/poolscaler_test.go @@ -17,23 +17,20 @@ func TestPoolScaler_AdjustNumberOfWaitingMachines(t *testing.T) { { name: "waiting machines match pool size; do nothing", partition: &metal.Partition{ - WaitingPoolRange: metal.ScalerRange{ - WaitingPoolMinSize: "10", - WaitingPoolMaxSize: "20", - }, + WaitingPoolMinSize: "10", + WaitingPoolMaxSize: "20", }, mockFn: func(mock *MockMachineManager) { mock.On("WaitingMachines").Once().Return(append(metal.Machines{}, make([]metal.Machine, 10)...), nil) + mock.On("AllMachines").Once().Return(append(metal.Machines{}, make([]metal.Machine, 150)...), nil) }, wantErr: false, }, { name: "waiting machines match pool size in percent; do nothing", partition: &metal.Partition{ - WaitingPoolRange: metal.ScalerRange{ - WaitingPoolMinSize: "25%", - WaitingPoolMaxSize: "30%", - }, + WaitingPoolMinSize: "25%", + WaitingPoolMaxSize: "30%", }, mockFn: func(mock *MockMachineManager) { mock.On("AllMachines").Once().Return(append(metal.Machines{}, make([]metal.Machine, 150)...), nil) @@ -44,12 +41,11 @@ func TestPoolScaler_AdjustNumberOfWaitingMachines(t *testing.T) { { name: "more waiting machines needed; power on 8 machines", partition: &metal.Partition{ - WaitingPoolRange: metal.ScalerRange{ - WaitingPoolMinSize: "15", - WaitingPoolMaxSize: "20", - }, + WaitingPoolMinSize: "15", + WaitingPoolMaxSize: "20", }, mockFn: func(mock *MockMachineManager) { + mock.On("AllMachines").Once().Return(append(metal.Machines{}, make([]metal.Machine, 150)...), nil) mock.On("WaitingMachines").Once().Return(append(metal.Machines{}, make([]metal.Machine, 10)...), nil) mock.On("ShutdownMachines").Once().Return(append(metal.Machines{}, make([]metal.Machine, 10)...), nil) mock.On("PowerOn", &metal.Machine{}).Return(nil).Times(8) @@ -59,10 +55,8 @@ func TestPoolScaler_AdjustNumberOfWaitingMachines(t *testing.T) { { name: "more waiting machines needed in percent; power on 10 machines", partition: &metal.Partition{ - WaitingPoolRange: metal.ScalerRange{ - WaitingPoolMinSize: "30%", - WaitingPoolMaxSize: "40%", - }, + WaitingPoolMinSize: "30%", + WaitingPoolMaxSize: "40%", }, mockFn: func(mock *MockMachineManager) { mock.On("AllMachines").Once().Return(append(metal.Machines{}, make([]metal.Machine, 150)...), nil) @@ -75,13 +69,12 @@ func TestPoolScaler_AdjustNumberOfWaitingMachines(t *testing.T) { { name: "pool size exceeded; power off 7 machines", partition: &metal.Partition{ - WaitingPoolRange: metal.ScalerRange{ - WaitingPoolMinSize: "10", - WaitingPoolMaxSize: "15", - }, + WaitingPoolMinSize: "10", + WaitingPoolMaxSize: "15", }, mockFn: func(mock *MockMachineManager) { mock.On("WaitingMachines").Once().Return(append(metal.Machines{}, make([]metal.Machine, 20)...), nil) + mock.On("AllMachines").Once().Return(append(metal.Machines{}, make([]metal.Machine, 150)...), nil) mock.On("Shutdown", &metal.Machine{}).Return(nil).Times(7) }, wantErr: false, @@ -89,29 +82,26 @@ func TestPoolScaler_AdjustNumberOfWaitingMachines(t *testing.T) { { name: "pool size exceeded in percent; power off 5 machines", partition: &metal.Partition{ - WaitingPoolRange: metal.ScalerRange{ - WaitingPoolMinSize: "15%", - WaitingPoolMaxSize: "20%", - }, + WaitingPoolMinSize: "15%", + WaitingPoolMaxSize: "20%", }, mockFn: func(mock *MockMachineManager) { mock.On("AllMachines").Once().Return(append(metal.Machines{}, make([]metal.Machine, 150)...), nil) mock.On("WaitingMachines").Once().Return(append(metal.Machines{}, make([]metal.Machine, 31)...), nil) - mock.On("Shutdown", &metal.Machine{}).Return(nil).Times(5) + mock.On("Shutdown", &metal.Machine{}).Return(nil).Times(4) }, wantErr: false, }, { name: "more machines needed than available; power on all remaining", partition: &metal.Partition{ - WaitingPoolRange: metal.ScalerRange{ - WaitingPoolMinSize: "30", - WaitingPoolMaxSize: "40", - }, + WaitingPoolMinSize: "30", + WaitingPoolMaxSize: "40", }, mockFn: func(mock *MockMachineManager) { mock.On("ShutdownMachines").Once().Return(append(metal.Machines{}, make([]metal.Machine, 10)...), nil) mock.On("WaitingMachines").Once().Return(append(metal.Machines{}, make([]metal.Machine, 15)...), nil) + mock.On("AllMachines").Once().Return(append(metal.Machines{}, make([]metal.Machine, 150)...), nil) mock.On("PowerOn", &metal.Machine{}).Return(nil).Times(10) }, wantErr: false, @@ -119,10 +109,8 @@ func TestPoolScaler_AdjustNumberOfWaitingMachines(t *testing.T) { { name: "no more machines left; do nothing", partition: &metal.Partition{ - WaitingPoolRange: metal.ScalerRange{ - WaitingPoolMinSize: "15%", - WaitingPoolMaxSize: "20%", - }, + WaitingPoolMinSize: "15%", + WaitingPoolMaxSize: "20%", }, mockFn: func(mock *MockMachineManager) { mock.On("AllMachines").Once().Return(append(metal.Machines{}, make([]metal.Machine, 150)...), nil) @@ -132,14 +120,9 @@ func TestPoolScaler_AdjustNumberOfWaitingMachines(t *testing.T) { wantErr: false, }, { - name: "pool scaling disabled; do nothing", - partition: &metal.Partition{ - WaitingPoolRange: metal.ScalerRange{ - WaitingPoolMinSize: "", - WaitingPoolMaxSize: "", - }, - }, - wantErr: false, + name: "pool scaling disabled; do nothing", + partition: &metal.Partition{}, + wantErr: false, }, } for i := range tests { diff --git a/cmd/metal-api/internal/service/partition-service.go b/cmd/metal-api/internal/service/partition-service.go index 210dc5197..59c79074d 100644 --- a/cmd/metal-api/internal/service/partition-service.go +++ b/cmd/metal-api/internal/service/partition-service.go @@ -207,19 +207,13 @@ func (r *partitionResource) createPartition(request *restful.Request, response * commandLine = *requestPayload.PartitionBootConfiguration.CommandLine } - waitingPoolRange := metal.ScalerRange{ - WaitingPoolMinSize: "", - WaitingPoolMaxSize: "", - } - + var waitingPoolRange *metal.ScalerRange if requestPayload.PartitionWaitingPoolMinSize != nil && requestPayload.PartitionWaitingPoolMaxSize != nil { - waitingPoolRange.WaitingPoolMinSize = *requestPayload.PartitionWaitingPoolMinSize - waitingPoolRange.WaitingPoolMaxSize = *requestPayload.PartitionWaitingPoolMaxSize - } - - if err = waitingPoolRange.Validate(); err != nil { - r.sendError(request, response, httperrors.BadRequest(err)) - return + waitingPoolRange, err = metal.NewScalerRange(*requestPayload.PartitionWaitingPoolMinSize, *requestPayload.PartitionWaitingPoolMaxSize) + if err != nil { + r.sendError(request, response, httperrors.BadRequest(err)) + return + } } p := &metal.Partition{ @@ -235,7 +229,8 @@ func (r *partitionResource) createPartition(request *restful.Request, response * KernelURL: kernelURL, CommandLine: commandLine, }, - WaitingPoolRange: waitingPoolRange, + WaitingPoolMinSize: waitingPoolRange.WaitingPoolMinSize, + WaitingPoolMaxSize: waitingPoolRange.WaitingPoolMaxSize, } fqn := metal.TopicMachine.GetFQN(p.GetID()) @@ -319,17 +314,14 @@ func (r *partitionResource) updatePartition(request *restful.Request, response * } if requestPayload.PartitionWaitingPoolMinSize != nil && requestPayload.PartitionWaitingPoolMaxSize != nil { - waitingPoolRange := metal.ScalerRange{ - WaitingPoolMinSize: *requestPayload.PartitionWaitingPoolMinSize, - WaitingPoolMaxSize: *requestPayload.PartitionWaitingPoolMaxSize, - } - - if err = waitingPoolRange.Validate(); err != nil { + waitingPoolRange, err := metal.NewScalerRange(*requestPayload.PartitionWaitingPoolMinSize, *requestPayload.PartitionWaitingPoolMinSize) + if err != nil { r.sendError(request, response, httperrors.BadRequest(err)) return } - newPartition.WaitingPoolRange = waitingPoolRange + newPartition.WaitingPoolMinSize = waitingPoolRange.WaitingPoolMinSize + newPartition.WaitingPoolMaxSize = waitingPoolRange.WaitingPoolMaxSize } err = r.ds.UpdatePartition(oldPartition, &newPartition) diff --git a/cmd/metal-api/internal/service/v1/partition.go b/cmd/metal-api/internal/service/v1/partition.go index cb441c20f..92343d44e 100644 --- a/cmd/metal-api/internal/service/v1/partition.go +++ b/cmd/metal-api/internal/service/v1/partition.go @@ -19,8 +19,8 @@ type PartitionCreateRequest struct { Common PartitionBase PartitionBootConfiguration PartitionBootConfiguration `json:"bootconfig" description:"the boot configuration of this partition"` - PartitionWaitingPoolMinSize *string `json:"waitingpoolminsize" description:"the minimum waiting pool size of this partition"` - PartitionWaitingPoolMaxSize *string `json:"waitingpoolmaxsize" description:"the maximum waiting pool size of this partition"` + PartitionWaitingPoolMinSize *string `json:"waitingpoolminsize" description:"the minimum waiting pool size of this partition" optional:"true"` + PartitionWaitingPoolMaxSize *string `json:"waitingpoolmaxsize" description:"the maximum waiting pool size of this partition" optional:"true"` } type PartitionUpdateRequest struct { From d1145faf8c3762bcc1013ace127eb85fbd9d806a Mon Sep 17 00:00:00 2001 From: iljarotar Date: Thu, 8 Dec 2022 11:36:18 +0100 Subject: [PATCH 18/45] fix nil pointer --- cmd/metal-api/internal/service/partition-service.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/metal-api/internal/service/partition-service.go b/cmd/metal-api/internal/service/partition-service.go index 59c79074d..7ca7d5928 100644 --- a/cmd/metal-api/internal/service/partition-service.go +++ b/cmd/metal-api/internal/service/partition-service.go @@ -207,7 +207,7 @@ func (r *partitionResource) createPartition(request *restful.Request, response * commandLine = *requestPayload.PartitionBootConfiguration.CommandLine } - var waitingPoolRange *metal.ScalerRange + waitingPoolRange := &metal.ScalerRange{} if requestPayload.PartitionWaitingPoolMinSize != nil && requestPayload.PartitionWaitingPoolMaxSize != nil { waitingPoolRange, err = metal.NewScalerRange(*requestPayload.PartitionWaitingPoolMinSize, *requestPayload.PartitionWaitingPoolMaxSize) if err != nil { From 845a81449c6c9e911763e3d801d302e0ed782d73 Mon Sep 17 00:00:00 2001 From: iljarotar Date: Thu, 8 Dec 2022 11:42:04 +0100 Subject: [PATCH 19/45] spec --- spec/metal-api.json | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/spec/metal-api.json b/spec/metal-api.json index 5c78821cb..a946722b5 100644 --- a/spec/metal-api.json +++ b/spec/metal-api.json @@ -3469,9 +3469,7 @@ }, "required": [ "bootconfig", - "id", - "waitingpoolmaxsize", - "waitingpoolminsize" + "id" ] }, "v1.PartitionResponse": { From ad55fa0aa43f7470c9315ebacc18d6fbe18530a5 Mon Sep 17 00:00:00 2001 From: iljarotar Date: Thu, 8 Dec 2022 15:37:50 +0100 Subject: [PATCH 20/45] OnTransition -> OnEnter --- cmd/metal-api/internal/fsm/events.go | 4 ++-- cmd/metal-api/internal/fsm/states/alive.go | 2 +- cmd/metal-api/internal/fsm/states/booting-new-kernel.go | 2 +- cmd/metal-api/internal/fsm/states/crashed.go | 2 +- cmd/metal-api/internal/fsm/states/initial.go | 2 +- cmd/metal-api/internal/fsm/states/installing.go | 2 +- cmd/metal-api/internal/fsm/states/machine-reclaim.go | 2 +- cmd/metal-api/internal/fsm/states/noop-state.go | 4 ++-- cmd/metal-api/internal/fsm/states/phoned-home.go | 2 +- cmd/metal-api/internal/fsm/states/planned-reboot.go | 2 +- cmd/metal-api/internal/fsm/states/preparing.go | 2 +- cmd/metal-api/internal/fsm/states/pxe-booting.go | 2 +- cmd/metal-api/internal/fsm/states/registering.go | 2 +- cmd/metal-api/internal/fsm/states/states.go | 2 +- cmd/metal-api/internal/fsm/states/waiting.go | 2 +- 15 files changed, 17 insertions(+), 17 deletions(-) diff --git a/cmd/metal-api/internal/fsm/events.go b/cmd/metal-api/internal/fsm/events.go index ee6feff56..c89274847 100644 --- a/cmd/metal-api/internal/fsm/events.go +++ b/cmd/metal-api/internal/fsm/events.go @@ -150,13 +150,13 @@ func eventCallbacks(config *states.StateConfig) fsm.Callbacks { // therefore we have an artificial state self_transition from which we can trigger the state-specific callback "enter_" + SelfTransitionState: func(e *fsm.Event) { if state, ok := allStates[e.Src]; ok { - state.OnTransition(e) + state.OnEnter(e) } }, } for name, state := range allStates { - callbacks["enter_"+name] = state.OnTransition + callbacks["enter_"+name] = state.OnEnter callbacks["leave_"+name] = state.OnLeave } diff --git a/cmd/metal-api/internal/fsm/states/alive.go b/cmd/metal-api/internal/fsm/states/alive.go index 2ec93301e..77cba097f 100644 --- a/cmd/metal-api/internal/fsm/states/alive.go +++ b/cmd/metal-api/internal/fsm/states/alive.go @@ -21,7 +21,7 @@ func newAlive(c *StateConfig) *AliveState { } } -func (p *AliveState) OnTransition(e *fsm.Event) { +func (p *AliveState) OnEnter(e *fsm.Event) { updateTimeAndLiveliness(p.event, p.container) p.log.Debugw("received provisioning alive event", "id", p.container.ID) } diff --git a/cmd/metal-api/internal/fsm/states/booting-new-kernel.go b/cmd/metal-api/internal/fsm/states/booting-new-kernel.go index 626525555..929401379 100644 --- a/cmd/metal-api/internal/fsm/states/booting-new-kernel.go +++ b/cmd/metal-api/internal/fsm/states/booting-new-kernel.go @@ -18,6 +18,6 @@ func newBootingNewKernel(c *StateConfig) *BootingNewKernelState { } } -func (p *BootingNewKernelState) OnTransition(e *fsm.Event) { +func (p *BootingNewKernelState) OnEnter(e *fsm.Event) { appendEventToContainer(p.event, p.container) } diff --git a/cmd/metal-api/internal/fsm/states/crashed.go b/cmd/metal-api/internal/fsm/states/crashed.go index ced108248..bf6f6b321 100644 --- a/cmd/metal-api/internal/fsm/states/crashed.go +++ b/cmd/metal-api/internal/fsm/states/crashed.go @@ -18,7 +18,7 @@ func newCrash(c *StateConfig) *CrashState { } } -func (p *CrashState) OnTransition(e *fsm.Event) { +func (p *CrashState) OnEnter(e *fsm.Event) { p.container.CrashLoop = true p.container.LastErrorEvent = p.event appendEventToContainer(p.event, p.container) diff --git a/cmd/metal-api/internal/fsm/states/initial.go b/cmd/metal-api/internal/fsm/states/initial.go index dd535d883..b2eb4897e 100644 --- a/cmd/metal-api/internal/fsm/states/initial.go +++ b/cmd/metal-api/internal/fsm/states/initial.go @@ -20,6 +20,6 @@ func newInitial(c *StateConfig) *InitialState { } } -func (p *InitialState) OnTransition(e *fsm.Event) { +func (p *InitialState) OnEnter(e *fsm.Event) { e.Err = fmt.Errorf("unexpected transition back to initial state") } diff --git a/cmd/metal-api/internal/fsm/states/installing.go b/cmd/metal-api/internal/fsm/states/installing.go index 6622aae77..6fd06e9a9 100644 --- a/cmd/metal-api/internal/fsm/states/installing.go +++ b/cmd/metal-api/internal/fsm/states/installing.go @@ -18,6 +18,6 @@ func newInstalling(c *StateConfig) *InstallingState { } } -func (p *InstallingState) OnTransition(e *fsm.Event) { +func (p *InstallingState) OnEnter(e *fsm.Event) { appendEventToContainer(p.event, p.container) } diff --git a/cmd/metal-api/internal/fsm/states/machine-reclaim.go b/cmd/metal-api/internal/fsm/states/machine-reclaim.go index 3c454979d..413c1b384 100644 --- a/cmd/metal-api/internal/fsm/states/machine-reclaim.go +++ b/cmd/metal-api/internal/fsm/states/machine-reclaim.go @@ -18,7 +18,7 @@ func newMachineReclaim(c *StateConfig) *MachineReclaimState { } } -func (p *MachineReclaimState) OnTransition(e *fsm.Event) { +func (p *MachineReclaimState) OnEnter(e *fsm.Event) { p.container.CrashLoop = false appendEventToContainer(p.event, p.container) } diff --git a/cmd/metal-api/internal/fsm/states/noop-state.go b/cmd/metal-api/internal/fsm/states/noop-state.go index 3d2effe1d..367e70628 100644 --- a/cmd/metal-api/internal/fsm/states/noop-state.go +++ b/cmd/metal-api/internal/fsm/states/noop-state.go @@ -6,5 +6,5 @@ import ( type noopState struct{} -func (_ noopState) OnTransition(e *fsm.Event) {} -func (_ noopState) OnLeave(e *fsm.Event) {} +func (_ noopState) OnEnter(e *fsm.Event) {} +func (_ noopState) OnLeave(e *fsm.Event) {} diff --git a/cmd/metal-api/internal/fsm/states/phoned-home.go b/cmd/metal-api/internal/fsm/states/phoned-home.go index e0d665f27..892b91672 100644 --- a/cmd/metal-api/internal/fsm/states/phoned-home.go +++ b/cmd/metal-api/internal/fsm/states/phoned-home.go @@ -26,7 +26,7 @@ func newPhonedHome(c *StateConfig) *PhonedHomeState { } } -func (p *PhonedHomeState) OnTransition(e *fsm.Event) { +func (p *PhonedHomeState) OnEnter(e *fsm.Event) { switch e.Src { case PhonedHome.String(): updateTimeAndLiveliness(p.event, p.container) diff --git a/cmd/metal-api/internal/fsm/states/planned-reboot.go b/cmd/metal-api/internal/fsm/states/planned-reboot.go index 5b30ae211..8a9d33865 100644 --- a/cmd/metal-api/internal/fsm/states/planned-reboot.go +++ b/cmd/metal-api/internal/fsm/states/planned-reboot.go @@ -18,7 +18,7 @@ func newPlannedReboot(c *StateConfig) *PlannedRebootState { } } -func (p *PlannedRebootState) OnTransition(e *fsm.Event) { +func (p *PlannedRebootState) OnEnter(e *fsm.Event) { p.container.CrashLoop = false appendEventToContainer(p.event, p.container) } diff --git a/cmd/metal-api/internal/fsm/states/preparing.go b/cmd/metal-api/internal/fsm/states/preparing.go index fc33682c1..ddfd28dcd 100644 --- a/cmd/metal-api/internal/fsm/states/preparing.go +++ b/cmd/metal-api/internal/fsm/states/preparing.go @@ -18,7 +18,7 @@ func newPreparing(c *StateConfig) *PreparingState { } } -func (p *PreparingState) OnTransition(e *fsm.Event) { +func (p *PreparingState) OnEnter(e *fsm.Event) { p.container.FailedMachineReclaim = false appendEventToContainer(p.event, p.container) diff --git a/cmd/metal-api/internal/fsm/states/pxe-booting.go b/cmd/metal-api/internal/fsm/states/pxe-booting.go index b7a8e5ae3..e26e4f35c 100644 --- a/cmd/metal-api/internal/fsm/states/pxe-booting.go +++ b/cmd/metal-api/internal/fsm/states/pxe-booting.go @@ -18,7 +18,7 @@ func newPXEBooting(c *StateConfig) *PXEBootingState { } } -func (p *PXEBootingState) OnTransition(e *fsm.Event) { +func (p *PXEBootingState) OnEnter(e *fsm.Event) { p.container.FailedMachineReclaim = false if e.Src == PXEBooting.String() { diff --git a/cmd/metal-api/internal/fsm/states/registering.go b/cmd/metal-api/internal/fsm/states/registering.go index eebc99b36..29971bf7b 100644 --- a/cmd/metal-api/internal/fsm/states/registering.go +++ b/cmd/metal-api/internal/fsm/states/registering.go @@ -18,6 +18,6 @@ func newRegistering(c *StateConfig) *RegisteringState { } } -func (p *RegisteringState) OnTransition(e *fsm.Event) { +func (p *RegisteringState) OnEnter(e *fsm.Event) { appendEventToContainer(p.event, p.container) } diff --git a/cmd/metal-api/internal/fsm/states/states.go b/cmd/metal-api/internal/fsm/states/states.go index d11289216..493e1e812 100644 --- a/cmd/metal-api/internal/fsm/states/states.go +++ b/cmd/metal-api/internal/fsm/states/states.go @@ -25,7 +25,7 @@ const ( ) type FSMState interface { - OnTransition(e *fsm.Event) + OnEnter(e *fsm.Event) OnLeave(e *fsm.Event) } diff --git a/cmd/metal-api/internal/fsm/states/waiting.go b/cmd/metal-api/internal/fsm/states/waiting.go index 0085e3f79..6fb251d6e 100644 --- a/cmd/metal-api/internal/fsm/states/waiting.go +++ b/cmd/metal-api/internal/fsm/states/waiting.go @@ -14,7 +14,7 @@ func newWaiting(c *StateConfig) *WaitingState { } } -func (p *WaitingState) OnTransition(e *fsm.Event) { +func (p *WaitingState) OnEnter(e *fsm.Event) { appendEventToContainer(p.config.Event, p.config.Container) if p.config.Scaler != nil { From 7951390c1d76f427b5d082ef2d74c7a7365c6739 Mon Sep 17 00:00:00 2001 From: iljarotar Date: Thu, 8 Dec 2022 16:12:37 +0100 Subject: [PATCH 21/45] scaler range in partitionbase --- .../internal/service/v1/partition.go | 14 ++++++------- spec/metal-api.json | 20 ++++++++++++++++--- 2 files changed, 24 insertions(+), 10 deletions(-) diff --git a/cmd/metal-api/internal/service/v1/partition.go b/cmd/metal-api/internal/service/v1/partition.go index 92343d44e..495785768 100644 --- a/cmd/metal-api/internal/service/v1/partition.go +++ b/cmd/metal-api/internal/service/v1/partition.go @@ -5,8 +5,10 @@ import ( ) type PartitionBase struct { - MgmtServiceAddress *string `json:"mgmtserviceaddress" description:"the address to the management service of this partition" optional:"true"` - PrivateNetworkPrefixLength *int `json:"privatenetworkprefixlength" description:"the length of private networks for the machine's child networks in this partition, default 22" optional:"true" minimum:"16" maximum:"30"` + MgmtServiceAddress *string `json:"mgmtserviceaddress" description:"the address to the management service of this partition" optional:"true"` + PrivateNetworkPrefixLength *int `json:"privatenetworkprefixlength" description:"the length of private networks for the machine's child networks in this partition, default 22" optional:"true" minimum:"16" maximum:"30"` + PartitionWaitingPoolMinSize *string `json:"waitingpoolminsize" description:"the minimum waiting pool size of this partition" optional:"true"` + PartitionWaitingPoolMaxSize *string `json:"waitingpoolmaxsize" description:"the maximum waiting pool size of this partition" optional:"true"` } type PartitionBootConfiguration struct { @@ -18,17 +20,15 @@ type PartitionBootConfiguration struct { type PartitionCreateRequest struct { Common PartitionBase - PartitionBootConfiguration PartitionBootConfiguration `json:"bootconfig" description:"the boot configuration of this partition"` - PartitionWaitingPoolMinSize *string `json:"waitingpoolminsize" description:"the minimum waiting pool size of this partition" optional:"true"` - PartitionWaitingPoolMaxSize *string `json:"waitingpoolmaxsize" description:"the maximum waiting pool size of this partition" optional:"true"` + PartitionBootConfiguration PartitionBootConfiguration `json:"bootconfig" description:"the boot configuration of this partition"` } type PartitionUpdateRequest struct { Common MgmtServiceAddress *string `json:"mgmtserviceaddress" description:"the address to the management service of this partition" optional:"true"` PartitionBootConfiguration *PartitionBootConfiguration `json:"bootconfig" description:"the boot configuration of this partition" optional:"true"` - PartitionWaitingPoolMinSize *string `json:"waitingpoolminsize" description:"the minimum waiting pool size of this partition"` - PartitionWaitingPoolMaxSize *string `json:"waitingpoolmaxsize" description:"the maximum waiting pool size of this partition"` + PartitionWaitingPoolMinSize *string `json:"waitingpoolminsize" description:"the minimum waiting pool size of this partition" optional:"true"` + PartitionWaitingPoolMaxSize *string `json:"waitingpoolmaxsize" description:"the maximum waiting pool size of this partition" optional:"true"` } type PartitionResponse struct { diff --git a/spec/metal-api.json b/spec/metal-api.json index a946722b5..64d33ca85 100644 --- a/spec/metal-api.json +++ b/spec/metal-api.json @@ -3368,6 +3368,14 @@ "maximum": 30, "minimum": 16, "type": "integer" + }, + "waitingpoolmaxsize": { + "description": "the maximum waiting pool size of this partition", + "type": "string" + }, + "waitingpoolminsize": { + "description": "the minimum waiting pool size of this partition", + "type": "string" } } }, @@ -3513,6 +3521,14 @@ "maximum": 30, "minimum": 16, "type": "integer" + }, + "waitingpoolmaxsize": { + "description": "the maximum waiting pool size of this partition", + "type": "string" + }, + "waitingpoolminsize": { + "description": "the minimum waiting pool size of this partition", + "type": "string" } }, "required": [ @@ -3553,9 +3569,7 @@ } }, "required": [ - "id", - "waitingpoolmaxsize", - "waitingpoolminsize" + "id" ] }, "v1.Project": { From 4efb66e9ec95ec2a95b65e0d99dd0a9126394a61 Mon Sep 17 00:00:00 2001 From: iljarotar Date: Thu, 8 Dec 2022 16:20:49 +0100 Subject: [PATCH 22/45] scaler range in NewPartitionResponse --- cmd/metal-api/internal/service/v1/partition.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/cmd/metal-api/internal/service/v1/partition.go b/cmd/metal-api/internal/service/v1/partition.go index 495785768..04923fab5 100644 --- a/cmd/metal-api/internal/service/v1/partition.go +++ b/cmd/metal-api/internal/service/v1/partition.go @@ -75,8 +75,10 @@ func NewPartitionResponse(p *metal.Partition) *PartitionResponse { }, }, PartitionBase: PartitionBase{ - MgmtServiceAddress: &p.MgmtServiceAddress, - PrivateNetworkPrefixLength: &prefixLength, + MgmtServiceAddress: &p.MgmtServiceAddress, + PrivateNetworkPrefixLength: &prefixLength, + PartitionWaitingPoolMinSize: &p.WaitingPoolMinSize, + PartitionWaitingPoolMaxSize: &p.WaitingPoolMaxSize, }, PartitionBootConfiguration: PartitionBootConfiguration{ ImageURL: &p.BootConfiguration.ImageURL, From 3b8cd78bcb3eee9bde3ad4f6859cd7e6e6d50732 Mon Sep 17 00:00:00 2001 From: iljarotar Date: Thu, 8 Dec 2022 17:09:52 +0100 Subject: [PATCH 23/45] partitionid and sizeid added to manager --- cmd/metal-api/internal/datastore/event.go | 6 ++++-- cmd/metal-api/internal/datastore/machine.go | 5 +++++ cmd/metal-api/internal/datastore/poolsize.go | 8 +++++--- cmd/metal-api/internal/scaler/poolscaler_test.go | 13 +++++++++++++ 4 files changed, 27 insertions(+), 5 deletions(-) diff --git a/cmd/metal-api/internal/datastore/event.go b/cmd/metal-api/internal/datastore/event.go index b2db85ee3..3ef514658 100644 --- a/cmd/metal-api/internal/datastore/event.go +++ b/cmd/metal-api/internal/datastore/event.go @@ -61,8 +61,10 @@ func (rs *RethinkStore) ProvisioningEventForMachine(log *zap.SugaredLogger, publ } manager := &manager{ - rs: rs, - publisher: publisher, + rs: rs, + publisher: publisher, + partitionid: p.ID, + sizeid: machine.SizeID, } c := &s.PoolScalerConfig{ diff --git a/cmd/metal-api/internal/datastore/machine.go b/cmd/metal-api/internal/datastore/machine.go index 93f28c172..75795e44e 100644 --- a/cmd/metal-api/internal/datastore/machine.go +++ b/cmd/metal-api/internal/datastore/machine.go @@ -22,6 +22,7 @@ type MachineSearchQuery struct { Tags []string `json:"tags" optional:"true"` // allocation + NotAllocated *bool `json:"not_allocated" optional:"true"` AllocationName *string `json:"allocation_name" optional:"true"` AllocationProject *string `json:"allocation_project" optional:"true"` AllocationImageID *string `json:"allocation_image_id" optional:"true"` @@ -117,6 +118,10 @@ func (p *MachineSearchQuery) generateTerm(rs *RethinkStore) *r.Term { }) } + if p.NotAllocated != nil && *p.NotAllocated { + q = q.Filter(map[string]any{"allocation": nil}) + } + if p.AllocationName != nil { q = q.Filter(func(row r.Term) r.Term { return row.Field("allocation").Field("name").Eq(*p.AllocationName) diff --git a/cmd/metal-api/internal/datastore/poolsize.go b/cmd/metal-api/internal/datastore/poolsize.go index 3e512e267..8d2e17b2d 100644 --- a/cmd/metal-api/internal/datastore/poolsize.go +++ b/cmd/metal-api/internal/datastore/poolsize.go @@ -31,10 +31,12 @@ func (m *manager) AllMachines() (metal.Machines, error) { func (m *manager) WaitingMachines() (metal.Machines, error) { stateValue := string(metal.AvailableState) + notAllocated := true q := MachineSearchQuery{ - PartitionID: &m.partitionid, - SizeID: &m.sizeid, - StateValue: &stateValue, + PartitionID: &m.partitionid, + SizeID: &m.sizeid, + StateValue: &stateValue, + NotAllocated: ¬Allocated, } waitingMachines := metal.Machines{} diff --git a/cmd/metal-api/internal/scaler/poolscaler_test.go b/cmd/metal-api/internal/scaler/poolscaler_test.go index 5dce1f999..5aa1ece9c 100644 --- a/cmd/metal-api/internal/scaler/poolscaler_test.go +++ b/cmd/metal-api/internal/scaler/poolscaler_test.go @@ -119,6 +119,19 @@ func TestPoolScaler_AdjustNumberOfWaitingMachines(t *testing.T) { }, wantErr: false, }, + { + name: "edge case 2 waiting machines; min = max = 1", + partition: &metal.Partition{ + WaitingPoolMinSize: "1", + WaitingPoolMaxSize: "1", + }, + mockFn: func(mock *MockMachineManager) { + mock.On("AllMachines").Once().Return(append(metal.Machines{}, make([]metal.Machine, 2)...), nil) + mock.On("WaitingMachines").Once().Return(append(metal.Machines{}, make([]metal.Machine, 2)...), nil) + mock.On("Shutdown", &metal.Machine{}).Return(nil).Times(1) + }, + wantErr: false, + }, { name: "pool scaling disabled; do nothing", partition: &metal.Partition{}, From 06f269e7016507a65107dd4d570eabac8071ae26 Mon Sep 17 00:00:00 2001 From: iljarotar Date: Thu, 8 Dec 2022 17:21:57 +0100 Subject: [PATCH 24/45] fix state value query filter --- cmd/metal-api/internal/datastore/machine.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/metal-api/internal/datastore/machine.go b/cmd/metal-api/internal/datastore/machine.go index 75795e44e..b09d7c4cc 100644 --- a/cmd/metal-api/internal/datastore/machine.go +++ b/cmd/metal-api/internal/datastore/machine.go @@ -328,7 +328,7 @@ func (p *MachineSearchQuery) generateTerm(rs *RethinkStore) *r.Term { if p.StateValue != nil { q = q.Filter(func(row r.Term) r.Term { - return row.Field("state_value").Eq(*p.StateValue) + return row.Field("state").Field("value").Eq(*p.StateValue) }) } From 7f6a8c259bec59bd21ba40459c1f78cb1525220d Mon Sep 17 00:00:00 2001 From: iljarotar Date: Fri, 9 Dec 2022 14:52:58 +0100 Subject: [PATCH 25/45] simplify scaler range --- cmd/metal-api/internal/metal/partition.go | 109 ++++++++++++------ .../internal/metal/partition_test.go | 22 +++- cmd/metal-api/internal/scaler/poolscaler.go | 14 +-- .../internal/service/partition-service.go | 16 +-- spec/metal-api.json | 9 ++ 5 files changed, 108 insertions(+), 62 deletions(-) diff --git a/cmd/metal-api/internal/metal/partition.go b/cmd/metal-api/internal/metal/partition.go index 8cf647266..40e6d62e7 100644 --- a/cmd/metal-api/internal/metal/partition.go +++ b/cmd/metal-api/internal/metal/partition.go @@ -40,75 +40,108 @@ func (sz Partitions) ByID() PartitionMap { } type ScalerRange struct { - WaitingPoolMinSize string - WaitingPoolMaxSize string + isDisabled bool + inPercent bool + minSize int + maxSize int } func NewScalerRange(min, max string) (*ScalerRange, error) { - r := &ScalerRange{WaitingPoolMinSize: min, WaitingPoolMaxSize: max} + r := &ScalerRange{} + if r.setDisabled(min, max); r.IsDisabled() { + return r, nil + } - if err := r.Validate(); err != nil { + if err := r.setRange(min, max); err != nil { return nil, err } - return r, nil -} - -func (r *ScalerRange) Min(of int) (int64, error) { - if strings.HasSuffix(r.WaitingPoolMinSize, "%") { - s := strings.Split(r.WaitingPoolMinSize, "%")[0] - min, err := strconv.ParseInt(s, 10, 64) - if err != nil { - return 0, err - } - return int64(math.Round(float64(of) * float64(min) / 100)), nil + if err := r.Validate(); err != nil { + return nil, err } - return strconv.ParseInt(r.WaitingPoolMinSize, 10, 64) + return r, nil } -func (r *ScalerRange) Max(of int) (int64, error) { - if strings.HasSuffix(r.WaitingPoolMaxSize, "%") { - s := strings.Split(r.WaitingPoolMaxSize, "%")[0] - min, err := strconv.ParseInt(s, 10, 64) - if err != nil { - return 0, err - } - - return int64(math.Round(float64(of) * float64(min) / 100)), nil +func (r *ScalerRange) setDisabled(min, max string) { + if min == "" && max == "" { + r.isDisabled = true + return } - - return strconv.ParseInt(r.WaitingPoolMaxSize, 10, 64) + r.isDisabled = false } func (r *ScalerRange) IsDisabled() bool { - return r.WaitingPoolMinSize == "" && r.WaitingPoolMaxSize == "" + return r.isDisabled } -func (r *ScalerRange) Validate() error { - if r.IsDisabled() { - return nil - } - - min, err := r.Min(100) +func (r *ScalerRange) setRange(min, max string) error { + minSize, err, minInPercent := stringToSize(min) if err != nil { return errors.New("could not parse minimum waiting pool size") } - max, err := r.Max(100) + maxSize, err, maxInPercent := stringToSize(max) if err != nil { return errors.New("could not parse maximum waiting pool size") } - if strings.HasSuffix(r.WaitingPoolMinSize, "%") != strings.HasSuffix(r.WaitingPoolMaxSize, "%") { + if minInPercent != maxInPercent { return errors.New("minimum and maximum pool sizes must either be both in percent or both an absolute value") } - if min < 0 || max < 0 { + if minInPercent { + r.inPercent = true + } else { + r.inPercent = false + } + + r.minSize = int(minSize) + r.maxSize = int(maxSize) + return nil +} + +func stringToSize(input string) (size int64, err error, inPercent bool) { + s := input + inPercent = false + + if strings.HasSuffix(input, "%") { + s = strings.Split(input, "%")[0] + inPercent = true + } + + size, err = strconv.ParseInt(s, 10, 64) + return size, err, inPercent +} + +func (r *ScalerRange) Min(of int) int { + if r.inPercent { + return percentOf(r.minSize, of) + } + return r.minSize +} + +func (r *ScalerRange) Max(of int) int { + if r.inPercent { + return percentOf(r.maxSize, of) + } + return r.maxSize +} + +func percentOf(percent, of int) int { + return int(math.Round(float64(of) * float64(percent) / 100)) +} + +func (r *ScalerRange) Validate() error { + if r.IsDisabled() { + return nil + } + + if r.minSize < 0 || r.maxSize < 0 { return errors.New("minimum and maximum waiting pool sizes must be greater or equal to 0") } - if max < min { + if r.maxSize < r.minSize { return errors.New("minimum waiting pool size must be less or equal to maximum pool size") } diff --git a/cmd/metal-api/internal/metal/partition_test.go b/cmd/metal-api/internal/metal/partition_test.go index 4db0ef414..9c42b6bd4 100644 --- a/cmd/metal-api/internal/metal/partition_test.go +++ b/cmd/metal-api/internal/metal/partition_test.go @@ -52,69 +52,79 @@ func TestPartitions_ByID(t *testing.T) { } } -func TestScalerRange_Validate(t *testing.T) { +func TestNewScalerRange(t *testing.T) { tests := []struct { name string min string max string + want *ScalerRange wantErr error }{ { name: "min max format mismatch", min: "15%", max: "30", + want: nil, wantErr: errors.New("minimum and maximum pool sizes must either be both in percent or both an absolute value"), }, { name: "parse error for min", min: "15#%", max: "30", + want: nil, wantErr: errors.New("could not parse minimum waiting pool size"), }, { name: "parse error for max", min: "15", max: "#30", + want: nil, wantErr: errors.New("could not parse maximum waiting pool size"), }, { name: "negative value for min", min: "-15", max: "0", + want: nil, wantErr: errors.New("minimum and maximum waiting pool sizes must be greater or equal to 0"), }, { name: "max less than min", min: "15", max: "0", + want: nil, wantErr: errors.New("minimum waiting pool size must be less or equal to maximum pool size"), }, { name: "everything okay", min: "15", max: "30", + want: &ScalerRange{minSize: 15, maxSize: 30}, wantErr: nil, }, { name: "everything okay in percent", min: "15%", max: "30%", + want: &ScalerRange{minSize: 15, maxSize: 30, inPercent: true}, wantErr: nil, }, { name: "pool scaling disabled", + want: &ScalerRange{isDisabled: true}, wantErr: nil, }, } for i := range tests { tt := tests[i] t.Run(tt.name, func(t *testing.T) { - r := &ScalerRange{ - WaitingPoolMinSize: tt.min, - WaitingPoolMaxSize: tt.max, + got, err := NewScalerRange(tt.min, tt.max) + if (err != nil || tt.wantErr != nil) && (err == nil || tt.wantErr == nil || err.Error() != tt.wantErr.Error()) { + t.Errorf("NewScalerRange() error = %v, wantErr %v", err, tt.wantErr) + return } - if err := r.Validate(); (err != nil || tt.wantErr != nil) && (err == nil || tt.wantErr == nil || err.Error() != tt.wantErr.Error()) { - t.Errorf("ScalerRange.Validate() error = %v, wantErr %v", err, tt.wantErr) + if !reflect.DeepEqual(got, tt.want) { + t.Errorf("NewScalerRange() = %v, want %v", got, tt.want) } }) } diff --git a/cmd/metal-api/internal/scaler/poolscaler.go b/cmd/metal-api/internal/scaler/poolscaler.go index 7f5050af4..fead987db 100644 --- a/cmd/metal-api/internal/scaler/poolscaler.go +++ b/cmd/metal-api/internal/scaler/poolscaler.go @@ -102,19 +102,11 @@ func (p *PoolScaler) calculatePoolSizeExcess(actual int, scalerRange metal.Scale return 0 } - min, err := scalerRange.Min(len(allMachines)) - if err != nil { - return 0 - } - - max, err := scalerRange.Max(len(allMachines)) - if err != nil { - return 0 - } - + min := scalerRange.Min(len(allMachines)) + max := scalerRange.Max(len(allMachines)) average := (float64(max) + float64(min)) / 2 - if int64(actual) >= min && int64(actual) <= max { + if actual >= min && actual <= max { return 0 } diff --git a/cmd/metal-api/internal/service/partition-service.go b/cmd/metal-api/internal/service/partition-service.go index 7ca7d5928..077a04492 100644 --- a/cmd/metal-api/internal/service/partition-service.go +++ b/cmd/metal-api/internal/service/partition-service.go @@ -207,13 +207,15 @@ func (r *partitionResource) createPartition(request *restful.Request, response * commandLine = *requestPayload.PartitionBootConfiguration.CommandLine } - waitingPoolRange := &metal.ScalerRange{} + var minSize, maxSize string if requestPayload.PartitionWaitingPoolMinSize != nil && requestPayload.PartitionWaitingPoolMaxSize != nil { - waitingPoolRange, err = metal.NewScalerRange(*requestPayload.PartitionWaitingPoolMinSize, *requestPayload.PartitionWaitingPoolMaxSize) + _, err = metal.NewScalerRange(*requestPayload.PartitionWaitingPoolMinSize, *requestPayload.PartitionWaitingPoolMaxSize) if err != nil { r.sendError(request, response, httperrors.BadRequest(err)) return } + minSize = *requestPayload.PartitionWaitingPoolMinSize + maxSize = *requestPayload.PartitionWaitingPoolMaxSize } p := &metal.Partition{ @@ -229,8 +231,8 @@ func (r *partitionResource) createPartition(request *restful.Request, response * KernelURL: kernelURL, CommandLine: commandLine, }, - WaitingPoolMinSize: waitingPoolRange.WaitingPoolMinSize, - WaitingPoolMaxSize: waitingPoolRange.WaitingPoolMaxSize, + WaitingPoolMinSize: minSize, + WaitingPoolMaxSize: maxSize, } fqn := metal.TopicMachine.GetFQN(p.GetID()) @@ -314,14 +316,14 @@ func (r *partitionResource) updatePartition(request *restful.Request, response * } if requestPayload.PartitionWaitingPoolMinSize != nil && requestPayload.PartitionWaitingPoolMaxSize != nil { - waitingPoolRange, err := metal.NewScalerRange(*requestPayload.PartitionWaitingPoolMinSize, *requestPayload.PartitionWaitingPoolMinSize) + _, err := metal.NewScalerRange(*requestPayload.PartitionWaitingPoolMinSize, *requestPayload.PartitionWaitingPoolMinSize) if err != nil { r.sendError(request, response, httperrors.BadRequest(err)) return } - newPartition.WaitingPoolMinSize = waitingPoolRange.WaitingPoolMinSize - newPartition.WaitingPoolMaxSize = waitingPoolRange.WaitingPoolMaxSize + newPartition.WaitingPoolMinSize = *requestPayload.PartitionWaitingPoolMinSize + newPartition.WaitingPoolMaxSize = *requestPayload.PartitionWaitingPoolMaxSize } err = r.ds.UpdatePartition(oldPartition, &newPartition) diff --git a/spec/metal-api.json b/spec/metal-api.json index 64d33ca85..2fa6b1dec 100644 --- a/spec/metal-api.json +++ b/spec/metal-api.json @@ -211,6 +211,9 @@ }, "type": "array" }, + "not_allocated": { + "type": "boolean" + }, "partition_id": { "type": "string" }, @@ -1108,6 +1111,9 @@ }, "type": "array" }, + "not_allocated": { + "type": "boolean" + }, "partition_id": { "type": "string" }, @@ -2218,6 +2224,9 @@ }, "type": "array" }, + "not_allocated": { + "type": "boolean" + }, "partition_id": { "type": "string" }, From e0dea0c808d2f4682e88a86a26cbf1db9fed5517 Mon Sep 17 00:00:00 2001 From: iljarotar Date: Tue, 13 Dec 2022 08:30:43 +0100 Subject: [PATCH 26/45] log error from pool scaler instead of returning it --- cmd/metal-api/internal/fsm/states/waiting.go | 10 ++++++++-- cmd/metal-api/internal/scaler/poolscaler_test.go | 4 ++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/cmd/metal-api/internal/fsm/states/waiting.go b/cmd/metal-api/internal/fsm/states/waiting.go index 6fb251d6e..a7ef9cadf 100644 --- a/cmd/metal-api/internal/fsm/states/waiting.go +++ b/cmd/metal-api/internal/fsm/states/waiting.go @@ -18,12 +18,18 @@ func (p *WaitingState) OnEnter(e *fsm.Event) { appendEventToContainer(p.config.Event, p.config.Container) if p.config.Scaler != nil { - e.Err = p.config.Scaler.AdjustNumberOfWaitingMachines() + err := p.config.Scaler.AdjustNumberOfWaitingMachines() + if err != nil { + p.config.Log.Errorw("received error from pool scaler", "error", err) + } } } func (p *WaitingState) OnLeave(e *fsm.Event) { if p.config.Scaler != nil { - e.Err = p.config.Scaler.AdjustNumberOfWaitingMachines() + err := p.config.Scaler.AdjustNumberOfWaitingMachines() + if err != nil { + p.config.Log.Errorw("received error from pool scaler", "error", err) + } } } diff --git a/cmd/metal-api/internal/scaler/poolscaler_test.go b/cmd/metal-api/internal/scaler/poolscaler_test.go index 5aa1ece9c..8edd54e13 100644 --- a/cmd/metal-api/internal/scaler/poolscaler_test.go +++ b/cmd/metal-api/internal/scaler/poolscaler_test.go @@ -53,7 +53,7 @@ func TestPoolScaler_AdjustNumberOfWaitingMachines(t *testing.T) { wantErr: false, }, { - name: "more waiting machines needed in percent; power on 10 machines", + name: "more waiting machines needed in percent; power on 18 machines", partition: &metal.Partition{ WaitingPoolMinSize: "30%", WaitingPoolMaxSize: "40%", @@ -80,7 +80,7 @@ func TestPoolScaler_AdjustNumberOfWaitingMachines(t *testing.T) { wantErr: false, }, { - name: "pool size exceeded in percent; power off 5 machines", + name: "pool size exceeded in percent; power off 4 machines", partition: &metal.Partition{ WaitingPoolMinSize: "15%", WaitingPoolMaxSize: "20%", From 031b2be52f0c8af56fc84ecfd8c69a9851a01177 Mon Sep 17 00:00:00 2001 From: iljarotar Date: Tue, 13 Dec 2022 08:35:45 +0100 Subject: [PATCH 27/45] assert expectations in poolscaler test --- cmd/metal-api/internal/scaler/poolscaler_test.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cmd/metal-api/internal/scaler/poolscaler_test.go b/cmd/metal-api/internal/scaler/poolscaler_test.go index 8edd54e13..ba6f20b99 100644 --- a/cmd/metal-api/internal/scaler/poolscaler_test.go +++ b/cmd/metal-api/internal/scaler/poolscaler_test.go @@ -155,6 +155,8 @@ func TestPoolScaler_AdjustNumberOfWaitingMachines(t *testing.T) { if err := p.AdjustNumberOfWaitingMachines(); (err != nil) != tt.wantErr { t.Errorf("PoolScaler.AdjustNumberOfWaitingMachines() error = %v, wantErr %v", err, tt.wantErr) } + + manager.AssertExpectations(t) }) } } From e24bf715a0bc98e69b899c86b8f01246afd6ac00 Mon Sep 17 00:00:00 2001 From: iljarotar Date: Fri, 13 Jan 2023 12:48:20 +0100 Subject: [PATCH 28/45] log if no scaling required --- cmd/metal-api/internal/scaler/poolscaler.go | 1 + 1 file changed, 1 insertion(+) diff --git a/cmd/metal-api/internal/scaler/poolscaler.go b/cmd/metal-api/internal/scaler/poolscaler.go index fead987db..3d5e43086 100644 --- a/cmd/metal-api/internal/scaler/poolscaler.go +++ b/cmd/metal-api/internal/scaler/poolscaler.go @@ -58,6 +58,7 @@ func (p *PoolScaler) AdjustNumberOfWaitingMachines() error { poolSizeExcess = p.calculatePoolSizeExcess(len(waitingMachines), *waitingPoolRange) if poolSizeExcess == 0 { + p.log.Info("pool size condition met; doing nothing") return nil } From ada1bbc91f69074ebb84c2ab967c1e852a0034d8 Mon Sep 17 00:00:00 2001 From: iljarotar Date: Fri, 13 Jan 2023 13:01:21 +0100 Subject: [PATCH 29/45] named log --- cmd/metal-api/internal/datastore/event.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmd/metal-api/internal/datastore/event.go b/cmd/metal-api/internal/datastore/event.go index 3ef514658..80ba52f30 100644 --- a/cmd/metal-api/internal/datastore/event.go +++ b/cmd/metal-api/internal/datastore/event.go @@ -68,14 +68,14 @@ func (rs *RethinkStore) ProvisioningEventForMachine(log *zap.SugaredLogger, publ } c := &s.PoolScalerConfig{ - Log: log, + Log: log.Named("pool-scaler"), Manager: manager, Partition: *p, } scaler := s.NewPoolScaler(c) config := states.StateConfig{ - Log: log, + Log: log.Named("fsm"), Container: ec, Event: event, Scaler: scaler, From e8e6513aeea222ff8f69be89a1cc131976cfbd11 Mon Sep 17 00:00:00 2001 From: iljarotar Date: Thu, 19 Jan 2023 10:04:22 +0100 Subject: [PATCH 30/45] no scaling on alive events --- cmd/metal-api/internal/datastore/poolsize.go | 2 +- cmd/metal-api/internal/fsm/fsm_test.go | 2 +- cmd/metal-api/internal/fsm/states/waiting.go | 2 +- cmd/metal-api/internal/scaler/poolscaler.go | 8 +++++--- 4 files changed, 8 insertions(+), 6 deletions(-) diff --git a/cmd/metal-api/internal/datastore/poolsize.go b/cmd/metal-api/internal/datastore/poolsize.go index 8d2e17b2d..2d0e60488 100644 --- a/cmd/metal-api/internal/datastore/poolsize.go +++ b/cmd/metal-api/internal/datastore/poolsize.go @@ -71,7 +71,7 @@ func (m *manager) Shutdown(machine *metal.Machine) error { Description: "shut down as exceeding maximum partition poolsize", } - err := m.rs.publishCommandAndUpdate(m.rs.log, machine, m.publisher, metal.MachineOnCmd, state) + err := m.rs.publishCommandAndUpdate(m.rs.log, machine, m.publisher, metal.MachineOffCmd, state) if err != nil { return err } diff --git a/cmd/metal-api/internal/fsm/fsm_test.go b/cmd/metal-api/internal/fsm/fsm_test.go index b067a8ad3..78b96ad7d 100644 --- a/cmd/metal-api/internal/fsm/fsm_test.go +++ b/cmd/metal-api/internal/fsm/fsm_test.go @@ -127,7 +127,7 @@ func TestHandleProvisioningEvent(t *testing.T) { }, }, { - name: "valid transition from registering to preparing (metal-hammer wait skip)", + name: "valid transition from registering to installing (metal-hammer wait skip)", container: &metal.ProvisioningEventContainer{ Events: metal.ProvisioningEvents{ { diff --git a/cmd/metal-api/internal/fsm/states/waiting.go b/cmd/metal-api/internal/fsm/states/waiting.go index a7ef9cadf..341d6c3ce 100644 --- a/cmd/metal-api/internal/fsm/states/waiting.go +++ b/cmd/metal-api/internal/fsm/states/waiting.go @@ -26,7 +26,7 @@ func (p *WaitingState) OnEnter(e *fsm.Event) { } func (p *WaitingState) OnLeave(e *fsm.Event) { - if p.config.Scaler != nil { + if p.config.Scaler != nil && e.Dst != Alive.String() { err := p.config.Scaler.AdjustNumberOfWaitingMachines() if err != nil { p.config.Log.Errorw("received error from pool scaler", "error", err) diff --git a/cmd/metal-api/internal/scaler/poolscaler.go b/cmd/metal-api/internal/scaler/poolscaler.go index 3d5e43086..d4b525a16 100644 --- a/cmd/metal-api/internal/scaler/poolscaler.go +++ b/cmd/metal-api/internal/scaler/poolscaler.go @@ -97,7 +97,7 @@ func (p *PoolScaler) AdjustNumberOfWaitingMachines() error { // calculatePoolSizeExcess checks if there are less waiting machines than minRequired or more than maxRequired // if yes, it returns the difference between the actual amount of waiting machines and the average of minRequired and maxRequired // if no, it returns 0 -func (p *PoolScaler) calculatePoolSizeExcess(actual int, scalerRange metal.ScalerRange) int { +func (p *PoolScaler) calculatePoolSizeExcess(current int, scalerRange metal.ScalerRange) int { allMachines, err := p.manager.AllMachines() if err != nil { return 0 @@ -107,11 +107,13 @@ func (p *PoolScaler) calculatePoolSizeExcess(actual int, scalerRange metal.Scale max := scalerRange.Max(len(allMachines)) average := (float64(max) + float64(min)) / 2 - if actual >= min && actual <= max { + p.log.Infow("checking pool size condition", "minimum pool size", min, "maximum pool size", max, "current pool size", current) + + if current >= min && current <= max { return 0 } - return actual - int(math.Round(average)) + return current - int(math.Round(average)) } func randomIndices(n, k int) []int { From f0841a23952b39e44484fe146439296db290cd81 Mon Sep 17 00:00:00 2001 From: iljarotar Date: Thu, 19 Jan 2023 10:19:06 +0100 Subject: [PATCH 31/45] set machine state to available if shut down machine is powered on again --- cmd/metal-api/internal/service/machine-service.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cmd/metal-api/internal/service/machine-service.go b/cmd/metal-api/internal/service/machine-service.go index 223cf0d1f..4f7e61845 100644 --- a/cmd/metal-api/internal/service/machine-service.go +++ b/cmd/metal-api/internal/service/machine-service.go @@ -2014,6 +2014,8 @@ func (r *machineResource) machineCmd(cmd metal.MachineCommand, request *restful. Description: description, } needsUpdate = true + case metal.MachineOnCmd: + newMachine.State = metal.MachineState{Value: metal.AvailableState} } if needsUpdate { From fea48f0488e8a443f801d1d4c577de988ed5898f Mon Sep 17 00:00:00 2001 From: iljarotar Date: Fri, 20 Jan 2023 16:20:20 +0100 Subject: [PATCH 32/45] do not check machine liveliness, if machine is in shutdown state --- cmd/metal-api/internal/service/machine-service.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cmd/metal-api/internal/service/machine-service.go b/cmd/metal-api/internal/service/machine-service.go index 13b36e02a..513d66af1 100644 --- a/cmd/metal-api/internal/service/machine-service.go +++ b/cmd/metal-api/internal/service/machine-service.go @@ -1769,6 +1769,10 @@ func MachineLiveliness(ds *datastore.RethinkStore, logger *zap.SugaredLogger) er } func evaluateMachineLiveliness(ds *datastore.RethinkStore, m metal.Machine) (metal.MachineLiveliness, error) { + if m.State.Value == metal.ShutdownState { + return metal.MachineLivelinessAlive, nil + } + provisioningEvents, err := ds.FindProvisioningEventContainer(m.ID) if err != nil { // we have no provisioning events... we cannot tell From 65e2cf356a34eeb7919072ab57ec719cf48078f3 Mon Sep 17 00:00:00 2001 From: iljarotar Date: Tue, 31 Jan 2023 11:23:19 +0100 Subject: [PATCH 33/45] add sleeping flag to MachineState and remove SHUTDOWN MState --- cmd/metal-api/internal/datastore/machine.go | 1 + cmd/metal-api/internal/datastore/poolsize.go | 8 ++++---- cmd/metal-api/internal/metal/machine.go | 3 +-- cmd/metal-api/internal/service/machine-service.go | 6 +++--- spec/metal-api.json | 9 +++++++++ 5 files changed, 18 insertions(+), 9 deletions(-) diff --git a/cmd/metal-api/internal/datastore/machine.go b/cmd/metal-api/internal/datastore/machine.go index b09d7c4cc..9488b7c60 100644 --- a/cmd/metal-api/internal/datastore/machine.go +++ b/cmd/metal-api/internal/datastore/machine.go @@ -59,6 +59,7 @@ type MachineSearchQuery struct { // state StateValue *string `json:"state_value" optional:"true"` + Sleeping *bool `json:"sleeping" optional:"true"` // ipmi IpmiAddress *string `json:"ipmi_address" optional:"true"` diff --git a/cmd/metal-api/internal/datastore/poolsize.go b/cmd/metal-api/internal/datastore/poolsize.go index 2d0e60488..3b40a4a63 100644 --- a/cmd/metal-api/internal/datastore/poolsize.go +++ b/cmd/metal-api/internal/datastore/poolsize.go @@ -49,11 +49,11 @@ func (m *manager) WaitingMachines() (metal.Machines, error) { } func (m *manager) ShutdownMachines() (metal.Machines, error) { - stateValue := string(metal.ShutdownState) + sleeping := true q := MachineSearchQuery{ PartitionID: &m.partitionid, SizeID: &m.sizeid, - StateValue: &stateValue, + Sleeping: &sleeping, } shutdownMachines := metal.Machines{} @@ -67,7 +67,7 @@ func (m *manager) ShutdownMachines() (metal.Machines, error) { func (m *manager) Shutdown(machine *metal.Machine) error { state := metal.MachineState{ - Value: metal.ShutdownState, + Sleeping: true, Description: "shut down as exceeding maximum partition poolsize", } @@ -79,7 +79,7 @@ func (m *manager) Shutdown(machine *metal.Machine) error { } func (m *manager) PowerOn(machine *metal.Machine) error { - state := metal.MachineState{Value: metal.AvailableState} + state := metal.MachineState{Sleeping: false} err := m.rs.publishCommandAndUpdate(m.rs.log, machine, m.publisher, metal.MachineOnCmd, state) if err != nil { diff --git a/cmd/metal-api/internal/metal/machine.go b/cmd/metal-api/internal/metal/machine.go index cf7284812..e8f55f77b 100644 --- a/cmd/metal-api/internal/metal/machine.go +++ b/cmd/metal-api/internal/metal/machine.go @@ -21,8 +21,6 @@ const ( ReservedState MState = "RESERVED" // LockedState describes a machine state where a machine cannot be deleted or allocated anymore LockedState MState = "LOCKED" - // ShutdownState describes a machine state where a machine was shut down on purpose and should not be resurrected - ShutdownState MState = "SHUTDOWN" ) var ( @@ -50,6 +48,7 @@ type MachineState struct { Description string `rethinkdb:"description" json:"description"` Issuer string `rethinkdb:"issuer" json:"issuer,omitempty"` MetalHammerVersion string `rethinkdb:"metal_hammer_version" json:"metal_hammer_version"` + Sleeping bool `rethinkdb:"sleeping" json:"sleeping"` } // MachineStateFrom converts a machineState string to the type diff --git a/cmd/metal-api/internal/service/machine-service.go b/cmd/metal-api/internal/service/machine-service.go index 513d66af1..f18980480 100644 --- a/cmd/metal-api/internal/service/machine-service.go +++ b/cmd/metal-api/internal/service/machine-service.go @@ -1769,7 +1769,7 @@ func MachineLiveliness(ds *datastore.RethinkStore, logger *zap.SugaredLogger) er } func evaluateMachineLiveliness(ds *datastore.RethinkStore, m metal.Machine) (metal.MachineLiveliness, error) { - if m.State.Value == metal.ShutdownState { + if m.State.Sleeping { return metal.MachineLivelinessAlive, nil } @@ -1833,7 +1833,7 @@ func ResurrectMachines(ds *datastore.RethinkStore, publisher bus.Publisher, ep * continue } - if m.State.Value == metal.ShutdownState { + if m.State.Sleeping { continue } @@ -2027,7 +2027,7 @@ func (r *machineResource) machineCmd(cmd metal.MachineCommand, request *restful. } needsUpdate = true case metal.MachineOnCmd: - newMachine.State = metal.MachineState{Value: metal.AvailableState} + newMachine.State = metal.MachineState{Sleeping: false} } if needsUpdate { diff --git a/spec/metal-api.json b/spec/metal-api.json index a92a2c27d..f5e98f53f 100644 --- a/spec/metal-api.json +++ b/spec/metal-api.json @@ -223,6 +223,9 @@ "sizeid": { "type": "string" }, + "sleeping": { + "type": "boolean" + }, "state_value": { "type": "string" }, @@ -1123,6 +1126,9 @@ "sizeid": { "type": "string" }, + "sleeping": { + "type": "boolean" + }, "state_value": { "type": "string" }, @@ -2236,6 +2242,9 @@ "sizeid": { "type": "string" }, + "sleeping": { + "type": "boolean" + }, "state_value": { "type": "string" }, From 2b607d84b0b3b35e5e2e39ab1ccb078b65a5103d Mon Sep 17 00:00:00 2001 From: iljarotar Date: Tue, 21 Feb 2023 08:42:41 +0100 Subject: [PATCH 34/45] remove rand.Seed --- cmd/metal-api/internal/scaler/poolscaler.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/cmd/metal-api/internal/scaler/poolscaler.go b/cmd/metal-api/internal/scaler/poolscaler.go index d4b525a16..2c53c721d 100644 --- a/cmd/metal-api/internal/scaler/poolscaler.go +++ b/cmd/metal-api/internal/scaler/poolscaler.go @@ -3,7 +3,6 @@ package scaler import ( "math" "math/rand" - "time" "github.com/metal-stack/metal-api/cmd/metal-api/internal/metal" "go.uber.org/zap" @@ -122,7 +121,6 @@ func randomIndices(n, k int) []int { indices[i] = i } - rand.Seed(time.Now().UnixNano()) rand.Shuffle(len(indices), func(i, j int) { indices[i], indices[j] = indices[j], indices[i] }) return indices[:k] From aa035a3b9e55e2c6e7941a89ca6e124abed403c4 Mon Sep 17 00:00:00 2001 From: iljarotar Date: Fri, 3 Mar 2023 11:57:28 +0100 Subject: [PATCH 35/45] hibernation struct --- cmd/metal-api/internal/datastore/machine.go | 20 +++- .../datastore/machine_integration_test.go | 101 ++++++++++++++++++ cmd/metal-api/internal/datastore/poolsize.go | 31 ++++-- cmd/metal-api/internal/metal/machine.go | 28 +++-- .../internal/service/machine-service.go | 20 +++- cmd/metal-api/internal/service/v1/machine.go | 22 +++- spec/metal-api.json | 68 ++++++++++-- 7 files changed, 246 insertions(+), 44 deletions(-) diff --git a/cmd/metal-api/internal/datastore/machine.go b/cmd/metal-api/internal/datastore/machine.go index fd45afc96..711001df9 100644 --- a/cmd/metal-api/internal/datastore/machine.go +++ b/cmd/metal-api/internal/datastore/machine.go @@ -54,8 +54,8 @@ type MachineSearchQuery struct { DiskSizes []int64 `json:"disk_sizes" optional:"true"` // state - Sleeping *bool `json:"sleeping" optional:"true"` - StateValue *string `json:"state_value" optional:"true" enum:"|RESERVED|LOCKED"` + HibernationEnabled *bool `json:"hibernation_enabled" optional:"true"` + StateValue *string `json:"state_value" optional:"true" enum:"|RESERVED|LOCKED"` // ipmi IpmiAddress *string `json:"ipmi_address" optional:"true"` @@ -115,8 +115,14 @@ func (p *MachineSearchQuery) generateTerm(rs *RethinkStore) *r.Term { }) } - if p.NotAllocated != nil && *p.NotAllocated { - q = q.Filter(map[string]any{"allocation": nil}) + if p.NotAllocated != nil { + q = q.Filter(func(row r.Term) r.Term { + if *p.NotAllocated { + return row.Field("allocation").Eq(nil) + } else { + return row.Field("allocation").Ne(nil) + } + }) } if p.AllocationName != nil { @@ -299,6 +305,12 @@ func (p *MachineSearchQuery) generateTerm(rs *RethinkStore) *r.Term { }) } + if p.HibernationEnabled != nil { + q = q.Filter(func(row r.Term) r.Term { + return row.Field("state").Field("hibernation").Field("enabled").Eq(*p.HibernationEnabled) + }) + } + if p.StateValue != nil { q = q.Filter(func(row r.Term) r.Term { return row.Field("state").Field("value").Eq(*p.StateValue) diff --git a/cmd/metal-api/internal/datastore/machine_integration_test.go b/cmd/metal-api/internal/datastore/machine_integration_test.go index 9a82b86c7..a1a897322 100644 --- a/cmd/metal-api/internal/datastore/machine_integration_test.go +++ b/cmd/metal-api/internal/datastore/machine_integration_test.go @@ -343,6 +343,54 @@ func TestRethinkStore_SearchMachines(t *testing.T) { }, wantErr: nil, }, + { + name: "search by not_allocated true", + q: &MachineSearchQuery{ + NotAllocated: pointer.Pointer(true), + }, + mock: []*metal.Machine{ + {Base: metal.Base{ID: "1"}, Allocation: &metal.MachineAllocation{}}, + {Base: metal.Base{ID: "2"}, Allocation: &metal.MachineAllocation{}}, + {Base: metal.Base{ID: "3"}, Allocation: nil}, + }, + want: []*metal.Machine{ + tt.defaultBody(&metal.Machine{Base: metal.Base{ID: "3"}, Allocation: nil}), + }, + wantErr: nil, + }, + { + name: "search by not_allocated false", + q: &MachineSearchQuery{ + NotAllocated: pointer.Pointer(false), + }, + mock: []*metal.Machine{ + {Base: metal.Base{ID: "1"}, Allocation: &metal.MachineAllocation{}}, + {Base: metal.Base{ID: "2"}, Allocation: &metal.MachineAllocation{}}, + {Base: metal.Base{ID: "3"}, Allocation: nil}, + }, + want: []*metal.Machine{ + tt.defaultBody(&metal.Machine{Base: metal.Base{ID: "1"}, Allocation: &metal.MachineAllocation{}}), + tt.defaultBody(&metal.Machine{Base: metal.Base{ID: "2"}, Allocation: &metal.MachineAllocation{}}), + }, + wantErr: nil, + }, + { + name: "search by not_allocated nil", + q: &MachineSearchQuery{ + NotAllocated: nil, + }, + mock: []*metal.Machine{ + {Base: metal.Base{ID: "1"}, Allocation: &metal.MachineAllocation{}}, + {Base: metal.Base{ID: "2"}, Allocation: &metal.MachineAllocation{}}, + {Base: metal.Base{ID: "3"}, Allocation: nil}, + }, + want: []*metal.Machine{ + tt.defaultBody(&metal.Machine{Base: metal.Base{ID: "1"}, Allocation: &metal.MachineAllocation{}}), + tt.defaultBody(&metal.Machine{Base: metal.Base{ID: "2"}, Allocation: &metal.MachineAllocation{}}), + tt.defaultBody(&metal.Machine{Base: metal.Base{ID: "3"}, Allocation: nil}), + }, + wantErr: nil, + }, { name: "search by network ids", q: &MachineSearchQuery{ @@ -635,6 +683,59 @@ func TestRethinkStore_SearchMachines(t *testing.T) { }, wantErr: nil, }, + { + name: "search by hibernation_enabled true", + q: &MachineSearchQuery{ + HibernationEnabled: pointer.Pointer(true), + }, + mock: []*metal.Machine{ + {Base: metal.Base{ID: "1"}, State: metal.MachineState{Hibernation: metal.MachineHibernation{Enabled: true}}}, + {Base: metal.Base{ID: "2"}, State: metal.MachineState{Hibernation: metal.MachineHibernation{Enabled: true}}}, + {Base: metal.Base{ID: "3"}, State: metal.MachineState{Hibernation: metal.MachineHibernation{Enabled: false}}}, + {Base: metal.Base{ID: "4"}, State: metal.MachineState{Hibernation: metal.MachineHibernation{Enabled: false}}}, + }, + want: []*metal.Machine{ + tt.defaultBody(&metal.Machine{Base: metal.Base{ID: "1"}, State: metal.MachineState{Hibernation: metal.MachineHibernation{Enabled: true}}}), + tt.defaultBody(&metal.Machine{Base: metal.Base{ID: "2"}, State: metal.MachineState{Hibernation: metal.MachineHibernation{Enabled: true}}}), + }, + wantErr: nil, + }, + { + name: "search by hibernation_enabled false", + q: &MachineSearchQuery{ + HibernationEnabled: pointer.Pointer(false), + }, + mock: []*metal.Machine{ + {Base: metal.Base{ID: "1"}, State: metal.MachineState{Hibernation: metal.MachineHibernation{Enabled: true}}}, + {Base: metal.Base{ID: "2"}, State: metal.MachineState{Hibernation: metal.MachineHibernation{Enabled: true}}}, + {Base: metal.Base{ID: "3"}, State: metal.MachineState{Hibernation: metal.MachineHibernation{Enabled: false}}}, + {Base: metal.Base{ID: "4"}, State: metal.MachineState{Hibernation: metal.MachineHibernation{Enabled: false}}}, + }, + want: []*metal.Machine{ + tt.defaultBody(&metal.Machine{Base: metal.Base{ID: "3"}, State: metal.MachineState{Hibernation: metal.MachineHibernation{Enabled: false}}}), + tt.defaultBody(&metal.Machine{Base: metal.Base{ID: "4"}, State: metal.MachineState{Hibernation: metal.MachineHibernation{Enabled: false}}}), + }, + wantErr: nil, + }, + { + name: "search by hibernation_enabled nil", + q: &MachineSearchQuery{ + HibernationEnabled: nil, + }, + mock: []*metal.Machine{ + {Base: metal.Base{ID: "1"}, State: metal.MachineState{Hibernation: metal.MachineHibernation{Enabled: true}}}, + {Base: metal.Base{ID: "2"}, State: metal.MachineState{Hibernation: metal.MachineHibernation{Enabled: true}}}, + {Base: metal.Base{ID: "3"}, State: metal.MachineState{Hibernation: metal.MachineHibernation{Enabled: false}}}, + {Base: metal.Base{ID: "4"}, State: metal.MachineState{Hibernation: metal.MachineHibernation{Enabled: false}}}, + }, + want: []*metal.Machine{ + tt.defaultBody(&metal.Machine{Base: metal.Base{ID: "1"}, State: metal.MachineState{Hibernation: metal.MachineHibernation{Enabled: true}}}), + tt.defaultBody(&metal.Machine{Base: metal.Base{ID: "2"}, State: metal.MachineState{Hibernation: metal.MachineHibernation{Enabled: true}}}), + tt.defaultBody(&metal.Machine{Base: metal.Base{ID: "3"}, State: metal.MachineState{Hibernation: metal.MachineHibernation{Enabled: false}}}), + tt.defaultBody(&metal.Machine{Base: metal.Base{ID: "4"}, State: metal.MachineState{Hibernation: metal.MachineHibernation{Enabled: false}}}), + }, + wantErr: nil, + }, { name: "search by ipmi address", q: &MachineSearchQuery{ diff --git a/cmd/metal-api/internal/datastore/poolsize.go b/cmd/metal-api/internal/datastore/poolsize.go index 3b40a4a63..113059a3a 100644 --- a/cmd/metal-api/internal/datastore/poolsize.go +++ b/cmd/metal-api/internal/datastore/poolsize.go @@ -1,9 +1,12 @@ package datastore import ( + "time" + e "github.com/metal-stack/metal-api/cmd/metal-api/internal/eventbus" "github.com/metal-stack/metal-api/cmd/metal-api/internal/metal" "github.com/metal-stack/metal-lib/bus" + "github.com/metal-stack/metal-lib/pkg/pointer" "go.uber.org/zap" ) @@ -30,13 +33,11 @@ func (m *manager) AllMachines() (metal.Machines, error) { } func (m *manager) WaitingMachines() (metal.Machines, error) { - stateValue := string(metal.AvailableState) - notAllocated := true q := MachineSearchQuery{ PartitionID: &m.partitionid, SizeID: &m.sizeid, - StateValue: &stateValue, - NotAllocated: ¬Allocated, + StateValue: pointer.Pointer(string(metal.AvailableState)), + NotAllocated: pointer.Pointer(true), } waitingMachines := metal.Machines{} @@ -49,11 +50,10 @@ func (m *manager) WaitingMachines() (metal.Machines, error) { } func (m *manager) ShutdownMachines() (metal.Machines, error) { - sleeping := true q := MachineSearchQuery{ - PartitionID: &m.partitionid, - SizeID: &m.sizeid, - Sleeping: &sleeping, + PartitionID: &m.partitionid, + SizeID: &m.sizeid, + HibernationEnabled: pointer.Pointer(true), } shutdownMachines := metal.Machines{} @@ -67,8 +67,11 @@ func (m *manager) ShutdownMachines() (metal.Machines, error) { func (m *manager) Shutdown(machine *metal.Machine) error { state := metal.MachineState{ - Sleeping: true, - Description: "shut down as exceeding maximum partition poolsize", + Hibernation: metal.MachineHibernation{ + Enabled: true, + Description: "shutdown by pool scaler due to exceeding pool size", + Changed: pointer.Pointer(time.Now()), + }, } err := m.rs.publishCommandAndUpdate(m.rs.log, machine, m.publisher, metal.MachineOffCmd, state) @@ -79,7 +82,13 @@ func (m *manager) Shutdown(machine *metal.Machine) error { } func (m *manager) PowerOn(machine *metal.Machine) error { - state := metal.MachineState{Sleeping: false} + state := metal.MachineState{ + Hibernation: metal.MachineHibernation{ + Enabled: false, + Description: "powered on by pool scaler to increase pool size", + Changed: pointer.Pointer(time.Now()), + }, + } err := m.rs.publishCommandAndUpdate(m.rs.log, machine, m.publisher, metal.MachineOnCmd, state) if err != nil { diff --git a/cmd/metal-api/internal/metal/machine.go b/cmd/metal-api/internal/metal/machine.go index e8f55f77b..249611544 100644 --- a/cmd/metal-api/internal/metal/machine.go +++ b/cmd/metal-api/internal/metal/machine.go @@ -44,11 +44,18 @@ var ( // the machine will be available for allocation. In all other cases the allocation // must explicitly point to this machine. type MachineState struct { - Value MState `rethinkdb:"value" json:"value"` - Description string `rethinkdb:"description" json:"description"` - Issuer string `rethinkdb:"issuer" json:"issuer,omitempty"` - MetalHammerVersion string `rethinkdb:"metal_hammer_version" json:"metal_hammer_version"` - Sleeping bool `rethinkdb:"sleeping" json:"sleeping"` + Value MState `rethinkdb:"value" json:"value"` + Description string `rethinkdb:"description" json:"description"` + Issuer string `rethinkdb:"issuer" json:"issuer,omitempty"` + MetalHammerVersion string `rethinkdb:"metal_hammer_version" json:"metal_hammer_version"` + Hibernation MachineHibernation `rethinkdb:"hibernation" json:"hibernation"` +} + +// A MachineHibernation indicates that a machine was sent to sleep or woken up by the pool scaler +type MachineHibernation struct { + Enabled bool `rethinkdb:"enabled" json:"enabled"` + Description string `rethinkdb:"description" json:"description"` + Changed *time.Time `rethinkdb:"changed" json:"changed"` } // MachineStateFrom converts a machineState string to the type @@ -295,11 +302,12 @@ type MachineLiveliness string // The enums for the machine liveliness states. const ( - MachineLivelinessAlive MachineLiveliness = "Alive" - MachineLivelinessDead MachineLiveliness = "Dead" - MachineLivelinessUnknown MachineLiveliness = "Unknown" - MachineDeadAfter time.Duration = 5 * time.Minute - MachineResurrectAfter time.Duration = time.Hour + MachineLivelinessAlive MachineLiveliness = "Alive" + MachineLivelinessDead MachineLiveliness = "Dead" + MachineLivelinessUnknown MachineLiveliness = "Unknown" + MachineLivelinesHibernated MachineLiveliness = "Hibernated" + MachineDeadAfter time.Duration = 5 * time.Minute + MachineResurrectAfter time.Duration = time.Hour ) // Is return true if given liveliness is equal to specific Liveliness diff --git a/cmd/metal-api/internal/service/machine-service.go b/cmd/metal-api/internal/service/machine-service.go index efddd99b1..6856a51d2 100644 --- a/cmd/metal-api/internal/service/machine-service.go +++ b/cmd/metal-api/internal/service/machine-service.go @@ -24,6 +24,7 @@ import ( "go.uber.org/zap" "github.com/metal-stack/metal-lib/httperrors" + "github.com/metal-stack/metal-lib/pkg/pointer" "github.com/metal-stack/metal-lib/pkg/tag" mdmv1 "github.com/metal-stack/masterdata-api/api/v1" @@ -761,6 +762,17 @@ func (r *machineResource) ipmiReport(request *restful.Request, response *restful if report.PowerState != "" { newMachine.IPMI.PowerState = report.PowerState } + + h := newMachine.State.Hibernation + // TODO: an enum for PowerState would be nice + if newMachine.IPMI.PowerState == "ON" && h.Enabled && h.Changed != nil && time.Since(*h.Changed) > 10*time.Minute { + newMachine.State.Hibernation = metal.MachineHibernation{ + Enabled: false, + Description: "machine was hibernated by poolscaler but is still powered on or was powered on by user", + Changed: pointer.Pointer(time.Now()), + } + } + if report.PowerMetric != nil { newMachine.IPMI.PowerMetric = &metal.PowerMetric{ AverageConsumedWatts: report.PowerMetric.AverageConsumedWatts, @@ -1772,8 +1784,8 @@ func MachineLiveliness(ds *datastore.RethinkStore, logger *zap.SugaredLogger) er } func evaluateMachineLiveliness(ds *datastore.RethinkStore, m metal.Machine) (metal.MachineLiveliness, error) { - if m.State.Sleeping { - return metal.MachineLivelinessAlive, nil + if m.State.Hibernation.Enabled { + return metal.MachineLivelinesHibernated, nil } provisioningEvents, err := ds.FindProvisioningEventContainer(m.ID) @@ -1836,7 +1848,7 @@ func ResurrectMachines(ds *datastore.RethinkStore, publisher bus.Publisher, ep * continue } - if m.State.Sleeping { + if provisioningEvents.Liveliness.Is(string(metal.MachineLivelinesHibernated)) { continue } @@ -2029,8 +2041,6 @@ func (r *machineResource) machineCmd(cmd metal.MachineCommand, request *restful. Description: description, } needsUpdate = true - case metal.MachineOnCmd: - newMachine.State = metal.MachineState{Sleeping: false} } if needsUpdate { diff --git a/cmd/metal-api/internal/service/v1/machine.go b/cmd/metal-api/internal/service/v1/machine.go index c3913ae72..f1e26447b 100644 --- a/cmd/metal-api/internal/service/v1/machine.go +++ b/cmd/metal-api/internal/service/v1/machine.go @@ -19,7 +19,7 @@ type MachineBase struct { Allocation *MachineAllocation `json:"allocation" description:"the allocation data of an allocated machine" optional:"true"` State MachineState `json:"state" rethinkdb:"state" description:"the state of this machine"` LEDState ChassisIdentifyLEDState `json:"ledstate" rethinkdb:"ledstate" description:"the state of this chassis identify LED"` - Liveliness string `json:"liveliness" description:"the liveliness of this machine"` + Liveliness string `json:"liveliness" description:"the liveliness of this machine" enum:"Alive|Dead|Unknown|Hibernated"` RecentProvisioningEvents MachineRecentProvisioningEvents `json:"events" description:"recent events of this machine during provisioning"` Tags []string `json:"tags" description:"tags for this machine"` } @@ -85,10 +85,17 @@ type MachineHardware struct { } type MachineState struct { - Value string `json:"value" enum:"RESERVED|LOCKED|" description:"the state of this machine. empty means available for all"` - Description string `json:"description" description:"a description why this machine is in the given state"` - Issuer string `json:"issuer,omitempty" optional:"true" description:"the user that changed the state"` - MetalHammerVersion string `json:"metal_hammer_version" description:"the version of metal hammer which put the machine in waiting state"` + Value string `json:"value" enum:"RESERVED|LOCKED|" description:"the state of this machine. empty means available for all"` + Description string `json:"description" description:"a description why this machine is in the given state"` + Issuer string `json:"issuer,omitempty" optional:"true" description:"the user that changed the state"` + MetalHammerVersion string `json:"metal_hammer_version" description:"the version of metal hammer which put the machine in waiting state"` + Hibernation MachineHibernation `json:"hibernation" description:"indicates that a machine was sent to sleep or woken up by the pool scaler"` +} + +type MachineHibernation struct { + Enabled bool `json:"enabled" description:"true if hibernation is enabled"` + Description string `json:"description" description:"describes last state change of hibernation"` + Changed *time.Time `json:"changed" optional:"true" description:"last changed timestamp"` } type ChassisIdentifyLEDState struct { @@ -547,6 +554,11 @@ func NewMachineResponse(m *metal.Machine, s *metal.Size, p *metal.Partition, i * Description: m.State.Description, Issuer: m.State.Issuer, MetalHammerVersion: m.State.MetalHammerVersion, + Hibernation: MachineHibernation{ + Enabled: m.State.Hibernation.Enabled, + Description: m.State.Hibernation.Description, + Changed: m.State.Hibernation.Changed, + }, }, LEDState: ChassisIdentifyLEDState{ Value: string(m.LEDState.Value), diff --git a/spec/metal-api.json b/spec/metal-api.json index 4e2bcd695..2cd2fad12 100644 --- a/spec/metal-api.json +++ b/spec/metal-api.json @@ -140,6 +140,9 @@ "format": "int64", "type": "integer" }, + "hibernation_enabled": { + "type": "boolean" + }, "id": { "type": "string" }, @@ -244,9 +247,6 @@ "sizeid": { "type": "string" }, - "sleeping": { - "type": "boolean" - }, "state_value": { "enum": [ "", @@ -1061,6 +1061,9 @@ "format": "int64", "type": "integer" }, + "hibernation_enabled": { + "type": "boolean" + }, "id": { "type": "string" }, @@ -1165,9 +1168,6 @@ "sizeid": { "type": "string" }, - "sleeping": { - "type": "boolean" - }, "state_value": { "enum": [ "", @@ -1229,6 +1229,12 @@ }, "liveliness": { "description": "the liveliness of this machine", + "enum": [ + "Alive", + "Dead", + "Hibernated", + "Unknown" + ], "type": "string" }, "name": { @@ -2041,6 +2047,12 @@ }, "liveliness": { "description": "the liveliness of this machine", + "enum": [ + "Alive", + "Dead", + "Hibernated", + "Unknown" + ], "type": "string" }, "partition": { @@ -2203,6 +2215,9 @@ "format": "int64", "type": "integer" }, + "hibernation_enabled": { + "type": "boolean" + }, "id": { "type": "string" }, @@ -2307,9 +2322,6 @@ "sizeid": { "type": "string" }, - "sleeping": { - "type": "boolean" - }, "state_value": { "enum": [ "", @@ -2423,6 +2435,27 @@ "memory" ] }, + "v1.MachineHibernation": { + "properties": { + "changed": { + "description": "last changed timestamp", + "format": "date-time", + "type": "string" + }, + "description": { + "description": "describes last state change of hibernation", + "type": "string" + }, + "enabled": { + "description": "true if hibernation is enabled", + "type": "boolean" + } + }, + "required": [ + "description", + "enabled" + ] + }, "v1.MachineIPMI": { "description": "The IPMI connection data", "properties": { @@ -2515,6 +2548,12 @@ }, "liveliness": { "description": "the liveliness of this machine", + "enum": [ + "Alive", + "Dead", + "Hibernated", + "Unknown" + ], "type": "string" }, "name": { @@ -2852,6 +2891,12 @@ }, "liveliness": { "description": "the liveliness of this machine", + "enum": [ + "Alive", + "Dead", + "Hibernated", + "Unknown" + ], "type": "string" }, "name": { @@ -2902,6 +2947,10 @@ "description": "a description why this machine is in the given state", "type": "string" }, + "hibernation": { + "$ref": "#/definitions/v1.MachineHibernation", + "description": "indicates that a machine was sent to sleep or woken up by the pool scaler" + }, "issuer": { "description": "the user that changed the state", "type": "string" @@ -2922,6 +2971,7 @@ }, "required": [ "description", + "hibernation", "metal_hammer_version", "value" ] From a4d4507689e8067190cf879bc947c35a7bfd2abf Mon Sep 17 00:00:00 2001 From: Gerrit Date: Tue, 7 Mar 2023 08:58:41 +0100 Subject: [PATCH 36/45] Save liveliness properly and fix linting issue. (#427) --- .../internal/service/machine-service.go | 61 ++++----- .../machine-service_integration_test.go | 119 ++++++++++++++++++ 2 files changed, 150 insertions(+), 30 deletions(-) diff --git a/cmd/metal-api/internal/service/machine-service.go b/cmd/metal-api/internal/service/machine-service.go index 6856a51d2..c876b6ad6 100644 --- a/cmd/metal-api/internal/service/machine-service.go +++ b/cmd/metal-api/internal/service/machine-service.go @@ -1757,64 +1757,65 @@ func MachineLiveliness(ds *datastore.RethinkStore, logger *zap.SugaredLogger) er return err } - unknown := 0 - alive := 0 - dead := 0 - errs := 0 + var ( + unknown = 0 + alive = 0 + dead = 0 + hibernated = 0 + errs = 0 + ) for _, m := range machines { lvlness, err := evaluateMachineLiveliness(ds, m) if err != nil { logger.Errorw("cannot update liveliness", "error", err, "machine", m) errs++ - // fall through, so the rest of the machines is getting evaluated + // fall through, so the machine counted anyway and rest of the machines is getting evaluated } switch lvlness { case metal.MachineLivelinessAlive: alive++ case metal.MachineLivelinessDead: dead++ + case metal.MachineLivelinesHibernated: + hibernated++ case metal.MachineLivelinessUnknown: unknown++ } } - logger.Infow("machine liveliness evaluated", "alive", alive, "dead", dead, "unknown", unknown, "errors", errs) + logger.Infow("machine liveliness evaluated", "alive", alive, "dead", dead, "hibernated", hibernated, "unknown", unknown, "errors", errs) return nil } func evaluateMachineLiveliness(ds *datastore.RethinkStore, m metal.Machine) (metal.MachineLiveliness, error) { - if m.State.Hibernation.Enabled { - return metal.MachineLivelinesHibernated, nil - } - - provisioningEvents, err := ds.FindProvisioningEventContainer(m.ID) + ec, err := ds.FindProvisioningEventContainer(m.ID) if err != nil { // we have no provisioning events... we cannot tell - return metal.MachineLivelinessUnknown, fmt.Errorf("no provisioningEvents found for ID: %s", m.ID) + return metal.MachineLivelinessUnknown, fmt.Errorf("no provisioning events found for machine: %s", m.ID) } - old := *provisioningEvents + updateLiveliness := func(liveliness metal.MachineLiveliness) (metal.MachineLiveliness, error) { + old := *ec + ec.Liveliness = liveliness - if provisioningEvents.LastEventTime != nil { - if time.Since(*provisioningEvents.LastEventTime) > metal.MachineDeadAfter { - if m.Allocation != nil { - // the machine is either dead or the customer did turn off the phone home service - provisioningEvents.Liveliness = metal.MachineLivelinessUnknown - } else { - // the machine is just dead - provisioningEvents.Liveliness = metal.MachineLivelinessDead - } - } else { - provisioningEvents.Liveliness = metal.MachineLivelinessAlive - } - err = ds.UpdateProvisioningEventContainer(&old, provisioningEvents) - if err != nil { - return provisioningEvents.Liveliness, err - } + return ec.Liveliness, ds.UpdateProvisioningEventContainer(&old, ec) + } + + if m.State.Hibernation.Enabled { + return updateLiveliness(metal.MachineLivelinesHibernated) + } + + if time.Since(pointer.SafeDeref(ec.LastEventTime)) < metal.MachineDeadAfter { + return updateLiveliness(metal.MachineLivelinessAlive) + } + + if m.Allocation != nil { + // the machine is either dead or the customer did turn off the phone home service + return updateLiveliness(metal.MachineLivelinessUnknown) } - return provisioningEvents.Liveliness, nil + return updateLiveliness(metal.MachineLivelinessDead) } // ResurrectMachines attempts to resurrect machines that are obviously dead diff --git a/cmd/metal-api/internal/service/machine-service_integration_test.go b/cmd/metal-api/internal/service/machine-service_integration_test.go index 52a332d0e..b8f8f014a 100644 --- a/cmd/metal-api/internal/service/machine-service_integration_test.go +++ b/cmd/metal-api/internal/service/machine-service_integration_test.go @@ -19,6 +19,7 @@ import ( grpcv1 "github.com/metal-stack/metal-api/pkg/api/v1" "github.com/metal-stack/metal-api/test" "github.com/metal-stack/metal-lib/bus" + "github.com/metal-stack/metal-lib/pkg/pointer" "go.uber.org/zap/zaptest" "google.golang.org/grpc" "google.golang.org/grpc/credentials/insecure" @@ -294,3 +295,121 @@ func BenchmarkMachineList(b *testing.B) { b.StopTimer() } + +func TestMachineLivelinessEvaluation(t *testing.T) { + te := createTestEnvironment(t) + defer te.teardown() + + now := time.Now() + + machinesWithECs := []struct { + m metal.Machine + ec *metal.ProvisioningEventContainer + want metal.MachineLiveliness + }{ + { + m: metal.Machine{ + Base: metal.Base{ + ID: "1", + Description: "waiting machine", + }, + Waiting: true, + }, + ec: &metal.ProvisioningEventContainer{ + Base: metal.Base{ID: "1"}, + LastEventTime: pointer.Pointer(now.Add(-1 * time.Minute)), + }, + want: metal.MachineLivelinessAlive, + }, + { + m: metal.Machine{ + Base: metal.Base{ + ID: "2", + Description: "dead machine", + }, + }, + ec: &metal.ProvisioningEventContainer{ + Base: metal.Base{ID: "2"}, + LastEventTime: pointer.Pointer(now.Add(-1*time.Minute - metal.MachineDeadAfter)), + }, + want: metal.MachineLivelinessDead, + }, + { + m: metal.Machine{ + Base: metal.Base{ + ID: "3", + Description: "machine where user disabled lldp daemon", + }, + Allocation: &metal.MachineAllocation{ + Creator: "someone", + }, + }, + ec: &metal.ProvisioningEventContainer{ + Base: metal.Base{ID: "3"}, + LastEventTime: pointer.Pointer(now.Add(-1*time.Minute - metal.MachineDeadAfter)), + }, + want: metal.MachineLivelinessUnknown, + }, + { + m: metal.Machine{ + Base: metal.Base{ + ID: "4", + Description: "hibernated machine", + }, + State: metal.MachineState{ + Hibernation: metal.MachineHibernation{ + Enabled: true, + }, + }, + }, + ec: &metal.ProvisioningEventContainer{ + Base: metal.Base{ID: "4"}, + LastEventTime: pointer.Pointer(now.Add(-1*time.Minute - metal.MachineDeadAfter)), + }, + want: metal.MachineLivelinesHibernated, + }, + { + m: metal.Machine{ + Base: metal.Base{ + ID: "5", + Description: "machine without event container", + }, + }, + ec: nil, + want: metal.MachineLivelinessUnknown, + }, + } + + for i := range machinesWithECs { + skeleton := &metal.Machine{Base: machinesWithECs[i].m.Base} + err := te.ds.CreateMachine(skeleton) + require.NoError(t, err) + + updatedMachine := *skeleton + updatedMachine.Waiting = machinesWithECs[i].m.Waiting + updatedMachine.Allocation = machinesWithECs[i].m.Allocation + updatedMachine.State = machinesWithECs[i].m.State + + err = te.ds.UpdateMachine(skeleton, &updatedMachine) + require.NoError(t, err) + + if machinesWithECs[i].ec != nil { + err = te.ds.CreateProvisioningEventContainer(machinesWithECs[i].ec) + require.NoError(t, err) + } + } + + err := MachineLiveliness(te.ds, zaptest.NewLogger(t).Sugar()) + require.NoError(t, err) + + for i := range machinesWithECs { + if machinesWithECs[i].ec == nil { + continue + } + + ec, err := te.ds.FindProvisioningEventContainer(machinesWithECs[i].m.ID) + require.NoError(t, err) + + assert.Equal(t, machinesWithECs[i].want, ec.Liveliness, "machine %q", machinesWithECs[i].m.ID) + } +} From b0ba295c85d03bd6c012844679019d69b2e47908 Mon Sep 17 00:00:00 2001 From: Gerrit Date: Tue, 7 Mar 2023 09:47:00 +0100 Subject: [PATCH 37/45] Fix integration test (#429) --- .../internal/service/integration_test.go | 14 +++++--- .../internal/service/switch-service_test.go | 36 +++++++++---------- 2 files changed, 28 insertions(+), 22 deletions(-) diff --git a/cmd/metal-api/internal/service/integration_test.go b/cmd/metal-api/internal/service/integration_test.go index a1ee19aa2..12f238359 100644 --- a/cmd/metal-api/internal/service/integration_test.go +++ b/cmd/metal-api/internal/service/integration_test.go @@ -53,10 +53,14 @@ type testEnv struct { privateNetwork *v1.NetworkResponse rethinkContainer testcontainers.Container ctx context.Context + cancel context.CancelFunc } func (te *testEnv) teardown() { - _ = te.rethinkContainer.Terminate(te.ctx) + ctx, cancel := context.WithTimeout(te.ctx, 3*time.Second) + defer cancel() + _ = te.rethinkContainer.Terminate(ctx) + te.cancel() } //nolint:deadcode @@ -83,10 +87,11 @@ func createTestEnvironment(t *testing.T) testEnv { mdc := mdm.NewMock(psc, nil) log := zaptest.NewLogger(t).Sugar() + ctx, cancel := context.WithCancel(context.Background()) go func() { - err := metalgrpc.Run(&metalgrpc.ServerConfig{ - Context: context.Background(), + err := metalgrpc.Run(&metalgrpc.ServerConfig{ //nolint + Context: ctx, Store: ds, Publisher: NopPublisher{}, Logger: log, @@ -122,7 +127,8 @@ func createTestEnvironment(t *testing.T) testEnv { ipService: ipService, ds: ds, rethinkContainer: rethinkContainer, - ctx: context.TODO(), + ctx: ctx, + cancel: cancel, } imageID := "test-image-1.0.0" diff --git a/cmd/metal-api/internal/service/switch-service_test.go b/cmd/metal-api/internal/service/switch-service_test.go index c164919ea..9302d342e 100644 --- a/cmd/metal-api/internal/service/switch-service_test.go +++ b/cmd/metal-api/internal/service/switch-service_test.go @@ -1020,10 +1020,10 @@ func Test_updateSwitchNics(t *testing.T) { name: "idempotence", args: args{ oldNics: map[string]*metal.Nic{ - "11:11:11:11:11:11": &metal.Nic{Name: "swp1", MacAddress: "11:11:11:11:11:11"}, + "11:11:11:11:11:11": {Name: "swp1", MacAddress: "11:11:11:11:11:11"}, }, newNics: map[string]*metal.Nic{ - "11:11:11:11:11:11": &metal.Nic{Name: "swp1", MacAddress: "11:11:11:11:11:11"}, + "11:11:11:11:11:11": {Name: "swp1", MacAddress: "11:11:11:11:11:11"}, }, currentConnections: metal.ConnectionMap{}, }, @@ -1036,11 +1036,11 @@ func Test_updateSwitchNics(t *testing.T) { name: "adding a nic", args: args{ oldNics: map[string]*metal.Nic{ - "11:11:11:11:11:11": &metal.Nic{Name: "swp1", MacAddress: "11:11:11:11:11:11"}, + "11:11:11:11:11:11": {Name: "swp1", MacAddress: "11:11:11:11:11:11"}, }, newNics: map[string]*metal.Nic{ - "11:11:11:11:11:11": &metal.Nic{Name: "swp1", MacAddress: "11:11:11:11:11:11"}, - "11:11:11:11:11:12": &metal.Nic{Name: "swp2", MacAddress: "11:11:11:11:11:12"}, + "11:11:11:11:11:11": {Name: "swp1", MacAddress: "11:11:11:11:11:11"}, + "11:11:11:11:11:12": {Name: "swp2", MacAddress: "11:11:11:11:11:12"}, }, currentConnections: metal.ConnectionMap{}, }, @@ -1054,7 +1054,7 @@ func Test_updateSwitchNics(t *testing.T) { name: "removing a nic", args: args{ oldNics: map[string]*metal.Nic{ - "11:11:11:11:11:11": &metal.Nic{Name: "swp1", MacAddress: "11:11:11:11:11:11"}, + "11:11:11:11:11:11": {Name: "swp1", MacAddress: "11:11:11:11:11:11"}, }, newNics: map[string]*metal.Nic{}, currentConnections: metal.ConnectionMap{}, @@ -1066,11 +1066,11 @@ func Test_updateSwitchNics(t *testing.T) { name: "removing a nic 2", args: args{ oldNics: map[string]*metal.Nic{ - "11:11:11:11:11:11": &metal.Nic{Name: "swp1", MacAddress: "11:11:11:11:11:11"}, - "11:11:11:11:11:12": &metal.Nic{Name: "swp2", MacAddress: "11:11:11:11:11:12"}, + "11:11:11:11:11:11": {Name: "swp1", MacAddress: "11:11:11:11:11:11"}, + "11:11:11:11:11:12": {Name: "swp2", MacAddress: "11:11:11:11:11:12"}, }, newNics: map[string]*metal.Nic{ - "11:11:11:11:11:11": &metal.Nic{Name: "swp1", MacAddress: "11:11:11:11:11:11"}, + "11:11:11:11:11:11": {Name: "swp1", MacAddress: "11:11:11:11:11:11"}, }, currentConnections: metal.ConnectionMap{}, }, @@ -1083,11 +1083,11 @@ func Test_updateSwitchNics(t *testing.T) { name: "removing a used nic", args: args{ oldNics: map[string]*metal.Nic{ - "11:11:11:11:11:11": &metal.Nic{Name: "swp1", MacAddress: "11:11:11:11:11:11"}, - "11:11:11:11:11:12": &metal.Nic{Name: "swp2", MacAddress: "11:11:11:11:11:12"}, + "11:11:11:11:11:11": {Name: "swp1", MacAddress: "11:11:11:11:11:11"}, + "11:11:11:11:11:12": {Name: "swp2", MacAddress: "11:11:11:11:11:12"}, }, newNics: map[string]*metal.Nic{ - "11:11:11:11:11:11": &metal.Nic{Name: "swp1", MacAddress: "11:11:11:11:11:11"}, + "11:11:11:11:11:11": {Name: "swp1", MacAddress: "11:11:11:11:11:11"}, }, currentConnections: metal.ConnectionMap{ "machine-uuid-1": metal.Connections{metal.Connection{MachineID: "machine-uuid-1", Nic: metal.Nic{Name: "swp2", MacAddress: "11:11:11:11:11:12"}}}, @@ -1100,10 +1100,10 @@ func Test_updateSwitchNics(t *testing.T) { name: "updating a nic", args: args{ oldNics: map[string]*metal.Nic{ - "11:11:11:11:11:11": &metal.Nic{Name: "swp1", MacAddress: "11:11:11:11:11:11"}, + "11:11:11:11:11:11": {Name: "swp1", MacAddress: "11:11:11:11:11:11"}, }, newNics: map[string]*metal.Nic{ - "11:11:11:11:11:11": &metal.Nic{Name: "swp2", MacAddress: "11:11:11:11:11:11"}, + "11:11:11:11:11:11": {Name: "swp2", MacAddress: "11:11:11:11:11:11"}, }, currentConnections: metal.ConnectionMap{}, }, @@ -1116,10 +1116,10 @@ func Test_updateSwitchNics(t *testing.T) { name: "updating a nic, vrf should not be altered", args: args{ oldNics: map[string]*metal.Nic{ - "11:11:11:11:11:11": &metal.Nic{Name: "swp1", Vrf: "vrf1", MacAddress: "11:11:11:11:11:11"}, + "11:11:11:11:11:11": {Name: "swp1", Vrf: "vrf1", MacAddress: "11:11:11:11:11:11"}, }, newNics: map[string]*metal.Nic{ - "11:11:11:11:11:11": &metal.Nic{Name: "swp2", Vrf: "vrf2", MacAddress: "11:11:11:11:11:11"}, + "11:11:11:11:11:11": {Name: "swp2", Vrf: "vrf2", MacAddress: "11:11:11:11:11:11"}, }, currentConnections: metal.ConnectionMap{}, }, @@ -1132,10 +1132,10 @@ func Test_updateSwitchNics(t *testing.T) { name: "updating a nic name, which already has a connection", args: args{ oldNics: map[string]*metal.Nic{ - "11:11:11:11:11:11": &metal.Nic{Name: "swp1", MacAddress: "11:11:11:11:11:11"}, + "11:11:11:11:11:11": {Name: "swp1", MacAddress: "11:11:11:11:11:11"}, }, newNics: map[string]*metal.Nic{ - "11:11:11:11:11:11": &metal.Nic{Name: "swp2", MacAddress: "11:11:11:11:11:11"}, + "11:11:11:11:11:11": {Name: "swp2", MacAddress: "11:11:11:11:11:11"}, }, currentConnections: metal.ConnectionMap{ "machine-uuid-1": metal.Connections{metal.Connection{MachineID: "machine-uuid-1", Nic: metal.Nic{Name: "swp1", MacAddress: "11:11:11:11:11:11"}}}, From 049abbf4dfa581d59dcc610d15895d1d67ca23f7 Mon Sep 17 00:00:00 2001 From: Gerrit Date: Tue, 7 Mar 2023 10:55:43 +0100 Subject: [PATCH 38/45] Do not use pool scaler in early machine lifecycle. (#430) --- Makefile | 2 +- cmd/metal-api/internal/datastore/event.go | 34 +++++++++++-------- .../internal/scaler/pool_scaler_mock_test.go | 17 +++++++--- 3 files changed, 33 insertions(+), 20 deletions(-) diff --git a/Makefile b/Makefile index 3476e13f7..ed6db068b 100644 --- a/Makefile +++ b/Makefile @@ -46,4 +46,4 @@ visualize-fsm: .PHONY: mocks mocks: - docker run --user $$(id -u):$$(id -g) --rm -w /work -v ${PWD}:/work vektra/mockery:v2.14.0 --name MachineManager --dir /work/cmd/metal-api/internal/scaler --output /work/cmd/metal-api/internal/scaler --filename pool_scaler_mock_test.go --testonly --inpackage \ No newline at end of file + docker run --user $$(id -u):$$(id -g) --rm -w /work -v ${PWD}:/work vektra/mockery:v2.21.1 --name MachineManager --dir /work/cmd/metal-api/internal/scaler --output /work/cmd/metal-api/internal/scaler --filename pool_scaler_mock_test.go --testonly --inpackage diff --git a/cmd/metal-api/internal/datastore/event.go b/cmd/metal-api/internal/datastore/event.go index 80ba52f30..3f0bcecc5 100644 --- a/cmd/metal-api/internal/datastore/event.go +++ b/cmd/metal-api/internal/datastore/event.go @@ -55,24 +55,28 @@ func (rs *RethinkStore) ProvisioningEventForMachine(log *zap.SugaredLogger, publ } } - p, err := rs.FindPartition(machine.PartitionID) - if err != nil { - return nil, err - } + var scaler *s.PoolScaler + if machine.PartitionID != "" && machine.SizeID != "" { + // in the early lifecycle, when the pxe booting event is submitted + // a machine does not have a partition or size , so the pool scaler + // can not work at this stage - manager := &manager{ - rs: rs, - publisher: publisher, - partitionid: p.ID, - sizeid: machine.SizeID, - } + p, err := rs.FindPartition(machine.PartitionID) + if err != nil { + return nil, err + } - c := &s.PoolScalerConfig{ - Log: log.Named("pool-scaler"), - Manager: manager, - Partition: *p, + scaler = s.NewPoolScaler(&s.PoolScalerConfig{ + Log: log.Named("pool-scaler"), + Manager: &manager{ + rs: rs, + publisher: publisher, + partitionid: p.ID, + sizeid: machine.SizeID, + }, + Partition: *p, + }) } - scaler := s.NewPoolScaler(c) config := states.StateConfig{ Log: log.Named("fsm"), diff --git a/cmd/metal-api/internal/scaler/pool_scaler_mock_test.go b/cmd/metal-api/internal/scaler/pool_scaler_mock_test.go index 9e11a7c0a..b95d6e899 100644 --- a/cmd/metal-api/internal/scaler/pool_scaler_mock_test.go +++ b/cmd/metal-api/internal/scaler/pool_scaler_mock_test.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.14.0. DO NOT EDIT. +// Code generated by mockery v2.21.1. DO NOT EDIT. package scaler @@ -17,6 +17,10 @@ func (_m *MockMachineManager) AllMachines() (metal.Machines, error) { ret := _m.Called() var r0 metal.Machines + var r1 error + if rf, ok := ret.Get(0).(func() (metal.Machines, error)); ok { + return rf() + } if rf, ok := ret.Get(0).(func() metal.Machines); ok { r0 = rf() } else { @@ -25,7 +29,6 @@ func (_m *MockMachineManager) AllMachines() (metal.Machines, error) { } } - var r1 error if rf, ok := ret.Get(1).(func() error); ok { r1 = rf() } else { @@ -68,6 +71,10 @@ func (_m *MockMachineManager) ShutdownMachines() (metal.Machines, error) { ret := _m.Called() var r0 metal.Machines + var r1 error + if rf, ok := ret.Get(0).(func() (metal.Machines, error)); ok { + return rf() + } if rf, ok := ret.Get(0).(func() metal.Machines); ok { r0 = rf() } else { @@ -76,7 +83,6 @@ func (_m *MockMachineManager) ShutdownMachines() (metal.Machines, error) { } } - var r1 error if rf, ok := ret.Get(1).(func() error); ok { r1 = rf() } else { @@ -91,6 +97,10 @@ func (_m *MockMachineManager) WaitingMachines() (metal.Machines, error) { ret := _m.Called() var r0 metal.Machines + var r1 error + if rf, ok := ret.Get(0).(func() (metal.Machines, error)); ok { + return rf() + } if rf, ok := ret.Get(0).(func() metal.Machines); ok { r0 = rf() } else { @@ -99,7 +109,6 @@ func (_m *MockMachineManager) WaitingMachines() (metal.Machines, error) { } } - var r1 error if rf, ok := ret.Get(1).(func() error); ok { r1 = rf() } else { From 0c633ef758783d8223583e92e6fa3e8133515ca5 Mon Sep 17 00:00:00 2001 From: iljarotar Date: Tue, 7 Mar 2023 11:08:06 +0100 Subject: [PATCH 39/45] liveliness typo --- cmd/metal-api/internal/metal/machine.go | 12 ++++++------ cmd/metal-api/internal/service/machine-service.go | 6 +++--- .../service/machine-service_integration_test.go | 2 +- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/cmd/metal-api/internal/metal/machine.go b/cmd/metal-api/internal/metal/machine.go index 249611544..bc733ff5e 100644 --- a/cmd/metal-api/internal/metal/machine.go +++ b/cmd/metal-api/internal/metal/machine.go @@ -302,12 +302,12 @@ type MachineLiveliness string // The enums for the machine liveliness states. const ( - MachineLivelinessAlive MachineLiveliness = "Alive" - MachineLivelinessDead MachineLiveliness = "Dead" - MachineLivelinessUnknown MachineLiveliness = "Unknown" - MachineLivelinesHibernated MachineLiveliness = "Hibernated" - MachineDeadAfter time.Duration = 5 * time.Minute - MachineResurrectAfter time.Duration = time.Hour + MachineLivelinessAlive MachineLiveliness = "Alive" + MachineLivelinessDead MachineLiveliness = "Dead" + MachineLivelinessUnknown MachineLiveliness = "Unknown" + MachineLivelinessHibernated MachineLiveliness = "Hibernated" + MachineDeadAfter time.Duration = 5 * time.Minute + MachineResurrectAfter time.Duration = time.Hour ) // Is return true if given liveliness is equal to specific Liveliness diff --git a/cmd/metal-api/internal/service/machine-service.go b/cmd/metal-api/internal/service/machine-service.go index c876b6ad6..c1b555f1a 100644 --- a/cmd/metal-api/internal/service/machine-service.go +++ b/cmd/metal-api/internal/service/machine-service.go @@ -1776,7 +1776,7 @@ func MachineLiveliness(ds *datastore.RethinkStore, logger *zap.SugaredLogger) er alive++ case metal.MachineLivelinessDead: dead++ - case metal.MachineLivelinesHibernated: + case metal.MachineLivelinessHibernated: hibernated++ case metal.MachineLivelinessUnknown: unknown++ @@ -1803,7 +1803,7 @@ func evaluateMachineLiveliness(ds *datastore.RethinkStore, m metal.Machine) (met } if m.State.Hibernation.Enabled { - return updateLiveliness(metal.MachineLivelinesHibernated) + return updateLiveliness(metal.MachineLivelinessHibernated) } if time.Since(pointer.SafeDeref(ec.LastEventTime)) < metal.MachineDeadAfter { @@ -1849,7 +1849,7 @@ func ResurrectMachines(ds *datastore.RethinkStore, publisher bus.Publisher, ep * continue } - if provisioningEvents.Liveliness.Is(string(metal.MachineLivelinesHibernated)) { + if provisioningEvents.Liveliness.Is(string(metal.MachineLivelinessHibernated)) { continue } diff --git a/cmd/metal-api/internal/service/machine-service_integration_test.go b/cmd/metal-api/internal/service/machine-service_integration_test.go index b8f8f014a..3d05b6784 100644 --- a/cmd/metal-api/internal/service/machine-service_integration_test.go +++ b/cmd/metal-api/internal/service/machine-service_integration_test.go @@ -366,7 +366,7 @@ func TestMachineLivelinessEvaluation(t *testing.T) { Base: metal.Base{ID: "4"}, LastEventTime: pointer.Pointer(now.Add(-1*time.Minute - metal.MachineDeadAfter)), }, - want: metal.MachineLivelinesHibernated, + want: metal.MachineLivelinessHibernated, }, { m: metal.Machine{ From 1d729771a8cab3b30ff973bb0434e821f8e9b3fa Mon Sep 17 00:00:00 2001 From: iljarotar Date: Thu, 9 Mar 2023 15:34:24 +0100 Subject: [PATCH 40/45] add waiting and preallocated fields to waiting machine query --- cmd/metal-api/internal/datastore/machine.go | 14 ++++++++ .../datastore/machine_integration_test.go | 34 +++++++++++++++++++ cmd/metal-api/internal/datastore/poolsize.go | 2 ++ spec/metal-api.json | 18 ++++++++++ 4 files changed, 68 insertions(+) diff --git a/cmd/metal-api/internal/datastore/machine.go b/cmd/metal-api/internal/datastore/machine.go index 711001df9..27de1ca16 100644 --- a/cmd/metal-api/internal/datastore/machine.go +++ b/cmd/metal-api/internal/datastore/machine.go @@ -56,6 +56,8 @@ type MachineSearchQuery struct { // state HibernationEnabled *bool `json:"hibernation_enabled" optional:"true"` StateValue *string `json:"state_value" optional:"true" enum:"|RESERVED|LOCKED"` + PreAllocated *bool `json:"preallocated" optional:"true"` + Waiting *bool `json:"waiting" optional:"true"` // ipmi IpmiAddress *string `json:"ipmi_address" optional:"true"` @@ -317,6 +319,18 @@ func (p *MachineSearchQuery) generateTerm(rs *RethinkStore) *r.Term { }) } + if p.PreAllocated != nil { + q = q.Filter(func(row r.Term) r.Term { + return row.Field("preallocated").Eq(*p.PreAllocated) + }) + } + + if p.Waiting != nil { + q = q.Filter(func(row r.Term) r.Term { + return row.Field("waiting").Eq(*p.Waiting) + }) + } + if p.IpmiAddress != nil { q = q.Filter(func(row r.Term) r.Term { return row.Field("ipmi").Field("address").Eq(*p.IpmiAddress) diff --git a/cmd/metal-api/internal/datastore/machine_integration_test.go b/cmd/metal-api/internal/datastore/machine_integration_test.go index a1a897322..a8fbb7371 100644 --- a/cmd/metal-api/internal/datastore/machine_integration_test.go +++ b/cmd/metal-api/internal/datastore/machine_integration_test.go @@ -683,6 +683,40 @@ func TestRethinkStore_SearchMachines(t *testing.T) { }, wantErr: nil, }, + { + name: "search by waiting true", + q: &MachineSearchQuery{ + Waiting: pointer.Pointer(true), + }, + mock: []*metal.Machine{ + {Base: metal.Base{ID: "1"}, Waiting: true}, + {Base: metal.Base{ID: "2"}, Waiting: true}, + {Base: metal.Base{ID: "3"}, Waiting: false}, + {Base: metal.Base{ID: "4"}, Waiting: false}, + }, + want: []*metal.Machine{ + tt.defaultBody(&metal.Machine{Base: metal.Base{ID: "1"}, Waiting: true}), + tt.defaultBody(&metal.Machine{Base: metal.Base{ID: "2"}, Waiting: true}), + }, + wantErr: nil, + }, + { + name: "search by preallocated true", + q: &MachineSearchQuery{ + PreAllocated: pointer.Pointer(true), + }, + mock: []*metal.Machine{ + {Base: metal.Base{ID: "1"}, PreAllocated: true}, + {Base: metal.Base{ID: "2"}, PreAllocated: true}, + {Base: metal.Base{ID: "3"}, PreAllocated: false}, + {Base: metal.Base{ID: "4"}, PreAllocated: false}, + }, + want: []*metal.Machine{ + tt.defaultBody(&metal.Machine{Base: metal.Base{ID: "1"}, PreAllocated: true}), + tt.defaultBody(&metal.Machine{Base: metal.Base{ID: "2"}, PreAllocated: true}), + }, + wantErr: nil, + }, { name: "search by hibernation_enabled true", q: &MachineSearchQuery{ diff --git a/cmd/metal-api/internal/datastore/poolsize.go b/cmd/metal-api/internal/datastore/poolsize.go index 113059a3a..8c7b0600a 100644 --- a/cmd/metal-api/internal/datastore/poolsize.go +++ b/cmd/metal-api/internal/datastore/poolsize.go @@ -38,6 +38,8 @@ func (m *manager) WaitingMachines() (metal.Machines, error) { SizeID: &m.sizeid, StateValue: pointer.Pointer(string(metal.AvailableState)), NotAllocated: pointer.Pointer(true), + Waiting: pointer.Pointer(true), + PreAllocated: pointer.Pointer(false), } waitingMachines := metal.Machines{} diff --git a/spec/metal-api.json b/spec/metal-api.json index 2cd2fad12..503f5fd40 100644 --- a/spec/metal-api.json +++ b/spec/metal-api.json @@ -241,6 +241,9 @@ "partition_id": { "type": "string" }, + "preallocated": { + "type": "boolean" + }, "rackid": { "type": "string" }, @@ -260,6 +263,9 @@ "type": "string" }, "type": "array" + }, + "waiting": { + "type": "boolean" } } }, @@ -1162,6 +1168,9 @@ "partition_id": { "type": "string" }, + "preallocated": { + "type": "boolean" + }, "rackid": { "type": "string" }, @@ -1181,6 +1190,9 @@ "type": "string" }, "type": "array" + }, + "waiting": { + "type": "boolean" } } }, @@ -2316,6 +2328,9 @@ "partition_id": { "type": "string" }, + "preallocated": { + "type": "boolean" + }, "rackid": { "type": "string" }, @@ -2335,6 +2350,9 @@ "type": "string" }, "type": "array" + }, + "waiting": { + "type": "boolean" } } }, From cfd3e94fc05259102b19562b82677700f23c3f7d Mon Sep 17 00:00:00 2001 From: iljarotar Date: Thu, 9 Mar 2023 15:50:07 +0100 Subject: [PATCH 41/45] do not update machine liveliness while machine is shutting down --- cmd/metal-api/internal/fsm/states/alive.go | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/cmd/metal-api/internal/fsm/states/alive.go b/cmd/metal-api/internal/fsm/states/alive.go index 77cba097f..c12852541 100644 --- a/cmd/metal-api/internal/fsm/states/alive.go +++ b/cmd/metal-api/internal/fsm/states/alive.go @@ -11,6 +11,7 @@ type AliveState struct { log *zap.SugaredLogger container *metal.ProvisioningEventContainer event *metal.ProvisioningEvent + machine *metal.Machine } func newAlive(c *StateConfig) *AliveState { @@ -18,10 +19,17 @@ func newAlive(c *StateConfig) *AliveState { log: c.Log, container: c.Container, event: c.Event, + machine: c.Machine, } } func (p *AliveState) OnEnter(e *fsm.Event) { - updateTimeAndLiveliness(p.event, p.container) p.log.Debugw("received provisioning alive event", "id", p.container.ID) + + if p.machine.State.Hibernation.Enabled { + p.container.LastEventTime = &p.event.Time // machine is about to shutdown and is still sending alive events + return + } + + updateTimeAndLiveliness(p.event, p.container) } From f8e4515409cf73f022f620d79d0e0bcb04b994f5 Mon Sep 17 00:00:00 2001 From: iljarotar Date: Thu, 9 Mar 2023 16:24:54 +0100 Subject: [PATCH 42/45] fix nil pointer --- cmd/metal-api/internal/fsm/states/alive.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/metal-api/internal/fsm/states/alive.go b/cmd/metal-api/internal/fsm/states/alive.go index c12852541..78220a6ab 100644 --- a/cmd/metal-api/internal/fsm/states/alive.go +++ b/cmd/metal-api/internal/fsm/states/alive.go @@ -26,7 +26,7 @@ func newAlive(c *StateConfig) *AliveState { func (p *AliveState) OnEnter(e *fsm.Event) { p.log.Debugw("received provisioning alive event", "id", p.container.ID) - if p.machine.State.Hibernation.Enabled { + if p.machine != nil && p.machine.State.Hibernation.Enabled { p.container.LastEventTime = &p.event.Time // machine is about to shutdown and is still sending alive events return } From 9e42317e999c655c4644c013046514f31bf226f8 Mon Sep 17 00:00:00 2001 From: Gerrit Date: Thu, 9 Mar 2023 18:06:18 +0100 Subject: [PATCH 43/45] Add Dockerfile.dev to fasten up dev cycles. (#431) --- Dockerfile.dev | 3 +++ Makefile | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) create mode 100644 Dockerfile.dev diff --git a/Dockerfile.dev b/Dockerfile.dev new file mode 100644 index 000000000..b62e69d8a --- /dev/null +++ b/Dockerfile.dev @@ -0,0 +1,3 @@ +FROM alpine:3.17 +COPY bin/metal-api /metal-api +CMD ["/metal-api"] diff --git a/Makefile b/Makefile index ed6db068b..336bbed49 100644 --- a/Makefile +++ b/Makefile @@ -32,8 +32,8 @@ protoc-docker: docker run --rm --user $$(id -u):$$(id -g) -v $(PWD):/work --tmpfs /.cache -w /work/proto bufbuild/buf:1.14.0 generate -v .PHONY: mini-lab-push -mini-lab-push: - docker build -t metalstack/metal-api:latest . +mini-lab-push: all + docker build -f Dockerfile.dev -t metalstack/metal-api:latest . kind --name metal-control-plane load docker-image metalstack/metal-api:latest kubectl --kubeconfig=$(MINI_LAB_KUBECONFIG) patch deployments.apps -n metal-control-plane metal-api --patch='{"spec":{"template":{"spec":{"containers":[{"name": "metal-api","imagePullPolicy":"IfNotPresent","image":"metalstack/metal-api:latest"}]}}}}' kubectl --kubeconfig=$(MINI_LAB_KUBECONFIG) delete pod -n metal-control-plane -l app=metal-api From 4809b4b8927785413895100556c00fee17021632 Mon Sep 17 00:00:00 2001 From: Gerrit Date: Tue, 1 Aug 2023 10:03:38 +0200 Subject: [PATCH 44/45] Merge master. --- cmd/metal-api/internal/grpc/event-service.go | 1 - cmd/metal-api/internal/service/image-service_test.go | 2 +- cmd/metal-api/internal/service/machine-service.go | 2 +- 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/cmd/metal-api/internal/grpc/event-service.go b/cmd/metal-api/internal/grpc/event-service.go index bba4e206f..bf10b0642 100644 --- a/cmd/metal-api/internal/grpc/event-service.go +++ b/cmd/metal-api/internal/grpc/event-service.go @@ -10,7 +10,6 @@ import ( "github.com/metal-stack/metal-api/cmd/metal-api/internal/metal" v1 "github.com/metal-stack/metal-api/pkg/api/v1" "github.com/metal-stack/metal-lib/bus" - "go.uber.org/multierr" "go.uber.org/zap" ) diff --git a/cmd/metal-api/internal/service/image-service_test.go b/cmd/metal-api/internal/service/image-service_test.go index 478fc9a13..48093927a 100644 --- a/cmd/metal-api/internal/service/image-service_test.go +++ b/cmd/metal-api/internal/service/image-service_test.go @@ -183,7 +183,7 @@ func TestCreateImageWithBrokenURL(t *testing.T) { var result httperrors.HTTPErrorResponse err = json.NewDecoder(resp.Body).Decode(&result) require.NoError(t, err) - require.True(t, strings.Contains(result.Message, "no such host")) + require.True(t, strings.Contains(result.Message, "is not accessible under")) createRequest.URL = "http://images.metal-stack.io/this-file-does-not-exist" diff --git a/cmd/metal-api/internal/service/machine-service.go b/cmd/metal-api/internal/service/machine-service.go index 70f967165..9143c5f36 100644 --- a/cmd/metal-api/internal/service/machine-service.go +++ b/cmd/metal-api/internal/service/machine-service.go @@ -2071,7 +2071,7 @@ func (r *machineResource) machineCmd(cmd metal.MachineCommand, request *restful. newMachine.IPMI.Password = r.ipmiSuperUser.Password() } - err = publishMachineCmd(logger, newMachine, r.Publisher, cmd) + err = e.PublishMachineCmd(logger, newMachine, r.Publisher, cmd) if err != nil { r.sendError(request, response, defaultError(err)) return From fe959405ff5c9164ece6c7011f54f5702744b051 Mon Sep 17 00:00:00 2001 From: iljarotar Date: Thu, 25 Jan 2024 16:25:42 +0100 Subject: [PATCH 45/45] remove Is functions --- cmd/metal-api/internal/service/machine-service.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cmd/metal-api/internal/service/machine-service.go b/cmd/metal-api/internal/service/machine-service.go index 4e78a3d10..4d069102c 100644 --- a/cmd/metal-api/internal/service/machine-service.go +++ b/cmd/metal-api/internal/service/machine-service.go @@ -2008,7 +2008,7 @@ func ResurrectMachines(ctx context.Context, ds *datastore.RethinkStore, publishe continue } - if provisioningEvents.Liveliness.Is(string(metal.MachineLivelinessHibernated)) { + if provisioningEvents.Liveliness == metal.MachineLivelinessHibernated { continue } @@ -2238,16 +2238,16 @@ func machineHasIssues(m *v1.MachineResponse) bool { if m.Partition == nil { return true } - if !metal.MachineLivelinessAlive.Is(m.Liveliness) { + if metal.MachineLivelinessAlive == metal.MachineLiveliness(m.Liveliness) { return true } - if m.Allocation == nil && len(m.RecentProvisioningEvents.Events) > 0 && metal.ProvisioningEventPhonedHome.Is(m.RecentProvisioningEvents.Events[0].Event) { + if m.Allocation == nil && len(m.RecentProvisioningEvents.Events) > 0 && metal.ProvisioningEventPhonedHome == metal.ProvisioningEventType(m.RecentProvisioningEvents.Events[0].Event) { // not allocated, but phones home return true } if m.RecentProvisioningEvents.CrashLoop || m.RecentProvisioningEvents.FailedMachineReclaim { // Machines in crash loop but in "Waiting" state are considered available - if len(m.RecentProvisioningEvents.Events) > 0 && !metal.ProvisioningEventWaiting.Is(m.RecentProvisioningEvents.Events[0].Event) { + if len(m.RecentProvisioningEvents.Events) > 0 && metal.ProvisioningEventWaiting != metal.ProvisioningEventType(m.RecentProvisioningEvents.Events[0].Event) { return true } }