Skip to content

Commit

Permalink
Ensure that ServerBootConfiguration is removed
Browse files Browse the repository at this point in the history
- Remove `ServerBootConfiguration` when `Server` turns ready
- Switch default registry port to 10000
- Fix not found errors and implement proper re-queueing
  • Loading branch information
afritzler committed Apr 26, 2024
1 parent 6fba843 commit 80abc0d
Show file tree
Hide file tree
Showing 5 changed files with 75 additions and 33 deletions.
2 changes: 1 addition & 1 deletion cmd/manager/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ func main() {

flag.StringVar(&registryURL, "registry-url", "", "The URL of the registry.")
flag.StringVar(&registryProtocol, "registry-protocol", "http", "The protocol to use for the registry.")
flag.IntVar(&registryPort, "registry-port", 8000, "The port to use for the registry.")
flag.IntVar(&registryPort, "registry-port", 10000, "The port to use for the registry.")
flag.StringVar(&probeImage, "probe-image", "", "Image for the first boot probing of a Server.")
flag.StringVar(&probeOSImage, "probe-os-image", "", "OS image for the first boot probing of a Server.")
flag.StringVar(&managerNamespace, "manager-namespace", "default", "Namespace the manager is running in.")
Expand Down
5 changes: 5 additions & 0 deletions internal/controller/bmc_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ import (
"context"
"fmt"

"k8s.io/apimachinery/pkg/api/errors"

metalv1alpha1 "github.com/afritzler/metal-operator/api/v1alpha1"
"github.com/go-logr/logr"
"github.com/ironcore-dev/controller-utils/clientutils"
Expand Down Expand Up @@ -99,6 +101,9 @@ func (r *BMCReconciler) reconcile(ctx context.Context, log logr.Logger, bmcObj *
func (r *BMCReconciler) updateBMCStatusDetails(ctx context.Context, log logr.Logger, bmcObj *metalv1alpha1.BMC) error {
endpoint := &metalv1alpha1.Endpoint{}
if err := r.Get(ctx, client.ObjectKey{Name: bmcObj.Spec.EndpointRef.Name}, endpoint); err != nil {
if errors.IsNotFound(err) {
return nil
}
return fmt.Errorf("failed to get Endpoints for BMC: %w", err)
}
log.V(1).Info("Got Endpoints for BMC", "Endpoints", endpoint.Name)
Expand Down
87 changes: 56 additions & 31 deletions internal/controller/server_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"encoding/json"
"fmt"
"net/http"
"time"

apierrors "k8s.io/apimachinery/pkg/api/errors"

Expand Down Expand Up @@ -146,7 +147,11 @@ func (r *ServerReconciler) reconcile(ctx context.Context, log logr.Logger, serve
}
}

if err := r.ensureServerStateTransition(ctx, log, server); err != nil {
requeue, err := r.ensureServerStateTransition(ctx, log, server)
if requeue && err == nil {
return ctrl.Result{Requeue: requeue, RequeueAfter: 10 * time.Second}, nil
}
if err != nil {
return ctrl.Result{}, fmt.Errorf("failed to ensure server state transition: %w", err)
}

Expand Down Expand Up @@ -178,77 +183,89 @@ func (r *ServerReconciler) reconcile(ctx context.Context, log logr.Logger, serve
//
// Maintenance:
// A Maintenance state represents a special case where certain operations like BIOS updates should be performed.
func (r *ServerReconciler) ensureServerStateTransition(ctx context.Context, log logr.Logger, server *metalv1alpha1.Server) error {
func (r *ServerReconciler) ensureServerStateTransition(ctx context.Context, log logr.Logger, server *metalv1alpha1.Server) (bool, error) {
switch server.Status.State {
case metalv1alpha1.ServerStateInitial:
if err := r.updateServerStatus(ctx, log, server); err != nil {
return err
return false, err
}
log.V(1).Info("Updated Server status")

// apply boot configuration
if err := r.applyBootConfigurationAndIgnitionForDiscovery(ctx, server); err != nil {
return fmt.Errorf("failed to apply server boot configuration: %w", err)
config, err := r.applyBootConfigurationAndIgnitionForDiscovery(ctx, server)
if err != nil {
return false, fmt.Errorf("failed to apply server boot configuration: %w", err)
}
log.V(1).Info("Applied Server boot configuration")

if ready, err := r.serverBootConfigurationIsReady(ctx, server); err != nil || !ready {
log.V(1).Info("Server boot configuration is not ready")
return err
log.V(1).Info("Server boot configuration is not ready. Retrying ...")
return false, err
}
log.V(1).Info("Server boot configuration is ready")

if err := r.pxeBootServer(ctx, log, server); err != nil {
return fmt.Errorf("failed to boot server: %w", err)
return false, fmt.Errorf("failed to boot server: %w", err)
}
log.V(1).Info("Booted Server in PXE")

if err := r.extractServerDetailsFromRegistry(ctx, server); err != nil {
ready, err := r.extractServerDetailsFromRegistry(ctx, log, server)
if !ready && err == nil {
log.V(1).Info("Server agent did not post info to registry")
return true, nil
}
if err != nil {
log.V(1).Info("Could not get server details from registry.")
// TODO: instead of requeue subscribe to registry events and requeue Server objects in SetupWithManager
return err
return false, err
}
log.V(1).Info("Extracted Server details")

if err := r.ensureInitialBootConfigurationIsDeleted(ctx, config); err != nil {
return false, fmt.Errorf("failed to ensure server initial boot configuration is deleted: %w", err)
}
log.V(1).Info("Ensured initial boot configuration is deleted")

// TODO: fix that by providing the power state to the ensure method
server.Spec.Power = metalv1alpha1.PowerOff
if err := r.ensureServerPowerState(ctx, log, server); err != nil {
return fmt.Errorf("failed to shutdown server: %w", err)
return false, fmt.Errorf("failed to shutdown server: %w", err)
}
log.V(1).Info("Server state set to power off")

log.V(1).Info("Setting Server state set to available")
if modified, err := r.patchServerState(ctx, server, metalv1alpha1.ServerStateAvailable); err != nil || modified {
return err
return false, err
}
case metalv1alpha1.ServerStateAvailable:
if err := r.updateServerStatus(ctx, log, server); err != nil {
return err
return false, err
}
log.V(1).Info("Updated Server status")

if err := r.ensureServerPowerState(ctx, log, server); err != nil {
return fmt.Errorf("failed to ensure server power state: %w", err)
return false, fmt.Errorf("failed to ensure server power state: %w", err)
}
if err := r.ensureIndicatorLED(ctx, log, server); err != nil {
return fmt.Errorf("failed to ensure server indicator led: %w", err)
return false, fmt.Errorf("failed to ensure server indicator led: %w", err)
}
log.V(1).Info("Reconciled available state")
case metalv1alpha1.ServerStateReserved:

if err := r.updateServerStatus(ctx, log, server); err != nil {
return err
return false, err
}
log.V(1).Info("Updated Server status")

if err := r.ensureServerPowerState(ctx, log, server); err != nil {
return fmt.Errorf("failed to ensure server power state: %w", err)
return false, fmt.Errorf("failed to ensure server power state: %w", err)
}
if err := r.ensureIndicatorLED(ctx, log, server); err != nil {
return fmt.Errorf("failed to ensure server indicator led: %w", err)
return false, fmt.Errorf("failed to ensure server indicator led: %w", err)
}
log.V(1).Info("Reconciled reserved state")
}
return nil
return false, nil
}

func (r *ServerReconciler) updateServerStatus(ctx context.Context, log logr.Logger, server *metalv1alpha1.Server) error {
Expand Down Expand Up @@ -281,7 +298,7 @@ func (r *ServerReconciler) updateServerStatus(ctx context.Context, log logr.Logg
return nil
}

func (r *ServerReconciler) applyBootConfigurationAndIgnitionForDiscovery(ctx context.Context, server *metalv1alpha1.Server) error {
func (r *ServerReconciler) applyBootConfigurationAndIgnitionForDiscovery(ctx context.Context, server *metalv1alpha1.Server) (*metalv1alpha1.ServerBootConfiguration, error) {
// apply server boot configuration
bootConfig := &metalv1alpha1.ServerBootConfiguration{
TypeMeta: metav1.TypeMeta{
Expand All @@ -304,14 +321,14 @@ func (r *ServerReconciler) applyBootConfigurationAndIgnitionForDiscovery(ctx con
}

if err := r.Patch(ctx, bootConfig, client.Apply, fieldOwner, client.ForceOwnership); err != nil {
return fmt.Errorf("failed to apply server boot configuration: %w", err)
return nil, fmt.Errorf("failed to apply server boot configuration: %w", err)
}

if err := r.applyDefaultIgnitionForServer(ctx, server, bootConfig, r.RegistryURL); err != nil {
return fmt.Errorf("failed to apply default server ignitionSecret: %w", err)
return nil, fmt.Errorf("failed to apply default server ignitionSecret: %w", err)
}

return nil
return bootConfig, nil
}

func (r *ServerReconciler) applyDefaultIgnitionForServer(
Expand Down Expand Up @@ -398,19 +415,20 @@ func (r *ServerReconciler) pxeBootServer(ctx context.Context, log logr.Logger, s
return nil
}

func (r *ServerReconciler) extractServerDetailsFromRegistry(ctx context.Context, server *metalv1alpha1.Server) error {
func (r *ServerReconciler) extractServerDetailsFromRegistry(ctx context.Context, log logr.Logger, server *metalv1alpha1.Server) (bool, error) {
resp, err := http.Get(fmt.Sprintf("%s/systems/%s", r.RegistryURL, server.Spec.UUID))
if err != nil {
return fmt.Errorf("failed to fetch server details: %w", err)
if resp != nil && resp.StatusCode == http.StatusNotFound {
log.V(1).Info("Did not find server information in registry")
return false, nil
}

if resp != nil && resp.StatusCode == http.StatusNotFound {
return fmt.Errorf("could not find server details: %s", resp.Status)
if err != nil {
return false, fmt.Errorf("failed to fetch server details: %w", err)
}

serverDetails := &registry.Server{}
if err := json.NewDecoder(resp.Body).Decode(serverDetails); err != nil {
return fmt.Errorf("failed to decode server details: %w", err)
return false, fmt.Errorf("failed to decode server details: %w", err)
}

serverBase := server.DeepCopy()
Expand All @@ -426,10 +444,10 @@ func (r *ServerReconciler) extractServerDetailsFromRegistry(ctx context.Context,
server.Status.NetworkInterfaces = nics

if err := r.Status().Patch(ctx, server, client.MergeFrom(serverBase)); err != nil {
return fmt.Errorf("failed to patch server status: %w", err)
return false, fmt.Errorf("failed to patch server status: %w", err)
}

return nil
return true, nil
}

func (r *ServerReconciler) patchServerState(ctx context.Context, server *metalv1alpha1.Server, state metalv1alpha1.ServerState) (bool, error) {
Expand Down Expand Up @@ -494,6 +512,13 @@ func (r *ServerReconciler) ensureIndicatorLED(ctx context.Context, log logr.Logg
return nil
}

func (r *ServerReconciler) ensureInitialBootConfigurationIsDeleted(ctx context.Context, config *metalv1alpha1.ServerBootConfiguration) error {
if err := r.Delete(ctx, config); !apierrors.IsNotFound(err) {
return err
}
return nil
}

// SetupWithManager sets up the controller with the Manager.
func (r *ServerReconciler) SetupWithManager(mgr ctrl.Manager) error {
return ctrl.NewControllerManagedBy(mgr).
Expand Down
12 changes: 12 additions & 0 deletions internal/controller/server_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ package controller
import (
"fmt"

apierrors "k8s.io/apimachinery/pkg/api/errors"

metalv1alpha1 "github.com/afritzler/metal-operator/api/v1alpha1"
"github.com/afritzler/metal-operator/internal/controller/testdata"
"github.com/afritzler/metal-operator/internal/probe"
Expand Down Expand Up @@ -141,10 +143,20 @@ var _ = Describe("Server Controller", func() {

By("Ensuring that the server is set to available and powered off")
Eventually(Object(server)).Should(SatisfyAll(
HaveField("Spec.BootConfigurationRef", BeNil()),
HaveField("Status.State", metalv1alpha1.ServerStateAvailable),
HaveField("Status.PowerState", metalv1alpha1.ServerOffPowerState),
HaveField("Status.NetworkInterfaces", Not(BeEmpty())),
))

By("Ensuring that the boot configuration has been removed")
config := &metalv1alpha1.ServerBootConfiguration{
ObjectMeta: metav1.ObjectMeta{
Namespace: ns.Name,
Name: server.Name,
},
}
Eventually(Get(config)).Should(Satisfy(apierrors.IsNotFound))
})

// TODO: test server with manual BMC registration
Expand Down
2 changes: 1 addition & 1 deletion internal/controller/serverbootconfiguration_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ func (r *ServerBootConfigurationReconciler) removeServerBootConfigRef(ctx contex

serverBase := server.DeepCopy()
server.Spec.BootConfigurationRef = nil
if err := r.Patch(ctx, server, client.MergeFrom(serverBase)); err != nil {
if err := r.Patch(ctx, server, client.MergeFrom(serverBase)); !apierrors.IsNotFound(err) {
return err
}

Expand Down

0 comments on commit 80abc0d

Please sign in to comment.