Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ensure ServerBootConfiguration removal on Server available state #29

Merged
merged 1 commit into from
Apr 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cmd/manager/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ func main() {

flag.StringVar(&registryURL, "registry-url", "", "The URL of the registry.")
flag.StringVar(&registryProtocol, "registry-protocol", "http", "The protocol to use for the registry.")
flag.IntVar(&registryPort, "registry-port", 8000, "The port to use for the registry.")
flag.IntVar(&registryPort, "registry-port", 10000, "The port to use for the registry.")
flag.StringVar(&probeImage, "probe-image", "", "Image for the first boot probing of a Server.")
flag.StringVar(&probeOSImage, "probe-os-image", "", "OS image for the first boot probing of a Server.")
flag.StringVar(&managerNamespace, "manager-namespace", "default", "Namespace the manager is running in.")
Expand Down
5 changes: 5 additions & 0 deletions internal/controller/bmc_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ import (
"context"
"fmt"

"k8s.io/apimachinery/pkg/api/errors"

metalv1alpha1 "github.com/afritzler/metal-operator/api/v1alpha1"
"github.com/go-logr/logr"
"github.com/ironcore-dev/controller-utils/clientutils"
Expand Down Expand Up @@ -99,6 +101,9 @@ func (r *BMCReconciler) reconcile(ctx context.Context, log logr.Logger, bmcObj *
func (r *BMCReconciler) updateBMCStatusDetails(ctx context.Context, log logr.Logger, bmcObj *metalv1alpha1.BMC) error {
endpoint := &metalv1alpha1.Endpoint{}
if err := r.Get(ctx, client.ObjectKey{Name: bmcObj.Spec.EndpointRef.Name}, endpoint); err != nil {
if errors.IsNotFound(err) {
return nil
}
return fmt.Errorf("failed to get Endpoints for BMC: %w", err)
}
log.V(1).Info("Got Endpoints for BMC", "Endpoints", endpoint.Name)
Expand Down
87 changes: 56 additions & 31 deletions internal/controller/server_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"encoding/json"
"fmt"
"net/http"
"time"

apierrors "k8s.io/apimachinery/pkg/api/errors"

Expand Down Expand Up @@ -146,7 +147,11 @@ func (r *ServerReconciler) reconcile(ctx context.Context, log logr.Logger, serve
}
}

if err := r.ensureServerStateTransition(ctx, log, server); err != nil {
requeue, err := r.ensureServerStateTransition(ctx, log, server)
if requeue && err == nil {
return ctrl.Result{Requeue: requeue, RequeueAfter: 10 * time.Second}, nil
}
if err != nil {
return ctrl.Result{}, fmt.Errorf("failed to ensure server state transition: %w", err)
}

Expand Down Expand Up @@ -178,77 +183,89 @@ func (r *ServerReconciler) reconcile(ctx context.Context, log logr.Logger, serve
//
// Maintenance:
// A Maintenance state represents a special case where certain operations like BIOS updates should be performed.
func (r *ServerReconciler) ensureServerStateTransition(ctx context.Context, log logr.Logger, server *metalv1alpha1.Server) error {
func (r *ServerReconciler) ensureServerStateTransition(ctx context.Context, log logr.Logger, server *metalv1alpha1.Server) (bool, error) {
switch server.Status.State {
case metalv1alpha1.ServerStateInitial:
if err := r.updateServerStatus(ctx, log, server); err != nil {
return err
return false, err
}
log.V(1).Info("Updated Server status")

// apply boot configuration
if err := r.applyBootConfigurationAndIgnitionForDiscovery(ctx, server); err != nil {
return fmt.Errorf("failed to apply server boot configuration: %w", err)
config, err := r.applyBootConfigurationAndIgnitionForDiscovery(ctx, server)
if err != nil {
return false, fmt.Errorf("failed to apply server boot configuration: %w", err)
}
log.V(1).Info("Applied Server boot configuration")

if ready, err := r.serverBootConfigurationIsReady(ctx, server); err != nil || !ready {
log.V(1).Info("Server boot configuration is not ready")
return err
log.V(1).Info("Server boot configuration is not ready. Retrying ...")
return false, err
}
log.V(1).Info("Server boot configuration is ready")

if err := r.pxeBootServer(ctx, log, server); err != nil {
return fmt.Errorf("failed to boot server: %w", err)
return false, fmt.Errorf("failed to boot server: %w", err)
}
log.V(1).Info("Booted Server in PXE")

if err := r.extractServerDetailsFromRegistry(ctx, server); err != nil {
ready, err := r.extractServerDetailsFromRegistry(ctx, log, server)
if !ready && err == nil {
log.V(1).Info("Server agent did not post info to registry")
return true, nil
}
if err != nil {
log.V(1).Info("Could not get server details from registry.")
// TODO: instead of requeue subscribe to registry events and requeue Server objects in SetupWithManager
return err
return false, err
}
log.V(1).Info("Extracted Server details")

if err := r.ensureInitialBootConfigurationIsDeleted(ctx, config); err != nil {
return false, fmt.Errorf("failed to ensure server initial boot configuration is deleted: %w", err)
}
log.V(1).Info("Ensured initial boot configuration is deleted")

// TODO: fix that by providing the power state to the ensure method
server.Spec.Power = metalv1alpha1.PowerOff
if err := r.ensureServerPowerState(ctx, log, server); err != nil {
return fmt.Errorf("failed to shutdown server: %w", err)
return false, fmt.Errorf("failed to shutdown server: %w", err)
}
log.V(1).Info("Server state set to power off")

log.V(1).Info("Setting Server state set to available")
if modified, err := r.patchServerState(ctx, server, metalv1alpha1.ServerStateAvailable); err != nil || modified {
return err
return false, err
}
case metalv1alpha1.ServerStateAvailable:
if err := r.updateServerStatus(ctx, log, server); err != nil {
return err
return false, err
}
log.V(1).Info("Updated Server status")

if err := r.ensureServerPowerState(ctx, log, server); err != nil {
return fmt.Errorf("failed to ensure server power state: %w", err)
return false, fmt.Errorf("failed to ensure server power state: %w", err)
}
if err := r.ensureIndicatorLED(ctx, log, server); err != nil {
return fmt.Errorf("failed to ensure server indicator led: %w", err)
return false, fmt.Errorf("failed to ensure server indicator led: %w", err)
}
log.V(1).Info("Reconciled available state")
case metalv1alpha1.ServerStateReserved:

if err := r.updateServerStatus(ctx, log, server); err != nil {
return err
return false, err
}
log.V(1).Info("Updated Server status")

if err := r.ensureServerPowerState(ctx, log, server); err != nil {
return fmt.Errorf("failed to ensure server power state: %w", err)
return false, fmt.Errorf("failed to ensure server power state: %w", err)
}
if err := r.ensureIndicatorLED(ctx, log, server); err != nil {
return fmt.Errorf("failed to ensure server indicator led: %w", err)
return false, fmt.Errorf("failed to ensure server indicator led: %w", err)
}
log.V(1).Info("Reconciled reserved state")
}
return nil
return false, nil
}

func (r *ServerReconciler) updateServerStatus(ctx context.Context, log logr.Logger, server *metalv1alpha1.Server) error {
Expand Down Expand Up @@ -281,7 +298,7 @@ func (r *ServerReconciler) updateServerStatus(ctx context.Context, log logr.Logg
return nil
}

func (r *ServerReconciler) applyBootConfigurationAndIgnitionForDiscovery(ctx context.Context, server *metalv1alpha1.Server) error {
func (r *ServerReconciler) applyBootConfigurationAndIgnitionForDiscovery(ctx context.Context, server *metalv1alpha1.Server) (*metalv1alpha1.ServerBootConfiguration, error) {
// apply server boot configuration
bootConfig := &metalv1alpha1.ServerBootConfiguration{
TypeMeta: metav1.TypeMeta{
Expand All @@ -304,14 +321,14 @@ func (r *ServerReconciler) applyBootConfigurationAndIgnitionForDiscovery(ctx con
}

if err := r.Patch(ctx, bootConfig, client.Apply, fieldOwner, client.ForceOwnership); err != nil {
return fmt.Errorf("failed to apply server boot configuration: %w", err)
return nil, fmt.Errorf("failed to apply server boot configuration: %w", err)
}

if err := r.applyDefaultIgnitionForServer(ctx, server, bootConfig, r.RegistryURL); err != nil {
return fmt.Errorf("failed to apply default server ignitionSecret: %w", err)
return nil, fmt.Errorf("failed to apply default server ignitionSecret: %w", err)
}

return nil
return bootConfig, nil
}

func (r *ServerReconciler) applyDefaultIgnitionForServer(
Expand Down Expand Up @@ -398,19 +415,20 @@ func (r *ServerReconciler) pxeBootServer(ctx context.Context, log logr.Logger, s
return nil
}

func (r *ServerReconciler) extractServerDetailsFromRegistry(ctx context.Context, server *metalv1alpha1.Server) error {
func (r *ServerReconciler) extractServerDetailsFromRegistry(ctx context.Context, log logr.Logger, server *metalv1alpha1.Server) (bool, error) {
resp, err := http.Get(fmt.Sprintf("%s/systems/%s", r.RegistryURL, server.Spec.UUID))
if err != nil {
return fmt.Errorf("failed to fetch server details: %w", err)
if resp != nil && resp.StatusCode == http.StatusNotFound {
log.V(1).Info("Did not find server information in registry")
return false, nil
}

if resp != nil && resp.StatusCode == http.StatusNotFound {
return fmt.Errorf("could not find server details: %s", resp.Status)
if err != nil {
return false, fmt.Errorf("failed to fetch server details: %w", err)
}

serverDetails := &registry.Server{}
if err := json.NewDecoder(resp.Body).Decode(serverDetails); err != nil {
return fmt.Errorf("failed to decode server details: %w", err)
return false, fmt.Errorf("failed to decode server details: %w", err)
}

serverBase := server.DeepCopy()
Expand All @@ -426,10 +444,10 @@ func (r *ServerReconciler) extractServerDetailsFromRegistry(ctx context.Context,
server.Status.NetworkInterfaces = nics

if err := r.Status().Patch(ctx, server, client.MergeFrom(serverBase)); err != nil {
return fmt.Errorf("failed to patch server status: %w", err)
return false, fmt.Errorf("failed to patch server status: %w", err)
}

return nil
return true, nil
}

func (r *ServerReconciler) patchServerState(ctx context.Context, server *metalv1alpha1.Server, state metalv1alpha1.ServerState) (bool, error) {
Expand Down Expand Up @@ -494,6 +512,13 @@ func (r *ServerReconciler) ensureIndicatorLED(ctx context.Context, log logr.Logg
return nil
}

func (r *ServerReconciler) ensureInitialBootConfigurationIsDeleted(ctx context.Context, config *metalv1alpha1.ServerBootConfiguration) error {
if err := r.Delete(ctx, config); !apierrors.IsNotFound(err) {
return err
}
return nil
}

// SetupWithManager sets up the controller with the Manager.
func (r *ServerReconciler) SetupWithManager(mgr ctrl.Manager) error {
return ctrl.NewControllerManagedBy(mgr).
Expand Down
12 changes: 12 additions & 0 deletions internal/controller/server_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ package controller
import (
"fmt"

apierrors "k8s.io/apimachinery/pkg/api/errors"

metalv1alpha1 "github.com/afritzler/metal-operator/api/v1alpha1"
"github.com/afritzler/metal-operator/internal/controller/testdata"
"github.com/afritzler/metal-operator/internal/probe"
Expand Down Expand Up @@ -141,10 +143,20 @@ var _ = Describe("Server Controller", func() {

By("Ensuring that the server is set to available and powered off")
Eventually(Object(server)).Should(SatisfyAll(
HaveField("Spec.BootConfigurationRef", BeNil()),
HaveField("Status.State", metalv1alpha1.ServerStateAvailable),
HaveField("Status.PowerState", metalv1alpha1.ServerOffPowerState),
HaveField("Status.NetworkInterfaces", Not(BeEmpty())),
))

By("Ensuring that the boot configuration has been removed")
config := &metalv1alpha1.ServerBootConfiguration{
ObjectMeta: metav1.ObjectMeta{
Namespace: ns.Name,
Name: server.Name,
},
}
Eventually(Get(config)).Should(Satisfy(apierrors.IsNotFound))
})

// TODO: test server with manual BMC registration
Expand Down
2 changes: 1 addition & 1 deletion internal/controller/serverbootconfiguration_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ func (r *ServerBootConfigurationReconciler) removeServerBootConfigRef(ctx contex

serverBase := server.DeepCopy()
server.Spec.BootConfigurationRef = nil
if err := r.Patch(ctx, server, client.MergeFrom(serverBase)); err != nil {
if err := r.Patch(ctx, server, client.MergeFrom(serverBase)); !apierrors.IsNotFound(err) {
return err
}

Expand Down
Loading