From eeea94a6109939451f539f2d877cd3396f157274 Mon Sep 17 00:00:00 2001 From: Ivan Mikheykin Date: Tue, 20 Feb 2024 13:32:29 +0300 Subject: [PATCH] feat(vm): always replace Pod on VM restart (#3) ## Description Make VM's Pod a one-shot Pod. Use runPolicy Manual and restart Pod via KVVMI deletion. ## Why do we need it, and what problem does it solve? Executing `sudo reboot` inside the VM should be equal to executing virtctl restart/deleting KVVMI in controller. ## What is the expected result? VM after `sudo restart` should be created with new settings. 1. Use disruption mode Manual 2. patch VM 3. enter VM console, run reboot 4. check that rebootId is absent, and the recreated VM has new settings. ## Checklist - [ ] The code is covered by unit tests. - [ ] e2e tests passed. - [x] Documentation updated according to the changes. - [x] Changes were tested in the Kubernetes cluster manually. Signed-off-by: Ivan Mikheykin --- docs/internal/vm_power_state.md | 43 ++++++ .../virt-launcher/scripts/domain-monitor.sh | 22 +++ .../scripts/virt-launcher-monitor-wrapper.sh | 16 +++ images/virt-launcher/werf.inc.yaml | 14 ++ .../pkg/common/kvvm/util.go | 86 ------------ .../pkg/controller/powerstate/kvvm_request.go | 72 ++++++++++ .../pkg/controller/powerstate/operations.go | 86 ++++++++++++ .../controller/powerstate/shutdown_reason.go | 66 +++++++++ .../pkg/controller/vm_reconciler.go | 126 ++++++++++++++++-- .../pkg/controller/vmop/vmop_reconciler.go | 8 +- werf.yaml | 1 + 11 files changed, 437 insertions(+), 103 deletions(-) create mode 100644 docs/internal/vm_power_state.md create mode 100755 images/virt-launcher/scripts/domain-monitor.sh create mode 100755 images/virt-launcher/scripts/virt-launcher-monitor-wrapper.sh create mode 100644 images/virtualization-controller/pkg/controller/powerstate/kvvm_request.go create mode 100644 images/virtualization-controller/pkg/controller/powerstate/operations.go create mode 100644 images/virtualization-controller/pkg/controller/powerstate/shutdown_reason.go diff --git a/docs/internal/vm_power_state.md b/docs/internal/vm_power_state.md new file mode 100644 index 000000000..ab0cd1e85 --- /dev/null +++ b/docs/internal/vm_power_state.md @@ -0,0 +1,43 @@ +# VM power state + +## Reboot differences with kubevirt + +Kubevirt has 2 types of reboot: +1. In-Pod reboot: restart VM without exiting from Qemu process. +2. External reboot: delete Kubevirt VirtualMachineInstance and create a new one. + +Deckhouse Virtualization promote the idea that reboot issued from inside the VM +is equal to reboot issued externally, e.g. with VirtualMachineOperation. + +The only possible restart in Deckhouse Virtualization is to delete VirtualMachineInstance +and create a new one with all possible changes made to VirtualMachine spec. + +In-Pod reboot is disabled with some additions to virt-launcher image: +1. Qemu event handler on_restart is set to shutdown to exit from qemu process when reboot is issued. +2. Monitor qemu SHUTDOWN events and write them to /dev/termination-log to catch them later and + distinguish between guest-rest and guest-shutdown. +These changes are made in images/virt-launcher/scripts/domain-monitor.sh. + +## A relationship between runPolicy and runStrategy + +Deckhouse Virtualization has 4 run policies: + +- AlwaysOff - The system is asked to ensure that no VM is running. This is achieved by stopping + any VirtualMachineInstance that is associated ith the VM. If a guest is already running, + it will be stopped. +- AlwaysOn - VM will start immediately after the stop. A stopped VM is scheduled to start when runPolicy changed to AlwaysOn. +- Manual - The system will not automatically turn the VM on or off, instead the user manually controls the VM status by creating VirtualMachineOperation or by issuing reboot or poweroff commands inside the VM. +- AlwaysOnUntilStoppedManually - Similar to Always, except that the VM is only restarted if it terminated + in an uncontrolled way (e.g. crash) and due to an infrastructure reason (i.e. the node crashed, + the KVM related process OOMed). This allows a user to determine when the VM should be shut down by + initiating the shut down inside the guest or creating a VirtualMachineOperation. + Note: Guest sided crashes (i.e. BSOD) are not covered by this. In such cases liveness checks or the use of a watchdog can help. + +AlwaysOff policy is implemented with kubevirt's `runStrategy: Halted`. + +AlwaysOn policy is implemented with kubevirt's `runStrategy: Always` + +Manual policy is implemented with kubevirt's `runStrategy: Manual` with addition of VM start on guest-reset event. + +AlwaysOnUntilStoppedManually policy is implemented with kubevirt's `runStrategy: Manual` with addition of VM start on guest-reset event and stoping VM on failures. + diff --git a/images/virt-launcher/scripts/domain-monitor.sh b/images/virt-launcher/scripts/domain-monitor.sh new file mode 100755 index 000000000..d908913dc --- /dev/null +++ b/images/virt-launcher/scripts/domain-monitor.sh @@ -0,0 +1,22 @@ +#!/bin/bash + +set -eo pipefail + +# Wait for qemu-kvm process +vmName= +while true ; do + vmName=$(virsh list --name || true) + if [[ -n $vmName ]]; then + break + fi + sleep 1 +done + +# Set action as libvirt will do for destroy. +echo "Set reboot action to shutdown for domain $vmName" +virsh qemu-monitor-command $vmName '{"execute": "set-action", "arguments":{"reboot":"shutdown"}}' + + +# Redirect events to termination logs +echo "Monitor domain $vmName events" +virsh qemu-monitor-event --domain $vmName --loop --event SHUTDOWN > /dev/termination-log diff --git a/images/virt-launcher/scripts/virt-launcher-monitor-wrapper.sh b/images/virt-launcher/scripts/virt-launcher-monitor-wrapper.sh new file mode 100755 index 000000000..e29f135b2 --- /dev/null +++ b/images/virt-launcher/scripts/virt-launcher-monitor-wrapper.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +# virt-launcher-monitor execution interceptor: +# - Run qemu customizer as a child process. +# - Exec virt-launcher-monitor in-place to start usual virt-launcher. + +echo '{"msg":"Start domain monitor daemon", "level":"info","component":"virt-launcher-monitor-wrapper"}' +nohup bash /scripts/domain-monitor.sh & 2>&1 > /var/log/domain-monitor-daemon.log + +# Pass all arguments to the original virt-launcher-monitor. +if [[ ! -f /usr/bin/virt-launcher-monitor-orig ]]; then + echo '{"msg":"Target /usr/bin/virt-launcher-monitor-orig is absent", "level":"info","component":"virt-launcher-monitor-wrapper"}' + exit 1 +fi +echo '{"msg":"Exec original virt-launcher-monitor", "level":"info","component":"virt-launcher-monitor-wrapper"}' +exec /usr/bin/virt-launcher-monitor-orig "$@" diff --git a/images/virt-launcher/werf.inc.yaml b/images/virt-launcher/werf.inc.yaml index 1a5c553aa..c809a3778 100644 --- a/images/virt-launcher/werf.inc.yaml +++ b/images/virt-launcher/werf.inc.yaml @@ -8,6 +8,20 @@ import: - 'sys' to: / before: setup +git: + - add: /images/{{ $.ImageName }} + to: / + stageDependencies: + setup: + - '**/*' + includePaths: + - scripts +shell: + setup: + # Replace virt-launcher-monitor with script. + - mv /usr/bin/virt-launcher-monitor /usr/bin/virt-launcher-monitor-orig + - cp /scripts/virt-launcher-monitor-wrapper.sh /usr/bin/virt-launcher-monitor + - chmod +x /usr/bin/virt-launcher-monitor # Source https://github.com/kubevirt/containerized-data-importer/blob/main/cmd/cdi-apiserver/BUILD.bazel docker: ENTRYPOINT: ["/usr/bin/virt-launcher"] diff --git a/images/virtualization-controller/pkg/common/kvvm/util.go b/images/virtualization-controller/pkg/common/kvvm/util.go index 7b083639e..1796bbf21 100644 --- a/images/virtualization-controller/pkg/common/kvvm/util.go +++ b/images/virtualization-controller/pkg/common/kvvm/util.go @@ -5,15 +5,12 @@ import ( "fmt" corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/equality" "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/types" virtv1 "kubevirt.io/api/core/v1" "sigs.k8s.io/controller-runtime/pkg/client" - "github.com/deckhouse/virtualization-controller/pkg/common/patch" "github.com/deckhouse/virtualization-controller/pkg/sdk/framework/helper" - "github.com/deckhouse/virtualization-controller/pkg/util" ) // PatchRunStrategy returns JSON merge patch to set 'runStrategy' field to the desired value @@ -77,86 +74,3 @@ func DeletePodByKVVMI(ctx context.Context, cli client.Client, kvvmi *virtv1.Virt } return helper.DeleteObject(ctx, cli, pod, opts) } - -// GetChangeRequest returns the stop/start patch. -func GetChangeRequest(vm *virtv1.VirtualMachine, changes ...virtv1.VirtualMachineStateChangeRequest) ([]byte, error) { - jp := patch.NewJsonPatch() - verb := patch.PatchAddOp - // Special case: if there's no status field at all, add one. - newStatus := virtv1.VirtualMachineStatus{} - if equality.Semantic.DeepEqual(vm.Status, newStatus) { - newStatus.StateChangeRequests = changes - jp.Append(patch.NewJsonPatchOperation(verb, "/status", newStatus)) - } else { - failOnConflict := true - if len(changes) == 1 && changes[0].Action == virtv1.StopRequest { - // If this is a stopRequest, replace all existing StateChangeRequests. - failOnConflict = false - } - if len(vm.Status.StateChangeRequests) != 0 { - if failOnConflict { - return nil, fmt.Errorf("unable to complete request: stop/start already underway") - } else { - verb = patch.PatchReplaceOp - } - } - jp.Append(patch.NewJsonPatchOperation(verb, "/status/stateChangeRequests", changes)) - } - if vm.Status.StartFailure != nil { - jp.Append(patch.NewJsonPatchOperation(patch.PatchRemoveOp, "/status/startFailure", nil)) - } - return jp.Bytes() -} - -// StartKVVM starts kvvm. -func StartKVVM(ctx context.Context, cli client.Client, kvvm *virtv1.VirtualMachine) error { - if kvvm == nil { - return fmt.Errorf("kvvm must not be empty") - } - jp, err := GetChangeRequest(kvvm, - virtv1.VirtualMachineStateChangeRequest{Action: virtv1.StartRequest}) - if err != nil { - return err - } - return cli.Status().Patch(ctx, kvvm, client.RawPatch(types.JSONPatchType, jp), &client.SubResourcePatchOptions{}) -} - -// StopKVVM stops kvvm. -func StopKVVM(ctx context.Context, cli client.Client, kvvmi *virtv1.VirtualMachineInstance, force bool) error { - if kvvmi == nil { - return fmt.Errorf("kvvmi must not be empty") - } - if err := cli.Delete(ctx, kvvmi, &client.DeleteOptions{}); err != nil { - return err - } - if force { - return DeletePodByKVVMI(ctx, cli, kvvmi, &client.DeleteOptions{GracePeriodSeconds: util.GetPointer(int64(0))}) - } - return nil -} - -// RestartKVVM restarts kvvm. -func RestartKVVM(ctx context.Context, cli client.Client, kvvm *virtv1.VirtualMachine, kvvmi *virtv1.VirtualMachineInstance, force bool) error { - if kvvm == nil { - return fmt.Errorf("kvvm must not be empty") - } - if kvvmi == nil { - return fmt.Errorf("kvvmi must not be empty") - } - - jp, err := GetChangeRequest(kvvm, - virtv1.VirtualMachineStateChangeRequest{Action: virtv1.StopRequest, UID: &kvvmi.UID}, - virtv1.VirtualMachineStateChangeRequest{Action: virtv1.StartRequest}) - if err != nil { - return err - } - - err = cli.Status().Patch(ctx, kvvm, client.RawPatch(types.JSONPatchType, jp), &client.SubResourcePatchOptions{}) - if err != nil { - return err - } - if force { - return DeletePodByKVVMI(ctx, cli, kvvmi, &client.DeleteOptions{GracePeriodSeconds: util.GetPointer(int64(0))}) - } - return nil -} diff --git a/images/virtualization-controller/pkg/controller/powerstate/kvvm_request.go b/images/virtualization-controller/pkg/controller/powerstate/kvvm_request.go new file mode 100644 index 000000000..5fbf617d5 --- /dev/null +++ b/images/virtualization-controller/pkg/controller/powerstate/kvvm_request.go @@ -0,0 +1,72 @@ +package powerstate + +import ( + "fmt" + + "github.com/deckhouse/virtualization-controller/pkg/common/patch" + "k8s.io/apimachinery/pkg/api/equality" + kvv1 "kubevirt.io/api/core/v1" +) + +// BuildPatch creates a patch to request VM state changing via updating KVVM status. +// +// Some combinations lead to an error to not interfere with kvvm controller: +// +// current / desired stop start restart(stop+start) +// stop replace error error +// start replace error error +// restart(stop+start) replace error error +// empty add add add +func BuildPatch(vm *kvv1.VirtualMachine, changes ...kvv1.VirtualMachineStateChangeRequest) ([]byte, error) { + jp := patch.NewJsonPatch() + // Special case: if there's no status field at all, add one. + newStatus := kvv1.VirtualMachineStatus{} + if equality.Semantic.DeepEqual(vm.Status, newStatus) { + newStatus.StateChangeRequests = changes + jp.Append(patch.NewJsonPatchOperation(patch.PatchAddOp, "/status", newStatus)) + } else { + verb := patch.PatchAddOp + failOnConflict := true + if len(changes) == 1 && changes[0].Action == kvv1.StopRequest { + // If this is a stopRequest, replace all existing StateChangeRequests. + failOnConflict = false + } + if len(vm.Status.StateChangeRequests) != 0 { + if failOnConflict { + return nil, fmt.Errorf("unable to complete request: stop/start already underway") + } else { + verb = patch.PatchReplaceOp + } + } + jp.Append(patch.NewJsonPatchOperation(verb, "/status/stateChangeRequests", changes)) + } + if vm.Status.StartFailure != nil { + jp.Append(patch.NewJsonPatchOperation(patch.PatchRemoveOp, "/status/startFailure", nil)) + } + return jp.Bytes() +} + +// BuildPatchSafeRestart creates a patch to restart a VM in case no other operations are present. +// This method respects other operations that was issued during VM reboot. +func BuildPatchSafeRestart(kvvm *kvv1.VirtualMachine, kvvmi *kvv1.VirtualMachineInstance) ([]byte, error) { + // Restart only if current request is empty. + if len(kvvm.Status.StateChangeRequests) > 0 { + return nil, nil + } + restartRequest := []kvv1.VirtualMachineStateChangeRequest{ + {Action: kvv1.StopRequest, UID: &kvvmi.UID}, + {Action: kvv1.StartRequest}, + } + jp := patch.NewJsonPatch() + + newStatus := kvv1.VirtualMachineStatus{} + if equality.Semantic.DeepEqual(kvvm.Status, newStatus) { + // Add /status if it's not exists. + newStatus.StateChangeRequests = restartRequest + jp.Append(patch.NewJsonPatchOperation(patch.PatchAddOp, "/status", newStatus)) + } else { + // Set stateChangeRequests. + jp.Append(patch.NewJsonPatchOperation(patch.PatchAddOp, "/status/stateChangeRequests", restartRequest)) + } + return jp.Bytes() +} diff --git a/images/virtualization-controller/pkg/controller/powerstate/operations.go b/images/virtualization-controller/pkg/controller/powerstate/operations.go new file mode 100644 index 000000000..051a6af49 --- /dev/null +++ b/images/virtualization-controller/pkg/controller/powerstate/operations.go @@ -0,0 +1,86 @@ +package powerstate + +import ( + "context" + "fmt" + + kvvmutil "github.com/deckhouse/virtualization-controller/pkg/common/kvvm" + "github.com/deckhouse/virtualization-controller/pkg/util" + "k8s.io/apimachinery/pkg/types" + kvv1 "kubevirt.io/api/core/v1" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +// StartVM starts VM via adding change request to the KVVM status. +func StartVM(ctx context.Context, cl client.Client, kvvm *kvv1.VirtualMachine) error { + if kvvm == nil { + return fmt.Errorf("kvvm must not be empty") + } + jp, err := BuildPatch(kvvm, + kvv1.VirtualMachineStateChangeRequest{Action: kvv1.StartRequest}) + if err != nil { + return err + } + return cl.Status().Patch(ctx, kvvm, client.RawPatch(types.JSONPatchType, jp), &client.SubResourcePatchOptions{}) +} + +// StopVM stops VM via deleting kvvmi. +// It implements force stop by immediately deleting VM's Pod. +func StopVM(ctx context.Context, cl client.Client, kvvmi *kvv1.VirtualMachineInstance, force bool) error { + if kvvmi == nil { + return fmt.Errorf("kvvmi must not be empty") + } + if err := cl.Delete(ctx, kvvmi, &client.DeleteOptions{}); err != nil { + return err + } + if force { + return kvvmutil.DeletePodByKVVMI(ctx, cl, kvvmi, &client.DeleteOptions{GracePeriodSeconds: util.GetPointer(int64(0))}) + } + return nil +} + +// RestartVM restarts VM via adding stop and start change requests to the KVVM status. +// It implements force stop by immediately deleting VM's Pod. +func RestartVM(ctx context.Context, cl client.Client, kvvm *kvv1.VirtualMachine, kvvmi *kvv1.VirtualMachineInstance, force bool) error { + if kvvm == nil { + return fmt.Errorf("kvvm must not be empty") + } + if kvvmi == nil { + return fmt.Errorf("kvvmi must not be empty") + } + + jp, err := BuildPatch(kvvm, + kvv1.VirtualMachineStateChangeRequest{Action: kvv1.StopRequest, UID: &kvvmi.UID}, + kvv1.VirtualMachineStateChangeRequest{Action: kvv1.StartRequest}) + if err != nil { + return err + } + + err = cl.Status().Patch(ctx, kvvm, client.RawPatch(types.JSONPatchType, jp), &client.SubResourcePatchOptions{}) + if err != nil { + return err + } + if force { + return kvvmutil.DeletePodByKVVMI(ctx, cl, kvvmi, &client.DeleteOptions{GracePeriodSeconds: util.GetPointer(int64(0))}) + } + return nil +} + +// SafeRestartVM restarts VM via adding stop and start change requests to the KVVM status if no other requests are in progress. +func SafeRestartVM(ctx context.Context, cl client.Client, kvvm *kvv1.VirtualMachine, kvvmi *kvv1.VirtualMachineInstance) error { + if kvvm == nil { + return fmt.Errorf("kvvm must not be empty") + } + if kvvmi == nil { + return fmt.Errorf("kvvmi must not be empty") + } + + jp, err := BuildPatchSafeRestart(kvvm, kvvmi) + if err != nil { + return err + } + if jp == nil { + return nil + } + return cl.Status().Patch(ctx, kvvm, client.RawPatch(types.JSONPatchType, jp), &client.SubResourcePatchOptions{}) +} diff --git a/images/virtualization-controller/pkg/controller/powerstate/shutdown_reason.go b/images/virtualization-controller/pkg/controller/powerstate/shutdown_reason.go new file mode 100644 index 000000000..fdbd8646f --- /dev/null +++ b/images/virtualization-controller/pkg/controller/powerstate/shutdown_reason.go @@ -0,0 +1,66 @@ +package powerstate + +import ( + "sort" + "strings" + + corev1 "k8s.io/api/core/v1" + kvv1 "kubevirt.io/api/core/v1" +) + +const ( + // DefaultVMContainerName - a container name with virt-launcher, libvirt and qemu processes. + DefaultVMContainerName = "compute" + + // GuestResetReason - a reboot command was issued from inside the VM. + GuestResetReason = "guest-reset" + + // GuestShutdownReason - a poweroff command was issued from inside the VM. + GuestShutdownReason = "guest-shutdown" +) + +// ShutdownReason returns a shutdown reason from the Completed Pod with VM: +// - guest-reset — reboot was issued inside the VM +// - guest-shutdown — poweroff was issued inside the VM +// - empty string means VM is still Running or was exited without event. +func ShutdownReason(kvvmi *kvv1.VirtualMachineInstance, kvPods *corev1.PodList) (bool, string) { + if kvvmi == nil || kvvmi.Status.Phase != kvv1.Succeeded { + return false, "" + } + if kvPods == nil || len(kvPods.Items) == 0 { + return false, "" + } + + // Sort Pods in descending order to operate on the most recent Pod. + sort.SliceStable(kvPods.Items, func(i, j int) bool { + return kvPods.Items[i].CreationTimestamp.Compare(kvPods.Items[j].CreationTimestamp.Time) > 0 + }) + recentPod := kvPods.Items[0] + // Power events are not available in Running state, only Completed Pod has termination message. + if recentPod.Status.Phase != corev1.PodSucceeded { + return false, "" + } + + // Extract termination mesage from the "compute" container. + for _, contStatus := range recentPod.Status.ContainerStatuses { + // "compute" is a default container name for VM Pod. + if contStatus.Name != DefaultVMContainerName { + continue + } + msg := "" + if contStatus.LastTerminationState.Terminated != nil { + msg = contStatus.LastTerminationState.Terminated.Message + } + if contStatus.State.Terminated != nil { + msg = contStatus.State.Terminated.Message + } + if strings.Contains(msg, GuestResetReason) { + return true, GuestResetReason + } + if strings.Contains(msg, GuestShutdownReason) { + return true, GuestShutdownReason + } + } + + return true, "" +} diff --git a/images/virtualization-controller/pkg/controller/vm_reconciler.go b/images/virtualization-controller/pkg/controller/vm_reconciler.go index 7f0c287dc..859754ef8 100644 --- a/images/virtualization-controller/pkg/controller/vm_reconciler.go +++ b/images/virtualization-controller/pkg/controller/vm_reconciler.go @@ -9,6 +9,8 @@ import ( "time" corev1 "k8s.io/api/core/v1" + k8serrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/types" virtv1 "kubevirt.io/api/core/v1" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller" @@ -21,9 +23,9 @@ import ( "sigs.k8s.io/controller-runtime/pkg/source" virtv2 "github.com/deckhouse/virtualization-controller/api/v1alpha2" - kvvmutil "github.com/deckhouse/virtualization-controller/pkg/common/kvvm" vmutil "github.com/deckhouse/virtualization-controller/pkg/common/vm" "github.com/deckhouse/virtualization-controller/pkg/controller/kvbuilder" + "github.com/deckhouse/virtualization-controller/pkg/controller/powerstate" "github.com/deckhouse/virtualization-controller/pkg/controller/vmchange" "github.com/deckhouse/virtualization-controller/pkg/dvcr" "github.com/deckhouse/virtualization-controller/pkg/sdk/framework/helper" @@ -61,8 +63,50 @@ func (r *VMReconciler) SetupController(_ context.Context, mgr manager.Manager, c &virtv2.VirtualMachine{}, handler.OnlyControllerOwner(), ), + predicate.Funcs{ + CreateFunc: func(e event.CreateEvent) bool { return true }, + DeleteFunc: func(e event.DeleteEvent) bool { return true }, + UpdateFunc: func(e event.UpdateEvent) bool { + oldVM := e.ObjectOld.(*virtv1.VirtualMachine) + newVM := e.ObjectNew.(*virtv1.VirtualMachine) + return oldVM.Status.PrintableStatus != newVM.Status.PrintableStatus || + oldVM.Status.Ready != newVM.Status.Ready + }, + }, + ); err != nil { + return fmt.Errorf("error setting watch on VirtualMachine: %w", err) + } + + // Watch for Pods created on behalf of VMs. Handle only changes in status.phase. + // Pod tracking is required to detect when Pod becomes Completed after guest initiated reset or shutdown. + if err := ctr.Watch( + source.Kind(mgr.GetCache(), &corev1.Pod{}), + handler.EnqueueRequestsFromMapFunc(func(ctx context.Context, pod client.Object) []reconcile.Request { + vmName, hasLabel := pod.GetLabels()["vm.kubevirt.io/name"] + if !hasLabel { + return nil + } + + return []reconcile.Request{ + { + NamespacedName: types.NamespacedName{ + Name: vmName, + Namespace: pod.GetNamespace(), + }, + }, + } + }), + predicate.Funcs{ + CreateFunc: func(e event.CreateEvent) bool { return true }, + DeleteFunc: func(e event.DeleteEvent) bool { return true }, + UpdateFunc: func(e event.UpdateEvent) bool { + oldPod := e.ObjectOld.(*corev1.Pod) + newPod := e.ObjectNew.(*corev1.Pod) + return oldPod.Status.Phase != newPod.Status.Phase + }, + }, ); err != nil { - return fmt.Errorf("error setting watch on VirtualMachineInstance: %w", err) + return fmt.Errorf("error setting watch on Pod: %w", err) } return nil @@ -174,8 +218,8 @@ func (r *VMReconciler) syncKVVM(ctx context.Context, state *VMReconcilerState, o lastAppliedSpec = &state.VM.Current().Spec } - // Ensure power state according to the runPolicy if KVVM was changed. - powerErr := r.syncPowerState(ctx, state, lastAppliedSpec) + // Ensure power state according to the runPolicy. + powerErr := r.syncPowerState(ctx, state, opts, lastAppliedSpec) if powerErr != nil { opts.Log.Error(powerErr, "sync power state") } @@ -191,7 +235,6 @@ func (r *VMReconciler) syncKVVM(ctx context.Context, state *VMReconcilerState, o } func (r *VMReconciler) UpdateStatus(_ context.Context, _ reconcile.Request, state *VMReconcilerState, opts two_phase_reconciler.ReconcilerOptions) error { - opts.Log.V(2).Info("VMReconciler.UpdateStatus") if state.isDeletion() { state.VM.Changed().Status.Phase = virtv2.MachineTerminating return nil @@ -636,28 +679,85 @@ func (r *VMReconciler) restartKVVM(ctx context.Context, state *VMReconcilerState return nil } -// syncPowerState enforces runPolicy on underlying KVVM. -// Method ensures desired runStrategy and sets a 'running' field to null. -func (r *VMReconciler) syncPowerState(ctx context.Context, state *VMReconcilerState, effectiveSpec *virtv2.VirtualMachineSpec) error { +// syncPowerState enforces runPolicy on the underlying KVVM. +func (r *VMReconciler) syncPowerState(ctx context.Context, state *VMReconcilerState, opts two_phase_reconciler.ReconcilerOptions, effectiveSpec *virtv2.VirtualMachineSpec) error { if state.KVVM == nil { return nil } vmRunPolicy := effectiveSpec.RunPolicy + isPodCompleted, vmShutdownReason := powerstate.ShutdownReason(state.KVVMI, state.KVPods) + var err error switch vmRunPolicy { - case virtv2.ManualPolicy: - err = state.EnsureRunStrategy(ctx, virtv1.RunStrategyManual) case virtv2.AlwaysOffPolicy: + if state.KVVMI != nil { + // Ensure KVVMI is absent. + err = opts.Client.Delete(ctx, state.KVVMI) + if err != nil && !k8serrors.IsNotFound(err) { + return fmt.Errorf("force AlwaysOff: delete KVVMI: %w", err) + } + } err = state.EnsureRunStrategy(ctx, virtv1.RunStrategyHalted) case virtv2.AlwaysOnPolicy: + // Power state change reason is not significant for AlwaysOn: + // kubevirt restarts VM via re-creation of KVVMI. err = state.EnsureRunStrategy(ctx, virtv1.RunStrategyAlways) case virtv2.AlwaysOnUnlessStoppedManualy: - if kvvmutil.GetRunStrategy(state.KVVM) == virtv1.RunStrategyHalted { - return nil + if state.KVVMI != nil && state.KVVMI.DeletionTimestamp == nil { + if state.KVVMI.Status.Phase == virtv1.Succeeded { + if isPodCompleted { + // Request to start new KVVMI if guest was restarted. + // Cleanup KVVMI is enough if VM was stopped from inside. + if vmShutdownReason == "guest-reset" { + opts.Log.Info("Restart for guest initiated reset") + err = powerstate.SafeRestartVM(ctx, opts.Client, state.KVVM, state.KVVMI) + if err != nil { + return fmt.Errorf("restart VM on guest-reset: %w", err) + } + } else { + opts.Log.Info("Cleanup Succeeded KVVMI") + err = opts.Client.Delete(ctx, state.KVVMI) + if err != nil && !k8serrors.IsNotFound(err) { + return fmt.Errorf("delete Succeeded KVVMI: %w", err) + } + } + } + } + if state.KVVMI.Status.Phase == virtv1.Failed { + opts.Log.Info("Restart on Failed KVVMI", "obj", state.KVVMI.GetName()) + err = powerstate.SafeRestartVM(ctx, opts.Client, state.KVVM, state.KVVMI) + if err != nil { + return fmt.Errorf("restart VM on failed: %w", err) + } + } } - err = state.EnsureRunStrategy(ctx, virtv1.RunStrategyAlways) + + err = state.EnsureRunStrategy(ctx, virtv1.RunStrategyManual) + case virtv2.ManualPolicy: + // Manual policy requires to handle only guest-reset event. + // All types of shutdown are a final state. + if state.KVVMI != nil && state.KVVMI.DeletionTimestamp == nil { + if state.KVVMI.Status.Phase == virtv1.Succeeded && isPodCompleted { + // Request to start new KVVMI (with updated settings). + if vmShutdownReason == "guest-reset" { + err = powerstate.SafeRestartVM(ctx, opts.Client, state.KVVM, state.KVVMI) + if err != nil { + return fmt.Errorf("restart VM on guest-reset: %w", err) + } + } else { + // Cleanup old version of KVVMI. + opts.Log.Info("Cleanup Succeeded KVVMI") + err = opts.Client.Delete(ctx, state.KVVMI) + if err != nil && !k8serrors.IsNotFound(err) { + return fmt.Errorf("delete Succeeded KVVMI: %w", err) + } + } + } + } + + err = state.EnsureRunStrategy(ctx, virtv1.RunStrategyManual) } if err != nil { diff --git a/images/virtualization-controller/pkg/controller/vmop/vmop_reconciler.go b/images/virtualization-controller/pkg/controller/vmop/vmop_reconciler.go index 11f0fee07..ef8bb3f9f 100644 --- a/images/virtualization-controller/pkg/controller/vmop/vmop_reconciler.go +++ b/images/virtualization-controller/pkg/controller/vmop/vmop_reconciler.go @@ -15,8 +15,8 @@ import ( "sigs.k8s.io/controller-runtime/pkg/source" virtv2 "github.com/deckhouse/virtualization-controller/api/v1alpha2" - kvvmutil "github.com/deckhouse/virtualization-controller/pkg/common/kvvm" "github.com/deckhouse/virtualization-controller/pkg/controller/common" + "github.com/deckhouse/virtualization-controller/pkg/controller/powerstate" "github.com/deckhouse/virtualization-controller/pkg/sdk/framework/two_phase_reconciler" ) @@ -178,7 +178,7 @@ func (r *Reconciler) doOperationStart(ctx context.Context, state *ReconcilerStat if err != nil { return fmt.Errorf("cannot get kvvm %q. %w", state.VM.Name, err) } - return kvvmutil.StartKVVM(ctx, state.Client, kvvm) + return powerstate.StartVM(ctx, state.Client, kvvm) } func (r *Reconciler) doOperationStop(ctx context.Context, force bool, state *ReconcilerState) error { @@ -186,7 +186,7 @@ func (r *Reconciler) doOperationStop(ctx context.Context, force bool, state *Rec if err != nil { return fmt.Errorf("cannot get kvvmi %q. %w", state.VM.Name, err) } - return kvvmutil.StopKVVM(ctx, state.Client, kvvmi, force) + return powerstate.StopVM(ctx, state.Client, kvvmi, force) } func (r *Reconciler) doOperationRestart(ctx context.Context, force bool, state *ReconcilerState) error { @@ -198,7 +198,7 @@ func (r *Reconciler) doOperationRestart(ctx context.Context, force bool, state * if err != nil { return fmt.Errorf("cannot get kvvmi %q. %w", state.VM.Name, err) } - return kvvmutil.RestartKVVM(ctx, state.Client, kvvm, kvvmi, force) + return powerstate.RestartVM(ctx, state.Client, kvvm, kvvmi, force) } func (r *Reconciler) isOperationAllowed(op virtv2.VMOPOperation, state *ReconcilerState) bool { diff --git a/werf.yaml b/werf.yaml index e3c96b3f8..cc3b1b6a6 100644 --- a/werf.yaml +++ b/werf.yaml @@ -60,6 +60,7 @@ shell: ) ) ' > /images_digests.json + cat images_digests.json --- image: python-dependencies from: python:3.9-slim