diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index c6e513b..0f7a52e 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -14,6 +14,8 @@ jobs: with: go-version: 1.19.3 + - run: sudo apt-get update && sudo apt-get install libaio-dev libsanlock-dev + - run: make test - uses: codecov/codecov-action@v2 diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 1f7c166..e3cc83c 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -23,6 +23,30 @@ jobs: username: smartxrocks password: ${{ secrets.DOCKERHUB_PUSH_TOKEN }} + - id: build_lockspace_initializer + uses: docker/build-push-action@v2 + with: + file: build/lockspace-initializer/Dockerfile + tags: smartxworks/lockspace-initializer:${{ steps.get_version.outputs.version }} + platforms: linux/amd64,linux/arm64 + push: true + + - id: build_lockspace_attacher + uses: docker/build-push-action@v2 + with: + file: build/lockspace-attacher/Dockerfile + tags: smartxworks/lockspace-attacher:${{ steps.get_version.outputs.version }} + platforms: linux/amd64,linux/arm64 + push: true + + - id: build_lockspace_detector + uses: docker/build-push-action@v2 + with: + file: build/lockspace-detector/Dockerfile + tags: smartxworks/lockspace-detector:${{ steps.get_version.outputs.version }} + platforms: linux/amd64,linux/arm64 + push: true + - id: build_virt_prerunner uses: docker/build-push-action@v2 with: @@ -34,7 +58,11 @@ jobs: - uses: docker/build-push-action@v2 with: file: build/virt-controller/Dockerfile - build-args: PRERUNNER_IMAGE=smartxworks/virt-prerunner:${{ steps.get_version.outputs.version }}@${{ steps.build_virt_prerunner.outputs.digest }} + build-args: | + PRERUNNER_IMAGE=smartxworks/virt-prerunner:${{ steps.get_version.outputs.version }}@${{ steps.build_virt_prerunner.outputs.digest }} + INITIALIZER_IMAGE=smartxworks/lockspace-initializer:${{ steps.get_version.outputs.version }}@${{ steps.build_lockspace_initializer.outputs.digest }} + ATTACHER_IMAGE=smartxworks/lockspace-attacher:${{ steps.get_version.outputs.version }}@${{ steps.build_lockspace_attacher.outputs.digest }} + DETECTOR_IMAGE=smartxworks/lockspace-detector:${{ steps.get_version.outputs.version }}@${{ steps.build_lockspace_detector.outputs.digest }} tags: smartxworks/virt-controller:${{ steps.get_version.outputs.version }} platforms: linux/amd64,linux/arm64 push: true diff --git a/build/lockspace-attacher/Dockerfile b/build/lockspace-attacher/Dockerfile new file mode 100644 index 0000000..7d2d11f --- /dev/null +++ b/build/lockspace-attacher/Dockerfile @@ -0,0 +1,21 @@ +FROM golang:1.19-alpine AS builder + +RUN apk add --no-cache gcc musl-dev libaio-dev +RUN apk add --no-cache sanlock-dev --repository=http://dl-cdn.alpinelinux.org/alpine/edge/testing/ + +WORKDIR /workspace + +COPY go.mod go.mod +COPY go.sum go.sum +RUN go mod download + +COPY cmd/ cmd/ +COPY pkg/ pkg/ +RUN --mount=type=cache,target=/root/.cache/go-build go build -a cmd/lockspace-attacher/main.go + +FROM alpine + +RUN apk add --no-cache sanlock --repository=http://dl-cdn.alpinelinux.org/alpine/edge/testing/ + +COPY --from=builder /workspace/main /usr/bin/lockspace-attacher +ENTRYPOINT ["lockspace-attacher"] diff --git a/build/lockspace-detector/Dockerfile b/build/lockspace-detector/Dockerfile new file mode 100644 index 0000000..0600ab8 --- /dev/null +++ b/build/lockspace-detector/Dockerfile @@ -0,0 +1,21 @@ +FROM golang:1.19-alpine AS builder + +RUN apk add --no-cache gcc musl-dev libaio-dev +RUN apk add --no-cache sanlock-dev --repository=http://dl-cdn.alpinelinux.org/alpine/edge/testing/ + +WORKDIR /workspace + +COPY go.mod go.mod +COPY go.sum go.sum +RUN go mod download + +COPY cmd/ cmd/ +COPY pkg/ pkg/ +RUN --mount=type=cache,target=/root/.cache/go-build go build -a cmd/lockspace-detector/main.go + +FROM alpine + +RUN apk add --no-cache sanlock --repository=http://dl-cdn.alpinelinux.org/alpine/edge/testing/ + +COPY --from=builder /workspace/main /usr/bin/lockspace-detector +ENTRYPOINT ["lockspace-detector"] diff --git a/build/lockspace-initializer/Dockerfile b/build/lockspace-initializer/Dockerfile new file mode 100644 index 0000000..9160593 --- /dev/null +++ b/build/lockspace-initializer/Dockerfile @@ -0,0 +1,21 @@ +FROM golang:1.19-alpine AS builder + +RUN apk add --no-cache gcc musl-dev libaio-dev +RUN apk add --no-cache sanlock-dev --repository=http://dl-cdn.alpinelinux.org/alpine/edge/testing/ + +WORKDIR /workspace + +COPY go.mod go.mod +COPY go.sum go.sum +RUN go mod download + +COPY cmd/ cmd/ +COPY pkg/ pkg/ +RUN --mount=type=cache,target=/root/.cache/go-build go build -a cmd/lockspace-initializer/main.go + +FROM alpine + +RUN apk add --no-cache sanlock --repository=http://dl-cdn.alpinelinux.org/alpine/edge/testing/ + +COPY --from=builder /workspace/main /usr/bin/lockspace-initializer +ENTRYPOINT ["lockspace-initializer"] diff --git a/build/virt-controller/Dockerfile b/build/virt-controller/Dockerfile index e781cf6..f183624 100644 --- a/build/virt-controller/Dockerfile +++ b/build/virt-controller/Dockerfile @@ -14,6 +14,12 @@ FROM alpine ARG PRERUNNER_IMAGE ENV PRERUNNER_IMAGE=$PRERUNNER_IMAGE +ARG DETECTOR_IMAGE +ENV DETECTOR_IMAGE=$DETECTOR_IMAGE +ARG ATTACHER_IMAGE +ENV ATTACHER_IMAGE=$ATTACHER_IMAGE +ARG INITIALIZER_IMAGE +ENV INITIALIZER_IMAGE=$INITIALIZER_IMAGE COPY --from=builder /workspace/main /usr/bin/virt-controller ENTRYPOINT ["virt-controller"] diff --git a/build/virt-prerunner/Dockerfile b/build/virt-prerunner/Dockerfile index daaa947..df11a58 100644 --- a/build/virt-prerunner/Dockerfile +++ b/build/virt-prerunner/Dockerfile @@ -1,6 +1,7 @@ FROM golang:1.19-alpine AS builder -RUN apk add --no-cache gcc musl-dev +RUN apk add --no-cache gcc musl-dev libaio-dev +RUN apk add --no-cache sanlock-dev --repository=http://dl-cdn.alpinelinux.org/alpine/edge/testing/ WORKDIR /workspace @@ -14,7 +15,8 @@ RUN --mount=type=cache,target=/root/.cache/go-build go build -a cmd/virt-prerunn FROM alpine -RUN apk add --no-cache curl screen dnsmasq cdrkit iptables iproute2 qemu-virtiofsd dpkg util-linux s6-overlay nmap-ncat +RUN apk add --no-cache curl screen dnsmasq cdrkit iptables iproute2 qemu-virtiofsd dpkg util-linux tini nmap-ncat +RUN apk add --no-cache sanlock --repository=http://dl-cdn.alpinelinux.org/alpine/edge/testing/ RUN set -eux; \ mkdir /var/lib/cloud-hypervisor; \ @@ -34,19 +36,9 @@ RUN set -eux; \ chmod +x /usr/bin/cloud-hypervisor; \ chmod +x /usr/bin/ch-remote -COPY build/virt-prerunner/cloud-hypervisor-type /etc/s6-overlay/s6-rc.d/cloud-hypervisor/type -COPY build/virt-prerunner/cloud-hypervisor-run.sh /etc/s6-overlay/s6-rc.d/cloud-hypervisor/run -COPY build/virt-prerunner/cloud-hypervisor-finish.sh /etc/s6-overlay/s6-rc.d/cloud-hypervisor/finish -RUN touch /etc/s6-overlay/s6-rc.d/user/contents.d/cloud-hypervisor - COPY --from=builder /workspace/main /usr/bin/virt-prerunner -COPY build/virt-prerunner/virt-prerunner-type /etc/s6-overlay/s6-rc.d/virt-prerunner/type -COPY build/virt-prerunner/virt-prerunner-up /etc/s6-overlay/s6-rc.d/virt-prerunner/up -COPY build/virt-prerunner/virt-prerunner-run.sh /etc/s6-overlay/scripts/virt-prerunner-run.sh -RUN touch /etc/s6-overlay/s6-rc.d/user/contents.d/virt-prerunner -ENV S6_BEHAVIOUR_IF_STAGE2_FAILS=2 -ENTRYPOINT ["/init"] +ENTRYPOINT ["tini", "-s", "-g", "--", "/usr/bin/virt-prerunner"] COPY build/virt-prerunner/iptables-wrapper /sbin/iptables-wrapper RUN update-alternatives --install /sbin/iptables iptables /sbin/iptables-wrapper 100 diff --git a/build/virt-prerunner/cloud-hypervisor-finish.sh b/build/virt-prerunner/cloud-hypervisor-finish.sh deleted file mode 100755 index 6de07ad..0000000 --- a/build/virt-prerunner/cloud-hypervisor-finish.sh +++ /dev/null @@ -1,10 +0,0 @@ -#!/bin/sh - -if test "$1" -eq 256 ; then - e=$((128 + $2)) -else - e="$1" -fi - -echo "$e" > /run/s6-linux-init-container-results/exitcode -/run/s6/basedir/bin/halt diff --git a/build/virt-prerunner/cloud-hypervisor-run.sh b/build/virt-prerunner/cloud-hypervisor-run.sh deleted file mode 100755 index b3d1c2c..0000000 --- a/build/virt-prerunner/cloud-hypervisor-run.sh +++ /dev/null @@ -1,3 +0,0 @@ -#!/usr/bin/execlineb -P - -cloud-hypervisor --api-socket /var/run/virtink/ch.sock diff --git a/build/virt-prerunner/cloud-hypervisor-type b/build/virt-prerunner/cloud-hypervisor-type deleted file mode 100644 index 5883cff..0000000 --- a/build/virt-prerunner/cloud-hypervisor-type +++ /dev/null @@ -1 +0,0 @@ -longrun diff --git a/build/virt-prerunner/virt-prerunner-run.sh b/build/virt-prerunner/virt-prerunner-run.sh deleted file mode 100755 index b5f70c7..0000000 --- a/build/virt-prerunner/virt-prerunner-run.sh +++ /dev/null @@ -1,3 +0,0 @@ -#!/usr/bin/with-contenv sh - -/usr/bin/virt-prerunner diff --git a/build/virt-prerunner/virt-prerunner-type b/build/virt-prerunner/virt-prerunner-type deleted file mode 100644 index bdd22a1..0000000 --- a/build/virt-prerunner/virt-prerunner-type +++ /dev/null @@ -1 +0,0 @@ -oneshot diff --git a/build/virt-prerunner/virt-prerunner-up b/build/virt-prerunner/virt-prerunner-up deleted file mode 100755 index 103a5e6..0000000 --- a/build/virt-prerunner/virt-prerunner-up +++ /dev/null @@ -1 +0,0 @@ -/etc/s6-overlay/scripts/virt-prerunner-run.sh diff --git a/cmd/lockspace-attacher/main.go b/cmd/lockspace-attacher/main.go new file mode 100644 index 0000000..1a0505c --- /dev/null +++ b/cmd/lockspace-attacher/main.go @@ -0,0 +1,100 @@ +package main + +import ( + "context" + "fmt" + "log" + "os" + "os/exec" + "os/signal" + "path/filepath" + "strconv" + "syscall" + + "github.com/namsral/flag" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/tools/clientcmd" + + "github.com/smartxworks/virtink/pkg/sanlock" +) + +func main() { + var lockspaceName string + var nodeName string + flag.StringVar(&lockspaceName, "lockspace-name", "", "") + flag.StringVar(&nodeName, "node-name", "", "") + flag.Parse() + + cfg, err := clientcmd.BuildConfigFromFlags("", "") + if err != nil { + log.Fatalf("failed to build kubeconfig: %s", err) + } + kubeClient, err := kubernetes.NewForConfig(cfg) + if err != nil { + log.Fatalf("failed to create Kubernetes client: %s", err) + } + + hostID, err := getHostID(kubeClient, nodeName) + if err != nil { + log.Fatalf("failed to get host ID: %s", err) + + } + + leaseFilePath := filepath.Join("/var/lib/sanlock", lockspaceName, "leases") + if err := sanlock.AcquireDeltaLease(lockspaceName, leaseFilePath, hostID); err != nil { + log.Fatalf("failed to acquire delta lease: %s", err) + } + log.Println("succeeded to acquire delta lease") + + stop := make(chan struct{}) + c := make(chan os.Signal, 2) + signal.Notify(c, os.Interrupt, syscall.SIGTERM) + go func() { + <-c + close(stop) + <-c + os.Exit(1) + }() + + <-stop + if err := sanlock.ReleaseDeltaLease(lockspaceName, leaseFilePath, hostID); err != nil { + if err != sanlock.ENOENT { + log.Fatalf("failed to release delta lease: %s", err) + } + } + if err := umountLeaseVolume(lockspaceName); err != nil { + log.Fatalf("failed to umont lease volume: %s", err) + } +} + +//+kubebuilder:rbac:groups="",resources=nodes,verbs=get + +func getHostID(client *kubernetes.Clientset, nodeName string) (uint64, error) { + node, err := client.CoreV1().Nodes().Get(context.Background(), nodeName, metav1.GetOptions{}) + if err != nil { + return 0, fmt.Errorf("get node: %s", err) + } + + if id, exist := node.Annotations["virtink.smartx.com/sanlock-host-id"]; exist { + if hostID, err := strconv.ParseUint(id, 10, 64); err != nil { + return 0, fmt.Errorf("parse uint: %s", err) + } else { + return hostID, nil + } + } + return 0, fmt.Errorf("sanlock host %s ID not found", nodeName) +} + +func umountLeaseVolume(lockspace string) error { + leaseFileDir := filepath.Join("/var/lib/sanlock", lockspace) + output, err := exec.Command("sh", "-c", fmt.Sprintf("mount | grep '%s'", leaseFileDir)).CombinedOutput() + if err != nil && string(output) == "" { + return nil + } + + if _, err := exec.Command("umount", leaseFileDir).CombinedOutput(); err != nil { + return fmt.Errorf("unmount volume: %s", err) + } + return nil +} diff --git a/cmd/lockspace-detector/main.go b/cmd/lockspace-detector/main.go new file mode 100644 index 0000000..3eed20e --- /dev/null +++ b/cmd/lockspace-detector/main.go @@ -0,0 +1,483 @@ +package main + +import ( + "context" + "errors" + "flag" + "fmt" + "os" + "path/filepath" + "reflect" + "strconv" + "sync" + "time" + + "go.uber.org/zap/zapcore" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" + clientgoscheme "k8s.io/client-go/kubernetes/scheme" + "k8s.io/client-go/tools/record" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" + "sigs.k8s.io/controller-runtime/pkg/log/zap" + + virtv1alpha1 "github.com/smartxworks/virtink/pkg/apis/virt/v1alpha1" + "github.com/smartxworks/virtink/pkg/sanlock" +) + +const ( + LockProtectionFinalizer = "virtink.smartx.com/lock-protection" +) + +var ( + scheme = runtime.NewScheme() + + setupLog = ctrl.Log.WithName("setup") +) + +func init() { + utilruntime.Must(clientgoscheme.AddToScheme(scheme)) + + utilruntime.Must(virtv1alpha1.AddToScheme(scheme)) +} + +// +kubebuilder:rbac:groups=coordination.k8s.io,resources=leases,verbs=get;list;watch;create;update;patch;delete +//+kubebuilder:rbac:groups="",resources=nodes,verbs=get;list;watch;update +//+kubebuilder:rbac:groups=virt.virtink.smartx.com,resources=lockspaces,verbs=get;list;watch +//+kubebuilder:rbac:groups=virt.virtink.smartx.com,resources=locks,verbs=get;list;watch;update +//+kubebuilder:rbac:groups=virt.virtink.smartx.com,resources=locks/status,verbs=get;update +//+kubebuilder:rbac:groups=virt.virtink.smartx.com,resources=virtualmachines,verbs=get;list;watch +//+kubebuilder:rbac:groups=virt.virtink.smartx.com,resources=virtualmachines/status,verbs=get;update +//+kubebuilder:rbac:groups="",resources=pods,verbs=get;list;watch;delete +//+kubebuilder:rbac:groups="",resources=events,verbs=create;update;patch + +func main() { + opts := zap.Options{ + Development: true, + TimeEncoder: zapcore.ISO8601TimeEncoder, + } + opts.BindFlags(flag.CommandLine) + flag.Parse() + + lockspace := os.Getenv("LOCKSPACE_NAME") + + ctrl.SetLogger(zap.New(zap.UseFlagOptions(&opts))) + mgr, err := ctrl.NewManager(ctrl.GetConfigOrDie(), ctrl.Options{ + Scheme: scheme, + LeaderElection: true, + LeaderElectionID: fmt.Sprintf("%s-detector.virtink.smartx.com", lockspace), + }) + if err != nil { + setupLog.Error(err, "unable to create manager") + os.Exit(1) + } + + ioTimeout, err := strconv.Atoi(os.Getenv("IO_TIMEOUT")) + if err != nil { + setupLog.Error(err, "failed to get io_timeout_seconds") + } + if err = (&Detector{ + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + Recorder: mgr.GetEventRecorderFor("lockspace-detector"), + lockspace: lockspace, + ioTimeout: time.Duration(ioTimeout) * time.Second, + freeStateDetector: make(map[string]chan struct{}), + }).SetupWithManager(mgr); err != nil { + setupLog.Error(err, "unable to create controller", "controller", "Detector") + os.Exit(1) + } + + if err = (&LockReconciler{ + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + Recorder: mgr.GetEventRecorderFor("lock-controller"), + lockspace: lockspace, + }).SetupWithManager(mgr); err != nil { + setupLog.Error(err, "unable to create controller", "controller", "Lock") + os.Exit(1) + } + + setupLog.Info("starting manager") + if err := mgr.Start(ctrl.SetupSignalHandler()); err != nil { + setupLog.Error(err, "problem running manager") + os.Exit(1) + } +} + +type Detector struct { + client.Client + Scheme *runtime.Scheme + Recorder record.EventRecorder + + lockspace string + ioTimeout time.Duration + + mutex sync.Mutex + freeStateDetector map[string]chan struct{} +} + +func (d *Detector) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { + ls, err := d.getLockspaceOrNil(ctx, d.lockspace) + if err != nil { + return ctrl.Result{}, fmt.Errorf("get Lockspace: %s", err) + } + if ls == nil { + return ctrl.Result{}, nil + } + + var node corev1.Node + if err := d.Get(ctx, req.NamespacedName, &node); err != nil { + return ctrl.Result{}, client.IgnoreNotFound(err) + } + + if err := d.detectNode(ctx, &node); err != nil { + reconcileErr := reconcileError{} + if errors.As(err, &reconcileErr) { + return reconcileErr.Result, nil + } + return ctrl.Result{}, fmt.Errorf("reconcile Node: %s", err) + } + return ctrl.Result{RequeueAfter: 2 * d.ioTimeout}, nil +} + +func (d *Detector) getLockspaceOrNil(ctx context.Context, lockspace string) (*virtv1alpha1.Lockspace, error) { + lsKey := types.NamespacedName{ + Name: lockspace, + } + var ls virtv1alpha1.Lockspace + if err := d.Client.Get(ctx, lsKey, &ls); err != nil { + return nil, client.IgnoreNotFound(err) + } + + return &ls, nil +} + +func (d *Detector) detectNode(ctx context.Context, node *corev1.Node) error { + if node == nil || node.DeletionTimestamp != nil { + d.stopFreeStateDetector(node.Name) + return nil + } + + id, exist := node.Annotations["virtink.smartx.com/sanlock-host-id"] + if !exist { + return fmt.Errorf("sanlock host %s ID not found", node.Name) + } + hostID, err := strconv.ParseUint(id, 10, 64) + if err != nil { + return fmt.Errorf("parse uint: %s", err) + } + + hostStatus, err := sanlock.GetHostStatus(d.lockspace, hostID) + if err != nil { + panic(fmt.Errorf("get Sanlock host status: %s", err)) + } + + switch hostStatus { + case sanlock.HostStatusLive: + delete(node.Labels, fmt.Sprintf("virtink.smartx.com/dead-lockspace-%s", d.lockspace)) + if err := d.Client.Update(ctx, node); err != nil { + return fmt.Errorf("update Node labels: %s", err) + } + d.stopFreeStateDetector(node.Name) + ctrl.LoggerFrom(ctx).Info("node is alive", "Node", node.Name) + case sanlock.HostStatusDead: + node.Labels[fmt.Sprintf("virtink.smartx.com/dead-lockspace-%s", d.lockspace)] = "" + if err := d.Client.Update(ctx, node); err != nil { + return fmt.Errorf("update Node labels: %s", err) + } + ctrl.LoggerFrom(ctx).Info("node is dead", "Node", node.Name) + + if err := d.updateVMOnDeadNode(ctx, node.Name); err != nil { + return fmt.Errorf("update VM on dead Node: %s", err) + } + case sanlock.HostStatusFree: + if _, exist := node.Labels[fmt.Sprintf("virtink.smartx.com/dead-lockspace-%s", d.lockspace)]; !exist { + go d.startFreeStateDetector(ctx, node) + } + case sanlock.HostStatusFail: + node.Labels[fmt.Sprintf("virtink.smartx.com/dead-lockspace-%s", d.lockspace)] = "" + if err := d.Client.Update(ctx, node); err != nil { + return fmt.Errorf("update Node labels: %s", err) + } + ctrl.LoggerFrom(ctx).Info("node is failed", "Node", node.Name) + + return reconcileError{ctrl.Result{RequeueAfter: d.ioTimeout}} + default: + // ignore + } + + return nil +} + +func (d *Detector) updateVMOnDeadNode(ctx context.Context, node string) error { + var vmList virtv1alpha1.VirtualMachineList + vmSelector := client.MatchingFields{".status.nodeName": node} + if err := d.Client.List(ctx, &vmList, vmSelector); err != nil { + return fmt.Errorf("list VM: %s", err) + } + for _, vm := range vmList.Items { + if vm.Status.Phase != virtv1alpha1.VirtualMachineScheduling && vm.Status.Phase != virtv1alpha1.VirtualMachineScheduled && vm.Status.Phase != virtv1alpha1.VirtualMachineRunning { + continue + } + for _, l := range vm.Spec.Locks { + var lock virtv1alpha1.Lock + lockKey := types.NamespacedName{ + Namespace: vm.Namespace, + Name: l, + } + if err := d.Client.Get(ctx, lockKey, &lock); err != nil { + return fmt.Errorf("get Lock: %s", err) + } + if lock.Spec.LockspaceName == d.lockspace { + vm.Status.Phase = virtv1alpha1.VirtualMachineFailed + if vm.Spec.EnableHA { + vm.Status = virtv1alpha1.VirtualMachineStatus{ + Phase: virtv1alpha1.VirtualMachinePending, + } + } + if err := d.Client.Status().Update(ctx, &vm); err != nil { + return fmt.Errorf("update VM status: %s", err) + } + break + } + } + } + + return nil +} + +func (d *Detector) startFreeStateDetector(ctx context.Context, node *corev1.Node) { + if _, exist := d.freeStateDetector[node.Name]; !exist { + d.mutex.Lock() + stop := make(chan struct{}) + d.freeStateDetector[node.Name] = stop + d.mutex.Unlock() + + timeout := time.NewTicker(8*d.ioTimeout + sanlock.WatchdogFireTimeoutDefaultSeconds*time.Second) + defer timeout.Stop() + + select { + case <-stop: + // no-operation + case <-timeout.C: + nodeKey := types.NamespacedName{ + Name: node.Name, + } + if err := d.Client.Get(ctx, nodeKey, node); err != nil { + ctrl.LoggerFrom(ctx).Error(err, "failed to get Node in free state detector", "Node", node.Name) + } else { + node.Labels[fmt.Sprintf("virtink.smartx.com/dead-lockspace-%s", d.lockspace)] = "" + if err := d.Client.Update(ctx, node); err != nil { + ctrl.LoggerFrom(ctx).Error(err, "failed to update Node in free state detector", "Node", node.Name) + } + ctrl.LoggerFrom(ctx).Info("node is dead, detected by free state detector", "Node", node.Name) + + if err := d.updateVMOnDeadNode(ctx, node.Name); err != nil { + ctrl.LoggerFrom(ctx).Error(err, "failed to update VM on dead Node", "Node", node.Name) + } + } + } + + d.mutex.Lock() + delete(d.freeStateDetector, node.Name) + d.mutex.Unlock() + } +} + +func (d *Detector) stopFreeStateDetector(node string) { + if ch, exist := d.freeStateDetector[node]; exist { + close(ch) + } +} + +func (d *Detector) SetupWithManager(mgr ctrl.Manager) error { + if err := mgr.GetFieldIndexer().IndexField(context.Background(), &virtv1alpha1.VirtualMachine{}, ".status.nodeName", func(obj client.Object) []string { + vm := obj.(*virtv1alpha1.VirtualMachine) + return []string{vm.Status.NodeName} + }); err != nil { + return fmt.Errorf("index VM by Node name: %s", err) + } + + return ctrl.NewControllerManagedBy(mgr). + For(&corev1.Node{}). + Complete(d) +} + +type LockReconciler struct { + client.Client + Scheme *runtime.Scheme + Recorder record.EventRecorder + + lockspace string +} + +func (r *LockReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { + var lock virtv1alpha1.Lock + if err := r.Get(ctx, req.NamespacedName, &lock); err != nil { + return ctrl.Result{}, client.IgnoreNotFound(err) + } + + if lock.Spec.LockspaceName != r.lockspace { + return ctrl.Result{}, nil + } + + status := lock.Status.DeepCopy() + if err := r.reconcile(ctx, &lock); err != nil { + reconcileErr := reconcileError{} + if errors.As(err, &reconcileErr) { + return reconcileErr.Result, nil + } + r.Recorder.Eventf(&lock, corev1.EventTypeWarning, "FailedReconcile", "Failed to reconcile Lock: %s", err) + return ctrl.Result{}, err + } + + if !reflect.DeepEqual(lock.Status, status) { + if err := r.Status().Update(ctx, &lock); err != nil { + if apierrors.IsConflict(err) { + return ctrl.Result{Requeue: true}, nil + } + return ctrl.Result{}, fmt.Errorf("update Lock status: %s", err) + } + } + + return ctrl.Result{}, nil +} + +func (r *LockReconciler) reconcile(ctx context.Context, lock *virtv1alpha1.Lock) error { + if lock.DeletionTimestamp != nil { + if err := r.cleanupLock(ctx, lock); err != nil { + return fmt.Errorf("cleanup Lock: %s", err) + } + return nil + } + + if !controllerutil.ContainsFinalizer(lock, LockProtectionFinalizer) { + controllerutil.AddFinalizer(lock, LockProtectionFinalizer) + return r.Client.Update(ctx, lock) + } + + if !lock.Status.Ready { + if err := r.reconcileNotReadyLock(ctx, lock); err != nil { + return fmt.Errorf("reconcile not ready Lock: %s", err) + } + lock.Status.Ready = true + } + + return nil +} + +func (r *LockReconciler) cleanupLock(ctx context.Context, lock *virtv1alpha1.Lock) error { + var vmList virtv1alpha1.VirtualMachineList + vmSelector := client.MatchingFields{".spec.locks": lock.Name} + if err := r.Client.List(ctx, &vmList, vmSelector); err != nil { + return fmt.Errorf("list VM: %s", err) + } + isVMPodFound := false + for _, vm := range vmList.Items { + pod, err := r.getVMPodOrNil(ctx, &vm) + if err != nil { + return fmt.Errorf("get VM Pod: %s", err) + } + if pod != nil { + isVMPodFound = true + // TODO: shutdown the VMM + if err := r.Client.Delete(ctx, pod); err != nil { + return fmt.Errorf("delete VM Pod: %s", err) + } + } + } + if isVMPodFound { + return reconcileError{ctrl.Result{RequeueAfter: time.Second}} + } + + leaseFilePath := filepath.Join("/var/lib/sanlock", lock.Spec.LockspaceName, "leases") + _, err := sanlock.SearchResource(lock.Spec.LockspaceName, leaseFilePath, lock.Name) + if err != nil { + if err == sanlock.ENOENT { + // no-operation + } else { + return fmt.Errorf("search Sanlock resource: %s", err) + } + } else { + if err := sanlock.DeleteResource(lock.Spec.LockspaceName, leaseFilePath, lock.Name); err != nil { + return fmt.Errorf("delete Sanlock resource: %s", err) + } + } + + if controllerutil.ContainsFinalizer(lock, LockProtectionFinalizer) { + controllerutil.RemoveFinalizer(lock, LockProtectionFinalizer) + if err := r.Client.Update(ctx, lock); err != nil { + return fmt.Errorf("update Lock finalizer: %s", err) + } + } + + return nil +} + +func (r *LockReconciler) getVMPodOrNil(ctx context.Context, vm *virtv1alpha1.VirtualMachine) (*corev1.Pod, error) { + var pod corev1.Pod + podKey := types.NamespacedName{ + Namespace: vm.Namespace, + Name: vm.Status.VMPodName, + } + if err := r.Client.Get(ctx, podKey, &pod); err != nil { + return nil, client.IgnoreNotFound(err) + } + + if !metav1.IsControlledBy(&pod, vm) { + return nil, fmt.Errorf("pod %q is not controlled by VM %q", namespacedName(&pod), namespacedName(vm)) + } + return &pod, nil +} + +func (r *LockReconciler) reconcileNotReadyLock(ctx context.Context, lock *virtv1alpha1.Lock) error { + leaseFilePath := filepath.Join("/var/lib/sanlock", lock.Spec.LockspaceName, "leases") + offset, err := sanlock.SearchResource(lock.Spec.LockspaceName, leaseFilePath, lock.Name) + if err != nil { + if err == sanlock.ENOENT { + offset, err = sanlock.CreateResource(lock.Spec.LockspaceName, leaseFilePath, lock.Name) + if err != nil { + return fmt.Errorf("create Sanlock resource: %s", err) + } + } else { + return fmt.Errorf("search Sanlock resource: %s", err) + } + } + lock.Status.Offset = offset + return nil +} + +func (r *LockReconciler) SetupWithManager(mgr ctrl.Manager) error { + if err := mgr.GetFieldIndexer().IndexField(context.Background(), &virtv1alpha1.VirtualMachine{}, ".spec.locks", func(obj client.Object) []string { + vm := obj.(*virtv1alpha1.VirtualMachine) + return vm.Spec.Locks + }); err != nil { + return fmt.Errorf("index VM by Lock: %s", err) + } + + return ctrl.NewControllerManagedBy(mgr). + For(&virtv1alpha1.Lock{}). + Complete(r) +} + +func namespacedName(obj metav1.ObjectMetaAccessor) types.NamespacedName { + meta := obj.GetObjectMeta() + return types.NamespacedName{ + Namespace: meta.GetNamespace(), + Name: meta.GetName(), + } +} + +type reconcileError struct { + ctrl.Result +} + +func (rerr reconcileError) Error() string { + return fmt.Sprintf("requeue: %v, requeueAfter: %s", rerr.Requeue, rerr.RequeueAfter) +} diff --git a/cmd/lockspace-initializer/main.go b/cmd/lockspace-initializer/main.go new file mode 100644 index 0000000..36eed3b --- /dev/null +++ b/cmd/lockspace-initializer/main.go @@ -0,0 +1,39 @@ +package main + +import ( + "log" + "os" + "os/exec" + "path/filepath" + + "github.com/namsral/flag" + + "github.com/smartxworks/virtink/pkg/sanlock" +) + +func main() { + var lockspaceName string + var ioTimeoutSeconds int + flag.StringVar(&lockspaceName, "lockspace-name", "", "") + flag.IntVar(&ioTimeoutSeconds, "io-timeout-seconds", 10, "") + flag.Parse() + + leaseFilePath := filepath.Join("/var/lib/sanlock", lockspaceName, "leases") + if _, err := os.Stat(leaseFilePath); err != nil { + if os.IsNotExist(err) { + if _, err := exec.Command("touch", leaseFilePath).CombinedOutput(); err != nil { + log.Fatalf("create lease file: %s", err) + } + } else { + log.Fatalf("check lease file status: %s", err) + } + } + + if err := sanlock.WriteLockspaceWithIOTimeout(lockspaceName, leaseFilePath, uint32(ioTimeoutSeconds)); err != nil { + log.Fatalf("create Sanlock Lockspace: %s", err) + } + + if err := sanlock.FormatRIndex(lockspaceName, leaseFilePath); err != nil { + log.Fatalf("format Sanlock RIndex: %s", err) + } +} diff --git a/cmd/virt-controller/main.go b/cmd/virt-controller/main.go index 5615b78..c34d0d1 100644 --- a/cmd/virt-controller/main.go +++ b/cmd/virt-controller/main.go @@ -84,6 +84,18 @@ func main() { os.Exit(1) } + if err = (&controller.LockspaceReconciler{ + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + Recorder: mgr.GetEventRecorderFor("lockspace-controller"), + DetectorImageName: os.Getenv("DETECTOR_IMAGE"), + AttacherImageName: os.Getenv("ATTACHER_IMAGE"), + InitializerImageName: os.Getenv("INITIALIZER_IMAGE"), + }).SetupWithManager(mgr); err != nil { + setupLog.Error(err, "unable to create controller", "controller", "Lockspace") + os.Exit(1) + } + mgr.GetWebhookServer().Register("/mutate-v1alpha1-virtualmachine", &webhook.Admission{Handler: &controller.VMMutator{}}) mgr.GetWebhookServer().Register("/validate-v1alpha1-virtualmachine", &webhook.Admission{Handler: &controller.VMValidator{}}) mgr.GetWebhookServer().Register("/validate-v1alpha1-virtualmachinemigration", &webhook.Admission{Handler: &controller.VMMValidator{Client: mgr.GetClient()}}) diff --git a/cmd/virt-prerunner/main.go b/cmd/virt-prerunner/main.go index d11248c..ee48b57 100644 --- a/cmd/virt-prerunner/main.go +++ b/cmd/virt-prerunner/main.go @@ -13,6 +13,7 @@ import ( "path/filepath" "runtime" "strings" + "syscall" "text/template" "github.com/docker/libnetwork/resolvconf" @@ -26,6 +27,7 @@ import ( virtv1alpha1 "github.com/smartxworks/virtink/pkg/apis/virt/v1alpha1" "github.com/smartxworks/virtink/pkg/cloudhypervisor" "github.com/smartxworks/virtink/pkg/cpuset" + "github.com/smartxworks/virtink/pkg/sanlock" ) func main() { @@ -45,26 +47,35 @@ func main() { log.Fatalf("Failed to unmarshal VM: %s", err) } + if len(vm.Spec.Locks) > 0 { + resources := os.Getenv("LOCKSPACE_RESOURCE") + if err := sanlock.AcquireResourceLease(strings.Split(resources, " "), vm.Name); err != nil { + log.Fatalf("Failed to acquire lock: %s", err) + } + } + vmConfig, err := buildVMConfig(context.Background(), &vm) if err != nil { log.Fatalf("Failed to build VM config: %s", err) } - if receiveMigration { - return - } + if !receiveMigration { + vmConfigFile, err := os.Create("/var/run/virtink/vm-config.json") + if err != nil { + log.Fatalf("Failed to create VM config file: %s", err) + } - vmConfigFile, err := os.Create("/var/run/virtink/vm-config.json") - if err != nil { - log.Fatalf("Failed to create VM config file: %s", err) - } + if err := json.NewEncoder(vmConfigFile).Encode(vmConfig); err != nil { + log.Fatalf("Failed to write VM config to file: %s", err) + } + vmConfigFile.Close() - if err := json.NewEncoder(vmConfigFile).Encode(vmConfig); err != nil { - log.Fatalf("Failed to write VM config to file: %s", err) + log.Println("Succeeded to setup") } - vmConfigFile.Close() - log.Println("Succeeded to setup") + if err := syscall.Exec("/usr/bin/cloud-hypervisor", []string{"cloud-hypervisor", "--api-socket", "/var/run/virtink/ch.sock"}, nil); err != nil { + log.Fatalf("failed to start cloud-hypervisor: %s", err) + } } func buildVMConfig(ctx context.Context, vm *virtv1alpha1.VirtualMachine) (*cloudhypervisor.VmConfig, error) { diff --git a/deploy/crd/virt.virtink.smartx.com_locks.yaml b/deploy/crd/virt.virtink.smartx.com_locks.yaml new file mode 100644 index 0000000..3771d95 --- /dev/null +++ b/deploy/crd/virt.virtink.smartx.com_locks.yaml @@ -0,0 +1,60 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.9.0 + creationTimestamp: null + name: locks.virt.virtink.smartx.com +spec: + group: virt.virtink.smartx.com + names: + kind: Lock + listKind: LockList + plural: locks + singular: lock + scope: Namespaced + versions: + - additionalPrinterColumns: + - jsonPath: .spec.lockspaceName + name: Lockspace + type: string + - jsonPath: .status.ready + name: Ready + type: boolean + name: v1alpha1 + schema: + openAPIV3Schema: + properties: + apiVersion: + description: 'APIVersion defines the versioned schema of this representation + of an object. Servers should convert recognized schemas to the latest + internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources' + type: string + kind: + description: 'Kind is a string value representing the REST resource this + object represents. Servers may infer this from the endpoint the client + submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' + type: string + metadata: + type: object + spec: + properties: + lockspaceName: + type: string + required: + - lockspaceName + type: object + status: + properties: + offset: + format: int64 + type: integer + ready: + type: boolean + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/deploy/crd/virt.virtink.smartx.com_lockspaces.yaml b/deploy/crd/virt.virtink.smartx.com_lockspaces.yaml new file mode 100644 index 0000000..5f9f695 --- /dev/null +++ b/deploy/crd/virt.virtink.smartx.com_lockspaces.yaml @@ -0,0 +1,74 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.9.0 + creationTimestamp: null + name: lockspaces.virt.virtink.smartx.com +spec: + group: virt.virtink.smartx.com + names: + kind: Lockspace + listKind: LockspaceList + plural: lockspaces + singular: lockspace + scope: Cluster + versions: + - additionalPrinterColumns: + - jsonPath: .spec.storageClassName + name: StorageClass + type: string + - jsonPath: .status.ready + name: Ready + type: boolean + name: v1alpha1 + schema: + openAPIV3Schema: + properties: + apiVersion: + description: 'APIVersion defines the versioned schema of this representation + of an object. Servers should convert recognized schemas to the latest + internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources' + type: string + kind: + description: 'Kind is a string value representing the REST resource this + object represents. Servers may infer this from the endpoint the client + submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' + type: string + metadata: + type: object + spec: + properties: + ioTimeoutSeconds: + default: 10 + format: int32 + maximum: 15 + minimum: 5 + type: integer + maxLocks: + default: 1000 + description: The maximum number of Lock that can be held in a Lockspace. + maximum: 16384 + minimum: 1 + type: integer + storageClassName: + type: string + volumeMode: + default: Filesystem + description: PersistentVolumeMode describes how a volume is intended + to be consumed, either Block or Filesystem. + type: string + required: + - storageClassName + type: object + status: + properties: + ready: + type: boolean + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/deploy/crd/virt.virtink.smartx.com_virtualmachines.yaml b/deploy/crd/virt.virtink.smartx.com_virtualmachines.yaml index 9d1117f..453ca42 100644 --- a/deploy/crd/virt.virtink.smartx.com_virtualmachines.yaml +++ b/deploy/crd/virt.virtink.smartx.com_virtualmachines.yaml @@ -861,6 +861,8 @@ spec: type: array type: object type: object + enableHA: + type: boolean instance: properties: cpu: @@ -1093,6 +1095,10 @@ spec: format: int32 type: integer type: object + locks: + items: + type: string + type: array networks: items: properties: diff --git a/deploy/kustomization.yaml b/deploy/kustomization.yaml index c9775d4..ae0e609 100644 --- a/deploy/kustomization.yaml +++ b/deploy/kustomization.yaml @@ -1,6 +1,10 @@ resources: - crd/virt.virtink.smartx.com_virtualmachines.yaml - crd/virt.virtink.smartx.com_virtualmachinemigrations.yaml + - crd/virt.virtink.smartx.com_locks.yaml + - crd/virt.virtink.smartx.com_lockspaces.yaml - namespace.yaml - virt-controller - virt-daemon + - lockspace-attacher + - lockspace-detector diff --git a/deploy/lockspace-attacher/kustomization.yaml b/deploy/lockspace-attacher/kustomization.yaml new file mode 100644 index 0000000..5059ee0 --- /dev/null +++ b/deploy/lockspace-attacher/kustomization.yaml @@ -0,0 +1,4 @@ +resources: + - rolebinding.yaml + - role.yaml + - sa.yaml diff --git a/deploy/lockspace-attacher/role.yaml b/deploy/lockspace-attacher/role.yaml new file mode 100644 index 0000000..7e9764f --- /dev/null +++ b/deploy/lockspace-attacher/role.yaml @@ -0,0 +1,13 @@ +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + creationTimestamp: null + name: lockspace-attacher +rules: +- apiGroups: + - "" + resources: + - nodes + verbs: + - get diff --git a/deploy/lockspace-attacher/rolebinding.yaml b/deploy/lockspace-attacher/rolebinding.yaml new file mode 100644 index 0000000..85fb628 --- /dev/null +++ b/deploy/lockspace-attacher/rolebinding.yaml @@ -0,0 +1,12 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: lockspace-attacher +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: lockspace-attacher +subjects: + - kind: ServiceAccount + name: lockspace-attacher + namespace: virtink-system diff --git a/deploy/lockspace-attacher/sa.yaml b/deploy/lockspace-attacher/sa.yaml new file mode 100644 index 0000000..d2808e7 --- /dev/null +++ b/deploy/lockspace-attacher/sa.yaml @@ -0,0 +1,5 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: lockspace-attacher + namespace: virtink-system diff --git a/deploy/lockspace-detector/kustomization.yaml b/deploy/lockspace-detector/kustomization.yaml new file mode 100644 index 0000000..5059ee0 --- /dev/null +++ b/deploy/lockspace-detector/kustomization.yaml @@ -0,0 +1,4 @@ +resources: + - rolebinding.yaml + - role.yaml + - sa.yaml diff --git a/deploy/lockspace-detector/role.yaml b/deploy/lockspace-detector/role.yaml new file mode 100644 index 0000000..29d0138 --- /dev/null +++ b/deploy/lockspace-detector/role.yaml @@ -0,0 +1,84 @@ +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + creationTimestamp: null + name: lockspace-detector +rules: +- apiGroups: + - "" + resources: + - events + verbs: + - create + - patch + - update +- apiGroups: + - "" + resources: + - nodes + verbs: + - get + - list + - update + - watch +- apiGroups: + - "" + resources: + - pods + verbs: + - delete + - get + - list + - watch +- apiGroups: + - coordination.k8s.io + resources: + - leases + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - virt.virtink.smartx.com + resources: + - locks + verbs: + - get + - list + - update + - watch +- apiGroups: + - virt.virtink.smartx.com + resources: + - locks/status + verbs: + - get + - update +- apiGroups: + - virt.virtink.smartx.com + resources: + - lockspaces + verbs: + - get + - list + - watch +- apiGroups: + - virt.virtink.smartx.com + resources: + - virtualmachines + verbs: + - get + - list + - watch +- apiGroups: + - virt.virtink.smartx.com + resources: + - virtualmachines/status + verbs: + - get + - update diff --git a/deploy/lockspace-detector/rolebinding.yaml b/deploy/lockspace-detector/rolebinding.yaml new file mode 100644 index 0000000..6f03bb6 --- /dev/null +++ b/deploy/lockspace-detector/rolebinding.yaml @@ -0,0 +1,12 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: lockspace-detector +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: lockspace-detector +subjects: + - kind: ServiceAccount + name: lockspace-detector + namespace: virtink-system diff --git a/deploy/lockspace-detector/sa.yaml b/deploy/lockspace-detector/sa.yaml new file mode 100644 index 0000000..a03b00f --- /dev/null +++ b/deploy/lockspace-detector/sa.yaml @@ -0,0 +1,5 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: lockspace-detector + namespace: virtink-system diff --git a/deploy/virt-controller/role.yaml b/deploy/virt-controller/role.yaml index a776f1a..6110821 100644 --- a/deploy/virt-controller/role.yaml +++ b/deploy/virt-controller/role.yaml @@ -30,6 +30,8 @@ rules: resources: - persistentvolumeclaims verbs: + - create + - delete - get - list - watch @@ -45,6 +47,17 @@ rules: - patch - update - watch +- apiGroups: + - apps + resources: + - daemonsets + verbs: + - create + - delete + - get + - list + - update + - watch - apiGroups: - cdi.kubevirt.io resources: @@ -73,6 +86,32 @@ rules: - get - list - watch +- apiGroups: + - virt.virtink.smartx.com + resources: + - locks + verbs: + - delete + - get + - list + - update + - watch +- apiGroups: + - virt.virtink.smartx.com + resources: + - lockspaces + verbs: + - get + - list + - update + - watch +- apiGroups: + - virt.virtink.smartx.com + resources: + - lockspaces/status + verbs: + - get + - update - apiGroups: - virt.virtink.smartx.com resources: diff --git a/docs/images/vm_ha_architecture.jpg b/docs/images/vm_ha_architecture.jpg new file mode 100644 index 0000000..6dec486 Binary files /dev/null and b/docs/images/vm_ha_architecture.jpg differ diff --git a/docs/vm_ha.md b/docs/vm_ha.md new file mode 100644 index 0000000..f4c5775 --- /dev/null +++ b/docs/vm_ha.md @@ -0,0 +1,172 @@ +# VM HA + +Virtink uses [sanlock](https://pagure.io/sanlock) for VM HA (high availability), and includes lock to avoid VM "split-brain" situation, which can lead to two active copies of a VM after recovery from a failure. Besides "split-brain" situation, the lock is also used to prevent two VM processes from having concurrent write access to the same disk image, as this will result in data corruption if the guest is not using a cluster aware filesystem. + +## Goals + +- [x] **VM HA with the ability to avoid VM "split-brain" situation**. The "split-brain" situation means there would be multiple active copies of a VM after recovery from a failure + +- [x] **VM HA based on the health state of the storage instead of node**. If a node can not acess storage A but can still access storage B, the HA VMs running on this node that have locks in storage A will be rebuilt on the other nodes, but the HA VMs without locks in storage A will still run on this node + +- [x] **Disk lock to prevent multiple VMs from having concurrent access to the same disk image** + +- [ ] **Live migration for the VMs with disk locks, or even enable HA** + +## Architecture + +![vm_ha_architecture](./images/vm_ha_architecture.jpg) + +- `Lockspace` is a no-namespaced CRD object refers to the specific storage, and each storage has its own Lockspace. E.g `nfs-1-lockspace`, `nfs-2-lockspace`, `iscsi-1-lockspace` and etc. + +- `Lock` is a CRD object refers to the disk locks in the Lockspace, the VM with Locks can get start only after the corresponding disk locks are obtained, and Lock can be used to prevent multiple VMs from having concurrent access to the same disk image. + +- `lockspace-volume` is a PVC object refers to the shared block device or file used for Locks in storage. + +- `sanlock` is a per-node daemon, responsible for acquiring/releasing/checking Locks in each Lockspace volume, and there is only one sanlock daemon instance on each node. + +- `lockspace-controller` is a cluster-wide controller, responsible for creating/cleaning volume, initializer, attacher and detector for each Lockspace. This controller is deployed with `virt-controller`. + +- `lockspace-initializer` is a run once Pod, responsible for initializing node lease space (also named delta lease in sanlock) in the corresponding Lockspace volume. + +- `lockspace-attacher` is a per-node daemon, responsible for attaching/detaching node to/from the corresponding Lockspace. And attacher will bind mount the Lockspace volume to host path for sanlock daemon to access. + +- `lockspace-detector` is a cluster-wide controller, responsible for heartbeat detection of each node in the corresponding Lockspace, and rebuild the HA VMs when node is "dead" in the Lockspace. + +- `lock-controller` is a cluster-wide controller, responsible for initializing/cleaning disk lock (also named paxos lease in sanlock) in the corresponding Lockspace volume. This controller is deployed with `lockspace-detector`. + +## Prerequisites + +### Configure Sanlock Host ID + +The sanlock host ID is an unique identifier for each node in the cluster, it's used for acquiring node lease by attacher and for heartbeat detection by detector. You can configure it by appending annotation `virtink.smartx.com/sanlock-host-id: "n"` to the Node object, and `n` should be unique for each node in the cluster. + +### Deploy CSI Plugin + +We currently use CSI driver to create PVC for Lockspace volume, you can try [NFS CSI driver](https://github.com/kubernetes-csi/csi-driver-nfs), [Ceph CSI](https://github.com/ceph/ceph-csi) and etc. + +> **Note**: Currently, only the NFS CSI driver is tested officially. + +After the CSI installation, you should create a StorageClass for the CSI driver. + +### Enable Watchdog Device + +The watchdog device is used to reset the "dead" host when the VM process is not cleaned up by sanlock daemon within the pre-configured time, it's the guarantee to avoid VM "split-brain". + +You can try kernel module `softdog` for test only when hardware watch dog device is missing. + +### Deploy Sanlock Daemon + +You can deploy the sanlock daemon directly on the host or try project [Containerized Sanlock Daemon](https://github.com/carezkh/containerized-sanlock-daemon) to deploy this daemon in the container. And the watchdog fire timeout should be 60s. + +## Usage Guides + +### Create Lockspace + +You can create a Lockspace by the following yaml: + +```bash +apiVersion: virt.virtink.smartx.com/v1alpha1 +kind: Lockspace +metadata: + name: nfs-192.168.27.87 +spec: + storageClassName: nfs-192.168.27.87 + volumeMode: Filesystem + maxLocks: 1000 + ioTimeoutSeconds: 10 +``` + +The details of the fields are as follows: + + - `storageClassName` refers to a storage, and a Lockspace volume will be created here to hold node leases and disk locks. + - `volumeMode` is the mode of the Lockspace volume, use value `Filesystem` (default) for NAS and `Block` for SAN. + - `maxLocks` is the maximum number of Lock that can be held in this Lockspace, and a Lockspace volume with size `(n+3)Mi` will be created to hold `n` Locks. Currently this field is imutable after created. + - `ioTimeoutSeconds` can be used to configure the time of failure recovery only for this Lockspace, please refer to [Failure Recovery](#failure-recovery) for more details. + +After the Lockspace initializer succeeded, the Lockspace will be updated as `ready` in status. The attacher will try to attach the node to the Lockspace, and the detector will do heartbeat detection in the Lockspace volume for each node. + +### Create Lock + +You can create a Lock by the following yaml: + +```bash +apiVersion: virt.virtink.smartx.com/v1alpha1 +kind: Lock +metadata: + name: ubuntu + namespace: default +spec: + lockspaceName: nfs-192.168.27.87 +``` + +The field `lockspaceName` refers to the Lockspace created above. + +The Lock controller will initialize disk lock in the corresponding Lockspace volume, and the Lock will be updated as `ready` in status when succeed. + +### Create HA VM + +You can refer to the [ubuntu-ha.yaml](../samples/ubuntu-ha.yaml) to create a HA VM. + +The details of the fields releated to HA are as follows: + + - `enableHA` is a boolean value indicating whether HA is enabled. + - `locks` is a list of Lock needed to be obtained by the VM before starts. It's recommended to add a Lock from the same storage as the VM os disk DataVolume, the VM will most likely fail to run properly when the node can not access this storage, it makes sense to rebuild this VM on other nodes. You can also add a Lock for data disk if the state of the corresponding storage is important for the function of the VM. For a "dead" VM with Locks but without HA enabled, it will only be set as `Failed` phase by HA components, the run policy will decide to rebuild it or not. + - `runPolicy` can be only `RerunOnFailure` and `Always` for VM HA. + +## Failure Recovery + +When a failure happens, the HA VMs can be rebuilt in the pre-configured time for different situations, and the time here refers to the time elapsed after the failure happens. + +> **Note**: We assume that the HA components are running properly when failures happen, otherwise these components need to be restored automatically before recovery the HA VMs. E.g the `virt-controller` running on the node with power failure will be restored in 300s before create the new instances of the HA VMs. + +### Storage Failures + +The storage failures here refer to only part of nodes in the cluster can not access the storage, the HA VMs can not be recovered when all nodes can not access the storage. + +When storage failures happen, the HA VMs with Locks in the corresponding storage start to be cleaned at `T1`, and this cleaning process must be completed **before** `T2`, otherwise the host will be reset by watchdog device. At `T3`, the label `virtink.smartx.com/dead-lockspace-` will be appended to the corresponding Node object, then the VMs with Locks in the "dead" Lockspace will not be scheduled to this node. At `T4`, the "dead" HA VMs will be recovered on the other healthy nodes. + +The recovery time can be calculated as follows, and `T1` < `T3` < `T2` < `T4` : + + - t1 < `T1` < t2, t2 = 8 * io_timeout_seconds, t1 = 6 * io_timeout_seconds; + - t3 < `T2` < t4, t4 = 8 * io_timeout_seconds + 60s, t3 = 6 * io_timeout_seconds + 60s; + - t5 < `T3` < t6, t6 = 12 * io_timeout_seconds, t5 = 6 * io_timeout_seconds; + - t7 < `T4` < t8, t8 = 11 * io_timeout_seconds + 60s, t7 = 6 * io_timeout_seconds + 60s; + +E.g io_timeout_seconds = 10s, 60s < `T1` < 80s, 120s < `T2` < 140s, 60s < `T3` < 120s, 120s < `T4` < 170s. + +> **Note**: When the node can not access storage A but can still access storage B, the HA VMs running on this node that have Locks in storage A will be rebuilt on the other nodes, but the HA VMs without Locks in storage A will still run on this node. + +> **Note**: When the node can not access storage but the management network can still work, the HA VMs will be updated as `Failed` phase by `virt-daemon` at `T1`, and the VMs may be rebuilt on the "dead" node before `T3`. + +You can refer to the following steps to recover a node from the "dead" Lockspace: + + - Resolve the storage failures. E.g replace the faulty storage NIC or update the misconfigured firewall rules. + - Wait for sanlock daemon to remove the corresponding Lockspace in REM (recover mode) state, you can use command `sanlock client status` on host (or in container) to check the state of Lockspace. + - Delete the Lockspace attacher pod on "dead" node, and it will be recreated by `lockspace-attacher` DaemonSet. + +### Node Power Failures + +When node power failures happen, the HA VMs running on this node will be recovered on the other healthy nodes at `T4`. At `T3`, the label `virtink.smartx.com/dead-lockspace-` will be appended to the "dead" Node object, then the VMs with Locks in any Lockspace will not be scheduled to this node. + +The recovery time can be calculated as follows, and `T3` < `T4` : + + - t5 < `T3` < t6, t6 = 12 * io_timeout_seconds, t5 = 6 * io_timeout_seconds; + - t7 < `T4` < t8, t8 = 11 * io_timeout_seconds + 60s, t7 = 6 * io_timeout_seconds + 60s; + +E.g io_timeout_seconds = 10s, 60s < `T3` < 120s, 120s < `T2` < 170s. + +If the node is manually shut down, you may get a different result, it's because the sanlock daemon may release node lease in this situation, and the `free` node lease will be detected by `free state detector` in `lockspace-detector`. At `T5`, the label `virtink.smartx.com/dead-lockspace-` will be appended to the "dead" Node object, then the HA VMs running on this node will be recovered on the other healthy nodes. + +The recovery time can be calculated as follows: + + - t9 < `T5` < t10, t10 = 12 * io_timeout_seconds + 60s, t9 = 8 * io_timeout_seconds + 60s; + +E.g io_timeout_seconds = 10s, 140s < `T5` < 180s. + +## Known Issues + +- The VMs with Locks are not migratable. + +- When a node can not access storage but the management network can still work, the HA VMs will be updated as `Failed` phase by `virt-daemon`, and the VMs may be schduled to the "dead" node again before the "dead" Lockspace label is appended to the Node object. + +- If you delete a Lockspace that is not ready, or acquiring the node lease is in process (you can see the Lockspace is still in `ADD` state by using command `sanlock client status`), the mount point `/var/lib/sanlock/` will not be cleaned on host, and you should unmount it manually. Otherwise, if you recreate this Lockspace, the new `lockspace-attahcer` Pod will fail to run because of the uncleaned mount point. diff --git a/go.mod b/go.mod index 59c0ba6..13560a6 100644 --- a/go.mod +++ b/go.mod @@ -20,6 +20,7 @@ require ( github.com/stretchr/testify v1.7.0 github.com/subgraph/libmacouflage v0.0.1 github.com/vishvananda/netlink v1.1.0 + go.uber.org/zap v1.21.0 golang.org/x/sys v0.0.0-20220908164124-27713097b956 google.golang.org/grpc v1.47.0 gopkg.in/fsnotify.v1 v1.4.7 @@ -91,7 +92,6 @@ require ( github.com/vmihailenco/msgpack v4.0.4+incompatible // indirect go.uber.org/atomic v1.7.0 // indirect go.uber.org/multierr v1.6.0 // indirect - go.uber.org/zap v1.21.0 // indirect golang.org/x/mod v0.6.0-dev.0.20220106191415-9b9b3d81d5e3 // indirect golang.org/x/net v0.0.0-20220722155237-a158d28d115b // indirect golang.org/x/oauth2 v0.0.0-20211104180415-d3ed0bb246c8 // indirect diff --git a/hack/Dockerfile b/hack/Dockerfile index 931cb4f..02483db 100644 --- a/hack/Dockerfile +++ b/hack/Dockerfile @@ -1,4 +1,7 @@ -FROM golang:1.19 +FROM golang:1.19-alpine + +RUN apk add --no-cache bash gcc git musl-dev libaio-dev +RUN apk add --no-cache sanlock-dev --repository=http://dl-cdn.alpinelinux.org/alpine/edge/testing/ WORKDIR /workspace diff --git a/hack/generate.sh b/hack/generate.sh index a5a8116..4890502 100755 --- a/hack/generate.sh +++ b/hack/generate.sh @@ -12,5 +12,7 @@ bash $GOPATH/src/k8s.io/code-generator/generate-groups.sh "deepcopy,client,infor controller-gen paths=./pkg/apis/... crd output:crd:artifacts:config=deploy/crd controller-gen paths=./cmd/virt-controller/... paths=./pkg/controller/... rbac:roleName=virt-controller output:rbac:artifacts:config=deploy/virt-controller webhook output:webhook:artifacts:config=deploy/virt-controller controller-gen paths=./cmd/virt-daemon/... paths=./pkg/daemon/... rbac:roleName=virt-daemon output:rbac:artifacts:config=deploy/virt-daemon +controller-gen paths=./cmd/lockspace-attacher/... rbac:roleName=lockspace-attacher output:rbac:artifacts:config=deploy/lockspace-attacher +controller-gen paths=./cmd/lockspace-detector/... rbac:roleName=lockspace-detector output:rbac:artifacts:config=deploy/lockspace-detector go generate ./... diff --git a/pkg/apis/virt/v1alpha1/register.go b/pkg/apis/virt/v1alpha1/register.go index 9c34c66..c17effc 100644 --- a/pkg/apis/virt/v1alpha1/register.go +++ b/pkg/apis/virt/v1alpha1/register.go @@ -35,6 +35,10 @@ func addKnownTypes(scheme *runtime.Scheme) error { &VirtualMachineList{}, &VirtualMachineMigration{}, &VirtualMachineMigrationList{}, + &Lockspace{}, + &LockspaceList{}, + &Lock{}, + &LockList{}, ) metav1.AddToGroupVersion(scheme, SchemeGroupVersion) return nil diff --git a/pkg/apis/virt/v1alpha1/types.go b/pkg/apis/virt/v1alpha1/types.go index 30765a4..b52f81a 100644 --- a/pkg/apis/virt/v1alpha1/types.go +++ b/pkg/apis/virt/v1alpha1/types.go @@ -1,6 +1,8 @@ package v1alpha1 import ( + "fmt" + corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -33,10 +35,12 @@ type VirtualMachineSpec struct { ReadinessProbe *corev1.Probe `json:"readinessProbe,omitempty"` RunPolicy RunPolicy `json:"runPolicy,omitempty"` + EnableHA bool `json:"enableHA,omitempty"` Instance Instance `json:"instance"` Volumes []Volume `json:"volumes,omitempty"` Networks []Network `json:"networks,omitempty"` + Locks []string `json:"locks,omitempty"` } // +kubebuilder:validation:Enum=Always;RerunOnFailure;Once;Manual;Halted @@ -333,3 +337,95 @@ type VirtualMachineMigrationList struct { Items []VirtualMachineMigration `json:"items"` } + +// +genclient +// +genclient:nonNamespaced +// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object +// +kubebuilder:resource:scope="Cluster" +// +kubebuilder:subresource:status +// +kubebuilder:printcolumn:name="StorageClass",type=string,JSONPath=`.spec.storageClassName` +// +kubebuilder:printcolumn:name="Ready",type=boolean,JSONPath=`.status.ready` + +type Lockspace struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + + Spec LockspaceSpec `json:"spec,omitempty"` + Status LockspaceStatus `json:"status,omitempty"` +} + +func (ls *Lockspace) GeneratePVCName() string { + return fmt.Sprintf("lockspace-pvc-%s", ls.Name) +} + +func (ls *Lockspace) GenerateInitializerName() string { + return fmt.Sprintf("lockspace-initializer-%s", ls.Name) +} + +func (ls *Lockspace) GenerateDetectorName() string { + return fmt.Sprintf("lockspace-detector-%s", ls.Name) +} + +func (ls *Lockspace) GenerateAttacherName() string { + return fmt.Sprintf("lockspace-attacher-%s", ls.Name) +} + +type LockspaceSpec struct { + StorageClassName string `json:"storageClassName"` + // +kubebuilder:default=Filesystem + VolumeMode *corev1.PersistentVolumeMode `json:"volumeMode,omitempty"` + // The maximum number of Lock that can be held in a Lockspace. + // +kubebuilder:default=1000 + // +kubebuilder:validation:Maximum=16384 + // +kubebuilder:validation:Minimum=1 + MaxLocks int `json:"maxLocks,omitempty"` + // +kubebuilder:default=10 + // +kubebuilder:validation:Maximum=15 + // +kubebuilder:validation:Minimum=5 + IOTimeoutSeconds uint32 `json:"ioTimeoutSeconds,omitempty"` +} + +type LockspaceStatus struct { + Ready bool `json:"ready,omitempty"` +} + +// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object + +type LockspaceList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitempty"` + + Items []Lockspace `json:"items"` +} + +// +genclient +// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object +// +kubebuilder:subresource:status +// +kubebuilder:printcolumn:name="Lockspace",type=string,JSONPath=`.spec.lockspaceName` +// +kubebuilder:printcolumn:name="Ready",type=boolean,JSONPath=`.status.ready` + +type Lock struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + + Spec LockSpec `json:"spec,omitempty"` + Status LockStatus `json:"status,omitempty"` +} + +type LockSpec struct { + LockspaceName string `json:"lockspaceName"` +} + +type LockStatus struct { + Ready bool `json:"ready,omitempty"` + Offset uint64 `json:"offset,omitempty"` +} + +// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object + +type LockList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitempty"` + + Items []Lock `json:"items"` +} diff --git a/pkg/apis/virt/v1alpha1/zz_generated.deepcopy.go b/pkg/apis/virt/v1alpha1/zz_generated.deepcopy.go index 5b80684..8f8be78 100644 --- a/pkg/apis/virt/v1alpha1/zz_generated.deepcopy.go +++ b/pkg/apis/virt/v1alpha1/zz_generated.deepcopy.go @@ -336,6 +336,197 @@ func (in *Kernel) DeepCopy() *Kernel { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *Lock) DeepCopyInto(out *Lock) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + out.Spec = in.Spec + out.Status = in.Status + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Lock. +func (in *Lock) DeepCopy() *Lock { + if in == nil { + return nil + } + out := new(Lock) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *Lock) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *LockList) DeepCopyInto(out *LockList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]Lock, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LockList. +func (in *LockList) DeepCopy() *LockList { + if in == nil { + return nil + } + out := new(LockList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *LockList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *LockSpec) DeepCopyInto(out *LockSpec) { + *out = *in + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LockSpec. +func (in *LockSpec) DeepCopy() *LockSpec { + if in == nil { + return nil + } + out := new(LockSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *LockStatus) DeepCopyInto(out *LockStatus) { + *out = *in + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LockStatus. +func (in *LockStatus) DeepCopy() *LockStatus { + if in == nil { + return nil + } + out := new(LockStatus) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *Lockspace) DeepCopyInto(out *Lockspace) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + in.Spec.DeepCopyInto(&out.Spec) + out.Status = in.Status + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Lockspace. +func (in *Lockspace) DeepCopy() *Lockspace { + if in == nil { + return nil + } + out := new(Lockspace) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *Lockspace) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *LockspaceList) DeepCopyInto(out *LockspaceList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]Lockspace, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LockspaceList. +func (in *LockspaceList) DeepCopy() *LockspaceList { + if in == nil { + return nil + } + out := new(LockspaceList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *LockspaceList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *LockspaceSpec) DeepCopyInto(out *LockspaceSpec) { + *out = *in + if in.VolumeMode != nil { + in, out := &in.VolumeMode, &out.VolumeMode + *out = new(v1.PersistentVolumeMode) + **out = **in + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LockspaceSpec. +func (in *LockspaceSpec) DeepCopy() *LockspaceSpec { + if in == nil { + return nil + } + out := new(LockspaceSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *LockspaceStatus) DeepCopyInto(out *LockspaceStatus) { + *out = *in + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LockspaceStatus. +func (in *LockspaceStatus) DeepCopy() *LockspaceStatus { + if in == nil { + return nil + } + out := new(LockspaceStatus) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *Memory) DeepCopyInto(out *Memory) { *out = *in @@ -651,6 +842,11 @@ func (in *VirtualMachineSpec) DeepCopyInto(out *VirtualMachineSpec) { (*in)[i].DeepCopyInto(&(*out)[i]) } } + if in.Locks != nil { + in, out := &in.Locks, &out.Locks + *out = make([]string, len(*in)) + copy(*out, *in) + } return } diff --git a/pkg/controller/lockspace_controller.go b/pkg/controller/lockspace_controller.go new file mode 100644 index 0000000..e5744ce --- /dev/null +++ b/pkg/controller/lockspace_controller.go @@ -0,0 +1,549 @@ +package controller + +import ( + "context" + "errors" + "fmt" + "path/filepath" + "reflect" + "strconv" + "time" + + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/tools/record" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + + virtv1alpha1 "github.com/smartxworks/virtink/pkg/apis/virt/v1alpha1" +) + +const ( + LockspaceProtectionFinalizer = "virtink.smartx.com/lockspace-protection" + LockspaceDetectorProtectionFinalizer = "virtink.smartx.com/lockspace-detector-protection" + LockspaceAttacherProtectionFinalizer = "virtink.smartx.com/lockspace-attacher-protection" + + VirtinkNamespace = "virtink-system" +) + +type LockspaceReconciler struct { + client.Client + Scheme *runtime.Scheme + Recorder record.EventRecorder + + DetectorImageName string + AttacherImageName string + InitializerImageName string +} + +//+kubebuilder:rbac:groups=virt.virtink.smartx.com,resources=lockspaces,verbs=get;list;watch;update +//+kubebuilder:rbac:groups=virt.virtink.smartx.com,resources=lockspaces/status,verbs=get;update +//+kubebuilder:rbac:groups=virt.virtink.smartx.com,resources=locks,verbs=get;list;watch;update;delete +//+kubebuilder:rbac:groups="",resources=events,verbs=create;update;patch +//+kubebuilder:rbac:groups="",resources=persistentvolumeclaims,verbs=get;list;watch;create;delete +//+kubebuilder:rbac:groups="",resources=pods,verbs=get;list;watch;create;delete +//+kubebuilder:rbac:groups="apps",resources=daemonsets,verbs=get;list;watch;create;update;delete + +func (r *LockspaceReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { + var lockspace virtv1alpha1.Lockspace + if err := r.Get(ctx, req.NamespacedName, &lockspace); err != nil { + return ctrl.Result{}, client.IgnoreNotFound(err) + } + + status := lockspace.Status.DeepCopy() + if err := r.reconcile(ctx, &lockspace); err != nil { + reconcileErr := reconcileError{} + if errors.As(err, &reconcileErr) { + return reconcileErr.Result, nil + } + r.Recorder.Eventf(&lockspace, corev1.EventTypeWarning, "FailedReconcile", "Failed to reconcile Lockspace: %s", err) + return ctrl.Result{}, err + } + + if !reflect.DeepEqual(lockspace.Status, status) { + if err := r.Status().Update(ctx, &lockspace); err != nil { + if apierrors.IsConflict(err) { + return ctrl.Result{Requeue: true}, nil + } + return ctrl.Result{}, fmt.Errorf("update Lockspace status: %s", err) + } + } + + return reconcile.Result{}, nil +} + +func (r *LockspaceReconciler) reconcile(ctx context.Context, lockspace *virtv1alpha1.Lockspace) error { + if lockspace.DeletionTimestamp != nil { + if err := r.cleanupLockspace(ctx, lockspace); err != nil { + return fmt.Errorf("cleanup Lockspace: %s", err) + } + } + + if !controllerutil.ContainsFinalizer(lockspace, LockspaceProtectionFinalizer) { + controllerutil.AddFinalizer(lockspace, LockspaceProtectionFinalizer) + return r.Client.Update(ctx, lockspace) + } + + if !lockspace.Status.Ready { + if err := r.reconcileNotReadyLockspace(ctx, lockspace); err != nil { + return fmt.Errorf("reconcile not ready Lockspace: %s", err) + } + } else { + if err := r.reconcileReadyLockspace(ctx, lockspace); err != nil { + return fmt.Errorf("reconcile ready Lockspace: %s", err) + } + } + return nil +} + +func (r *LockspaceReconciler) cleanupLockspace(ctx context.Context, lockspace *virtv1alpha1.Lockspace) error { + var lockList virtv1alpha1.LockList + lockSelector := client.MatchingFields{".spec.lockspaceName": lockspace.Name} + if err := r.Client.List(ctx, &lockList, lockSelector); err != nil { + return fmt.Errorf("list Lock: %s", err) + } + if len(lockList.Items) != 0 { + for _, lock := range lockList.Items { + if err := r.Client.Delete(ctx, &lock); err != nil { + return fmt.Errorf("delete Lock %q: %s", namespacedName(&lock), err) + } + } + return reconcileError{ctrl.Result{RequeueAfter: time.Second}} + } + + detector, err := r.getLockspaceDetectorOrNil(ctx, lockspace) + if err != nil { + return fmt.Errorf("get Lockspace detector: %s", err) + } + if detector != nil { + if controllerutil.ContainsFinalizer(detector, LockspaceDetectorProtectionFinalizer) { + controllerutil.RemoveFinalizer(detector, LockspaceDetectorProtectionFinalizer) + if err := r.Client.Update(ctx, detector); err != nil { + return fmt.Errorf("update Lockspace detector: %s", err) + } + } + + if r.Client.Delete(ctx, detector); err != nil { + return fmt.Errorf("delete Lockspace detector: %s", err) + } + return reconcileError{ctrl.Result{Requeue: true}} + } + + attacher, err := r.getLockspaceAttacherOrNil(ctx, lockspace) + if err != nil { + return fmt.Errorf("get Lockspace attacher: %s", err) + } + if attacher != nil { + if controllerutil.ContainsFinalizer(attacher, LockspaceAttacherProtectionFinalizer) { + controllerutil.RemoveFinalizer(attacher, LockspaceAttacherProtectionFinalizer) + if err := r.Client.Update(ctx, attacher); err != nil { + return fmt.Errorf("update Lockspace attacher: %s", err) + } + } + } + + if controllerutil.ContainsFinalizer(lockspace, LockspaceProtectionFinalizer) { + controllerutil.RemoveFinalizer(lockspace, LockspaceProtectionFinalizer) + if err := r.Client.Update(ctx, lockspace); err != nil { + return fmt.Errorf("update Lockspace: %s", err) + } + } + + return nil +} + +func (r *LockspaceReconciler) getLockspaceDetectorOrNil(ctx context.Context, lockspace *virtv1alpha1.Lockspace) (*appsv1.DaemonSet, error) { + detectorKey := types.NamespacedName{ + Namespace: VirtinkNamespace, + Name: lockspace.GenerateDetectorName(), + } + var detector appsv1.DaemonSet + if err := r.Client.Get(ctx, detectorKey, &detector); err != nil { + return nil, client.IgnoreNotFound(err) + } + + if !metav1.IsControlledBy(&detector, lockspace) { + return nil, fmt.Errorf("detector %q is not controlled by Lockspace %q", namespacedName(&detector), namespacedName(lockspace)) + } + return &detector, nil +} + +func (r *LockspaceReconciler) getLockspaceAttacherOrNil(ctx context.Context, lockspace *virtv1alpha1.Lockspace) (*appsv1.DaemonSet, error) { + attacherKey := types.NamespacedName{ + Namespace: VirtinkNamespace, + Name: lockspace.GenerateAttacherName(), + } + var attacher appsv1.DaemonSet + if err := r.Client.Get(ctx, attacherKey, &attacher); err != nil { + return nil, client.IgnoreNotFound(err) + } + + if !metav1.IsControlledBy(&attacher, lockspace) { + return nil, fmt.Errorf("attacher %q is not controlled by Lockspace %q", namespacedName(&attacher), namespacedName(lockspace)) + } + return &attacher, nil +} + +func (r *LockspaceReconciler) reconcileNotReadyLockspace(ctx context.Context, lockspace *virtv1alpha1.Lockspace) error { + pvc, err := r.getLockspacePVCOrNil(ctx, lockspace) + if err != nil { + return fmt.Errorf("get Lockspace PVC: %s", err) + } + if pvc == nil { + if err := r.createLockspacePVC(ctx, lockspace); err != nil { + return fmt.Errorf("create Lockspace PVC: %s", err) + } + return nil + } + + if pvc.Status.Phase == corev1.ClaimBound { + initializer, err := r.getLockspaceInitializerOrNil(ctx, lockspace) + if err != nil { + return fmt.Errorf("get Lockspace initializer: %s", err) + } + if initializer == nil { + if err := r.createLockspaceInitializer(ctx, lockspace, pvc); err != nil { + return fmt.Errorf("create Lockspace initializer: %s", err) + } + return nil + } + + if initializer.Status.Phase == corev1.PodSucceeded { + if err := r.Client.Delete(ctx, initializer); err != nil { + return fmt.Errorf("delete Lockspace initializer: %s", err) + } + lockspace.Status.Ready = true + } + } + + return nil +} + +func (r *LockspaceReconciler) getLockspacePVCOrNil(ctx context.Context, lockspace *virtv1alpha1.Lockspace) (*corev1.PersistentVolumeClaim, error) { + pvcKey := types.NamespacedName{ + Namespace: VirtinkNamespace, + Name: lockspace.GeneratePVCName(), + } + var pvc corev1.PersistentVolumeClaim + if err := r.Client.Get(ctx, pvcKey, &pvc); err != nil { + return nil, client.IgnoreNotFound(err) + } + + if !metav1.IsControlledBy(&pvc, lockspace) { + return nil, fmt.Errorf("PVC %q is not controlled by Lockspace %q", namespacedName(&pvc), namespacedName(lockspace)) + } + + return &pvc, nil +} + +func (r *LockspaceReconciler) createLockspacePVC(ctx context.Context, lockspace *virtv1alpha1.Lockspace) error { + pvc := &corev1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: VirtinkNamespace, + Name: lockspace.GeneratePVCName(), + }, + Spec: corev1.PersistentVolumeClaimSpec{ + StorageClassName: func() *string { str := lockspace.Spec.StorageClassName; return &str }(), + AccessModes: []corev1.PersistentVolumeAccessMode{corev1.ReadWriteMany}, + }, + } + + if pvc.Spec.Resources.Requests == nil { + pvc.Spec.Resources.Requests = corev1.ResourceList{} + } + pvc.Spec.Resources.Requests[corev1.ResourceStorage] = resource.MustParse(fmt.Sprintf("%dMi", 3+lockspace.Spec.MaxLocks)) + + volumeMode := corev1.PersistentVolumeFilesystem + if lockspace.Spec.VolumeMode != nil { + volumeMode = *lockspace.Spec.VolumeMode + } + pvc.Spec.VolumeMode = &volumeMode + + if err := controllerutil.SetControllerReference(lockspace, pvc, r.Scheme); err != nil { + return fmt.Errorf("set PVC controller reference: %s", err) + } + return r.Client.Create(ctx, pvc) +} + +func (r *LockspaceReconciler) getLockspaceInitializerOrNil(ctx context.Context, lockspace *virtv1alpha1.Lockspace) (*corev1.Pod, error) { + podKey := types.NamespacedName{ + Namespace: VirtinkNamespace, + Name: lockspace.GenerateInitializerName(), + } + var pod corev1.Pod + if err := r.Client.Get(ctx, podKey, &pod); err != nil { + return nil, client.IgnoreNotFound(err) + } + + if !metav1.IsControlledBy(&pod, lockspace) { + return nil, fmt.Errorf("pod %q is not controlled by Lockspace %q", namespacedName(&pod), namespacedName(lockspace)) + } + return &pod, nil +} + +func (r *LockspaceReconciler) createLockspaceInitializer(ctx context.Context, lockspace *virtv1alpha1.Lockspace, pvc *corev1.PersistentVolumeClaim) error { + initializerPod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: lockspace.GenerateInitializerName(), + Namespace: VirtinkNamespace, + }, + Spec: corev1.PodSpec{ + RestartPolicy: corev1.RestartPolicyOnFailure, + Containers: []corev1.Container{{ + Name: "lockspace-initializer", + Image: r.InitializerImageName, + Env: []corev1.EnvVar{{ + Name: "LOCKSPACE_NAME", + Value: lockspace.Name, + }, { + Name: "IO_TIMEOUT_SECONDS", + Value: fmt.Sprintf("%d", lockspace.Spec.IOTimeoutSeconds), + }}, + }}, + }, + } + initializerPod.Spec.Volumes = append(initializerPod.Spec.Volumes, corev1.Volume{ + Name: "lockspace-volume", + VolumeSource: corev1.VolumeSource{ + PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{ + ClaimName: pvc.Name, + }, + }, + }) + container := &initializerPod.Spec.Containers[0] + switch *pvc.Spec.VolumeMode { + case corev1.PersistentVolumeFilesystem: + container.VolumeMounts = append(container.VolumeMounts, corev1.VolumeMount{ + Name: "lockspace-volume", + MountPath: filepath.Join("/var/lib/sanlock/", lockspace.Name), + }) + case corev1.PersistentVolumeBlock: + container.VolumeDevices = append(container.VolumeDevices, corev1.VolumeDevice{ + Name: "lockspace-volume", + DevicePath: filepath.Join("/var/lib/sanlock", lockspace.Name, "leases"), + }) + } + + if err := controllerutil.SetControllerReference(lockspace, initializerPod, r.Scheme); err != nil { + return fmt.Errorf("set initializer controller reference: %s", err) + } + + return r.Client.Create(ctx, initializerPod) +} + +func (r *LockspaceReconciler) reconcileReadyLockspace(ctx context.Context, lockspace *virtv1alpha1.Lockspace) error { + pvc, err := r.getLockspacePVCOrNil(ctx, lockspace) + if err != nil { + return fmt.Errorf("get Lockspace PVC: %s", err) + } + + attacher, err := r.getLockspaceAttacherOrNil(ctx, lockspace) + if err != nil { + return fmt.Errorf("get Lockspace attacher: %s", err) + } + if attacher == nil { + if err := r.createLockspaceAttacher(ctx, lockspace, pvc); err != nil { + return fmt.Errorf("create Lockspace attacher: %s", err) + } + } + + detector, err := r.getLockspaceDetectorOrNil(ctx, lockspace) + if err != nil { + return fmt.Errorf("get Lockspace detector: %s", err) + } + if detector == nil { + if err := r.createLockspaceDetector(ctx, lockspace); err != nil { + return fmt.Errorf("create Lockspace detector: %s", err) + } + } + return nil +} + +func (r *LockspaceReconciler) createLockspaceAttacher(ctx context.Context, lockspace *virtv1alpha1.Lockspace, pvc *corev1.PersistentVolumeClaim) error { + matchLabels := make(map[string]string) + matchLabels["name"] = lockspace.GenerateAttacherName() + + attacher := &appsv1.DaemonSet{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: VirtinkNamespace, + Name: lockspace.GenerateAttacherName(), + Finalizers: []string{LockspaceAttacherProtectionFinalizer}, + }, + Spec: appsv1.DaemonSetSpec{ + Selector: &metav1.LabelSelector{MatchLabels: matchLabels}, + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: matchLabels, + }, + Spec: corev1.PodSpec{ + ServiceAccountName: "lockspace-attacher", + Containers: []corev1.Container{{ + Name: "lockspace-attacher", + Image: r.AttacherImageName, + SecurityContext: &corev1.SecurityContext{ + Privileged: func() *bool { b := true; return &b }(), + }, + Env: []corev1.EnvVar{{ + Name: "LOCKSPACE_NAME", + Value: lockspace.Name, + }, { + Name: "NODE_NAME", + ValueFrom: &corev1.EnvVarSource{ + FieldRef: &corev1.ObjectFieldSelector{ + FieldPath: "spec.nodeName", + }, + }, + }}, + VolumeMounts: []corev1.VolumeMount{{ + Name: "sanlock-run-dir", + MountPath: "/var/run/sanlock", + }, { + Name: "sanlock-lib-dir", + MountPath: "/var/lib/sanlock", + MountPropagation: func() *corev1.MountPropagationMode { p := corev1.MountPropagationBidirectional; return &p }(), + }}, + }}, + Volumes: []corev1.Volume{{ + Name: "sanlock-run-dir", + VolumeSource: corev1.VolumeSource{ + HostPath: &corev1.HostPathVolumeSource{ + Path: "/var/run/sanlock", + }, + }, + }, { + Name: "sanlock-lib-dir", + VolumeSource: corev1.VolumeSource{ + HostPath: &corev1.HostPathVolumeSource{ + Path: "/var/lib/sanlock", + }, + }, + }}, + }, + }, + }, + } + + pod := &attacher.Spec.Template + pod.Spec.Volumes = append(pod.Spec.Volumes, corev1.Volume{ + Name: "lockspace-volume", + VolumeSource: corev1.VolumeSource{ + PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{ + ClaimName: pvc.Name, + }, + }, + }) + container := &pod.Spec.Containers[0] + switch *pvc.Spec.VolumeMode { + case corev1.PersistentVolumeFilesystem: + container.VolumeMounts = append(container.VolumeMounts, corev1.VolumeMount{ + Name: "lockspace-volume", + MountPath: filepath.Join("/var/lib/sanlock", lockspace.Name), + }) + case corev1.PersistentVolumeBlock: + container.VolumeDevices = append(container.VolumeDevices, corev1.VolumeDevice{ + Name: "lockspace-volume", + DevicePath: filepath.Join("/var/lib/sanlock", lockspace.Name, "leases"), + }) + } + + if err := controllerutil.SetControllerReference(lockspace, attacher, r.Scheme); err != nil { + return fmt.Errorf("set attacher controller reference: %s", err) + } + + return r.Client.Create(ctx, attacher) +} + +func (r *LockspaceReconciler) createLockspaceDetector(ctx context.Context, lockspace *virtv1alpha1.Lockspace) error { + matchLabels := make(map[string]string) + matchLabels["name"] = lockspace.GenerateDetectorName() + + detector := &appsv1.DaemonSet{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: VirtinkNamespace, + Name: lockspace.GenerateDetectorName(), + Finalizers: []string{LockspaceDetectorProtectionFinalizer}, + }, + Spec: appsv1.DaemonSetSpec{ + Selector: &metav1.LabelSelector{MatchLabels: matchLabels}, + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: matchLabels, + }, + Spec: corev1.PodSpec{ + ServiceAccountName: "lockspace-detector", + Affinity: &corev1.Affinity{ + NodeAffinity: &corev1.NodeAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: &corev1.NodeSelector{ + NodeSelectorTerms: []corev1.NodeSelectorTerm{{ + MatchExpressions: []corev1.NodeSelectorRequirement{{ + Key: fmt.Sprintf("virtink.smartx.com/dead-lockspace-%s", lockspace.Name), + Operator: corev1.NodeSelectorOpDoesNotExist, + }}, + }}, + }, + }, + }, + Containers: []corev1.Container{{ + Name: "lockspace-detector", + Image: r.DetectorImageName, + Env: []corev1.EnvVar{{ + Name: "LOCKSPACE_NAME", + Value: lockspace.Name, + }, { + Name: "IO_TIMEOUT", + Value: strconv.Itoa(int(lockspace.Spec.IOTimeoutSeconds)), + }}, + VolumeMounts: []corev1.VolumeMount{{ + Name: "sanlock-run-dir", + MountPath: "/var/run/sanlock", + }}, + }}, + Volumes: []corev1.Volume{{ + Name: "sanlock-run-dir", + VolumeSource: corev1.VolumeSource{ + HostPath: &corev1.HostPathVolumeSource{ + Path: "/var/run/sanlock", + }, + }, + }}, + }, + }, + }, + } + + if err := controllerutil.SetControllerReference(lockspace, detector, r.Scheme); err != nil { + return fmt.Errorf("set detector controller reference: %s", err) + } + + return r.Client.Create(ctx, detector) +} + +func (r *LockspaceReconciler) SetupWithManager(mgr ctrl.Manager) error { + if err := mgr.GetFieldIndexer().IndexField(context.Background(), &virtv1alpha1.Lock{}, ".spec.lockspaceName", func(obj client.Object) []string { + lock := obj.(*virtv1alpha1.Lock) + return []string{lock.Spec.LockspaceName} + }); err != nil { + return fmt.Errorf("index Lock by lockspaceName: %s", err) + } + + return ctrl.NewControllerManagedBy(mgr). + For(&virtv1alpha1.Lockspace{}). + Owns(&corev1.PersistentVolumeClaim{}). + Owns(&corev1.Pod{}). + Complete(r) +} + +func namespacedName(obj metav1.ObjectMetaAccessor) types.NamespacedName { + meta := obj.GetObjectMeta() + return types.NamespacedName{ + Namespace: meta.GetNamespace(), + Name: meta.GetName(), + } +} diff --git a/pkg/controller/vm_controller.go b/pkg/controller/vm_controller.go index fdd4a37..2f182e9 100644 --- a/pkg/controller/vm_controller.go +++ b/pkg/controller/vm_controller.go @@ -53,6 +53,7 @@ type VMReconciler struct { // +kubebuilder:rbac:groups="",resources=persistentvolumeclaims,verbs=get;list;watch // +kubebuilder:rbac:groups=cdi.kubevirt.io,resources=datavolumes,verbs=get;list;watch // +kubebuilder:rbac:groups=k8s.cni.cncf.io,resources=network-attachment-definitions,verbs=get;list;watch +//+kubebuilder:rbac:groups=virt.virtink.smartx.com,resources=locks,verbs=get;list;watch;update func (r *VMReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { var vm virtv1alpha1.VirtualMachine @@ -123,6 +124,26 @@ func (r *VMReconciler) reconcile(ctx context.Context, vm *virtv1alpha1.VirtualMa vm.Status.VMPodName = names.SimpleNameGenerator.GenerateName(fmt.Sprintf("vm-%s-", vm.Name)) vm.Status.Phase = virtv1alpha1.VirtualMachineScheduling case virtv1alpha1.VirtualMachineScheduling, virtv1alpha1.VirtualMachineScheduled: + if vm.Status.Phase == virtv1alpha1.VirtualMachinePending { + for _, l := range vm.Spec.Locks { + var lock virtv1alpha1.Lock + lockKey := types.NamespacedName{ + Name: l, + Namespace: vm.Namespace, + } + if err := r.Get(ctx, lockKey, &lock); err != nil { + if apierrors.IsNotFound(err) { + return reconcileError{Result: ctrl.Result{RequeueAfter: 30 * time.Second}} + } else { + return fmt.Errorf("get VM Lock: %s", err) + } + } + if lock.DeletionTimestamp != nil || !lock.Status.Ready { + return reconcileError{Result: ctrl.Result{RequeueAfter: 30 * time.Second}} + } + } + } + var vmPod corev1.Pod vmPodKey := types.NamespacedName{ Name: vm.Status.VMPodName, @@ -774,6 +795,77 @@ func (r *VMReconciler) buildVMPod(ctx context.Context, vm *virtv1alpha1.VirtualM vmPod.Annotations["k8s.v1.cni.cncf.io/networks"] = string(networksJSON) } + lockspaces := make(map[string]bool) + resources := []string{} + for _, l := range vm.Spec.Locks { + var lock virtv1alpha1.Lock + lockKey := types.NamespacedName{ + Name: l, + Namespace: vm.Namespace, + } + if err := r.Get(ctx, lockKey, &lock); err != nil { + return nil, fmt.Errorf("get VM Lock: %s", err) + } + lsName := lock.Spec.LockspaceName + lockspaces[lsName] = true + resources = append(resources, fmt.Sprintf("%s:%s:/var/lib/sanlock/%s/leases:%d", lsName, lock.Name, lsName, lock.Status.Offset)) + } + if len(vm.Spec.Locks) > 0 { + vmPod.Spec.Containers[0].VolumeMounts = append(vmPod.Spec.Containers[0].VolumeMounts, []corev1.VolumeMount{{ + Name: "sanlock-run-dir", + MountPath: "/var/run/sanlock", + }, { + Name: "sanlock-lib-dir", + MountPath: "/var/lib/sanlock", + MountPropagation: func() *corev1.MountPropagationMode { p := corev1.MountPropagationHostToContainer; return &p }(), + }}...) + + vmPod.Spec.Volumes = append(vmPod.Spec.Volumes, []corev1.Volume{{ + Name: "sanlock-run-dir", + VolumeSource: corev1.VolumeSource{ + HostPath: &corev1.HostPathVolumeSource{ + Path: "/var/run/sanlock", + }, + }, + }, { + Name: "sanlock-lib-dir", + VolumeSource: corev1.VolumeSource{ + HostPath: &corev1.HostPathVolumeSource{ + Path: "/var/lib/sanlock", + }, + }, + }}...) + + if vmPod.Spec.Affinity == nil { + vmPod.Spec.Affinity = &corev1.Affinity{} + } + affinity := vmPod.Spec.Affinity + if affinity.NodeAffinity == nil { + affinity.NodeAffinity = &corev1.NodeAffinity{} + } + nodeAffinity := affinity.NodeAffinity + if nodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution == nil { + nodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution = &corev1.NodeSelector{} + } + nodeSelector := nodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution + if nodeSelector.NodeSelectorTerms == nil { + nodeSelector.NodeSelectorTerms = []corev1.NodeSelectorTerm{} + } + for ls := range lockspaces { + nodeSelector.NodeSelectorTerms = append(nodeSelector.NodeSelectorTerms, corev1.NodeSelectorTerm{ + MatchExpressions: []corev1.NodeSelectorRequirement{{ + Key: fmt.Sprintf("virtink.smartx.com/dead-lockspace-%s", ls), + Operator: corev1.NodeSelectorOpDoesNotExist, + }}}, + ) + } + + vmPod.Spec.Containers[0].Env = append(vmPod.Spec.Containers[0].Env, corev1.EnvVar{ + Name: "LOCKSPACE_RESOURCE", + Value: strings.Join(resources, " "), + }) + } + vmJSON, err := json.Marshal(vm) if err != nil { return nil, fmt.Errorf("marshal VM: %s", err) @@ -1188,6 +1280,16 @@ func (r *VMReconciler) calculateMigratableCondition(ctx context.Context, vm *vir }, nil } + // TODO: make VM with locks migratable + if len(vm.Spec.Locks) > 0 { + return &metav1.Condition{ + Type: string(virtv1alpha1.VirtualMachineMigratable), + Status: metav1.ConditionFalse, + Reason: "HANotMigratable", + Message: "migration is disabled when VM has locks", + }, nil + } + return &metav1.Condition{ Type: string(virtv1alpha1.VirtualMachineMigratable), Status: metav1.ConditionTrue, diff --git a/pkg/controller/vm_webhook.go b/pkg/controller/vm_webhook.go index 16fe869..c14a0ec 100644 --- a/pkg/controller/vm_webhook.go +++ b/pkg/controller/vm_webhook.go @@ -316,6 +316,16 @@ func ValidateVMSpec(ctx context.Context, spec *virtv1alpha1.VirtualMachineSpec, errs = append(errs, ValidateNetwork(ctx, &network, fieldPath)...) } + if spec.EnableHA { + if spec.RunPolicy != virtv1alpha1.RunPolicyAlways && spec.RunPolicy != virtv1alpha1.RunPolicyRerunOnFailure { + errs = append(errs, field.Forbidden(fieldPath.Child("enableHA"), "only \"RerunOnFailure\" and \"Always\" run policy are allowed for VM HA")) + } + } + + if len(spec.Locks) > 8 { + errs = append(errs, field.Forbidden(fieldPath.Child("locks"), "may not use more than 8 Locks for VM")) + } + return errs } diff --git a/pkg/controller/vm_webhook_test.go b/pkg/controller/vm_webhook_test.go index 963765a..753434a 100644 --- a/pkg/controller/vm_webhook_test.go +++ b/pkg/controller/vm_webhook_test.go @@ -2,6 +2,7 @@ package controller import ( "context" + "strconv" "testing" "github.com/stretchr/testify/assert" @@ -472,6 +473,23 @@ func TestValidateVM(t *testing.T) { return vm }(), invalidFields: []string{"spec.networks[0].multus"}, + }, { + vm: func() *virtv1alpha1.VirtualMachine { + vm := validVM.DeepCopy() + vm.Spec.EnableHA = true + vm.Spec.RunPolicy = virtv1alpha1.RunPolicyOnce + return vm + }(), + invalidFields: []string{"spec.enableHA"}, + }, { + vm: func() *virtv1alpha1.VirtualMachine { + vm := validVM.DeepCopy() + for i := 0; i < 9; i++ { + vm.Spec.Locks = append(vm.Spec.Locks, strconv.Itoa(i)) + } + return vm + }(), + invalidFields: []string{"spec.locks"}, }} for _, tc := range tests { diff --git a/pkg/generated/clientset/versioned/typed/virt/v1alpha1/fake/fake_lock.go b/pkg/generated/clientset/versioned/typed/virt/v1alpha1/fake/fake_lock.go new file mode 100644 index 0000000..f1cc86d --- /dev/null +++ b/pkg/generated/clientset/versioned/typed/virt/v1alpha1/fake/fake_lock.go @@ -0,0 +1,126 @@ +// Code generated by client-gen. DO NOT EDIT. + +package fake + +import ( + "context" + + v1alpha1 "github.com/smartxworks/virtink/pkg/apis/virt/v1alpha1" + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + labels "k8s.io/apimachinery/pkg/labels" + schema "k8s.io/apimachinery/pkg/runtime/schema" + types "k8s.io/apimachinery/pkg/types" + watch "k8s.io/apimachinery/pkg/watch" + testing "k8s.io/client-go/testing" +) + +// FakeLocks implements LockInterface +type FakeLocks struct { + Fake *FakeVirtV1alpha1 + ns string +} + +var locksResource = schema.GroupVersionResource{Group: "virt.virtink.smartx.com", Version: "v1alpha1", Resource: "locks"} + +var locksKind = schema.GroupVersionKind{Group: "virt.virtink.smartx.com", Version: "v1alpha1", Kind: "Lock"} + +// Get takes name of the lock, and returns the corresponding lock object, and an error if there is any. +func (c *FakeLocks) Get(ctx context.Context, name string, options v1.GetOptions) (result *v1alpha1.Lock, err error) { + obj, err := c.Fake. + Invokes(testing.NewGetAction(locksResource, c.ns, name), &v1alpha1.Lock{}) + + if obj == nil { + return nil, err + } + return obj.(*v1alpha1.Lock), err +} + +// List takes label and field selectors, and returns the list of Locks that match those selectors. +func (c *FakeLocks) List(ctx context.Context, opts v1.ListOptions) (result *v1alpha1.LockList, err error) { + obj, err := c.Fake. + Invokes(testing.NewListAction(locksResource, locksKind, c.ns, opts), &v1alpha1.LockList{}) + + if obj == nil { + return nil, err + } + + label, _, _ := testing.ExtractFromListOptions(opts) + if label == nil { + label = labels.Everything() + } + list := &v1alpha1.LockList{ListMeta: obj.(*v1alpha1.LockList).ListMeta} + for _, item := range obj.(*v1alpha1.LockList).Items { + if label.Matches(labels.Set(item.Labels)) { + list.Items = append(list.Items, item) + } + } + return list, err +} + +// Watch returns a watch.Interface that watches the requested locks. +func (c *FakeLocks) Watch(ctx context.Context, opts v1.ListOptions) (watch.Interface, error) { + return c.Fake. + InvokesWatch(testing.NewWatchAction(locksResource, c.ns, opts)) + +} + +// Create takes the representation of a lock and creates it. Returns the server's representation of the lock, and an error, if there is any. +func (c *FakeLocks) Create(ctx context.Context, lock *v1alpha1.Lock, opts v1.CreateOptions) (result *v1alpha1.Lock, err error) { + obj, err := c.Fake. + Invokes(testing.NewCreateAction(locksResource, c.ns, lock), &v1alpha1.Lock{}) + + if obj == nil { + return nil, err + } + return obj.(*v1alpha1.Lock), err +} + +// Update takes the representation of a lock and updates it. Returns the server's representation of the lock, and an error, if there is any. +func (c *FakeLocks) Update(ctx context.Context, lock *v1alpha1.Lock, opts v1.UpdateOptions) (result *v1alpha1.Lock, err error) { + obj, err := c.Fake. + Invokes(testing.NewUpdateAction(locksResource, c.ns, lock), &v1alpha1.Lock{}) + + if obj == nil { + return nil, err + } + return obj.(*v1alpha1.Lock), err +} + +// UpdateStatus was generated because the type contains a Status member. +// Add a +genclient:noStatus comment above the type to avoid generating UpdateStatus(). +func (c *FakeLocks) UpdateStatus(ctx context.Context, lock *v1alpha1.Lock, opts v1.UpdateOptions) (*v1alpha1.Lock, error) { + obj, err := c.Fake. + Invokes(testing.NewUpdateSubresourceAction(locksResource, "status", c.ns, lock), &v1alpha1.Lock{}) + + if obj == nil { + return nil, err + } + return obj.(*v1alpha1.Lock), err +} + +// Delete takes name of the lock and deletes it. Returns an error if one occurs. +func (c *FakeLocks) Delete(ctx context.Context, name string, opts v1.DeleteOptions) error { + _, err := c.Fake. + Invokes(testing.NewDeleteActionWithOptions(locksResource, c.ns, name, opts), &v1alpha1.Lock{}) + + return err +} + +// DeleteCollection deletes a collection of objects. +func (c *FakeLocks) DeleteCollection(ctx context.Context, opts v1.DeleteOptions, listOpts v1.ListOptions) error { + action := testing.NewDeleteCollectionAction(locksResource, c.ns, listOpts) + + _, err := c.Fake.Invokes(action, &v1alpha1.LockList{}) + return err +} + +// Patch applies the patch and returns the patched lock. +func (c *FakeLocks) Patch(ctx context.Context, name string, pt types.PatchType, data []byte, opts v1.PatchOptions, subresources ...string) (result *v1alpha1.Lock, err error) { + obj, err := c.Fake. + Invokes(testing.NewPatchSubresourceAction(locksResource, c.ns, name, pt, data, subresources...), &v1alpha1.Lock{}) + + if obj == nil { + return nil, err + } + return obj.(*v1alpha1.Lock), err +} diff --git a/pkg/generated/clientset/versioned/typed/virt/v1alpha1/fake/fake_lockspace.go b/pkg/generated/clientset/versioned/typed/virt/v1alpha1/fake/fake_lockspace.go new file mode 100644 index 0000000..896aada --- /dev/null +++ b/pkg/generated/clientset/versioned/typed/virt/v1alpha1/fake/fake_lockspace.go @@ -0,0 +1,117 @@ +// Code generated by client-gen. DO NOT EDIT. + +package fake + +import ( + "context" + + v1alpha1 "github.com/smartxworks/virtink/pkg/apis/virt/v1alpha1" + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + labels "k8s.io/apimachinery/pkg/labels" + schema "k8s.io/apimachinery/pkg/runtime/schema" + types "k8s.io/apimachinery/pkg/types" + watch "k8s.io/apimachinery/pkg/watch" + testing "k8s.io/client-go/testing" +) + +// FakeLockspaces implements LockspaceInterface +type FakeLockspaces struct { + Fake *FakeVirtV1alpha1 +} + +var lockspacesResource = schema.GroupVersionResource{Group: "virt.virtink.smartx.com", Version: "v1alpha1", Resource: "lockspaces"} + +var lockspacesKind = schema.GroupVersionKind{Group: "virt.virtink.smartx.com", Version: "v1alpha1", Kind: "Lockspace"} + +// Get takes name of the lockspace, and returns the corresponding lockspace object, and an error if there is any. +func (c *FakeLockspaces) Get(ctx context.Context, name string, options v1.GetOptions) (result *v1alpha1.Lockspace, err error) { + obj, err := c.Fake. + Invokes(testing.NewRootGetAction(lockspacesResource, name), &v1alpha1.Lockspace{}) + if obj == nil { + return nil, err + } + return obj.(*v1alpha1.Lockspace), err +} + +// List takes label and field selectors, and returns the list of Lockspaces that match those selectors. +func (c *FakeLockspaces) List(ctx context.Context, opts v1.ListOptions) (result *v1alpha1.LockspaceList, err error) { + obj, err := c.Fake. + Invokes(testing.NewRootListAction(lockspacesResource, lockspacesKind, opts), &v1alpha1.LockspaceList{}) + if obj == nil { + return nil, err + } + + label, _, _ := testing.ExtractFromListOptions(opts) + if label == nil { + label = labels.Everything() + } + list := &v1alpha1.LockspaceList{ListMeta: obj.(*v1alpha1.LockspaceList).ListMeta} + for _, item := range obj.(*v1alpha1.LockspaceList).Items { + if label.Matches(labels.Set(item.Labels)) { + list.Items = append(list.Items, item) + } + } + return list, err +} + +// Watch returns a watch.Interface that watches the requested lockspaces. +func (c *FakeLockspaces) Watch(ctx context.Context, opts v1.ListOptions) (watch.Interface, error) { + return c.Fake. + InvokesWatch(testing.NewRootWatchAction(lockspacesResource, opts)) +} + +// Create takes the representation of a lockspace and creates it. Returns the server's representation of the lockspace, and an error, if there is any. +func (c *FakeLockspaces) Create(ctx context.Context, lockspace *v1alpha1.Lockspace, opts v1.CreateOptions) (result *v1alpha1.Lockspace, err error) { + obj, err := c.Fake. + Invokes(testing.NewRootCreateAction(lockspacesResource, lockspace), &v1alpha1.Lockspace{}) + if obj == nil { + return nil, err + } + return obj.(*v1alpha1.Lockspace), err +} + +// Update takes the representation of a lockspace and updates it. Returns the server's representation of the lockspace, and an error, if there is any. +func (c *FakeLockspaces) Update(ctx context.Context, lockspace *v1alpha1.Lockspace, opts v1.UpdateOptions) (result *v1alpha1.Lockspace, err error) { + obj, err := c.Fake. + Invokes(testing.NewRootUpdateAction(lockspacesResource, lockspace), &v1alpha1.Lockspace{}) + if obj == nil { + return nil, err + } + return obj.(*v1alpha1.Lockspace), err +} + +// UpdateStatus was generated because the type contains a Status member. +// Add a +genclient:noStatus comment above the type to avoid generating UpdateStatus(). +func (c *FakeLockspaces) UpdateStatus(ctx context.Context, lockspace *v1alpha1.Lockspace, opts v1.UpdateOptions) (*v1alpha1.Lockspace, error) { + obj, err := c.Fake. + Invokes(testing.NewRootUpdateSubresourceAction(lockspacesResource, "status", lockspace), &v1alpha1.Lockspace{}) + if obj == nil { + return nil, err + } + return obj.(*v1alpha1.Lockspace), err +} + +// Delete takes name of the lockspace and deletes it. Returns an error if one occurs. +func (c *FakeLockspaces) Delete(ctx context.Context, name string, opts v1.DeleteOptions) error { + _, err := c.Fake. + Invokes(testing.NewRootDeleteActionWithOptions(lockspacesResource, name, opts), &v1alpha1.Lockspace{}) + return err +} + +// DeleteCollection deletes a collection of objects. +func (c *FakeLockspaces) DeleteCollection(ctx context.Context, opts v1.DeleteOptions, listOpts v1.ListOptions) error { + action := testing.NewRootDeleteCollectionAction(lockspacesResource, listOpts) + + _, err := c.Fake.Invokes(action, &v1alpha1.LockspaceList{}) + return err +} + +// Patch applies the patch and returns the patched lockspace. +func (c *FakeLockspaces) Patch(ctx context.Context, name string, pt types.PatchType, data []byte, opts v1.PatchOptions, subresources ...string) (result *v1alpha1.Lockspace, err error) { + obj, err := c.Fake. + Invokes(testing.NewRootPatchSubresourceAction(lockspacesResource, name, pt, data, subresources...), &v1alpha1.Lockspace{}) + if obj == nil { + return nil, err + } + return obj.(*v1alpha1.Lockspace), err +} diff --git a/pkg/generated/clientset/versioned/typed/virt/v1alpha1/fake/fake_virt_client.go b/pkg/generated/clientset/versioned/typed/virt/v1alpha1/fake/fake_virt_client.go index b5de326..38b4720 100644 --- a/pkg/generated/clientset/versioned/typed/virt/v1alpha1/fake/fake_virt_client.go +++ b/pkg/generated/clientset/versioned/typed/virt/v1alpha1/fake/fake_virt_client.go @@ -12,6 +12,14 @@ type FakeVirtV1alpha1 struct { *testing.Fake } +func (c *FakeVirtV1alpha1) Locks(namespace string) v1alpha1.LockInterface { + return &FakeLocks{c, namespace} +} + +func (c *FakeVirtV1alpha1) Lockspaces() v1alpha1.LockspaceInterface { + return &FakeLockspaces{c} +} + func (c *FakeVirtV1alpha1) VirtualMachines(namespace string) v1alpha1.VirtualMachineInterface { return &FakeVirtualMachines{c, namespace} } diff --git a/pkg/generated/clientset/versioned/typed/virt/v1alpha1/generated_expansion.go b/pkg/generated/clientset/versioned/typed/virt/v1alpha1/generated_expansion.go index 248e348..fe58206 100644 --- a/pkg/generated/clientset/versioned/typed/virt/v1alpha1/generated_expansion.go +++ b/pkg/generated/clientset/versioned/typed/virt/v1alpha1/generated_expansion.go @@ -2,6 +2,10 @@ package v1alpha1 +type LockExpansion interface{} + +type LockspaceExpansion interface{} + type VirtualMachineExpansion interface{} type VirtualMachineMigrationExpansion interface{} diff --git a/pkg/generated/clientset/versioned/typed/virt/v1alpha1/lock.go b/pkg/generated/clientset/versioned/typed/virt/v1alpha1/lock.go new file mode 100644 index 0000000..82fdb65 --- /dev/null +++ b/pkg/generated/clientset/versioned/typed/virt/v1alpha1/lock.go @@ -0,0 +1,179 @@ +// Code generated by client-gen. DO NOT EDIT. + +package v1alpha1 + +import ( + "context" + "time" + + v1alpha1 "github.com/smartxworks/virtink/pkg/apis/virt/v1alpha1" + scheme "github.com/smartxworks/virtink/pkg/generated/clientset/versioned/scheme" + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + types "k8s.io/apimachinery/pkg/types" + watch "k8s.io/apimachinery/pkg/watch" + rest "k8s.io/client-go/rest" +) + +// LocksGetter has a method to return a LockInterface. +// A group's client should implement this interface. +type LocksGetter interface { + Locks(namespace string) LockInterface +} + +// LockInterface has methods to work with Lock resources. +type LockInterface interface { + Create(ctx context.Context, lock *v1alpha1.Lock, opts v1.CreateOptions) (*v1alpha1.Lock, error) + Update(ctx context.Context, lock *v1alpha1.Lock, opts v1.UpdateOptions) (*v1alpha1.Lock, error) + UpdateStatus(ctx context.Context, lock *v1alpha1.Lock, opts v1.UpdateOptions) (*v1alpha1.Lock, error) + Delete(ctx context.Context, name string, opts v1.DeleteOptions) error + DeleteCollection(ctx context.Context, opts v1.DeleteOptions, listOpts v1.ListOptions) error + Get(ctx context.Context, name string, opts v1.GetOptions) (*v1alpha1.Lock, error) + List(ctx context.Context, opts v1.ListOptions) (*v1alpha1.LockList, error) + Watch(ctx context.Context, opts v1.ListOptions) (watch.Interface, error) + Patch(ctx context.Context, name string, pt types.PatchType, data []byte, opts v1.PatchOptions, subresources ...string) (result *v1alpha1.Lock, err error) + LockExpansion +} + +// locks implements LockInterface +type locks struct { + client rest.Interface + ns string +} + +// newLocks returns a Locks +func newLocks(c *VirtV1alpha1Client, namespace string) *locks { + return &locks{ + client: c.RESTClient(), + ns: namespace, + } +} + +// Get takes name of the lock, and returns the corresponding lock object, and an error if there is any. +func (c *locks) Get(ctx context.Context, name string, options v1.GetOptions) (result *v1alpha1.Lock, err error) { + result = &v1alpha1.Lock{} + err = c.client.Get(). + Namespace(c.ns). + Resource("locks"). + Name(name). + VersionedParams(&options, scheme.ParameterCodec). + Do(ctx). + Into(result) + return +} + +// List takes label and field selectors, and returns the list of Locks that match those selectors. +func (c *locks) List(ctx context.Context, opts v1.ListOptions) (result *v1alpha1.LockList, err error) { + var timeout time.Duration + if opts.TimeoutSeconds != nil { + timeout = time.Duration(*opts.TimeoutSeconds) * time.Second + } + result = &v1alpha1.LockList{} + err = c.client.Get(). + Namespace(c.ns). + Resource("locks"). + VersionedParams(&opts, scheme.ParameterCodec). + Timeout(timeout). + Do(ctx). + Into(result) + return +} + +// Watch returns a watch.Interface that watches the requested locks. +func (c *locks) Watch(ctx context.Context, opts v1.ListOptions) (watch.Interface, error) { + var timeout time.Duration + if opts.TimeoutSeconds != nil { + timeout = time.Duration(*opts.TimeoutSeconds) * time.Second + } + opts.Watch = true + return c.client.Get(). + Namespace(c.ns). + Resource("locks"). + VersionedParams(&opts, scheme.ParameterCodec). + Timeout(timeout). + Watch(ctx) +} + +// Create takes the representation of a lock and creates it. Returns the server's representation of the lock, and an error, if there is any. +func (c *locks) Create(ctx context.Context, lock *v1alpha1.Lock, opts v1.CreateOptions) (result *v1alpha1.Lock, err error) { + result = &v1alpha1.Lock{} + err = c.client.Post(). + Namespace(c.ns). + Resource("locks"). + VersionedParams(&opts, scheme.ParameterCodec). + Body(lock). + Do(ctx). + Into(result) + return +} + +// Update takes the representation of a lock and updates it. Returns the server's representation of the lock, and an error, if there is any. +func (c *locks) Update(ctx context.Context, lock *v1alpha1.Lock, opts v1.UpdateOptions) (result *v1alpha1.Lock, err error) { + result = &v1alpha1.Lock{} + err = c.client.Put(). + Namespace(c.ns). + Resource("locks"). + Name(lock.Name). + VersionedParams(&opts, scheme.ParameterCodec). + Body(lock). + Do(ctx). + Into(result) + return +} + +// UpdateStatus was generated because the type contains a Status member. +// Add a +genclient:noStatus comment above the type to avoid generating UpdateStatus(). +func (c *locks) UpdateStatus(ctx context.Context, lock *v1alpha1.Lock, opts v1.UpdateOptions) (result *v1alpha1.Lock, err error) { + result = &v1alpha1.Lock{} + err = c.client.Put(). + Namespace(c.ns). + Resource("locks"). + Name(lock.Name). + SubResource("status"). + VersionedParams(&opts, scheme.ParameterCodec). + Body(lock). + Do(ctx). + Into(result) + return +} + +// Delete takes name of the lock and deletes it. Returns an error if one occurs. +func (c *locks) Delete(ctx context.Context, name string, opts v1.DeleteOptions) error { + return c.client.Delete(). + Namespace(c.ns). + Resource("locks"). + Name(name). + Body(&opts). + Do(ctx). + Error() +} + +// DeleteCollection deletes a collection of objects. +func (c *locks) DeleteCollection(ctx context.Context, opts v1.DeleteOptions, listOpts v1.ListOptions) error { + var timeout time.Duration + if listOpts.TimeoutSeconds != nil { + timeout = time.Duration(*listOpts.TimeoutSeconds) * time.Second + } + return c.client.Delete(). + Namespace(c.ns). + Resource("locks"). + VersionedParams(&listOpts, scheme.ParameterCodec). + Timeout(timeout). + Body(&opts). + Do(ctx). + Error() +} + +// Patch applies the patch and returns the patched lock. +func (c *locks) Patch(ctx context.Context, name string, pt types.PatchType, data []byte, opts v1.PatchOptions, subresources ...string) (result *v1alpha1.Lock, err error) { + result = &v1alpha1.Lock{} + err = c.client.Patch(pt). + Namespace(c.ns). + Resource("locks"). + Name(name). + SubResource(subresources...). + VersionedParams(&opts, scheme.ParameterCodec). + Body(data). + Do(ctx). + Into(result) + return +} diff --git a/pkg/generated/clientset/versioned/typed/virt/v1alpha1/lockspace.go b/pkg/generated/clientset/versioned/typed/virt/v1alpha1/lockspace.go new file mode 100644 index 0000000..9f0530d --- /dev/null +++ b/pkg/generated/clientset/versioned/typed/virt/v1alpha1/lockspace.go @@ -0,0 +1,168 @@ +// Code generated by client-gen. DO NOT EDIT. + +package v1alpha1 + +import ( + "context" + "time" + + v1alpha1 "github.com/smartxworks/virtink/pkg/apis/virt/v1alpha1" + scheme "github.com/smartxworks/virtink/pkg/generated/clientset/versioned/scheme" + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + types "k8s.io/apimachinery/pkg/types" + watch "k8s.io/apimachinery/pkg/watch" + rest "k8s.io/client-go/rest" +) + +// LockspacesGetter has a method to return a LockspaceInterface. +// A group's client should implement this interface. +type LockspacesGetter interface { + Lockspaces() LockspaceInterface +} + +// LockspaceInterface has methods to work with Lockspace resources. +type LockspaceInterface interface { + Create(ctx context.Context, lockspace *v1alpha1.Lockspace, opts v1.CreateOptions) (*v1alpha1.Lockspace, error) + Update(ctx context.Context, lockspace *v1alpha1.Lockspace, opts v1.UpdateOptions) (*v1alpha1.Lockspace, error) + UpdateStatus(ctx context.Context, lockspace *v1alpha1.Lockspace, opts v1.UpdateOptions) (*v1alpha1.Lockspace, error) + Delete(ctx context.Context, name string, opts v1.DeleteOptions) error + DeleteCollection(ctx context.Context, opts v1.DeleteOptions, listOpts v1.ListOptions) error + Get(ctx context.Context, name string, opts v1.GetOptions) (*v1alpha1.Lockspace, error) + List(ctx context.Context, opts v1.ListOptions) (*v1alpha1.LockspaceList, error) + Watch(ctx context.Context, opts v1.ListOptions) (watch.Interface, error) + Patch(ctx context.Context, name string, pt types.PatchType, data []byte, opts v1.PatchOptions, subresources ...string) (result *v1alpha1.Lockspace, err error) + LockspaceExpansion +} + +// lockspaces implements LockspaceInterface +type lockspaces struct { + client rest.Interface +} + +// newLockspaces returns a Lockspaces +func newLockspaces(c *VirtV1alpha1Client) *lockspaces { + return &lockspaces{ + client: c.RESTClient(), + } +} + +// Get takes name of the lockspace, and returns the corresponding lockspace object, and an error if there is any. +func (c *lockspaces) Get(ctx context.Context, name string, options v1.GetOptions) (result *v1alpha1.Lockspace, err error) { + result = &v1alpha1.Lockspace{} + err = c.client.Get(). + Resource("lockspaces"). + Name(name). + VersionedParams(&options, scheme.ParameterCodec). + Do(ctx). + Into(result) + return +} + +// List takes label and field selectors, and returns the list of Lockspaces that match those selectors. +func (c *lockspaces) List(ctx context.Context, opts v1.ListOptions) (result *v1alpha1.LockspaceList, err error) { + var timeout time.Duration + if opts.TimeoutSeconds != nil { + timeout = time.Duration(*opts.TimeoutSeconds) * time.Second + } + result = &v1alpha1.LockspaceList{} + err = c.client.Get(). + Resource("lockspaces"). + VersionedParams(&opts, scheme.ParameterCodec). + Timeout(timeout). + Do(ctx). + Into(result) + return +} + +// Watch returns a watch.Interface that watches the requested lockspaces. +func (c *lockspaces) Watch(ctx context.Context, opts v1.ListOptions) (watch.Interface, error) { + var timeout time.Duration + if opts.TimeoutSeconds != nil { + timeout = time.Duration(*opts.TimeoutSeconds) * time.Second + } + opts.Watch = true + return c.client.Get(). + Resource("lockspaces"). + VersionedParams(&opts, scheme.ParameterCodec). + Timeout(timeout). + Watch(ctx) +} + +// Create takes the representation of a lockspace and creates it. Returns the server's representation of the lockspace, and an error, if there is any. +func (c *lockspaces) Create(ctx context.Context, lockspace *v1alpha1.Lockspace, opts v1.CreateOptions) (result *v1alpha1.Lockspace, err error) { + result = &v1alpha1.Lockspace{} + err = c.client.Post(). + Resource("lockspaces"). + VersionedParams(&opts, scheme.ParameterCodec). + Body(lockspace). + Do(ctx). + Into(result) + return +} + +// Update takes the representation of a lockspace and updates it. Returns the server's representation of the lockspace, and an error, if there is any. +func (c *lockspaces) Update(ctx context.Context, lockspace *v1alpha1.Lockspace, opts v1.UpdateOptions) (result *v1alpha1.Lockspace, err error) { + result = &v1alpha1.Lockspace{} + err = c.client.Put(). + Resource("lockspaces"). + Name(lockspace.Name). + VersionedParams(&opts, scheme.ParameterCodec). + Body(lockspace). + Do(ctx). + Into(result) + return +} + +// UpdateStatus was generated because the type contains a Status member. +// Add a +genclient:noStatus comment above the type to avoid generating UpdateStatus(). +func (c *lockspaces) UpdateStatus(ctx context.Context, lockspace *v1alpha1.Lockspace, opts v1.UpdateOptions) (result *v1alpha1.Lockspace, err error) { + result = &v1alpha1.Lockspace{} + err = c.client.Put(). + Resource("lockspaces"). + Name(lockspace.Name). + SubResource("status"). + VersionedParams(&opts, scheme.ParameterCodec). + Body(lockspace). + Do(ctx). + Into(result) + return +} + +// Delete takes name of the lockspace and deletes it. Returns an error if one occurs. +func (c *lockspaces) Delete(ctx context.Context, name string, opts v1.DeleteOptions) error { + return c.client.Delete(). + Resource("lockspaces"). + Name(name). + Body(&opts). + Do(ctx). + Error() +} + +// DeleteCollection deletes a collection of objects. +func (c *lockspaces) DeleteCollection(ctx context.Context, opts v1.DeleteOptions, listOpts v1.ListOptions) error { + var timeout time.Duration + if listOpts.TimeoutSeconds != nil { + timeout = time.Duration(*listOpts.TimeoutSeconds) * time.Second + } + return c.client.Delete(). + Resource("lockspaces"). + VersionedParams(&listOpts, scheme.ParameterCodec). + Timeout(timeout). + Body(&opts). + Do(ctx). + Error() +} + +// Patch applies the patch and returns the patched lockspace. +func (c *lockspaces) Patch(ctx context.Context, name string, pt types.PatchType, data []byte, opts v1.PatchOptions, subresources ...string) (result *v1alpha1.Lockspace, err error) { + result = &v1alpha1.Lockspace{} + err = c.client.Patch(pt). + Resource("lockspaces"). + Name(name). + SubResource(subresources...). + VersionedParams(&opts, scheme.ParameterCodec). + Body(data). + Do(ctx). + Into(result) + return +} diff --git a/pkg/generated/clientset/versioned/typed/virt/v1alpha1/virt_client.go b/pkg/generated/clientset/versioned/typed/virt/v1alpha1/virt_client.go index 7cccd32..0660ec6 100644 --- a/pkg/generated/clientset/versioned/typed/virt/v1alpha1/virt_client.go +++ b/pkg/generated/clientset/versioned/typed/virt/v1alpha1/virt_client.go @@ -12,6 +12,8 @@ import ( type VirtV1alpha1Interface interface { RESTClient() rest.Interface + LocksGetter + LockspacesGetter VirtualMachinesGetter VirtualMachineMigrationsGetter } @@ -21,6 +23,14 @@ type VirtV1alpha1Client struct { restClient rest.Interface } +func (c *VirtV1alpha1Client) Locks(namespace string) LockInterface { + return newLocks(c, namespace) +} + +func (c *VirtV1alpha1Client) Lockspaces() LockspaceInterface { + return newLockspaces(c) +} + func (c *VirtV1alpha1Client) VirtualMachines(namespace string) VirtualMachineInterface { return newVirtualMachines(c, namespace) } diff --git a/pkg/generated/informers/externalversions/generic.go b/pkg/generated/informers/externalversions/generic.go index fdc2448..92680b8 100644 --- a/pkg/generated/informers/externalversions/generic.go +++ b/pkg/generated/informers/externalversions/generic.go @@ -37,6 +37,10 @@ func (f *genericInformer) Lister() cache.GenericLister { func (f *sharedInformerFactory) ForResource(resource schema.GroupVersionResource) (GenericInformer, error) { switch resource { // Group=virt.virtink.smartx.com, Version=v1alpha1 + case v1alpha1.SchemeGroupVersion.WithResource("locks"): + return &genericInformer{resource: resource.GroupResource(), informer: f.Virt().V1alpha1().Locks().Informer()}, nil + case v1alpha1.SchemeGroupVersion.WithResource("lockspaces"): + return &genericInformer{resource: resource.GroupResource(), informer: f.Virt().V1alpha1().Lockspaces().Informer()}, nil case v1alpha1.SchemeGroupVersion.WithResource("virtualmachines"): return &genericInformer{resource: resource.GroupResource(), informer: f.Virt().V1alpha1().VirtualMachines().Informer()}, nil case v1alpha1.SchemeGroupVersion.WithResource("virtualmachinemigrations"): diff --git a/pkg/generated/informers/externalversions/virt/v1alpha1/interface.go b/pkg/generated/informers/externalversions/virt/v1alpha1/interface.go index 1bd9b5d..4885521 100644 --- a/pkg/generated/informers/externalversions/virt/v1alpha1/interface.go +++ b/pkg/generated/informers/externalversions/virt/v1alpha1/interface.go @@ -8,6 +8,10 @@ import ( // Interface provides access to all the informers in this group version. type Interface interface { + // Locks returns a LockInformer. + Locks() LockInformer + // Lockspaces returns a LockspaceInformer. + Lockspaces() LockspaceInformer // VirtualMachines returns a VirtualMachineInformer. VirtualMachines() VirtualMachineInformer // VirtualMachineMigrations returns a VirtualMachineMigrationInformer. @@ -25,6 +29,16 @@ func New(f internalinterfaces.SharedInformerFactory, namespace string, tweakList return &version{factory: f, namespace: namespace, tweakListOptions: tweakListOptions} } +// Locks returns a LockInformer. +func (v *version) Locks() LockInformer { + return &lockInformer{factory: v.factory, namespace: v.namespace, tweakListOptions: v.tweakListOptions} +} + +// Lockspaces returns a LockspaceInformer. +func (v *version) Lockspaces() LockspaceInformer { + return &lockspaceInformer{factory: v.factory, tweakListOptions: v.tweakListOptions} +} + // VirtualMachines returns a VirtualMachineInformer. func (v *version) VirtualMachines() VirtualMachineInformer { return &virtualMachineInformer{factory: v.factory, namespace: v.namespace, tweakListOptions: v.tweakListOptions} diff --git a/pkg/generated/informers/externalversions/virt/v1alpha1/lock.go b/pkg/generated/informers/externalversions/virt/v1alpha1/lock.go new file mode 100644 index 0000000..61b97cc --- /dev/null +++ b/pkg/generated/informers/externalversions/virt/v1alpha1/lock.go @@ -0,0 +1,74 @@ +// Code generated by informer-gen. DO NOT EDIT. + +package v1alpha1 + +import ( + "context" + time "time" + + virtv1alpha1 "github.com/smartxworks/virtink/pkg/apis/virt/v1alpha1" + versioned "github.com/smartxworks/virtink/pkg/generated/clientset/versioned" + internalinterfaces "github.com/smartxworks/virtink/pkg/generated/informers/externalversions/internalinterfaces" + v1alpha1 "github.com/smartxworks/virtink/pkg/generated/listers/virt/v1alpha1" + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + runtime "k8s.io/apimachinery/pkg/runtime" + watch "k8s.io/apimachinery/pkg/watch" + cache "k8s.io/client-go/tools/cache" +) + +// LockInformer provides access to a shared informer and lister for +// Locks. +type LockInformer interface { + Informer() cache.SharedIndexInformer + Lister() v1alpha1.LockLister +} + +type lockInformer struct { + factory internalinterfaces.SharedInformerFactory + tweakListOptions internalinterfaces.TweakListOptionsFunc + namespace string +} + +// NewLockInformer constructs a new informer for Lock type. +// Always prefer using an informer factory to get a shared informer instead of getting an independent +// one. This reduces memory footprint and number of connections to the server. +func NewLockInformer(client versioned.Interface, namespace string, resyncPeriod time.Duration, indexers cache.Indexers) cache.SharedIndexInformer { + return NewFilteredLockInformer(client, namespace, resyncPeriod, indexers, nil) +} + +// NewFilteredLockInformer constructs a new informer for Lock type. +// Always prefer using an informer factory to get a shared informer instead of getting an independent +// one. This reduces memory footprint and number of connections to the server. +func NewFilteredLockInformer(client versioned.Interface, namespace string, resyncPeriod time.Duration, indexers cache.Indexers, tweakListOptions internalinterfaces.TweakListOptionsFunc) cache.SharedIndexInformer { + return cache.NewSharedIndexInformer( + &cache.ListWatch{ + ListFunc: func(options v1.ListOptions) (runtime.Object, error) { + if tweakListOptions != nil { + tweakListOptions(&options) + } + return client.VirtV1alpha1().Locks(namespace).List(context.TODO(), options) + }, + WatchFunc: func(options v1.ListOptions) (watch.Interface, error) { + if tweakListOptions != nil { + tweakListOptions(&options) + } + return client.VirtV1alpha1().Locks(namespace).Watch(context.TODO(), options) + }, + }, + &virtv1alpha1.Lock{}, + resyncPeriod, + indexers, + ) +} + +func (f *lockInformer) defaultInformer(client versioned.Interface, resyncPeriod time.Duration) cache.SharedIndexInformer { + return NewFilteredLockInformer(client, f.namespace, resyncPeriod, cache.Indexers{cache.NamespaceIndex: cache.MetaNamespaceIndexFunc}, f.tweakListOptions) +} + +func (f *lockInformer) Informer() cache.SharedIndexInformer { + return f.factory.InformerFor(&virtv1alpha1.Lock{}, f.defaultInformer) +} + +func (f *lockInformer) Lister() v1alpha1.LockLister { + return v1alpha1.NewLockLister(f.Informer().GetIndexer()) +} diff --git a/pkg/generated/informers/externalversions/virt/v1alpha1/lockspace.go b/pkg/generated/informers/externalversions/virt/v1alpha1/lockspace.go new file mode 100644 index 0000000..6a0cd1d --- /dev/null +++ b/pkg/generated/informers/externalversions/virt/v1alpha1/lockspace.go @@ -0,0 +1,73 @@ +// Code generated by informer-gen. DO NOT EDIT. + +package v1alpha1 + +import ( + "context" + time "time" + + virtv1alpha1 "github.com/smartxworks/virtink/pkg/apis/virt/v1alpha1" + versioned "github.com/smartxworks/virtink/pkg/generated/clientset/versioned" + internalinterfaces "github.com/smartxworks/virtink/pkg/generated/informers/externalversions/internalinterfaces" + v1alpha1 "github.com/smartxworks/virtink/pkg/generated/listers/virt/v1alpha1" + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + runtime "k8s.io/apimachinery/pkg/runtime" + watch "k8s.io/apimachinery/pkg/watch" + cache "k8s.io/client-go/tools/cache" +) + +// LockspaceInformer provides access to a shared informer and lister for +// Lockspaces. +type LockspaceInformer interface { + Informer() cache.SharedIndexInformer + Lister() v1alpha1.LockspaceLister +} + +type lockspaceInformer struct { + factory internalinterfaces.SharedInformerFactory + tweakListOptions internalinterfaces.TweakListOptionsFunc +} + +// NewLockspaceInformer constructs a new informer for Lockspace type. +// Always prefer using an informer factory to get a shared informer instead of getting an independent +// one. This reduces memory footprint and number of connections to the server. +func NewLockspaceInformer(client versioned.Interface, resyncPeriod time.Duration, indexers cache.Indexers) cache.SharedIndexInformer { + return NewFilteredLockspaceInformer(client, resyncPeriod, indexers, nil) +} + +// NewFilteredLockspaceInformer constructs a new informer for Lockspace type. +// Always prefer using an informer factory to get a shared informer instead of getting an independent +// one. This reduces memory footprint and number of connections to the server. +func NewFilteredLockspaceInformer(client versioned.Interface, resyncPeriod time.Duration, indexers cache.Indexers, tweakListOptions internalinterfaces.TweakListOptionsFunc) cache.SharedIndexInformer { + return cache.NewSharedIndexInformer( + &cache.ListWatch{ + ListFunc: func(options v1.ListOptions) (runtime.Object, error) { + if tweakListOptions != nil { + tweakListOptions(&options) + } + return client.VirtV1alpha1().Lockspaces().List(context.TODO(), options) + }, + WatchFunc: func(options v1.ListOptions) (watch.Interface, error) { + if tweakListOptions != nil { + tweakListOptions(&options) + } + return client.VirtV1alpha1().Lockspaces().Watch(context.TODO(), options) + }, + }, + &virtv1alpha1.Lockspace{}, + resyncPeriod, + indexers, + ) +} + +func (f *lockspaceInformer) defaultInformer(client versioned.Interface, resyncPeriod time.Duration) cache.SharedIndexInformer { + return NewFilteredLockspaceInformer(client, resyncPeriod, cache.Indexers{cache.NamespaceIndex: cache.MetaNamespaceIndexFunc}, f.tweakListOptions) +} + +func (f *lockspaceInformer) Informer() cache.SharedIndexInformer { + return f.factory.InformerFor(&virtv1alpha1.Lockspace{}, f.defaultInformer) +} + +func (f *lockspaceInformer) Lister() v1alpha1.LockspaceLister { + return v1alpha1.NewLockspaceLister(f.Informer().GetIndexer()) +} diff --git a/pkg/generated/listers/virt/v1alpha1/expansion_generated.go b/pkg/generated/listers/virt/v1alpha1/expansion_generated.go index 3139e62..bc70955 100644 --- a/pkg/generated/listers/virt/v1alpha1/expansion_generated.go +++ b/pkg/generated/listers/virt/v1alpha1/expansion_generated.go @@ -2,6 +2,18 @@ package v1alpha1 +// LockListerExpansion allows custom methods to be added to +// LockLister. +type LockListerExpansion interface{} + +// LockNamespaceListerExpansion allows custom methods to be added to +// LockNamespaceLister. +type LockNamespaceListerExpansion interface{} + +// LockspaceListerExpansion allows custom methods to be added to +// LockspaceLister. +type LockspaceListerExpansion interface{} + // VirtualMachineListerExpansion allows custom methods to be added to // VirtualMachineLister. type VirtualMachineListerExpansion interface{} diff --git a/pkg/generated/listers/virt/v1alpha1/lock.go b/pkg/generated/listers/virt/v1alpha1/lock.go new file mode 100644 index 0000000..b8f524e --- /dev/null +++ b/pkg/generated/listers/virt/v1alpha1/lock.go @@ -0,0 +1,83 @@ +// Code generated by lister-gen. DO NOT EDIT. + +package v1alpha1 + +import ( + v1alpha1 "github.com/smartxworks/virtink/pkg/apis/virt/v1alpha1" + "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/labels" + "k8s.io/client-go/tools/cache" +) + +// LockLister helps list Locks. +// All objects returned here must be treated as read-only. +type LockLister interface { + // List lists all Locks in the indexer. + // Objects returned here must be treated as read-only. + List(selector labels.Selector) (ret []*v1alpha1.Lock, err error) + // Locks returns an object that can list and get Locks. + Locks(namespace string) LockNamespaceLister + LockListerExpansion +} + +// lockLister implements the LockLister interface. +type lockLister struct { + indexer cache.Indexer +} + +// NewLockLister returns a new LockLister. +func NewLockLister(indexer cache.Indexer) LockLister { + return &lockLister{indexer: indexer} +} + +// List lists all Locks in the indexer. +func (s *lockLister) List(selector labels.Selector) (ret []*v1alpha1.Lock, err error) { + err = cache.ListAll(s.indexer, selector, func(m interface{}) { + ret = append(ret, m.(*v1alpha1.Lock)) + }) + return ret, err +} + +// Locks returns an object that can list and get Locks. +func (s *lockLister) Locks(namespace string) LockNamespaceLister { + return lockNamespaceLister{indexer: s.indexer, namespace: namespace} +} + +// LockNamespaceLister helps list and get Locks. +// All objects returned here must be treated as read-only. +type LockNamespaceLister interface { + // List lists all Locks in the indexer for a given namespace. + // Objects returned here must be treated as read-only. + List(selector labels.Selector) (ret []*v1alpha1.Lock, err error) + // Get retrieves the Lock from the indexer for a given namespace and name. + // Objects returned here must be treated as read-only. + Get(name string) (*v1alpha1.Lock, error) + LockNamespaceListerExpansion +} + +// lockNamespaceLister implements the LockNamespaceLister +// interface. +type lockNamespaceLister struct { + indexer cache.Indexer + namespace string +} + +// List lists all Locks in the indexer for a given namespace. +func (s lockNamespaceLister) List(selector labels.Selector) (ret []*v1alpha1.Lock, err error) { + err = cache.ListAllByNamespace(s.indexer, s.namespace, selector, func(m interface{}) { + ret = append(ret, m.(*v1alpha1.Lock)) + }) + return ret, err +} + +// Get retrieves the Lock from the indexer for a given namespace and name. +func (s lockNamespaceLister) Get(name string) (*v1alpha1.Lock, error) { + obj, exists, err := s.indexer.GetByKey(s.namespace + "/" + name) + if err != nil { + return nil, err + } + if !exists { + return nil, errors.NewNotFound(v1alpha1.Resource("lock"), name) + } + return obj.(*v1alpha1.Lock), nil +} diff --git a/pkg/generated/listers/virt/v1alpha1/lockspace.go b/pkg/generated/listers/virt/v1alpha1/lockspace.go new file mode 100644 index 0000000..c7d6b8e --- /dev/null +++ b/pkg/generated/listers/virt/v1alpha1/lockspace.go @@ -0,0 +1,52 @@ +// Code generated by lister-gen. DO NOT EDIT. + +package v1alpha1 + +import ( + v1alpha1 "github.com/smartxworks/virtink/pkg/apis/virt/v1alpha1" + "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/labels" + "k8s.io/client-go/tools/cache" +) + +// LockspaceLister helps list Lockspaces. +// All objects returned here must be treated as read-only. +type LockspaceLister interface { + // List lists all Lockspaces in the indexer. + // Objects returned here must be treated as read-only. + List(selector labels.Selector) (ret []*v1alpha1.Lockspace, err error) + // Get retrieves the Lockspace from the index for a given name. + // Objects returned here must be treated as read-only. + Get(name string) (*v1alpha1.Lockspace, error) + LockspaceListerExpansion +} + +// lockspaceLister implements the LockspaceLister interface. +type lockspaceLister struct { + indexer cache.Indexer +} + +// NewLockspaceLister returns a new LockspaceLister. +func NewLockspaceLister(indexer cache.Indexer) LockspaceLister { + return &lockspaceLister{indexer: indexer} +} + +// List lists all Lockspaces in the indexer. +func (s *lockspaceLister) List(selector labels.Selector) (ret []*v1alpha1.Lockspace, err error) { + err = cache.ListAll(s.indexer, selector, func(m interface{}) { + ret = append(ret, m.(*v1alpha1.Lockspace)) + }) + return ret, err +} + +// Get retrieves the Lockspace from the index for a given name. +func (s *lockspaceLister) Get(name string) (*v1alpha1.Lockspace, error) { + obj, exists, err := s.indexer.GetByKey(name) + if err != nil { + return nil, err + } + if !exists { + return nil, errors.NewNotFound(v1alpha1.Resource("lockspace"), name) + } + return obj.(*v1alpha1.Lockspace), nil +} diff --git a/pkg/sanlock/sanlock.go b/pkg/sanlock/sanlock.go new file mode 100644 index 0000000..33b0915 --- /dev/null +++ b/pkg/sanlock/sanlock.go @@ -0,0 +1,297 @@ +package sanlock + +import ( + "fmt" + "strings" + "unsafe" +) + +/* +#cgo LDFLAGS: -lsanlock -lsanlock_client +#include +#include +#include +#include +#include "sanlock.h" +#include "sanlock_direct.h" +#include "sanlock_admin.h" +#include "sanlock_resource.h" + +struct aicb { + int used; + char *buf; + struct iocb iocb; +}; + +#define NAME_ID_SIZE 48 + +struct task { + char name[NAME_ID_SIZE+1]; // for log messages + + unsigned int io_count; // stats + unsigned int to_count; // stats + + int use_aio; + int cb_size; + char *iobuf; + io_context_t aio_ctx; + struct aicb *read_iobuf_timeout_aicb; + struct aicb *callbacks; +}; + +int direct_rindex_format(struct task *task, struct sanlk_rindex *ri); +*/ +import "C" + +const ( + OffsetLockspace = 0 + OffsetRIndex = 1048576 + + WatchdogFireTimeoutDefaultSeconds = 60 + IOTimeoutDefaultSeconds = 10 + IDRenewalDefaultSeconds = 2 * IOTimeoutDefaultSeconds + IDRenewalFailDefaultSeconds = 4 * IDRenewalDefaultSeconds + HostDeadDefaultSeconds = IDRenewalFailDefaultSeconds + WatchdogFireTimeoutDefaultSeconds +) + +type ErrorNumber int + +const ( + // TODO: more error + EPERM = ErrorNumber(C.EPERM) + ENOENT = ErrorNumber(C.ENOENT) + EINVAL = ErrorNumber(C.EINVAL) + EEXIST = ErrorNumber(C.EEXIST) + EINPROGRESS = ErrorNumber(C.EINPROGRESS) + EAGAIN = ErrorNumber(C.EAGAIN) +) + +func (e ErrorNumber) Error() string { + var err string + switch e { + case EPERM: + err = "EPERM" + case ENOENT: + err = "ENOENT" + case EINVAL: + err = "EINVAL" + case EEXIST: + err = "EEXIST" + case EINPROGRESS: + err = "EINPROGRESS" + case EAGAIN: + err = "EAGAIN" + } + return fmt.Sprintf("errCode(%d): %s", e, err) +} + +type HostStatusFlag uint32 + +const ( + HostStatusUnknown = HostStatusFlag(C.SANLK_HOST_UNKNOWN) + HostStatusFree = HostStatusFlag(C.SANLK_HOST_FREE) + HostStatusLive = HostStatusFlag(C.SANLK_HOST_LIVE) + HostStatusFail = HostStatusFlag(C.SANLK_HOST_FAIL) + HostStatusDead = HostStatusFlag(C.SANLK_HOST_DEAD) +) + +func WriteLockspace(lockspace string, path string) error { + return WriteLockspaceWithIOTimeout(lockspace, path, 0) +} + +func WriteLockspaceWithIOTimeout(lockspace string, path string, ioTimeout uint32) error { + ls := buildSanlockLockspace(lockspace, path, 0) + + if rv := C.sanlock_direct_write_lockspace(&ls, C.int(2000), 0, C.uint(ioTimeout)); rv < 0 { + return ErrorNumber(-rv) + } + return nil +} + +func FormatRIndex(lockspace string, path string) error { + rIndex := buildSanlockRIndex(lockspace, path) + + if rv := C.direct_rindex_format(&C.struct_task{}, &rIndex); rv < 0 { + return ErrorNumber(-rv) + } + return nil +} + +func CreateResource(lockspace string, path string, resource string) (uint64, error) { + rIndex := buildSanlockRIndex(lockspace, path) + rEntry := buildSanlockREntry(resource) + + if rv := C.sanlock_create_resource(&rIndex, 0, &rEntry, 0, 0); rv < 0 { + return 0, ErrorNumber(-rv) + } + return uint64(rEntry.offset), nil +} + +func DeleteResource(lockspace string, path string, resource string) error { + rIndex := buildSanlockRIndex(lockspace, path) + rEntry := buildSanlockREntry(resource) + + if rv := C.sanlock_delete_resource(&rIndex, 0, &rEntry); rv < 0 { + return ErrorNumber(-rv) + } + return nil +} + +func SearchResource(lockspace string, path string, resource string) (uint64, error) { + rIndex := buildSanlockRIndex(lockspace, path) + rEntry := buildSanlockREntry(resource) + + if rv := C.sanlock_lookup_rindex(&rIndex, 0, &rEntry); rv < 0 { + return 0, ErrorNumber(-rv) + } + return uint64(rEntry.offset), nil +} + +// AcquireDeltaLease returns: +// nil: acquire delta lease successfully +// EEXIST: the lockspace already exists +// EINPROGRESS: the lockspace is already in the process of being added (the in-progress add may or may not succeed) +// EAGAIN: the lockspace is being removed +func AcquireDeltaLease(lockspace string, path string, id uint64) error { + if id < 1 || id > 2000 { + return fmt.Errorf("invalid host ID, allowed value 1~2000") + } + + ls := buildSanlockLockspace(lockspace, path, id) + + if rv := C.sanlock_add_lockspace(&ls, 0); rv < 0 { + return ErrorNumber(-rv) + } + return nil +} + +// ReleaseDeltaLease returns: +// EINPROGRESS: the lockspace is already in the process of being removed +// ENOENT: lockspace not found +// +// The sanlock daemon will kill any pids using the lockspace when the +// lockspace is removed. +func ReleaseDeltaLease(lockspace string, path string, id uint64) error { + if id < 1 || id > 2000 { + return fmt.Errorf("invalid host ID, allowed value 1~2000") + } + + ls := buildSanlockLockspace(lockspace, path, id) + + if rv := C.sanlock_rem_lockspace(&ls, 0); rv < 0 { + return ErrorNumber(-rv) + } + return nil +} + +func HasDeltaLease(lockspace string, path string, id uint64) bool { + if id < 1 || id > 2000 { + return false + } + + ls := buildSanlockLockspace(lockspace, path, id) + + return C.sanlock_inq_lockspace(&ls, 0) == 0 +} + +func GetHostStatus(lockspace string, id uint64) (HostStatusFlag, error) { + if id < 1 || id > 2000 { + return 0, fmt.Errorf("invalid host ID, allowed value 1~2000") + } + + var host *C.struct_sanlk_host + var num C.int + lockspaceName := C.CString(lockspace) + defer C.free(unsafe.Pointer(lockspaceName)) + if rv := C.sanlock_get_hosts(lockspaceName, C.ulong(id), &host, &num, 0); rv < 0 { + return 0, ErrorNumber(-rv) + } + + return HostStatusFlag(host.flags & C.SANLK_HOST_MASK), nil +} + +func AcquireResourceLease(resources []string, owner string) error { + if len(resources) > C.SANLK_MAX_RESOURCES { + return fmt.Errorf("requested resource over max %d", C.SANLK_MAX_RESOURCES) + } + + sock := C.sanlock_register() + if sock < 0 { + return fmt.Errorf("failed to registr process") + } + + if rv := C.sanlock_restrict(sock, C.SANLK_RESTRICT_SIGTERM); rv < 0 { + return fmt.Errorf("restrict SIGTERM signal: %s", ErrorNumber(-rv)) + } + + var res_args **C.struct_sanlk_resource + var res_count C.int + res := C.CString(strings.Join(resources, " ")) + defer C.free(unsafe.Pointer(res)) + if rv := C.sanlock_state_to_args(res, &res_count, &res_args); rv < 0 { + return fmt.Errorf("convert sanlock resources: %s", ErrorNumber(-rv)) + } + opt := C.struct_sanlk_options{ + owner_name: buildSanlockName(owner), + } + + if rv := C.sanlock_acquire(sock, -1, 0, res_count, res_args, &opt); rv < 0 { + return fmt.Errorf("acquire resource lease: %s", ErrorNumber(-rv)) + } + return nil +} + +func buildSanlockLockspace(lockspace string, path string, id uint64) C.struct_sanlk_lockspace { + diskPath := buildSanlockPath(path) + lockspaceName := buildSanlockName(lockspace) + + disk := C.struct_sanlk_disk{ + path: diskPath, + offset: OffsetLockspace, + } + return C.struct_sanlk_lockspace{ + name: lockspaceName, + host_id: C.ulong(id), + host_id_disk: disk, + flags: C.SANLK_LSF_ALIGN1M | C.SANLK_LSF_SECTOR512, + } +} + +func buildSanlockPath(path string) [C.SANLK_PATH_LEN]C.char { + cPath := [C.SANLK_PATH_LEN]C.char{} + for i := 0; i < len(path); i++ { + cPath[i] = C.char(path[i]) + } + return cPath +} + +func buildSanlockName(name string) [C.SANLK_NAME_LEN]C.char { + cName := [C.SANLK_NAME_LEN]C.char{} + for i := 0; i < len(name); i++ { + cName[i] = C.char(name[i]) + } + return cName +} + +func buildSanlockRIndex(lockspace string, path string) C.struct_sanlk_rindex { + diskPath := buildSanlockPath(path) + lockspaceName := buildSanlockName(lockspace) + + disk := C.struct_sanlk_disk{ + path: diskPath, + offset: OffsetRIndex, + } + return C.struct_sanlk_rindex{ + flags: C.SANLK_RIF_ALIGN1M | C.SANLK_RIF_SECTOR512, + lockspace_name: lockspaceName, + disk: disk, + } +} + +func buildSanlockREntry(rEntry string) C.struct_sanlk_rentry { + rEntryName := buildSanlockName(rEntry) + + return C.struct_sanlk_rentry{ + name: rEntryName, + } +} diff --git a/samples/ubuntu-ha.yaml b/samples/ubuntu-ha.yaml new file mode 100644 index 0000000..69bdd27 --- /dev/null +++ b/samples/ubuntu-ha.yaml @@ -0,0 +1,47 @@ +apiVersion: virt.virtink.smartx.com/v1alpha1 +kind: VirtualMachine +metadata: + name: ubuntu-ha +spec: + runPolicy: RerunOnFailure + enableHA: true + locks: + - ubuntu + instance: + memory: + size: 1Gi + disks: + - name: ubuntu + - name: cloud-init + interfaces: + - name: pod + masquerade: {} + volumes: + - name: ubuntu + dataVolume: + volumeName: ubuntu + - name: cloud-init + cloudInit: + userData: |- + #cloud-config + password: password + chpasswd: { expire: False } + ssh_pwauth: True + networks: + - name: pod + pod: {} +--- +apiVersion: cdi.kubevirt.io/v1beta1 +kind: DataVolume +metadata: + name: ubuntu +spec: + source: + http: + url: https://cloud-images.ubuntu.com/jammy/current/jammy-server-cloudimg-amd64.img + pvc: + accessModes: + - ReadWriteMany + resources: + requests: + storage: 8Gi diff --git a/skaffold.yaml b/skaffold.yaml index 79172b6..cdb24eb 100644 --- a/skaffold.yaml +++ b/skaffold.yaml @@ -12,12 +12,27 @@ build: requires: - image: virt-prerunner alias: PRERUNNER_IMAGE + - image: lockspace-initializer + alias: INITIALIZER_IMAGE + - image: lockspace-attacher + alias: ATTACHER_IMAGE + - image: lockspace-detector + alias: DETECTOR_IMAGE - image: virt-daemon docker: dockerfile: build/virt-daemon/Dockerfile - image: virt-prerunner docker: dockerfile: build/virt-prerunner/Dockerfile + - image: lockspace-initializer + docker: + dockerfile: build/lockspace-initializer/Dockerfile + - image: lockspace-attacher + docker: + dockerfile: build/lockspace-attacher/Dockerfile + - image: lockspace-detector + docker: + dockerfile: build/lockspace-detector/Dockerfile deploy: kustomize: paths: