diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index d5d69236..c456dca4 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -1,11 +1,6 @@ name: Docker build and push -on: - push: - branches: - - '*' - tags: - - 'v*' +on: [push] env: # TEST_TARGET: Name of the testing target in the Dockerfile @@ -14,9 +9,6 @@ env: # DO_TEST - true to build and run unit tests, false to skip the tests DO_TEST: true - # DO_PUSH - true to push to the HPE_DEPLOY_REPO, false to not push - DO_PUSH: true - jobs: build: runs-on: ubuntu-latest @@ -98,3 +90,23 @@ jobs: push: true tags: ${{ steps.meta.outputs.tags }} + create_release: + needs: build + if: startsWith(github.ref, 'refs/tags/v') + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-tags: true + fetch-depth: 0 + - name: Repair tag + run: git fetch -f origin ${{ github.ref }}:${{ github.ref }} + - name: Verify that the tag is annotated + run: if test x$(git for-each-ref ${{ github.ref }} | awk '{print $2}') = xtag; then /bin/true; else echo "\"${{ github.ref }}\" does not look like an annotated tag!"; /bin/false; fi + - name: Release + uses: softprops/action-gh-release@v1 + with: + #prerelease: true + generate_release_notes: true + diff --git a/.github/workflows/verify_tag.yml b/.github/workflows/verify_tag.yml deleted file mode 100644 index 2a75d910..00000000 --- a/.github/workflows/verify_tag.yml +++ /dev/null @@ -1,28 +0,0 @@ -# Pushing a tag triggers this workflow, which verifies that it is an -# annotated tag. -name: Verify tag - -on: - push: - tags: - - "v*" - -jobs: - verify_tag: - runs-on: ubuntu-latest - steps: - - name: "Verify context" - run: | - echo "ref is ${{ github.ref }}" - echo "ref_type is ${{ github.ref_type }}" - - - uses: actions/checkout@v3 - # actions/checkout@v3 breaks annotated tags by converting them into - # lightweight tags, so we need to force fetch the tag again - # See: https://github.com/actions/checkout/issues/290 - - name: "Repair tag" - run: git fetch -f origin ${{ github.ref }}:${{ github.ref }} - - name: "Verify tag is annotated" - run: if test x$(git for-each-ref ${{ github.ref }} | awk '{print $2}') = xtag; then /bin/true; else echo "\"${{ github.ref }}\" does not look like an annotated tag!"; /bin/false; fi - - name: "Echo release tag" - run: echo "TAG=${{ github.repository }}:${{ github.ref }}" diff --git a/config/manager/kustomization.yaml b/config/manager/kustomization.yaml index 00bbb33b..b8c4810c 100644 --- a/config/manager/kustomization.yaml +++ b/config/manager/kustomization.yaml @@ -16,7 +16,7 @@ kind: Kustomization images: - name: controller newName: ghcr.io/nearnodeflash/nnf-dm - newTag: 0.0.8 + newTag: 0.0.9 - name: nnf-mfu newName: ghcr.io/nearnodeflash/nnf-mfu newTag: 0.0.3 diff --git a/config/manager/manager.yaml b/config/manager/manager.yaml index 07138cee..2b2441bc 100644 --- a/config/manager/manager.yaml +++ b/config/manager/manager.yaml @@ -58,6 +58,10 @@ spec: cray.nnf.node: 'true' hostPath: '/mnt/nnf' mountPath: '/mnt/nnf' + updateStrategy: + type: RollingUpdate + rollingUpdate: + maxUnavailable: '25%' template: spec: serviceAccountName: nnf-dm-controller-manager # not sure why 'controller-manager' isn't patched to include the 'nnf-dm-' prefix diff --git a/go.mod b/go.mod index 8ec73641..2ebfcb8c 100644 --- a/go.mod +++ b/go.mod @@ -3,8 +3,8 @@ module github.com/NearNodeFlash/nnf-dm go 1.19 require ( - github.com/NearNodeFlash/lustre-fs-operator v0.0.1-0.20231031201943-531116c1194e - github.com/NearNodeFlash/nnf-sos v0.0.1-0.20240207194141-d4c5588829a1 + github.com/NearNodeFlash/lustre-fs-operator v0.0.1-0.20240220213720-51597bca637d + github.com/NearNodeFlash/nnf-sos v0.0.1-0.20240221214302-e7989177289a github.com/onsi/ginkgo/v2 v2.11.0 github.com/onsi/gomega v1.27.10 github.com/prometheus/client_golang v1.16.0 @@ -73,7 +73,7 @@ require ( ) require ( - github.com/DataWorkflowServices/dws v0.0.1-0.20240207192901-62a0958c23f7 + github.com/DataWorkflowServices/dws v0.0.1-0.20240221183421-1a123a9274b6 go.openly.dev/pointy v1.3.0 ) diff --git a/go.sum b/go.sum index 06aef35a..d612f895 100644 --- a/go.sum +++ b/go.sum @@ -1,11 +1,11 @@ -github.com/DataWorkflowServices/dws v0.0.1-0.20240207192901-62a0958c23f7 h1:0WwSoJBPDJIZbM0rgXZk2D+/lBc4EYnM8OE1MRgskjg= -github.com/DataWorkflowServices/dws v0.0.1-0.20240207192901-62a0958c23f7/go.mod h1:vSTBLWbsFjMYxx+sjMDyZpMXLY9m5Bp73cjnmAL30WU= -github.com/NearNodeFlash/lustre-fs-operator v0.0.1-0.20231031201943-531116c1194e h1:j+MNZYrAcwtaUxqA2CcJFyPLWhfxpO6fsIUXhXljY2U= -github.com/NearNodeFlash/lustre-fs-operator v0.0.1-0.20231031201943-531116c1194e/go.mod h1:qBcz9p8sXm1qhDf8WUmhxTlD1NCMEjoAD7NoHbQvMiI= +github.com/DataWorkflowServices/dws v0.0.1-0.20240221183421-1a123a9274b6 h1:LYKIIoawsuo+1ByvQaIpgl8vZc2KrE0q7AE7t0YumrI= +github.com/DataWorkflowServices/dws v0.0.1-0.20240221183421-1a123a9274b6/go.mod h1:vSTBLWbsFjMYxx+sjMDyZpMXLY9m5Bp73cjnmAL30WU= +github.com/NearNodeFlash/lustre-fs-operator v0.0.1-0.20240220213720-51597bca637d h1:AP1TgQlneYZT/AxkYFyvJp1j86+7MTYOoo3I1Zw3L2E= +github.com/NearNodeFlash/lustre-fs-operator v0.0.1-0.20240220213720-51597bca637d/go.mod h1:qBcz9p8sXm1qhDf8WUmhxTlD1NCMEjoAD7NoHbQvMiI= github.com/NearNodeFlash/nnf-ec v0.0.0-20231010162453-a8168bb6a52f h1:aWtSSQLLk9mUZj94mowirQeVw9saf80gVe10X0rZe8o= github.com/NearNodeFlash/nnf-ec v0.0.0-20231010162453-a8168bb6a52f/go.mod h1:oxdwMqfttOF9dabJhqrWlirCnMk8/8eyLMwl+hducjk= -github.com/NearNodeFlash/nnf-sos v0.0.1-0.20240207194141-d4c5588829a1 h1:1FgQ8cLuy6+qpIbdHv2cFltssde2YTe823v+BAhALpo= -github.com/NearNodeFlash/nnf-sos v0.0.1-0.20240207194141-d4c5588829a1/go.mod h1:YtN3gTkpS4ju1jvBMZTU/UcCbvRt4DFH8w9mFue+diQ= +github.com/NearNodeFlash/nnf-sos v0.0.1-0.20240221214302-e7989177289a h1:m3lPHiWObITk+zp3GAuOeawkaMr+U4aKlHcblYnsP58= +github.com/NearNodeFlash/nnf-sos v0.0.1-0.20240221214302-e7989177289a/go.mod h1:4kJuGEwS46EYIt24NmNweaMryYp8M6RabNc8Nd+GIBE= github.com/benbjohnson/clock v1.1.0/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA= github.com/benbjohnson/clock v1.3.0 h1:ip6w0uFQkncKQ979AypyG0ER7mqUSBdKLOgAle/AT8A= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= diff --git a/internal/controller/datamovementmanager_controller.go b/internal/controller/datamovementmanager_controller.go index e08d7a05..b0e401e2 100644 --- a/internal/controller/datamovementmanager_controller.go +++ b/internal/controller/datamovementmanager_controller.go @@ -449,6 +449,7 @@ func (r *NnfDataMovementManagerReconciler) createOrUpdateDaemonSetIfNecessary(ct mutateFn := func() error { podTemplateSpec := manager.Spec.Template.DeepCopy() podTemplateSpec.Labels = manager.Spec.Selector.DeepCopy().MatchLabels + updateStrategy := manager.Spec.UpdateStrategy.DeepCopy() if podTemplateSpec.Labels == nil { podTemplateSpec.Labels = make(map[string]string) @@ -464,8 +465,9 @@ func (r *NnfDataMovementManagerReconciler) createOrUpdateDaemonSetIfNecessary(ct setupLustreVolumes(ctx, manager, podSpec, filesystems.Items) ds.Spec = appsv1.DaemonSetSpec{ - Selector: &manager.Spec.Selector, - Template: *podTemplateSpec, + Selector: &manager.Spec.Selector, + Template: *podTemplateSpec, + UpdateStrategy: *updateStrategy, } if err := ctrl.SetControllerReference(manager, ds, r.Scheme); err != nil { diff --git a/internal/controller/datamovementmanager_controller_test.go b/internal/controller/datamovementmanager_controller_test.go index a5bcd909..9b05fd4d 100644 --- a/internal/controller/datamovementmanager_controller_test.go +++ b/internal/controller/datamovementmanager_controller_test.go @@ -28,6 +28,7 @@ import ( corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/intstr" "k8s.io/client-go/util/retry" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" @@ -46,6 +47,9 @@ var _ = Describe("Data Movement Manager Test" /*Ordered, (Ginkgo v2)*/, func() { mgr := &nnfv1alpha1.NnfDataMovementManager{} labels := map[string]string{"control-plane": "controller-manager"} + maxUnavailStr := "50%" + maxSurgeStr := "0%" + /* BeforeAll (Ginkgo v2)*/ BeforeEach(func() { ns = &corev1.Namespace{ @@ -89,6 +93,8 @@ var _ = Describe("Data Movement Manager Test" /*Ordered, (Ginkgo v2)*/, func() { }) BeforeEach(func() { + maxUnavailable := intstr.FromString(maxUnavailStr) + maxSurge := intstr.FromString(maxSurgeStr) mgr = &nnfv1alpha1.NnfDataMovementManager{ ObjectMeta: metav1.ObjectMeta{ Name: "nnf-dm-manager-controller-manager", @@ -110,6 +116,13 @@ var _ = Describe("Data Movement Manager Test" /*Ordered, (Ginkgo v2)*/, func() { }, }, }, + UpdateStrategy: appsv1.DaemonSetUpdateStrategy{ + Type: appsv1.RollingUpdateDaemonSetStrategyType, + RollingUpdate: &appsv1.RollingUpdateDaemonSet{ + MaxUnavailable: &maxUnavailable, + MaxSurge: &maxSurge, + }, + }, }, } @@ -145,6 +158,14 @@ var _ = Describe("Data Movement Manager Test" /*Ordered, (Ginkgo v2)*/, func() { g.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(mgr), mgr)).Should(Succeed()) return mgr.Status.Ready }, "5s").Should(BeTrue()) + + By("The updateStrategy appears in the daemon set") + Eventually(func(g Gomega) error { + g.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(daemonset), daemonset)).Should(Succeed()) + g.Expect(daemonset.Spec.UpdateStrategy.RollingUpdate.MaxUnavailable.StrVal).Should(Equal(maxUnavailStr)) + g.Expect(daemonset.Spec.UpdateStrategy.RollingUpdate.MaxSurge.StrVal).Should(Equal(maxSurgeStr)) + return nil + }).Should(Succeed()) }) It("Adds and removes global lustre volumes", func() { diff --git a/vendor/github.com/DataWorkflowServices/dws/api/v1alpha2/clientmount_types.go b/vendor/github.com/DataWorkflowServices/dws/api/v1alpha2/clientmount_types.go index ce0f0513..a7a2ea0f 100644 --- a/vendor/github.com/DataWorkflowServices/dws/api/v1alpha2/clientmount_types.go +++ b/vendor/github.com/DataWorkflowServices/dws/api/v1alpha2/clientmount_types.go @@ -189,6 +189,9 @@ type ClientMountStatus struct { //+kubebuilder:object:root=true //+kubebuilder:storageversion //+kubebuilder:subresource:status +//+kubebuilder:printcolumn:name="DESIREDSTATE",type="string",JSONPath=".status.desiredState",description="mounted/unmounted" +//+kubebuilder:printcolumn:name="STATE",type="string",JSONPath=".status.state",description="mounted/unmounted" +//+kubebuilder:printcolumn:name="READY",type="boolean",JSONPath=".status.ready",description="True if current state is achieved" //+kubebuilder:printcolumn:name="ERROR",type="string",JSONPath=".status.error.severity" //+kubebuilder:printcolumn:name="AGE",type="date",JSONPath=".metadata.creationTimestamp" diff --git a/vendor/github.com/NearNodeFlash/lustre-fs-operator/api/v1beta1/zz_generated.deepcopy.go b/vendor/github.com/NearNodeFlash/lustre-fs-operator/api/v1beta1/zz_generated.deepcopy.go index fa8f4646..7d39cd67 100644 --- a/vendor/github.com/NearNodeFlash/lustre-fs-operator/api/v1beta1/zz_generated.deepcopy.go +++ b/vendor/github.com/NearNodeFlash/lustre-fs-operator/api/v1beta1/zz_generated.deepcopy.go @@ -1,7 +1,7 @@ //go:build !ignore_autogenerated /* - * Copyright 2023 Hewlett Packard Enterprise Development LP + * Copyright 2024 Hewlett Packard Enterprise Development LP * Other additional copyright holders may be indicated within. * * The entirety of this work is licensed under the Apache License, diff --git a/vendor/github.com/NearNodeFlash/nnf-sos/api/v1alpha1/nnf_datamovementmanager_types.go b/vendor/github.com/NearNodeFlash/nnf-sos/api/v1alpha1/nnf_datamovementmanager_types.go index 091a033f..db0dc96a 100644 --- a/vendor/github.com/NearNodeFlash/nnf-sos/api/v1alpha1/nnf_datamovementmanager_types.go +++ b/vendor/github.com/NearNodeFlash/nnf-sos/api/v1alpha1/nnf_datamovementmanager_types.go @@ -20,6 +20,7 @@ package v1alpha1 import ( + appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -50,6 +51,10 @@ type NnfDataMovementManagerSpec struct { // manages the per node data movement operations. Template corev1.PodTemplateSpec `json:"template"` + // UpdateStrategy defines the UpdateStrategy that is used for the basis of the worker Daemon Set + // that manages the per node data movement operations. + UpdateStrategy appsv1.DaemonSetUpdateStrategy `json:"updateStrategy"` + // Host Path defines the directory location of shared mounts on an individual worker node. HostPath string `json:"hostPath"` diff --git a/vendor/github.com/NearNodeFlash/nnf-sos/api/v1alpha1/nnf_storage_types.go b/vendor/github.com/NearNodeFlash/nnf-sos/api/v1alpha1/nnf_storage_types.go index 0468be73..c26ee1b8 100644 --- a/vendor/github.com/NearNodeFlash/nnf-sos/api/v1alpha1/nnf_storage_types.go +++ b/vendor/github.com/NearNodeFlash/nnf-sos/api/v1alpha1/nnf_storage_types.go @@ -129,6 +129,7 @@ type NnfStorageStatus struct { //+kubebuilder:object:root=true //+kubebuilder:subresource:status +//+kubebuilder:printcolumn:name="READY",type="string",JSONPath=".status.ready" //+kubebuilder:printcolumn:name="AGE",type="date",JSONPath=".metadata.creationTimestamp" //+kubebuilder:printcolumn:name="ERROR",type="string",JSONPath=".status.error.severity" diff --git a/vendor/github.com/NearNodeFlash/nnf-sos/api/v1alpha1/zz_generated.deepcopy.go b/vendor/github.com/NearNodeFlash/nnf-sos/api/v1alpha1/zz_generated.deepcopy.go index 96f3c58a..4f0fce8c 100644 --- a/vendor/github.com/NearNodeFlash/nnf-sos/api/v1alpha1/zz_generated.deepcopy.go +++ b/vendor/github.com/NearNodeFlash/nnf-sos/api/v1alpha1/zz_generated.deepcopy.go @@ -429,6 +429,7 @@ func (in *NnfDataMovementManagerSpec) DeepCopyInto(out *NnfDataMovementManagerSp *out = *in in.Selector.DeepCopyInto(&out.Selector) in.Template.DeepCopyInto(&out.Template) + in.UpdateStrategy.DeepCopyInto(&out.UpdateStrategy) } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NnfDataMovementManagerSpec. diff --git a/vendor/github.com/NearNodeFlash/nnf-sos/config/crd/bases/nnf.cray.hpe.com_nnfdatamovementmanagers.yaml b/vendor/github.com/NearNodeFlash/nnf-sos/config/crd/bases/nnf.cray.hpe.com_nnfdatamovementmanagers.yaml index ce62f193..823ed564 100644 --- a/vendor/github.com/NearNodeFlash/nnf-sos/config/crd/bases/nnf.cray.hpe.com_nnfdatamovementmanagers.yaml +++ b/vendor/github.com/NearNodeFlash/nnf-sos/config/crd/bases/nnf.cray.hpe.com_nnfdatamovementmanagers.yaml @@ -7626,11 +7626,75 @@ spec: - containers type: object type: object + updateStrategy: + description: UpdateStrategy defines the UpdateStrategy that is used + for the basis of the worker Daemon Set that manages the per node + data movement operations. + properties: + rollingUpdate: + description: 'Rolling update config params. Present only if type + = "RollingUpdate". --- TODO: Update this to follow our convention + for oneOf, whatever we decide it to be. Same as Deployment `strategy.rollingUpdate`. + See https://github.com/kubernetes/kubernetes/issues/35345' + properties: + maxSurge: + anyOf: + - type: integer + - type: string + description: 'The maximum number of nodes with an existing + available DaemonSet pod that can have an updated DaemonSet + pod during during an update. Value can be an absolute number + (ex: 5) or a percentage of desired pods (ex: 10%). This + can not be 0 if MaxUnavailable is 0. Absolute number is + calculated from percentage by rounding up to a minimum of + 1. Default value is 0. Example: when this is set to 30%, + at most 30% of the total number of nodes that should be + running the daemon pod (i.e. status.desiredNumberScheduled) + can have their a new pod created before the old pod is marked + as deleted. The update starts by launching new pods on 30% + of nodes. Once an updated pod is available (Ready for at + least minReadySeconds) the old DaemonSet pod on that node + is marked deleted. If the old pod becomes unavailable for + any reason (Ready transitions to false, is evicted, or is + drained) an updated pod is immediatedly created on that + node without considering surge limits. Allowing surge implies + the possibility that the resources consumed by the daemonset + on any given node can double if the readiness check fails, + and so resource intensive daemonsets should take into account + that they may cause evictions during disruption.' + x-kubernetes-int-or-string: true + maxUnavailable: + anyOf: + - type: integer + - type: string + description: 'The maximum number of DaemonSet pods that can + be unavailable during the update. Value can be an absolute + number (ex: 5) or a percentage of total number of DaemonSet + pods at the start of the update (ex: 10%). Absolute number + is calculated from percentage by rounding up. This cannot + be 0 if MaxSurge is 0 Default value is 1. Example: when + this is set to 30%, at most 30% of the total number of nodes + that should be running the daemon pod (i.e. status.desiredNumberScheduled) + can have their pods stopped for an update at any given time. + The update starts by stopping at most 30% of those DaemonSet + pods and then brings up new DaemonSet pods in their place. + Once the new pods are available, it then proceeds onto other + DaemonSet pods, thus ensuring that at least 70% of original + number of DaemonSet pods are available at all times during + the update.' + x-kubernetes-int-or-string: true + type: object + type: + description: Type of daemon set update. Can be "RollingUpdate" + or "OnDelete". Default is RollingUpdate. + type: string + type: object required: - hostPath - mountPath - selector - template + - updateStrategy type: object status: description: NnfDataMovementManagerStatus defines the observed state of diff --git a/vendor/github.com/NearNodeFlash/nnf-sos/config/crd/bases/nnf.cray.hpe.com_nnfstorages.yaml b/vendor/github.com/NearNodeFlash/nnf-sos/config/crd/bases/nnf.cray.hpe.com_nnfstorages.yaml index d506756f..bd6e0c03 100644 --- a/vendor/github.com/NearNodeFlash/nnf-sos/config/crd/bases/nnf.cray.hpe.com_nnfstorages.yaml +++ b/vendor/github.com/NearNodeFlash/nnf-sos/config/crd/bases/nnf.cray.hpe.com_nnfstorages.yaml @@ -15,6 +15,9 @@ spec: scope: Namespaced versions: - additionalPrinterColumns: + - jsonPath: .status.ready + name: READY + type: string - jsonPath: .metadata.creationTimestamp name: AGE type: date diff --git a/vendor/modules.txt b/vendor/modules.txt index 23966db2..2ac5028d 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -1,16 +1,16 @@ -# github.com/DataWorkflowServices/dws v0.0.1-0.20240207192901-62a0958c23f7 +# github.com/DataWorkflowServices/dws v0.0.1-0.20240221183421-1a123a9274b6 ## explicit; go 1.19 github.com/DataWorkflowServices/dws/api/v1alpha2 github.com/DataWorkflowServices/dws/utils/dwdparse github.com/DataWorkflowServices/dws/utils/updater -# github.com/NearNodeFlash/lustre-fs-operator v0.0.1-0.20231031201943-531116c1194e +# github.com/NearNodeFlash/lustre-fs-operator v0.0.1-0.20240220213720-51597bca637d ## explicit; go 1.19 github.com/NearNodeFlash/lustre-fs-operator/api/v1beta1 github.com/NearNodeFlash/lustre-fs-operator/config/crd/bases # github.com/NearNodeFlash/nnf-ec v0.0.0-20231010162453-a8168bb6a52f ## explicit; go 1.19 github.com/NearNodeFlash/nnf-ec/pkg/rfsf/pkg/models -# github.com/NearNodeFlash/nnf-sos v0.0.1-0.20240207194141-d4c5588829a1 +# github.com/NearNodeFlash/nnf-sos v0.0.1-0.20240221214302-e7989177289a ## explicit; go 1.19 github.com/NearNodeFlash/nnf-sos/api/v1alpha1 github.com/NearNodeFlash/nnf-sos/config/crd/bases