From a7c28f38a32396de212f60f204aea746ba441382 Mon Sep 17 00:00:00 2001 From: Dean Roehrich Date: Thu, 15 Jun 2023 12:01:44 -0500 Subject: [PATCH 01/19] Fix and use verifyPinnedContainerProfile() in tests This affected some related functions, so I ended up reworking their function signatures and collecting them in a new nnfcontainerprofile_helpers.go. Signed-off-by: Dean Roehrich --- controllers/integration_test.go | 3 +- controllers/nnf_workflow_controller.go | 2 +- .../nnf_workflow_controller_helpers.go | 115 +-------------- controllers/nnf_workflow_controller_test.go | 4 +- controllers/nnfcontainerprofile_helpers.go | 138 ++++++++++++++++++ controllers/nnfcontainerprofile_test.go | 5 +- 6 files changed, 153 insertions(+), 114 deletions(-) create mode 100644 controllers/nnfcontainerprofile_helpers.go diff --git a/controllers/integration_test.go b/controllers/integration_test.go index 301bcbef2..0414ab990 100644 --- a/controllers/integration_test.go +++ b/controllers/integration_test.go @@ -1263,6 +1263,7 @@ var _ = Describe("Integration Test", func() { }).Should(Succeed()) advanceStateAndCheckReady("Proposal", workflow) + Expect(verifyPinnedContainerProfile(context.TODO(), k8sClient, workflow, 0)).To(Succeed()) }) AfterEach(func() { @@ -1307,7 +1308,7 @@ var _ = Describe("Integration Test", func() { matchLabels[nnfv1alpha1.DirectiveIndexLabel] = "0" jobList := &batchv1.JobList{} - Eventually(func(g Gomega) int { + Eventually(func() int { Expect(k8sClient.List(context.TODO(), jobList, matchLabels)).To(Succeed()) return len(jobList.Items) }).Should(Equal(2)) diff --git a/controllers/nnf_workflow_controller.go b/controllers/nnf_workflow_controller.go index a19bb76ad..df1ec3850 100644 --- a/controllers/nnf_workflow_controller.go +++ b/controllers/nnf_workflow_controller.go @@ -287,7 +287,7 @@ func (r *NnfWorkflowReconciler) startProposalState(ctx context.Context, workflow // only jobdw, persistentdw, and create_persistent need a directive breakdown switch dwArgs["command"] { case "container": - return nil, r.createPinnedContainerProfileIfNecessary(ctx, workflow, index) + return nil, createPinnedContainerProfileIfNecessary(ctx, r.Client, r.Scheme, workflow, index, r.Log) case "jobdw", "persistentdw", "create_persistent": break default: diff --git a/controllers/nnf_workflow_controller_helpers.go b/controllers/nnf_workflow_controller_helpers.go index 993494188..97ed43777 100644 --- a/controllers/nnf_workflow_controller_helpers.go +++ b/controllers/nnf_workflow_controller_helpers.go @@ -22,7 +22,6 @@ package controllers import ( "context" "fmt" - "os" "reflect" "strconv" "strings" @@ -236,8 +235,8 @@ func (r *NnfWorkflowReconciler) validateContainerDirective(ctx context.Context, return nnfv1alpha1.NewWorkflowError("invalid DW directive: " + workflow.Spec.DWDirectives[index]).WithFatal() } - // Ensure the supplied profile exists or use the default - profile, err := r.findContainerProfile(ctx, workflow, index) + // Ensure the supplied profile exists + profile, err := findContainerProfile(ctx, r.Client, workflow, index) if err != nil { return nnfv1alpha1.NewWorkflowError(err.Error()).WithFatal() } @@ -1052,7 +1051,7 @@ func (r *NnfWorkflowReconciler) removeAllPersistentStorageReferences(ctx context } func (r *NnfWorkflowReconciler) containerHandler(ctx context.Context, workflow *dwsv1alpha2.Workflow, dwArgs map[string]string, index int, log logr.Logger) (*result, error) { - profile, err := r.getContainerProfile(ctx, workflow, index) + profile, err := getContainerProfile(ctx, r.Client, workflow, index) if err != nil { return nil, err } @@ -1563,7 +1562,7 @@ func (r *NnfWorkflowReconciler) getNnfNodesFromComputes(ctx context.Context, wor func (r *NnfWorkflowReconciler) waitForContainersToStart(ctx context.Context, workflow *dwsv1alpha2.Workflow, index int) (*result, error) { // Get profile to determine container job type (MPI or not) - profile, err := r.getContainerProfile(ctx, workflow, index) + profile, err := getContainerProfile(ctx, r.Client, workflow, index) if err != nil { return nil, err } @@ -1635,7 +1634,7 @@ func (r *NnfWorkflowReconciler) getMPIJobConditions(ctx context.Context, workflo func (r *NnfWorkflowReconciler) waitForContainersToFinish(ctx context.Context, workflow *dwsv1alpha2.Workflow, index int) (*result, error) { // Get profile to determine container job type (MPI or not) - profile, err := r.getContainerProfile(ctx, workflow, index) + profile, err := getContainerProfile(ctx, r.Client, workflow, index) if err != nil { return nil, err } @@ -1740,7 +1739,7 @@ func (r *NnfWorkflowReconciler) waitForContainersToFinish(ctx context.Context, w func (r *NnfWorkflowReconciler) checkContainersResults(ctx context.Context, workflow *dwsv1alpha2.Workflow, index int) (*result, error) { // Get profile to determine container job type (MPI or not) - profile, err := r.getContainerProfile(ctx, workflow, index) + profile, err := getContainerProfile(ctx, r.Client, workflow, index) if err != nil { return nil, err } @@ -1822,108 +1821,6 @@ func (r *NnfWorkflowReconciler) getContainerJobs(ctx context.Context, workflow * return jobList, nil } -func (r *NnfWorkflowReconciler) getContainerProfile(ctx context.Context, workflow *dwsv1alpha2.Workflow, index int) (*nnfv1alpha1.NnfContainerProfile, error) { - profile, err := r.findPinnedContainerProfile(ctx, workflow, index) - if err != nil { - return nil, err - } - - if profile == nil { - return nil, nnfv1alpha1.NewWorkflowErrorf("container profile '%s' not found", indexedResourceName(workflow, index)).WithFatal() - } - - if !profile.Data.Pinned { - return nil, nnfv1alpha1.NewWorkflowErrorf("expected pinned container profile '%s'", indexedResourceName(workflow, index)).WithFatal() - } - - return profile, nil -} - -func (r *NnfWorkflowReconciler) findPinnedContainerProfile(ctx context.Context, workflow *dwsv1alpha2.Workflow, index int) (*nnfv1alpha1.NnfContainerProfile, error) { - profile := &nnfv1alpha1.NnfContainerProfile{ - ObjectMeta: metav1.ObjectMeta{ - Name: indexedResourceName(workflow, index), - Namespace: workflow.Namespace, - }, - } - - if err := r.Get(ctx, client.ObjectKeyFromObject(profile), profile); err != nil { - return nil, err - } - - return profile, nil -} - -func (r *NnfWorkflowReconciler) findContainerProfile(ctx context.Context, workflow *dwsv1alpha2.Workflow, index int) (*nnfv1alpha1.NnfContainerProfile, error) { - args, err := dwdparse.BuildArgsMap(workflow.Spec.DWDirectives[index]) - if err != nil { - return nil, err - } - - name, found := args["profile"] - if !found { - return nil, fmt.Errorf("container directive '%s' has no profile key", workflow.Spec.DWDirectives[index]) - } - - profile := &nnfv1alpha1.NnfContainerProfile{ - ObjectMeta: metav1.ObjectMeta{ - Name: name, - Namespace: os.Getenv("NNF_CONTAINER_PROFILE_NAMESPACE"), - }, - } - - if err := r.Get(ctx, client.ObjectKeyFromObject(profile), profile); err != nil { - return nil, err - } - - return profile, nil -} - -func (r *NnfWorkflowReconciler) createPinnedContainerProfileIfNecessary(ctx context.Context, workflow *dwsv1alpha2.Workflow, index int) error { - profile, err := r.findPinnedContainerProfile(ctx, workflow, index) - if err != nil && !apierrors.IsNotFound(err) { - return err - } - - if profile != nil { - if !profile.Data.Pinned { - return fmt.Errorf("expected pinned container profile, but it was not pinned: %s", profile.Name) - } - - return nil - } - - profile, err = r.findContainerProfile(ctx, workflow, index) - if err != nil { - return err - } - - pinnedProfile := &nnfv1alpha1.NnfContainerProfile{ - ObjectMeta: metav1.ObjectMeta{ - Name: indexedResourceName(workflow, index), - Namespace: workflow.Namespace, - }, - } - - profile.Data.DeepCopyInto(&pinnedProfile.Data) - - pinnedProfile.Data.Pinned = true - - dwsv1alpha2.AddOwnerLabels(pinnedProfile, workflow) - - if err := controllerutil.SetControllerReference(workflow, pinnedProfile, r.Scheme); err != nil { - r.Log.Error(err, "failed to set controller reference on profile", "profile", pinnedProfile) - return fmt.Errorf("failed to set controller reference on profile %s", client.ObjectKeyFromObject(pinnedProfile)) - } - - r.Log.Info("Creating pinned container profile", "resource", client.ObjectKeyFromObject(pinnedProfile)) - if err := r.Create(ctx, pinnedProfile); err != nil { - return err - } - - return nil -} - // Create a list of volumes to be mounted inside of the containers based on the DW_JOB/DW_PERSISTENT arguments func (r *NnfWorkflowReconciler) getContainerVolumes(ctx context.Context, workflow *dwsv1alpha2.Workflow, dwArgs map[string]string) ([]nnfContainerVolume, *result, error) { volumes := []nnfContainerVolume{} diff --git a/controllers/nnf_workflow_controller_test.go b/controllers/nnf_workflow_controller_test.go index 859453794..c40277c92 100644 --- a/controllers/nnf_workflow_controller_test.go +++ b/controllers/nnf_workflow_controller_test.go @@ -1094,6 +1094,7 @@ var _ = Describe("NNF Workflow Unit Tests", func() { g.Expect(k8sClient.Get(context.TODO(), key, workflow)).To(Succeed()) return workflow.Status.Ready && workflow.Status.State == dwsv1alpha2.StateProposal }).Should(BeTrue(), "reach desired Proposal state") + Expect(verifyPinnedContainerProfile(context.TODO(), k8sClient, workflow, 2)).To(Succeed()) }) }) @@ -1118,6 +1119,7 @@ var _ = Describe("NNF Workflow Unit Tests", func() { g.Expect(k8sClient.Get(context.TODO(), key, workflow)).To(Succeed()) return workflow.Status.Ready && workflow.Status.State == dwsv1alpha2.StateProposal }).Should(BeTrue(), "reach desired Proposal state") + Expect(verifyPinnedContainerProfile(context.TODO(), k8sClient, workflow, 1)).To(Succeed()) }) }) @@ -1159,7 +1161,7 @@ var _ = Describe("NNF Workflow Unit Tests", func() { }) }) - Context("when a argument is not in the container profile", func() { + Context("when an argument is not in the container profile", func() { BeforeEach(func() { containerProfileStorages = []nnfv1alpha1.NnfContainerProfileStorage{ {Name: "DW_PERSISTENT_foo_persistent_storage", Optional: true}, diff --git a/controllers/nnfcontainerprofile_helpers.go b/controllers/nnfcontainerprofile_helpers.go new file mode 100644 index 000000000..2caf0bcb9 --- /dev/null +++ b/controllers/nnfcontainerprofile_helpers.go @@ -0,0 +1,138 @@ +/* + * Copyright 2023 Hewlett Packard Enterprise Development LP + * Other additional copyright holders may be indicated within. + * + * The entirety of this work is licensed under the Apache License, + * Version 2.0 (the "License"); you may not use this file except + * in compliance with the License. + * + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package controllers + +import ( + "context" + "fmt" + "os" + + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + kruntime "k8s.io/apimachinery/pkg/runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" + + dwsv1alpha2 "github.com/HewlettPackard/dws/api/v1alpha2" + "github.com/HewlettPackard/dws/utils/dwdparse" + nnfv1alpha1 "github.com/NearNodeFlash/nnf-sos/api/v1alpha1" + "github.com/go-logr/logr" +) + +func getContainerProfile(ctx context.Context, clnt client.Client, workflow *dwsv1alpha2.Workflow, index int) (*nnfv1alpha1.NnfContainerProfile, error) { + profile, err := findPinnedContainerProfile(ctx, clnt, workflow, index) + if err != nil { + return nil, err + } + + if profile == nil { + return nil, nnfv1alpha1.NewWorkflowErrorf("container profile '%s' not found", indexedResourceName(workflow, index)).WithFatal() + } + + return profile, nil +} + +func findPinnedContainerProfile(ctx context.Context, clnt client.Client, workflow *dwsv1alpha2.Workflow, index int) (*nnfv1alpha1.NnfContainerProfile, error) { + profile := &nnfv1alpha1.NnfContainerProfile{ + ObjectMeta: metav1.ObjectMeta{ + Name: indexedResourceName(workflow, index), + Namespace: workflow.Namespace, + }, + } + + if err := clnt.Get(ctx, client.ObjectKeyFromObject(profile), profile); err != nil { + return nil, err + } + + if !profile.Data.Pinned { + return nil, nnfv1alpha1.NewWorkflowErrorf("expected a pinned container profile '%s', but found one that is not pinned", indexedResourceName(workflow, index)).WithFatal() + } + + return profile, nil +} + +func findContainerProfile(ctx context.Context, clnt client.Client, workflow *dwsv1alpha2.Workflow, index int) (*nnfv1alpha1.NnfContainerProfile, error) { + args, err := dwdparse.BuildArgsMap(workflow.Spec.DWDirectives[index]) + if err != nil { + return nil, err + } + + name, found := args["profile"] + if !found { + return nil, fmt.Errorf("container directive '%s' has no profile key", workflow.Spec.DWDirectives[index]) + } + + profile := &nnfv1alpha1.NnfContainerProfile{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: os.Getenv("NNF_CONTAINER_PROFILE_NAMESPACE"), + }, + } + + if err := clnt.Get(ctx, client.ObjectKeyFromObject(profile), profile); err != nil { + return nil, err + } + + if profile.Data.Pinned { + return nil, nnfv1alpha1.NewWorkflowErrorf("expected container profile that is not pinned '%s', but found one that is pinned", indexedResourceName(workflow, index)).WithFatal() + } + + return profile, nil +} + +func createPinnedContainerProfileIfNecessary(ctx context.Context, clnt client.Client, scheme *kruntime.Scheme, workflow *dwsv1alpha2.Workflow, index int, log logr.Logger) error { + profile, err := findPinnedContainerProfile(ctx, clnt, workflow, index) + if err != nil && !apierrors.IsNotFound(err) { + return err + } + if profile != nil { + return nil + } + + profile, err = findContainerProfile(ctx, clnt, workflow, index) + if err != nil { + return err + } + + pinnedProfile := &nnfv1alpha1.NnfContainerProfile{ + ObjectMeta: metav1.ObjectMeta{ + Name: indexedResourceName(workflow, index), + Namespace: workflow.Namespace, + }, + } + + profile.Data.DeepCopyInto(&pinnedProfile.Data) + + pinnedProfile.Data.Pinned = true + + dwsv1alpha2.AddOwnerLabels(pinnedProfile, workflow) + + if err := controllerutil.SetControllerReference(workflow, pinnedProfile, scheme); err != nil { + log.Error(err, "failed to set controller reference on profile", "profile", pinnedProfile) + return fmt.Errorf("failed to set controller reference on profile %s", client.ObjectKeyFromObject(pinnedProfile)) + } + + if err := clnt.Create(ctx, pinnedProfile); err != nil { + return err + } + log.Info("Created pinned container profile", "resource", client.ObjectKeyFromObject(pinnedProfile)) + + return nil +} diff --git a/controllers/nnfcontainerprofile_test.go b/controllers/nnfcontainerprofile_test.go index 6064d85a1..0d647039b 100644 --- a/controllers/nnfcontainerprofile_test.go +++ b/controllers/nnfcontainerprofile_test.go @@ -29,6 +29,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "sigs.k8s.io/controller-runtime/pkg/client" + dwsv1alpha2 "github.com/HewlettPackard/dws/api/v1alpha2" nnfv1alpha1 "github.com/NearNodeFlash/nnf-sos/api/v1alpha1" ) @@ -90,9 +91,9 @@ func createBasicNnfContainerProfile(storages []nnfv1alpha1.NnfContainerProfileSt return createNnfContainerProfile(containerProfile, true) } -func verifyPinnedContainerProfile(ctx context.Context, clnt client.Client, namespace string, profileName string) error { +func verifyPinnedContainerProfile(ctx context.Context, clnt client.Client, workflow *dwsv1alpha2.Workflow, index int) error { - nnfContainerProfile, err := findPinnedProfile(ctx, clnt, namespace, profileName) + nnfContainerProfile, err := findPinnedContainerProfile(ctx, clnt, workflow, index) ExpectWithOffset(1, err).NotTo(HaveOccurred()) ExpectWithOffset(1, nnfContainerProfile.Data.Pinned).To(BeTrue()) refs := nnfContainerProfile.GetOwnerReferences() From de880bd44e1f4558e2dbd007611048340acf7a72 Mon Sep 17 00:00:00 2001 From: Dean Roehrich Date: Thu, 15 Jun 2023 14:16:51 -0500 Subject: [PATCH 02/19] Fix a negative NnfContainerProfile test (#206) As the Ginkgo book says, "Declare in container nodes, initialize in setup nodes". I was stumped by why this test wasn't failing. When I corrected the declare-before-initialize, it became more clear. Signed-off-by: Dean Roehrich --- controllers/nnf_workflow_controller_test.go | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/controllers/nnf_workflow_controller_test.go b/controllers/nnf_workflow_controller_test.go index c40277c92..c801adffd 100644 --- a/controllers/nnf_workflow_controller_test.go +++ b/controllers/nnf_workflow_controller_test.go @@ -1030,14 +1030,20 @@ var _ = Describe("NNF Workflow Unit Tests", func() { When("Using container directives", func() { var ns *corev1.Namespace - persistentStorageName := "container-persistent" - createPersistent := true + var persistentStorageName string + var createPersistent bool var containerProfile *nnfv1alpha1.NnfContainerProfile - var containerProfileStorages []nnfv1alpha1.NnfContainerProfileStorage = nil - createContainerProfile := true + var containerProfileStorages []nnfv1alpha1.NnfContainerProfileStorage + var createContainerProfile bool BeforeEach(func() { + persistentStorageName = "container-persistent" + createPersistent = true + containerProfile = nil + containerProfileStorages = nil + createContainerProfile = true + // Create/Delete the "nnf-system" namespace as part of the test life-cycle; the persistent storage instances are // placed in the "nnf-system" namespace so it must be present. // EnvTest does not support namespace deletion, so this could already exist. Ignore any errors. @@ -1143,6 +1149,12 @@ var _ = Describe("NNF Workflow Unit Tests", func() { }) Context("when a required storage in the container profile is not present in the arguments", func() { + BeforeEach(func() { + containerProfileStorages = []nnfv1alpha1.NnfContainerProfileStorage{ + {Name: "DW_JOB_foo_local_storage", Optional: false}, + {Name: "DW_PERSISTENT_foo_persistent_storage", Optional: true}, + } + }) It("should go to error", func() { workflow.Spec.DWDirectives = []string{ "#DW jobdw name=container-storage type=gfs2 capacity=1GB", From 10f0db081d3207bdd32a9a94491508adea6570f3 Mon Sep 17 00:00:00 2001 From: Blake Devcich <89158881+bdevcich-hpe@users.noreply.github.com> Date: Fri, 16 Jun 2023 13:11:40 -0500 Subject: [PATCH 03/19] Refactored containerHandler() (#207) `containerHandler()` was much too large. This change moves all of the underlying User Container code into it's own helper file. To reduce the number of function parameters (e.g. workflow, profile, ctx) passed into these functions, a new struct was added so that the helper functions could make use it. Also renamed it to `userContainerHandler()`. Signed-off-by: Blake Devcich --- controllers/nnf_workflow_controller.go | 2 +- ...f_workflow_controller_container_helpers.go | 439 ++++++++++++++++++ .../nnf_workflow_controller_helpers.go | 409 +--------------- 3 files changed, 457 insertions(+), 393 deletions(-) create mode 100644 controllers/nnf_workflow_controller_container_helpers.go diff --git a/controllers/nnf_workflow_controller.go b/controllers/nnf_workflow_controller.go index df1ec3850..cae1fc244 100644 --- a/controllers/nnf_workflow_controller.go +++ b/controllers/nnf_workflow_controller.go @@ -772,7 +772,7 @@ func (r *NnfWorkflowReconciler) startPreRunState(ctx context.Context, workflow * // Create container service and jobs if dwArgs["command"] == "container" { - return r.containerHandler(ctx, workflow, dwArgs, index, log) + return r.userContainerHandler(ctx, workflow, dwArgs, index, log) } // Create an NNFAccess for the compute clients diff --git a/controllers/nnf_workflow_controller_container_helpers.go b/controllers/nnf_workflow_controller_container_helpers.go new file mode 100644 index 000000000..221d96970 --- /dev/null +++ b/controllers/nnf_workflow_controller_container_helpers.go @@ -0,0 +1,439 @@ +/* + * Copyright 2023 Hewlett Packard Enterprise Development LP + * Other additional copyright holders may be indicated within. + * + * The entirety of this work is licensed under the Apache License, + * Version 2.0 (the "License"); you may not use this file except + * in compliance with the License. + * + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package controllers + +import ( + "context" + "fmt" + "strconv" + "strings" + + dwsv1alpha2 "github.com/HewlettPackard/dws/api/v1alpha2" + nnfv1alpha1 "github.com/NearNodeFlash/nnf-sos/api/v1alpha1" + "github.com/go-logr/logr" + mpicommonv1 "github.com/kubeflow/common/pkg/apis/common/v1" + mpiv2beta1 "github.com/kubeflow/mpi-operator/pkg/apis/kubeflow/v2beta1" + batchv1 "k8s.io/api/batch/v1" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +type nnfUserContainer struct { + workflow *dwsv1alpha2.Workflow + profile *nnfv1alpha1.NnfContainerProfile + nnfNodes []string + volumes []nnfContainerVolume + username string + uid, gid int64 + client client.Client + log logr.Logger + scheme *runtime.Scheme + ctx context.Context + index int +} + +// This struct contains all the necessary information for mounting container storages +type nnfContainerVolume struct { + name string + command string + directiveName string + directiveIndex int + mountPath string + envVarName string +} + +// MPI container workflow. In this model, we use mpi-operator to create an MPIJob, which creates +// a job for the launcher (to run mpirun) and a replicaset for the worker pods. The worker nodes +// run an ssh server tn listen for mpirun operations from the launcher pod. +func (c *nnfUserContainer) createMPIJob() error { + mpiJob := &mpiv2beta1.MPIJob{ + ObjectMeta: metav1.ObjectMeta{ + Name: c.workflow.Name, + Namespace: c.workflow.Namespace, + }, + } + c.profile.Data.MPISpec.DeepCopyInto(&mpiJob.Spec) + c.username = nnfv1alpha1.ContainerMPIUser + + if err := c.applyLabels(&mpiJob.ObjectMeta); err != nil { + return err + } + + // Use the profile's backoff limit if not set + if mpiJob.Spec.RunPolicy.BackoffLimit == nil { + mpiJob.Spec.RunPolicy.BackoffLimit = &c.profile.Data.RetryLimit + } + + // MPIJobs have two pod specs: one for the launcher and one for the workers + launcher := mpiJob.Spec.MPIReplicaSpecs[mpiv2beta1.MPIReplicaTypeLauncher] + launcherSpec := &launcher.Template.Spec + worker := mpiJob.Spec.MPIReplicaSpecs[mpiv2beta1.MPIReplicaTypeWorker] + workerSpec := &worker.Template.Spec + + // Keep failed pods around for log inspection + launcher.RestartPolicy = mpicommonv1.RestartPolicyNever + worker.RestartPolicy = mpicommonv1.RestartPolicyNever + + // Add NNF node tolerations + c.applyTolerations(launcherSpec) + c.applyTolerations(workerSpec) + + // Run the launcher on the first NNF node + launcherSpec.NodeSelector = map[string]string{"kubernetes.io/hostname": c.nnfNodes[0]} + + // Use initContainers to ensure the workers are up and discoverable before running the launcher command + for i := range c.nnfNodes { + c.addInitContainerWorkerWait(launcherSpec, i) + } + + // Target all the NNF nodes for the workers + replicas := int32(len(c.nnfNodes)) + worker.Replicas = &replicas + workerSpec.Affinity = &corev1.Affinity{ + // Ensure we run a worker on every NNF node + NodeAffinity: &corev1.NodeAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: &corev1.NodeSelector{ + NodeSelectorTerms: []corev1.NodeSelectorTerm{{ + MatchExpressions: []corev1.NodeSelectorRequirement{{ + Key: "kubernetes.io/hostname", + Operator: corev1.NodeSelectorOpIn, + Values: c.nnfNodes, + }}, + }}, + }, + }, + // But make sure it's only 1 per node + PodAntiAffinity: &corev1.PodAntiAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: []corev1.PodAffinityTerm{{ + TopologyKey: "kubernetes.io/hostname", + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "training.kubeflow.org/job-name", + Operator: metav1.LabelSelectorOpIn, + Values: []string{c.workflow.Name}, + }, + { + Key: "training.kubeflow.org/job-role", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"worker"}, + }, + }, + }}, + }, + }, + } + + // Set the appropriate permissions (UID/GID) from the workflow + c.applyPermissions(launcherSpec, &mpiJob.Spec, false) + c.applyPermissions(workerSpec, &mpiJob.Spec, true) + + c.addNnfVolumes(launcherSpec) + c.addNnfVolumes(workerSpec) + c.addEnvVars(launcherSpec, true) + c.addEnvVars(workerSpec, true) + + err := c.client.Create(c.ctx, mpiJob) + if err != nil { + if !apierrors.IsAlreadyExists(err) { + return err + } + } else { + c.log.Info("Created MPIJob", "name", mpiJob.Name, "namespace", mpiJob.Namespace) + } + + return nil +} + +// Non-MPI container workflow. In this model, a job is created for each NNF node which ensures +// that a pod is executed successfully (or the backOffLimit) is hit. Each container in this model +// runs the same image. +func (c *nnfUserContainer) createNonMPIJob() error { + // Use one job that we'll use as a base to create all jobs. Each NNF node will get its own job. + job := &batchv1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: c.workflow.Namespace, + }, + } + c.profile.Data.Spec.DeepCopyInto(&job.Spec.Template.Spec) + podSpec := &job.Spec.Template.Spec + + if err := c.applyLabels(&job.ObjectMeta); err != nil { + return err + } + + // Use the same labels as the job for the pods + job.Spec.Template.Labels = job.DeepCopy().Labels + + job.Spec.BackoffLimit = &c.profile.Data.RetryLimit + + podSpec.RestartPolicy = corev1.RestartPolicyNever + podSpec.Subdomain = c.workflow.Name // service name == workflow name + + c.applyTolerations(podSpec) + c.applyPermissions(podSpec, nil, false) + c.addNnfVolumes(podSpec) + c.addEnvVars(podSpec, false) + + // Using the base job, create a job for each nnfNode. Only the name, hostname, and node selector is different for each node + for _, nnfNode := range c.nnfNodes { + job.ObjectMeta.Name = c.workflow.Name + "-" + nnfNode + podSpec.Hostname = nnfNode + + // In our case, the target is only 1 node for the job, so a restartPolicy of Never + // is ok because any retry (i.e. new pod) will land on the same node. + podSpec.NodeSelector = map[string]string{"kubernetes.io/hostname": nnfNode} + + newJob := &batchv1.Job{} + job.DeepCopyInto(newJob) + + err := c.client.Create(c.ctx, newJob) + if err != nil { + if !apierrors.IsAlreadyExists(err) { + return err + } + } else { + c.log.Info("Created non-MPI job", "name", newJob.Name, "namespace", newJob.Namespace) + } + } + + return nil +} + +func (c *nnfUserContainer) applyLabels(job metav1.Object) error { + // Apply Job Labels/Owners + dwsv1alpha2.InheritParentLabels(job, c.workflow) + dwsv1alpha2.AddOwnerLabels(job, c.workflow) + dwsv1alpha2.AddWorkflowLabels(job, c.workflow) + + labels := job.GetLabels() + labels[nnfv1alpha1.ContainerLabel] = c.workflow.Name + labels[nnfv1alpha1.PinnedContainerProfileLabelName] = c.profile.GetName() + labels[nnfv1alpha1.PinnedContainerProfileLabelNameSpace] = c.profile.GetNamespace() + labels[nnfv1alpha1.DirectiveIndexLabel] = strconv.Itoa(c.index) + job.SetLabels(labels) + + if err := ctrl.SetControllerReference(c.workflow, job, c.scheme); err != nil { + return nnfv1alpha1.NewWorkflowErrorf("setting Job controller reference failed for '%s':", job.GetName()).WithError(err) + } + + return nil +} + +func (c *nnfUserContainer) applyTolerations(spec *corev1.PodSpec) { + spec.Tolerations = append(spec.Tolerations, corev1.Toleration{ + Effect: corev1.TaintEffectNoSchedule, + Key: "cray.nnf.node", + Operator: corev1.TolerationOpEqual, + Value: "true", + }) +} + +func (c *nnfUserContainer) addInitContainerPasswd(spec *corev1.PodSpec, image string) { + // This script creates an entry in /etc/passwd to map the user to the given UID/GID using an + // InitContainer. This is necessary for mpirun because it uses ssh to communicate with the + // worker nodes. ssh itself requires that the UID is tied to a username in the container. + // Since the launcher container is running as non-root, we need to make use of an InitContainer + // to edit /etc/passwd and copy it to a volume which can then be mounted into the non-root + // container to replace /etc/passwd. + script := `# tie the UID/GID to the user +sed -i '/^$USER/d' /etc/passwd +echo "$USER:x:$UID:$GID::/home/$USER:/bin/sh" >> /etc/passwd +cp /etc/passwd /config/ +exit 0 +` + // Replace the user and UID/GID + script = strings.ReplaceAll(script, "$USER", c.username) + script = strings.ReplaceAll(script, "$UID", fmt.Sprintf("%d", c.uid)) + script = strings.ReplaceAll(script, "$GID", fmt.Sprintf("%d", c.gid)) + + spec.InitContainers = append(spec.InitContainers, corev1.Container{ + Name: "mpi-init-passwd", + Image: image, + Command: []string{ + "/bin/sh", + "-c", + script, + }, + VolumeMounts: []corev1.VolumeMount{ + {Name: "passwd", MountPath: "/config"}, + }, + }) +} + +func (c *nnfUserContainer) addInitContainerWorkerWait(spec *corev1.PodSpec, worker int) { + // Add an initContainer to ensure that a worker pod is up and discoverable via dns. This + // assumes nslookup is available in the container. The nnf-mfu image provides this. + script := `# use nslookup to contact workers +echo "contacting $HOST..." +for i in $(seq 1 100); do + sleep 1 + echo "attempt $i of 100..." + nslookup $HOST + if [ $? -eq 0 ]; then + echo "successfully contacted $HOST; done" + exit 0 + fi +done +echo "failed to contact $HOST" +exit 1 +` + // Build the worker's hostname.domain (e.g. nnf-container-example-worker-0.nnf-container-example-worker.default.svc) + // This name comes from mpi-operator. + host := strings.ToLower(fmt.Sprintf( + "%s-worker-%d.%s-worker.%s.svc", c.workflow.Name, worker, c.workflow.Name, c.workflow.Namespace)) + script = strings.ReplaceAll(script, "$HOST", host) + + spec.InitContainers = append(spec.InitContainers, corev1.Container{ + Name: fmt.Sprintf("mpi-wait-for-worker-%d", worker), + Image: spec.Containers[0].Image, + Command: []string{ + "/bin/sh", + "-c", + script, + }, + }) +} + +func (c *nnfUserContainer) applyPermissions(spec *corev1.PodSpec, mpiJobSpec *mpiv2beta1.MPIJobSpec, worker bool) { + + // Add volume for /etc/passwd to map user to UID/GID + spec.Volumes = append(spec.Volumes, corev1.Volume{ + Name: "passwd", + VolumeSource: corev1.VolumeSource{ + EmptyDir: &corev1.EmptyDirVolumeSource{}, + }, + }) + + if !worker { + // Add SecurityContext if necessary + if spec.SecurityContext == nil { + spec.SecurityContext = &corev1.PodSecurityContext{} + } + + // Add spec level security context to apply FSGroup to all containers. This keeps the + // volumes safe from root actions. + spec.SecurityContext.FSGroup = &c.gid + + // Set the ssh key path for non-root users. Defaults to root. + if mpiJobSpec != nil { + mpiJobSpec.SSHAuthMountPath = fmt.Sprintf("/home/%s/.ssh", c.username) + } + } + + // Add user permissions to each container. This needs to be done for each container because + // we do not want these permissions on the init container. + for idx := range spec.Containers { + container := &spec.Containers[idx] + + // Add an InitContainer to map the user to the provided uid/gid using /etc/passwd + c.addInitContainerPasswd(spec, container.Image) + + // Add a mount to copy the modified /etc/passwd to + container.VolumeMounts = append(container.VolumeMounts, corev1.VolumeMount{ + Name: "passwd", + MountPath: "/etc/passwd", + SubPath: "passwd", + }) + + // Add non-root permissions from the workflow's user/group ID for the launcher, but not + // the worker. The worker needs to run an ssh daemon, which requires root. Commands on + // the worker are executed via the launcher as the `mpiuser` and not root. + if !worker { + if container.SecurityContext == nil { + container.SecurityContext = &corev1.SecurityContext{} + } + container.SecurityContext.RunAsUser = &c.uid + container.SecurityContext.RunAsGroup = &c.gid + nonRoot := true + container.SecurityContext.RunAsNonRoot = &nonRoot + su := false + container.SecurityContext.AllowPrivilegeEscalation = &su + } + } +} + +func (c *nnfUserContainer) addNnfVolumes(spec *corev1.PodSpec) { + for _, vol := range c.volumes { + // Volumes + hostPathType := corev1.HostPathDirectory + spec.Volumes = append(spec.Volumes, corev1.Volume{ + Name: vol.name, + VolumeSource: corev1.VolumeSource{ + HostPath: &corev1.HostPathVolumeSource{ + Path: vol.mountPath, + Type: &hostPathType, + }, + }, + }) + + // Add VolumeMounts and Volume environment variables for all containers + for idx := range spec.Containers { + container := &spec.Containers[idx] + + container.VolumeMounts = append(container.VolumeMounts, corev1.VolumeMount{ + Name: vol.name, + MountPath: vol.mountPath, + }) + + container.Env = append(container.Env, corev1.EnvVar{ + Name: vol.envVarName, + Value: vol.mountPath, + }) + } + } +} + +func (c *nnfUserContainer) addEnvVars(spec *corev1.PodSpec, mpi bool) { + // Add in non-volume environment variables for all containers + for idx := range spec.Containers { + container := &spec.Containers[idx] + + // Jobs/hostnames and services/subdomains are named differently based on mpi or not. For + // MPI, there are launcher/worker pods and the service is named after the worker. For + // non-MPI, the jobs are named after the rabbit node. + subdomain := "" + domain := c.workflow.Namespace + ".svc.cluster.local" + hosts := []string{} + + if mpi { + launcher := c.workflow.Name + "-launcher" + worker := c.workflow.Name + "-worker" + subdomain = worker + + hosts = append(hosts, launcher) + for i, _ := range c.nnfNodes { + hosts = append(hosts, fmt.Sprintf("%s-%d", worker, i)) + } + } else { + subdomain = spec.Subdomain + hosts = append(hosts, c.nnfNodes...) + } + + container.Env = append(container.Env, + corev1.EnvVar{Name: "NNF_CONTAINER_SUBDOMAIN", Value: subdomain}, + corev1.EnvVar{Name: "NNF_CONTAINER_DOMAIN", Value: domain}, + corev1.EnvVar{Name: "NNF_CONTAINER_HOSTNAMES", Value: strings.Join(hosts, " ")}) + } +} diff --git a/controllers/nnf_workflow_controller_helpers.go b/controllers/nnf_workflow_controller_helpers.go index 97ed43777..d6aa45535 100644 --- a/controllers/nnf_workflow_controller_helpers.go +++ b/controllers/nnf_workflow_controller_helpers.go @@ -33,7 +33,6 @@ import ( nnfv1alpha1 "github.com/NearNodeFlash/nnf-sos/api/v1alpha1" "github.com/go-logr/logr" - mpicommonv1 "github.com/kubeflow/common/pkg/apis/common/v1" mpiv2beta1 "github.com/kubeflow/mpi-operator/pkg/apis/kubeflow/v2beta1" batchv1 "k8s.io/api/batch/v1" corev1 "k8s.io/api/core/v1" @@ -55,16 +54,6 @@ type result struct { deleteStatus *dwsv1alpha2.DeleteStatus } -// This struct contains all the necessary information for mounting container storages -type nnfContainerVolume struct { - name string - command string - directiveName string - directiveIndex int - mountPath string - envVarName string -} - // When workflow stages cannot advance they return a Requeue result with a particular reason. func Requeue(reason string) *result { return &result{Result: ctrl.Result{}, reason: reason} @@ -1050,15 +1039,13 @@ func (r *NnfWorkflowReconciler) removeAllPersistentStorageReferences(ctx context return nil } -func (r *NnfWorkflowReconciler) containerHandler(ctx context.Context, workflow *dwsv1alpha2.Workflow, dwArgs map[string]string, index int, log logr.Logger) (*result, error) { +func (r *NnfWorkflowReconciler) userContainerHandler(ctx context.Context, workflow *dwsv1alpha2.Workflow, dwArgs map[string]string, index int, log logr.Logger) (*result, error) { profile, err := getContainerProfile(ctx, r.Client, workflow, index) if err != nil { return nil, err } mpiJob := profile.Data.MPISpec != nil - username := nnfv1alpha1.ContainerUser - // Get the targeted NNF nodes for the container jobs nnfNodes, err := r.getNnfNodesFromComputes(ctx, workflow) if err != nil || len(nnfNodes) <= 0 { @@ -1074,394 +1061,32 @@ func (r *NnfWorkflowReconciler) containerHandler(ctx context.Context, workflow * return result, nil } - applyLabels := func(job metav1.Object) error { - - // Apply Job Labels/Owners - dwsv1alpha2.InheritParentLabels(job, workflow) - dwsv1alpha2.AddOwnerLabels(job, workflow) - dwsv1alpha2.AddWorkflowLabels(job, workflow) - - labels := job.GetLabels() - labels[nnfv1alpha1.ContainerLabel] = workflow.Name - labels[nnfv1alpha1.PinnedContainerProfileLabelName] = profile.GetName() - labels[nnfv1alpha1.PinnedContainerProfileLabelNameSpace] = profile.GetNamespace() - labels[nnfv1alpha1.DirectiveIndexLabel] = strconv.Itoa(index) - job.SetLabels(labels) - - if err := ctrl.SetControllerReference(workflow, job, r.Scheme); err != nil { - return nnfv1alpha1.NewWorkflowErrorf("setting Job controller reference failed for '%s':", job.GetName()).WithError(err) - } - - return nil - } - - applyTolerations := func(spec *corev1.PodSpec) { - spec.Tolerations = append(spec.Tolerations, corev1.Toleration{ - Effect: corev1.TaintEffectNoSchedule, - Key: "cray.nnf.node", - Operator: corev1.TolerationOpEqual, - Value: "true", - }) - } - - addInitContainerPasswd := func(spec *corev1.PodSpec, user string, uid, gid int64, image string) { - // This script creates an entry in /etc/passwd to map the user to the given UID/GID using an - // InitContainer. This is necessary for mpirun because it uses ssh to communicate with the - // worker nodes. ssh itself requires that the UID is tied to a username in the container. - // Since the launcher container is running as non-root, we need to make use of an InitContainer - // to edit /etc/passwd and copy it to a volume which can then be mounted into the non-root - // container to replace /etc/passwd. - script := `# tie the UID/GID to the user -sed -i '/^$USER/d' /etc/passwd -echo "$USER:x:$UID:$GID::/home/$USER:/bin/sh" >> /etc/passwd -cp /etc/passwd /config/ -exit 0 -` - // Replace the user and UID/GID - script = strings.ReplaceAll(script, "$USER", user) - script = strings.ReplaceAll(script, "$UID", fmt.Sprintf("%d", uid)) - script = strings.ReplaceAll(script, "$GID", fmt.Sprintf("%d", gid)) - - spec.InitContainers = append(spec.InitContainers, corev1.Container{ - Name: "mpi-init-passwd", - Image: image, - Command: []string{ - "/bin/sh", - "-c", - script, - }, - VolumeMounts: []corev1.VolumeMount{ - {Name: "passwd", MountPath: "/config"}, - }, - }) - } - - addInitContainerWorkerWait := func(spec *corev1.PodSpec, worker int) { - // Add an initContainer to ensure that a worker pod is up and discoverable via dns. This - // assumes nslookup is available in the container. The nnf-mfu image provides this. - script := `# use nslookup to contact workers -echo "contacting $HOST..." -for i in $(seq 1 100); do - sleep 1 - echo "attempt $i of 100..." - nslookup $HOST - if [ $? -eq 0 ]; then - echo "successfully contacted $HOST; done" - exit 0 - fi -done -echo "failed to contact $HOST" -exit 1 -` - // Build the worker's hostname.domain (e.g. nnf-container-example-worker-0.nnf-container-example-worker.default.svc) - // This name comes from mpi-operator. - host := strings.ToLower(fmt.Sprintf( - "%s-worker-%d.%s-worker.%s.svc", workflow.Name, worker, workflow.Name, workflow.Namespace)) - script = strings.ReplaceAll(script, "$HOST", host) - - spec.InitContainers = append(spec.InitContainers, corev1.Container{ - Name: fmt.Sprintf("mpi-wait-for-worker-%d", worker), - Image: spec.Containers[0].Image, - Command: []string{ - "/bin/sh", - "-c", - script, - }, - }) - } - - applyPermissions := func(spec *corev1.PodSpec, mpiJobSpec *mpiv2beta1.MPIJobSpec, user string, worker bool) { - uid := int64(workflow.Spec.UserID) - gid := int64(workflow.Spec.GroupID) - - // Add volume for /etc/passwd to map user to UID/GID - spec.Volumes = append(spec.Volumes, corev1.Volume{ - Name: "passwd", - VolumeSource: corev1.VolumeSource{ - EmptyDir: &corev1.EmptyDirVolumeSource{}, - }, - }) - - if !worker { - // Add SecurityContext if necessary - if spec.SecurityContext == nil { - spec.SecurityContext = &corev1.PodSecurityContext{} - } - - // Add spec level security context to apply FSGroup to all containers. This keeps the - // volumes safe from root actions. - spec.SecurityContext.FSGroup = &gid - - // Set the ssh key path for non-root users. Defaults to root. - if mpiJobSpec != nil { - mpiJobSpec.SSHAuthMountPath = fmt.Sprintf("/home/%s/.ssh", username) - } - } - - // Add user permissions to each container. This needs to be done for each container because - // we do not want these permissions on the init container. - for idx := range spec.Containers { - container := &spec.Containers[idx] - - // Add an InitContainer to map the user to the provided uid/gid using /etc/passwd - addInitContainerPasswd(spec, user, uid, gid, container.Image) - - // Add a mount to copy the modified /etc/passwd to - container.VolumeMounts = append(container.VolumeMounts, corev1.VolumeMount{ - Name: "passwd", - MountPath: "/etc/passwd", - SubPath: "passwd", - }) - - // Add non-root permissions from the workflow's user/group ID for the launcher, but not - // the worker. The worker needs to run an ssh daemon, which requires root. Commands on - // the worker are executed via the launcher as the `mpiuser` and not root. - if !worker { - if container.SecurityContext == nil { - container.SecurityContext = &corev1.SecurityContext{} - } - container.SecurityContext.RunAsUser = &uid - container.SecurityContext.RunAsGroup = &gid - nonRoot := true - container.SecurityContext.RunAsNonRoot = &nonRoot - su := false - container.SecurityContext.AllowPrivilegeEscalation = &su - } - } - } - - addNNFVolumes := func(spec *corev1.PodSpec) { - for _, vol := range volumes { - // Volumes - hostPathType := corev1.HostPathDirectory - spec.Volumes = append(spec.Volumes, corev1.Volume{ - Name: vol.name, - VolumeSource: corev1.VolumeSource{ - HostPath: &corev1.HostPathVolumeSource{ - Path: vol.mountPath, - Type: &hostPathType, - }, - }, - }) - - // Add VolumeMounts and Volume environment variables for all containers - for idx := range spec.Containers { - container := &spec.Containers[idx] - - container.VolumeMounts = append(container.VolumeMounts, corev1.VolumeMount{ - Name: vol.name, - MountPath: vol.mountPath, - }) - - container.Env = append(container.Env, corev1.EnvVar{ - Name: vol.envVarName, - Value: vol.mountPath, - }) - } - } - } - - addEnvVars := func(spec *corev1.PodSpec, mpi bool) { - // Add in non-volume environment variables for all containers - for idx := range spec.Containers { - container := &spec.Containers[idx] - - // Jobs/hostnames and services/subdomains are named differently based on mpi or not. For - // MPI, there are launcher/worker pods and the service is named after the worker. For - // non-MPI, the jobs are named after the rabbit node. - subdomain := "" - domain := workflow.Namespace + ".svc.cluster.local" - hosts := []string{} - - if mpi { - launcher := workflow.Name + "-launcher" - worker := workflow.Name + "-worker" - subdomain = worker - - hosts = append(hosts, launcher) - for i, _ := range nnfNodes { - hosts = append(hosts, fmt.Sprintf("%s-%d", worker, i)) - } - } else { - subdomain = spec.Subdomain - hosts = append(hosts, nnfNodes...) - } - - container.Env = append(container.Env, - corev1.EnvVar{Name: "NNF_CONTAINER_SUBDOMAIN", Value: subdomain}, - corev1.EnvVar{Name: "NNF_CONTAINER_DOMAIN", Value: domain}, - corev1.EnvVar{Name: "NNF_CONTAINER_HOSTNAMES", Value: strings.Join(hosts, " ")}) - } - } - - // MPI container workflow. In this model, we use mpi-operator to create an MPIJob, which creates - // a job for the launcher (to run mpirun) and a replicaset for the worker pods. The worker nodes - // run an ssh server to listen for mpirun operations from the launcher pod. - createMPIJob := func() error { - mpiJob := &mpiv2beta1.MPIJob{ - ObjectMeta: metav1.ObjectMeta{ - Name: workflow.Name, - Namespace: workflow.Namespace, - }, - } - profile.Data.MPISpec.DeepCopyInto(&mpiJob.Spec) - username = nnfv1alpha1.ContainerMPIUser - - if err := applyLabels(&mpiJob.ObjectMeta); err != nil { - return err - } - - // Use the profile's backoff limit if not set - if mpiJob.Spec.RunPolicy.BackoffLimit == nil { - mpiJob.Spec.RunPolicy.BackoffLimit = &profile.Data.RetryLimit - } - - // MPIJobs have two pod specs: one for the launcher and one for the workers - launcher := mpiJob.Spec.MPIReplicaSpecs[mpiv2beta1.MPIReplicaTypeLauncher] - launcherSpec := &launcher.Template.Spec - worker := mpiJob.Spec.MPIReplicaSpecs[mpiv2beta1.MPIReplicaTypeWorker] - workerSpec := &worker.Template.Spec - - // Keep failed pods around for log inspection - launcher.RestartPolicy = mpicommonv1.RestartPolicyNever - worker.RestartPolicy = mpicommonv1.RestartPolicyNever - - // Add NNF node tolerations - applyTolerations(launcherSpec) - applyTolerations(workerSpec) - - // Run the launcher on the first NNF node - launcherSpec.NodeSelector = map[string]string{"kubernetes.io/hostname": nnfNodes[0]} - - // Use initContainers to ensure the workers are up and discoverable before running the launcher command - for i := range nnfNodes { - addInitContainerWorkerWait(launcherSpec, i) - } - - // Target all the NNF nodes for the workers - replicas := int32(len(nnfNodes)) - worker.Replicas = &replicas - workerSpec.Affinity = &corev1.Affinity{ - // Ensure we run a worker on every NNF node - NodeAffinity: &corev1.NodeAffinity{ - RequiredDuringSchedulingIgnoredDuringExecution: &corev1.NodeSelector{ - NodeSelectorTerms: []corev1.NodeSelectorTerm{{ - MatchExpressions: []corev1.NodeSelectorRequirement{{ - Key: "kubernetes.io/hostname", - Operator: corev1.NodeSelectorOpIn, - Values: nnfNodes, - }}, - }}, - }, - }, - // But make sure it's only 1 per node - PodAntiAffinity: &corev1.PodAntiAffinity{ - RequiredDuringSchedulingIgnoredDuringExecution: []corev1.PodAffinityTerm{{ - TopologyKey: "kubernetes.io/hostname", - LabelSelector: &metav1.LabelSelector{ - MatchExpressions: []metav1.LabelSelectorRequirement{ - { - Key: "training.kubeflow.org/job-name", - Operator: metav1.LabelSelectorOpIn, - Values: []string{workflow.Name}, - }, - { - Key: "training.kubeflow.org/job-role", - Operator: metav1.LabelSelectorOpIn, - Values: []string{"worker"}, - }, - }, - }}, - }, - }, - } - - // Set the appropriate permissions (UID/GID) from the workflow - applyPermissions(launcherSpec, &mpiJob.Spec, username, false) - applyPermissions(workerSpec, &mpiJob.Spec, username, true) - - addNNFVolumes(launcherSpec) - addNNFVolumes(workerSpec) - addEnvVars(launcherSpec, true) - addEnvVars(workerSpec, true) - - err = r.Create(ctx, mpiJob) - if err != nil { - if !apierrors.IsAlreadyExists(err) { - return err - } - } else { - log.Info("Created MPIJob", "name", mpiJob.Name, "namespace", mpiJob.Namespace) - } - - return nil - } - - // Non-MPI container workflow. In this model, a job is created for each NNF node which ensures - // that a pod is executed successfully (or the backOffLimit) is hit. Each container in this model - // runs the same image. - createNonMPIJob := func() error { - // Use one job that we'll use as a base to create all jobs. Each NNF node will get its own job. - job := &batchv1.Job{ - ObjectMeta: metav1.ObjectMeta{ - Namespace: workflow.Namespace, - }, - } - profile.Data.Spec.DeepCopyInto(&job.Spec.Template.Spec) - podSpec := &job.Spec.Template.Spec - - if err := applyLabels(&job.ObjectMeta); err != nil { - return err - } - - // Use the same labels as the job for the pods - job.Spec.Template.Labels = job.DeepCopy().Labels - - job.Spec.BackoffLimit = &profile.Data.RetryLimit - - podSpec.RestartPolicy = corev1.RestartPolicyNever - podSpec.Subdomain = workflow.Name // service name == workflow name - - applyTolerations(podSpec) - applyPermissions(podSpec, nil, username, false) - addNNFVolumes(podSpec) - addEnvVars(podSpec, false) - - // Using the base job, create a job for each nnfNode. Only the name, hostname, and node selector is different for each node - for _, nnfNode := range nnfNodes { - job.ObjectMeta.Name = workflow.Name + "-" + nnfNode - podSpec.Hostname = nnfNode - - // In our case, the target is only 1 node for the job, so a restartPolicy of Never - // is ok because any retry (i.e. new pod) will land on the same node. - podSpec.NodeSelector = map[string]string{"kubernetes.io/hostname": nnfNode} - - newJob := &batchv1.Job{} - job.DeepCopyInto(newJob) - - err = r.Create(ctx, newJob) - if err != nil { - if !apierrors.IsAlreadyExists(err) { - return err - } - } else { - log.Info("Created non-MPI job", "name", newJob.Name, "namespace", newJob.Namespace) - } - } - - return nil + c := nnfUserContainer{ + workflow: workflow, + profile: profile, + nnfNodes: nnfNodes, + volumes: volumes, + username: nnfv1alpha1.ContainerUser, + uid: int64(workflow.Spec.UserID), + gid: int64(workflow.Spec.GroupID), + index: index, + client: r.Client, + log: r.Log, + scheme: r.Scheme, + ctx: ctx, } if mpiJob { - if err := createMPIJob(); err != nil { + if err := c.createMPIJob(); err != nil { return nil, nnfv1alpha1.NewWorkflowError("Unable to create/update MPIJob").WithFatal().WithError(err) } } else { + // For non-MPI jobs, we need to create a service ourselves if err := r.createContainerService(ctx, workflow); err != nil { return nil, nnfv1alpha1.NewWorkflowError("Unable to create/update Container Service").WithFatal().WithError(err) } - if err := createNonMPIJob(); err != nil { + if err := c.createNonMPIJob(); err != nil { return nil, nnfv1alpha1.NewWorkflowError("Unable to create/update Container Jobs").WithFatal().WithError(err) } } From 691d86d13432af42977900140105e195293b8f2b Mon Sep 17 00:00:00 2001 From: Dean Roehrich Date: Wed, 21 Jun 2023 12:26:41 -0500 Subject: [PATCH 04/19] Allow NnfContainerProfiles to be restricted to a UserID/GroupID (#208) Add an optional UserID/GroupID field to NnfContainerProfileData. If either of these is specified, then the chosen container profile must match the user ID or group ID specified in the workflow resource that references the profile. Add tests. Signed-off-by: Dean Roehrich --- api/v1alpha1/nnfcontainerprofile_types.go | 10 ++ api/v1alpha1/zz_generated.deepcopy.go | 10 ++ ...nnf.cray.hpe.com_nnfcontainerprofiles.yaml | 12 ++ controllers/nnf_workflow_controller_test.go | 111 ++++++++++++------ controllers/nnfcontainerprofile_helpers.go | 10 ++ 5 files changed, 116 insertions(+), 37 deletions(-) diff --git a/api/v1alpha1/nnfcontainerprofile_types.go b/api/v1alpha1/nnfcontainerprofile_types.go index 7803f792b..4fc7d1df1 100644 --- a/api/v1alpha1/nnfcontainerprofile_types.go +++ b/api/v1alpha1/nnfcontainerprofile_types.go @@ -52,6 +52,16 @@ type NnfContainerProfileData struct { // +kubebuilder:default:=6 RetryLimit int32 `json:"retryLimit"` + // UserID specifies the user ID that is allowed to use this profile. If this + // is specified, only Workflows that have a matching user ID can select + // this profile. + UserID *uint32 `json:"userID,omitempty"` + + // GroupID specifies the group ID that is allowed to use this profile. If this + // is specified, only Workflows that have a matching group ID can select + // this profile. + GroupID *uint32 `json:"groupID,omitempty"` + // Spec to define the containers created from container profile. This is used for non-MPI // containers. // Either this or MPISpec must be provided, but not both. diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index bb75f7897..ee905d733 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -207,6 +207,16 @@ func (in *NnfContainerProfileData) DeepCopyInto(out *NnfContainerProfileData) { *out = make([]NnfContainerProfileStorage, len(*in)) copy(*out, *in) } + if in.UserID != nil { + in, out := &in.UserID, &out.UserID + *out = new(uint32) + **out = **in + } + if in.GroupID != nil { + in, out := &in.GroupID, &out.GroupID + *out = new(uint32) + **out = **in + } if in.Spec != nil { in, out := &in.Spec, &out.Spec *out = new(corev1.PodSpec) diff --git a/config/crd/bases/nnf.cray.hpe.com_nnfcontainerprofiles.yaml b/config/crd/bases/nnf.cray.hpe.com_nnfcontainerprofiles.yaml index 2182ba5dd..ea7cf2880 100644 --- a/config/crd/bases/nnf.cray.hpe.com_nnfcontainerprofiles.yaml +++ b/config/crd/bases/nnf.cray.hpe.com_nnfcontainerprofiles.yaml @@ -28,6 +28,12 @@ spec: data: description: NnfContainerProfileSpec defines the desired state of NnfContainerProfile properties: + groupID: + description: GroupID specifies the group ID that is allowed to use + this profile. If this is specified, only Workflows that have a matching + group ID can select this profile. + format: int32 + type: integer mpiSpec: description: MPIJobSpec to define the containers created from container profile. This is used for MPI containers via MPIJobs. See mpi-operator @@ -15735,6 +15741,12 @@ spec: - optional type: object type: array + userID: + description: UserID specifies the user ID that is allowed to use this + profile. If this is specified, only Workflows that have a matching + user ID can select this profile. + format: int32 + type: integer required: - retryLimit type: object diff --git a/controllers/nnf_workflow_controller_test.go b/controllers/nnf_workflow_controller_test.go index c801adffd..ed502fc10 100644 --- a/controllers/nnf_workflow_controller_test.go +++ b/controllers/nnf_workflow_controller_test.go @@ -47,16 +47,26 @@ import ( // BeforeEach - initialize the workflow // AfterEach - destroy the workflow +var ( + baseWorkflowUserID uint32 = 1042 + baseWorkflowGroupID uint32 = 1043 + + altWorkflowUserID uint32 = 1044 + altWorkflowGroupID uint32 = 1045 +) + var _ = Describe("NNF Workflow Unit Tests", func() { var ( - key types.NamespacedName - workflow *dwsv1alpha2.Workflow - storageProfile *nnfv1alpha1.NnfStorageProfile + key types.NamespacedName + workflow *dwsv1alpha2.Workflow + storageProfile *nnfv1alpha1.NnfStorageProfile + persistentStorageName string ) BeforeEach(func() { wfid := uuid.NewString()[0:8] + persistentStorageName = "persistent-" + uuid.NewString()[:8] key = types.NamespacedName{ Name: "nnf-workflow-" + wfid, @@ -72,6 +82,8 @@ var _ = Describe("NNF Workflow Unit Tests", func() { DesiredState: dwsv1alpha2.StateProposal, JobID: intstr.FromString("job 1244"), WLMID: uuid.NewString(), + UserID: baseWorkflowUserID, + GroupID: baseWorkflowGroupID, }, } @@ -155,23 +167,12 @@ var _ = Describe("NNF Workflow Unit Tests", func() { FileSystemType: "lustre", AllocationSets: []nnfv1alpha1.NnfStorageAllocationSetSpec{}, }, - Status: nnfv1alpha1.NnfStorageStatus{ - MgsNode: "", - AllocationSets: []nnfv1alpha1.NnfStorageAllocationSetStatus{{ - Status: "Ready", - Health: "OK", - Error: "", - AllocationCount: 0, - }}, - }, } Expect(k8sClient.Create(context.TODO(), nnfStorage)).To(Succeed()) } deletePersistentStorageInstance := func(name string) { - By("Fabricate the nnfStorage as if the persistent storage instance exists") - - // Delete persistent storage instance + By("delete persistent storage instance") psi := &dwsv1alpha2.PersistentStorageInstance{ ObjectMeta: metav1.ObjectMeta{Name: name, Namespace: workflow.Namespace}, } @@ -490,8 +491,6 @@ var _ = Describe("NNF Workflow Unit Tests", func() { }) When("using $DW_PERSISTENT_ references", func() { - persistentStorageName := "my-persistent-storage" - BeforeEach(func() { workflow.Spec.DWDirectives = []string{ fmt.Sprintf("#DW persistentdw name=%s", persistentStorageName), @@ -1030,7 +1029,6 @@ var _ = Describe("NNF Workflow Unit Tests", func() { When("Using container directives", func() { var ns *corev1.Namespace - var persistentStorageName string var createPersistent bool var containerProfile *nnfv1alpha1.NnfContainerProfile @@ -1038,7 +1036,6 @@ var _ = Describe("NNF Workflow Unit Tests", func() { var createContainerProfile bool BeforeEach(func() { - persistentStorageName = "container-persistent" createPersistent = true containerProfile = nil containerProfileStorages = nil @@ -1068,6 +1065,7 @@ var _ = Describe("NNF Workflow Unit Tests", func() { AfterEach(func() { if containerProfile != nil { + By("delete NnfContainerProfile") Expect(k8sClient.Delete(context.TODO(), containerProfile)).Should(Succeed()) Eventually(func() error { return k8sClient.Get(context.TODO(), client.ObjectKeyFromObject(containerProfile), containerProfile) @@ -1079,29 +1077,68 @@ var _ = Describe("NNF Workflow Unit Tests", func() { } }) - Context("when a container workflow has everything in order", func() { - // This means that: - // - A persistent instance is available prior to the container workflow - // - The provided storage arguments are included in the preceding directives - // - The supplied container profile exists and the supplied storage arguments are in the profiles list of required storages + Context("with container restrictions", func() { + BeforeEach(func() { + createContainerProfile = false // We'll make a custom version. + }) + + // buildRestrictedContainerProfile will create a NnfContainerProfile that + // is restricted to a specific user ID or group ID. + buildRestrictedContainerProfile := func(userID *uint32, groupID *uint32) { + By("Create a restricted NnfContainerProfile") + tempProfile := basicNnfContainerProfile("restricted-"+uuid.NewString()[:8], containerProfileStorages) + if userID != nil { + tempProfile.Data.UserID = userID + } + if groupID != nil { + tempProfile.Data.GroupID = groupID + } - It("should go to Proposal Ready with required storages present", func() { + containerProfile = createNnfContainerProfile(tempProfile, true) + } + + buildWorkflowWithCorrectDirectives := func() { + By("creating the workflow") workflow.Spec.DWDirectives = []string{ "#DW jobdw name=container-storage type=gfs2 capacity=1GB", "#DW persistentdw name=" + persistentStorageName, fmt.Sprintf("#DW container name=container profile=%s "+ "DW_JOB_foo_local_storage=container-storage "+ - "DW_PERSISTENT_foo_persistent_storage=container-persistent", - containerProfile.Name), + "DW_PERSISTENT_foo_persistent_storage=%s", + containerProfile.Name, persistentStorageName), } Expect(k8sClient.Create(context.TODO(), workflow)).Should(Succeed()) + } - Eventually(func(g Gomega) bool { - g.Expect(k8sClient.Get(context.TODO(), key, workflow)).To(Succeed()) - return workflow.Status.Ready && workflow.Status.State == dwsv1alpha2.StateProposal - }).Should(BeTrue(), "reach desired Proposal state") - Expect(verifyPinnedContainerProfile(context.TODO(), k8sClient, workflow, 2)).To(Succeed()) - }) + DescribeTable("should go to Proposal Ready when everything is in order", + func(containerUserID *uint32, containerGroupID *uint32) { + buildRestrictedContainerProfile(containerUserID, containerGroupID) + buildWorkflowWithCorrectDirectives() + Eventually(func(g Gomega) bool { + g.Expect(k8sClient.Get(context.TODO(), key, workflow)).To(Succeed()) + return workflow.Status.Ready && workflow.Status.State == dwsv1alpha2.StateProposal + }).Should(BeTrue(), "reach desired Proposal state") + Expect(verifyPinnedContainerProfile(context.TODO(), k8sClient, workflow, 2)).To(Succeed()) + }, + Entry("when not restricted to a user ID or group ID", nil, nil), + Entry("when restricted to a matching user ID", &baseWorkflowUserID, nil), + Entry("when restricted to a matching group ID", nil, &baseWorkflowGroupID), + Entry("when restricted to a matching user ID and group ID", &baseWorkflowUserID, &baseWorkflowGroupID), + ) + + DescribeTable("should not go to Proposal Ready when profile restriction is not satisfied", + func(containerUserID *uint32, containerGroupID *uint32) { + buildRestrictedContainerProfile(containerUserID, containerGroupID) + buildWorkflowWithCorrectDirectives() + Eventually(func(g Gomega) bool { + g.Expect(k8sClient.Get(context.TODO(), key, workflow)).To(Succeed()) + return workflow.Status.Status == dwsv1alpha2.StatusError && strings.Contains(workflow.Status.Message, "container profile") && strings.Contains(workflow.Status.Message, "is restricted to") + }).Should(BeTrue(), "does not reach desired Proposal state") + }, + Entry("when restricted to non-matching user ID", &altWorkflowUserID, nil), + Entry("when restricted to non-matching group ID", nil, &altWorkflowGroupID), + Entry("when restricted to non-matching user ID and group ID", &altWorkflowUserID, &altWorkflowGroupID), + ) }) Context("when an optional storage in the container profile is not present in the container arguments", func() { @@ -1136,8 +1173,8 @@ var _ = Describe("NNF Workflow Unit Tests", func() { "#DW jobdw name=container-storage type=gfs2 capacity=1GB", fmt.Sprintf("#DW container name=container profile=%s "+ "DW_JOB_foo_local_storage=container-storage "+ - "DW_PERSISTENT_foo_persistent_storage=container-persistent", - containerProfile.Name), + "DW_PERSISTENT_foo_persistent_storage=%s", + containerProfile.Name, persistentStorageName), } Expect(k8sClient.Create(context.TODO(), workflow)).Should(Succeed()) @@ -1161,8 +1198,8 @@ var _ = Describe("NNF Workflow Unit Tests", func() { "#DW persistentdw name=" + persistentStorageName, fmt.Sprintf("#DW container name=container profile=%s "+ // local storage is missing - "DW_PERSISTENT_foo_persistent_storage=container-persistent", - containerProfile.Name), + "DW_PERSISTENT_foo_persistent_storage=%s", + containerProfile.Name, persistentStorageName), } Expect(k8sClient.Create(context.TODO(), workflow)).Should(Succeed()) diff --git a/controllers/nnfcontainerprofile_helpers.go b/controllers/nnfcontainerprofile_helpers.go index 2caf0bcb9..6a0a9c6a5 100644 --- a/controllers/nnfcontainerprofile_helpers.go +++ b/controllers/nnfcontainerprofile_helpers.go @@ -94,6 +94,16 @@ func findContainerProfile(ctx context.Context, clnt client.Client, workflow *dws return nil, nnfv1alpha1.NewWorkflowErrorf("expected container profile that is not pinned '%s', but found one that is pinned", indexedResourceName(workflow, index)).WithFatal() } + // Determine whether the profile is restricted to a UserID/GroupID. + restrictedMsg := "container profile '%s' is restricted to %s %d" + if profile.Data.UserID != nil && *profile.Data.UserID != workflow.Spec.UserID { + return nil, fmt.Errorf(restrictedMsg, profile.Name, "UserID", *profile.Data.UserID) + } + if profile.Data.GroupID != nil && *profile.Data.GroupID != workflow.Spec.GroupID { + return nil, fmt.Errorf(restrictedMsg, profile.Name, "GroupID", *profile.Data.GroupID) + + } + return profile, nil } From afdc192660598c136147f33bb0977991889f2c25 Mon Sep 17 00:00:00 2001 From: Dean Roehrich Date: Mon, 26 Jun 2023 10:24:21 -0500 Subject: [PATCH 05/19] Tighten the tests that verify the profile webhooks are hooked up. (#210) The NnfContainerProfile test was targeting a failure in the kube-apiserver, by tripping a kubebuilder:validation for RetryLimit. That has been changed to something that will pass the kube-apiserver, and then will proceed on to fail in the webhook. Now we can distinguish when Create() errors are responses from the webhook, versus when they are errors about the webhook not being installed. Signed-off-by: Dean Roehrich --- controllers/nnf_workflow_controller_test.go | 11 ++++++----- controllers/nnfcontainerprofile_test.go | 11 ++++++++--- controllers/nnfstorageprofile_test.go | 11 ++++++++--- 3 files changed, 22 insertions(+), 11 deletions(-) diff --git a/controllers/nnf_workflow_controller_test.go b/controllers/nnf_workflow_controller_test.go index ed502fc10..0e98915d2 100644 --- a/controllers/nnf_workflow_controller_test.go +++ b/controllers/nnf_workflow_controller_test.go @@ -1238,9 +1238,10 @@ var _ = Describe("NnfContainerProfile Webhook test", func() { // The nnfcontainer_webhook_test.go covers testing of the webhook. // This spec exists only to verify that the webhook is also running for // the controller tests. - It("Fails to create an invalid profile, to verify that the webhook is installed", func() { - profileInvalid := basicNnfContainerProfile("an-invalid-profile", nil) - profileInvalid.Data.RetryLimit = -100 + It("fails to create an invalid profile to verify that the webhook is installed", func() { + profileInvalid := basicNnfContainerProfile("invalid-"+uuid.NewString()[:8], nil) + profileInvalid.Data.Spec = nil + profileInvalid.Data.MPISpec = nil Expect(createNnfContainerProfile(profileInvalid, false)).To(BeNil()) }) }) @@ -1249,8 +1250,8 @@ var _ = Describe("NnfStorageProfile Webhook test", func() { // The nnfstorageprofile_webhook_test.go covers testing of the webhook. // This spec exists only to verify that the webhook is also running for // the controller tests. - It("Fails to create an invalid profile, to verify that the webhook is installed", func() { - profileInvalid := basicNnfStorageProfile("an-invalid-profile") + It("fails to create an invalid profile to verify that the webhook is installed", func() { + profileInvalid := basicNnfStorageProfile("invalid-" + uuid.NewString()[:8]) profileInvalid.Data.LustreStorage.ExternalMGS = "10.0.0.1@tcp" profileInvalid.Data.LustreStorage.CombinedMGTMDT = true Expect(createNnfStorageProfile(profileInvalid, false)).To(BeNil()) diff --git a/controllers/nnfcontainerprofile_test.go b/controllers/nnfcontainerprofile_test.go index 0d647039b..1fcf04658 100644 --- a/controllers/nnfcontainerprofile_test.go +++ b/controllers/nnfcontainerprofile_test.go @@ -26,6 +26,7 @@ import ( "github.com/google/uuid" corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "sigs.k8s.io/controller-runtime/pkg/client" @@ -34,22 +35,26 @@ import ( ) // createNnfContainerProfile creates the given profile in the "default" namespace. +// When expectSuccess=false, we expect to find that it was failed by the webhook. func createNnfContainerProfile(containerProfile *nnfv1alpha1.NnfContainerProfile, expectSuccess bool) *nnfv1alpha1.NnfContainerProfile { // Place NnfContainerProfiles in "default" for the test environment. containerProfile.ObjectMeta.Namespace = corev1.NamespaceDefault profKey := client.ObjectKeyFromObject(containerProfile) profExpected := &nnfv1alpha1.NnfContainerProfile{} - Expect(k8sClient.Get(context.TODO(), profKey, profExpected)).ToNot(Succeed()) + err := k8sClient.Get(context.TODO(), profKey, profExpected) + Expect(err).ToNot(BeNil()) + Expect(apierrors.IsNotFound(err)).To(BeTrue()) if expectSuccess { Expect(k8sClient.Create(context.TODO(), containerProfile)).To(Succeed(), "create nnfcontainerprofile") - //err := k8sClient.Create(context.TODO(), containerProfile) Eventually(func(g Gomega) { g.Expect(k8sClient.Get(context.TODO(), profKey, profExpected)).To(Succeed()) }, "3s", "1s").Should(Succeed(), "wait for create of NnfContainerProfile") } else { - Expect(k8sClient.Create(context.TODO(), containerProfile)).ToNot(Succeed(), "expect to fail to create nnfcontainerprofile") + err = k8sClient.Create(context.TODO(), containerProfile) + Expect(err).ToNot(BeNil()) + Expect(err.Error()).To(MatchRegexp("webhook .* denied the request")) containerProfile = nil } diff --git a/controllers/nnfstorageprofile_test.go b/controllers/nnfstorageprofile_test.go index 07e20d20d..19e5bae3b 100644 --- a/controllers/nnfstorageprofile_test.go +++ b/controllers/nnfstorageprofile_test.go @@ -7,6 +7,7 @@ import ( "github.com/google/uuid" corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "sigs.k8s.io/controller-runtime/pkg/client" @@ -14,22 +15,26 @@ import ( ) // createNnfStorageProfile creates the given profile in the "default" namespace. +// When expectSuccess=false, we expect to find that it was failed by the webhook. func createNnfStorageProfile(storageProfile *nnfv1alpha1.NnfStorageProfile, expectSuccess bool) *nnfv1alpha1.NnfStorageProfile { // Place NnfStorageProfiles in "default" for the test environment. storageProfile.ObjectMeta.Namespace = corev1.NamespaceDefault profKey := client.ObjectKeyFromObject(storageProfile) profExpected := &nnfv1alpha1.NnfStorageProfile{} - Expect(k8sClient.Get(context.TODO(), profKey, profExpected)).ToNot(Succeed()) + err := k8sClient.Get(context.TODO(), profKey, profExpected) + Expect(err).ToNot(BeNil()) + Expect(apierrors.IsNotFound(err)).To(BeTrue()) if expectSuccess { Expect(k8sClient.Create(context.TODO(), storageProfile)).To(Succeed(), "create nnfstorageprofile") - //err := k8sClient.Create(context.TODO(), storageProfile) Eventually(func(g Gomega) { g.Expect(k8sClient.Get(context.TODO(), profKey, profExpected)).To(Succeed()) }, "3s", "1s").Should(Succeed(), "wait for create of NnfStorageProfile") } else { - Expect(k8sClient.Create(context.TODO(), storageProfile)).ToNot(Succeed(), "expect to fail to create nnfstorageprofile") + err = k8sClient.Create(context.TODO(), storageProfile) + Expect(err).ToNot(BeNil()) + Expect(err.Error()).To(MatchRegexp("webhook .* denied the request")) storageProfile = nil } From aa17b1c61f4b95d1d5eb95b64cfde97b3b1fe32a Mon Sep 17 00:00:00 2001 From: Blake Devcich <89158881+bdevcich-hpe@users.noreply.github.com> Date: Mon, 26 Jun 2023 12:56:52 -0500 Subject: [PATCH 06/19] Containers: Add specific linux capabilities for MPI workers (#209) On LLNL systems, the host's default capabilities are different than HPE systems due to the difference in container runtime and also configuration of those runtimes. This change drops all capabilities and then adds in the required ones to ensure mpirun can access the worker pods. Tested capabilities by: - Drop ALL capabilities without the specific Adds (and failing) - Then adding in the specific Adds (and passing) Signed-off-by: Blake Devcich --- api/v1alpha1/nnfcontainerprofile_types.go | 2 +- .../nnfcontainerprofile_webhook_test.go | 34 ++++++++++++++++++- ...nnf.cray.hpe.com_nnfcontainerprofiles.yaml | 2 ++ ...f_workflow_controller_container_helpers.go | 24 ++++++++++--- 4 files changed, 56 insertions(+), 6 deletions(-) diff --git a/api/v1alpha1/nnfcontainerprofile_types.go b/api/v1alpha1/nnfcontainerprofile_types.go index 4fc7d1df1..bfd0705ab 100644 --- a/api/v1alpha1/nnfcontainerprofile_types.go +++ b/api/v1alpha1/nnfcontainerprofile_types.go @@ -92,7 +92,7 @@ type NnfContainerProfile struct { metav1.TypeMeta `json:",inline"` metav1.ObjectMeta `json:"metadata,omitempty"` - Data NnfContainerProfileData `json:"data,omitempty"` + Data NnfContainerProfileData `json:"data"` } // +kubebuilder:object:root=true diff --git a/api/v1alpha1/nnfcontainerprofile_webhook_test.go b/api/v1alpha1/nnfcontainerprofile_webhook_test.go index a4e1f7ecc..fe2a1cc00 100644 --- a/api/v1alpha1/nnfcontainerprofile_webhook_test.go +++ b/api/v1alpha1/nnfcontainerprofile_webhook_test.go @@ -116,7 +116,7 @@ var _ = Describe("NnfContainerProfile Webhook", func() { nnfProfile = nil }) - It("Should not allow an empty Launcher and Worker ReplicaSpecs", func() { + It("Should not allow both an empty Launcher and Worker ReplicaSpecs", func() { nnfProfile.ObjectMeta.Name = pinnedResourceName nnfProfile.Data.MPISpec = &mpiv2beta1.MPIJobSpec{ MPIReplicaSpecs: map[mpiv2beta1.MPIReplicaType]*mpicommonv1.ReplicaSpec{ @@ -128,6 +128,38 @@ var _ = Describe("NnfContainerProfile Webhook", func() { nnfProfile = nil }) + It("Should not allow an empty Launcher ReplicaSpec", func() { + nnfProfile.ObjectMeta.Name = pinnedResourceName + nnfProfile.Data.MPISpec = &mpiv2beta1.MPIJobSpec{ + MPIReplicaSpecs: map[mpiv2beta1.MPIReplicaType]*mpicommonv1.ReplicaSpec{ + mpiv2beta1.MPIReplicaTypeLauncher: nil, + mpiv2beta1.MPIReplicaTypeWorker: { + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{}, + }, + }, + }, + } + Expect(k8sClient.Create(context.TODO(), nnfProfile)).ToNot(Succeed()) + nnfProfile = nil + }) + + It("Should not allow an empty Worker ReplicaSpec", func() { + nnfProfile.ObjectMeta.Name = pinnedResourceName + nnfProfile.Data.MPISpec = &mpiv2beta1.MPIJobSpec{ + MPIReplicaSpecs: map[mpiv2beta1.MPIReplicaType]*mpicommonv1.ReplicaSpec{ + mpiv2beta1.MPIReplicaTypeLauncher: { + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{}, + }, + }, + mpiv2beta1.MPIReplicaTypeWorker: nil, + }, + } + Expect(k8sClient.Create(context.TODO(), nnfProfile)).ToNot(Succeed()) + nnfProfile = nil + }) + It("Should not allow an empty Launcher and Worker PodSpecs", func() { nnfProfile.ObjectMeta.Name = pinnedResourceName nnfProfile.Data.MPISpec = &mpiv2beta1.MPIJobSpec{ diff --git a/config/crd/bases/nnf.cray.hpe.com_nnfcontainerprofiles.yaml b/config/crd/bases/nnf.cray.hpe.com_nnfcontainerprofiles.yaml index ea7cf2880..3d4cce9b0 100644 --- a/config/crd/bases/nnf.cray.hpe.com_nnfcontainerprofiles.yaml +++ b/config/crd/bases/nnf.cray.hpe.com_nnfcontainerprofiles.yaml @@ -15757,6 +15757,8 @@ spec: type: string metadata: type: object + required: + - data type: object served: true storage: true diff --git a/controllers/nnf_workflow_controller_container_helpers.go b/controllers/nnf_workflow_controller_container_helpers.go index 221d96970..ff5112285 100644 --- a/controllers/nnf_workflow_controller_container_helpers.go +++ b/controllers/nnf_workflow_controller_container_helpers.go @@ -73,6 +73,7 @@ func (c *nnfUserContainer) createMPIJob() error { Namespace: c.workflow.Namespace, }, } + c.profile.Data.MPISpec.DeepCopyInto(&mpiJob.Spec) c.username = nnfv1alpha1.ContainerMPIUser @@ -85,7 +86,8 @@ func (c *nnfUserContainer) createMPIJob() error { mpiJob.Spec.RunPolicy.BackoffLimit = &c.profile.Data.RetryLimit } - // MPIJobs have two pod specs: one for the launcher and one for the workers + // MPIJobs have two pod specs: one for the launcher and one for the workers. The webhook ensures + // that the launcher/worker specs exist launcher := mpiJob.Spec.MPIReplicaSpecs[mpiv2beta1.MPIReplicaTypeLauncher] launcherSpec := &launcher.Template.Spec worker := mpiJob.Spec.MPIReplicaSpecs[mpiv2beta1.MPIReplicaTypeWorker] @@ -357,19 +359,33 @@ func (c *nnfUserContainer) applyPermissions(spec *corev1.PodSpec, mpiJobSpec *mp SubPath: "passwd", }) + // Create SecurityContext if necessary + if container.SecurityContext == nil { + container.SecurityContext = &corev1.SecurityContext{} + } + // Add non-root permissions from the workflow's user/group ID for the launcher, but not // the worker. The worker needs to run an ssh daemon, which requires root. Commands on // the worker are executed via the launcher as the `mpiuser` and not root. if !worker { - if container.SecurityContext == nil { - container.SecurityContext = &corev1.SecurityContext{} - } container.SecurityContext.RunAsUser = &c.uid container.SecurityContext.RunAsGroup = &c.gid nonRoot := true container.SecurityContext.RunAsNonRoot = &nonRoot su := false container.SecurityContext.AllowPrivilegeEscalation = &su + } else { + // For the worker nodes, we need to ensure we have the appropriate linux capabilities to + // allow for ssh access for mpirun. Drop all capabilities and only add what is + // necessary. Only do this if the Capabilities have not been set by the user. + su := true + container.SecurityContext.AllowPrivilegeEscalation = &su + if container.SecurityContext.Capabilities == nil { + container.SecurityContext.Capabilities = &corev1.Capabilities{ + Drop: []corev1.Capability{"ALL"}, + Add: []corev1.Capability{"NET_BIND_SERVICE", "SYS_CHROOT", "AUDIT_WRITE", "SETUID", "SETGID"}, + } + } } } } From e5a7951f8f4898fb43925c4e8fe8655a70688373 Mon Sep 17 00:00:00 2001 From: Dean Roehrich Date: Thu, 6 Jul 2023 10:14:08 -0500 Subject: [PATCH 07/19] Handle stale kustomize or controller-gen in bin dir. (#211) Signed-off-by: Dean Roehrich --- Makefile | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/Makefile b/Makefile index a336c4bd7..c0f63899b 100644 --- a/Makefile +++ b/Makefile @@ -282,6 +282,12 @@ LOCALBIN ?= $(shell pwd)/bin $(LOCALBIN): mkdir -p $(LOCALBIN) +.PHONY: clean-bin +clean-bin: + if [[ -d $(LOCALBIN) ]]; then \ + chmod -R u+w $(LOCALBIN) && rm -rf $(LOCALBIN); \ + fi + ## Tool Binaries KUSTOMIZE ?= $(LOCALBIN)/kustomize CONTROLLER_GEN ?= $(LOCALBIN)/controller-gen @@ -293,14 +299,17 @@ CONTROLLER_TOOLS_VERSION ?= v0.12.0 KUSTOMIZE_INSTALL_SCRIPT ?= "https://raw.githubusercontent.com/kubernetes-sigs/kustomize/master/hack/install_kustomize.sh" .PHONY: kustomize -kustomize: $(KUSTOMIZE) ## Download kustomize locally if necessary. -$(KUSTOMIZE): $(LOCALBIN) - test -s $(LOCALBIN)/kustomize || { curl -s $(KUSTOMIZE_INSTALL_SCRIPT) | bash -s -- $(subst v,,$(KUSTOMIZE_VERSION)) $(LOCALBIN); } +kustomize: $(LOCALBIN) ## Download kustomize locally if necessary. + if [[ ! -s $(LOCALBIN)/kustomize || $$($(LOCALBIN)/kustomize version | awk '{print $$1}' | awk -F/ '{print $$2}') != $(KUSTOMIZE_VERSION) ]]; then \ + rm -f $(LOCALBIN)/kustomize && \ + { curl -s $(KUSTOMIZE_INSTALL_SCRIPT) | bash -s -- $(subst v,,$(KUSTOMIZE_VERSION)) $(LOCALBIN); }; \ + fi .PHONY: controller-gen -controller-gen: $(CONTROLLER_GEN) ## Download controller-gen locally if necessary. -$(CONTROLLER_GEN): $(LOCALBIN) - test -s $(LOCALBIN)/controller-gen || GOBIN=$(LOCALBIN) go install sigs.k8s.io/controller-tools/cmd/controller-gen@$(CONTROLLER_TOOLS_VERSION) +controller-gen: $(LOCALBIN) ## Download controller-gen locally if necessary. + if [[ ! -s $(LOCALBIN)/controller-gen || $$($(LOCALBIN)/controller-gen --version | awk '{print $$2}') != $(CONTROLLER_TOOLS_VERSION) ]]; then \ + rm -f $(LOCALBIN)/controller-gen && GOBIN=$(LOCALBIN) go install sigs.k8s.io/controller-tools/cmd/controller-gen@$(CONTROLLER_TOOLS_VERSION); \ + fi .PHONY: envtest envtest: $(ENVTEST) ## Download envtest-setup locally if necessary. From 3180a517d5ab8ea262bbcc807bee60c9175d9a4d Mon Sep 17 00:00:00 2001 From: Dean Roehrich Date: Fri, 7 Jul 2023 09:02:02 -0500 Subject: [PATCH 08/19] Enforce namespace for system NnfStorageProfile and NnfContainerProfile resources (#212) The system profiles must be in the NNF_STORAGE_PROFILE_NAMESPACE and NNF_CONTAINER_PROFILE_NAMESPACE namespaces, as specified in the Deployment spec. Make the 'pinned' flag immutable. Signed-off-by: Dean Roehrich --- api/v1alpha1/nnfcontainerprofile_webhook.go | 15 +++ .../nnfcontainerprofile_webhook_test.go | 91 +++++++++++++------ api/v1alpha1/nnfstorageprofile_webhook.go | 16 +++- .../nnfstorageprofile_webhook_test.go | 69 +++++++++++++- api/v1alpha1/webhook_suite_test.go | 4 + 5 files changed, 162 insertions(+), 33 deletions(-) diff --git a/api/v1alpha1/nnfcontainerprofile_webhook.go b/api/v1alpha1/nnfcontainerprofile_webhook.go index 73e786a73..bdf7bbcb3 100644 --- a/api/v1alpha1/nnfcontainerprofile_webhook.go +++ b/api/v1alpha1/nnfcontainerprofile_webhook.go @@ -21,6 +21,7 @@ package v1alpha1 import ( "fmt" + "os" "reflect" "github.com/kubeflow/mpi-operator/pkg/apis/kubeflow/v2beta1" @@ -48,6 +49,15 @@ var _ webhook.Validator = &NnfContainerProfile{} func (r *NnfContainerProfile) ValidateCreate() error { nnfcontainerprofilelog.Info("validate create", "name", r.Name) + // If it's not pinned, then it's being made available for users to select + // and it must be in the correct namespace. + profileNamespace := os.Getenv("NNF_CONTAINER_PROFILE_NAMESPACE") + if !r.Data.Pinned && r.GetNamespace() != profileNamespace { + err := fmt.Errorf("incorrect namespace for profile that is intended to be selected by users; the namespace should be '%s'", profileNamespace) + nnfstorageprofilelog.Error(err, "invalid") + return err + } + mpiJob := r.Data.MPISpec != nil nonmpiJob := r.Data.Spec != nil @@ -97,6 +107,11 @@ func (r *NnfContainerProfile) ValidateUpdate(old runtime.Object) error { nnfcontainerprofilelog.Info("validate update", "name", r.Name) obj := old.(*NnfContainerProfile) + if obj.Data.Pinned != r.Data.Pinned { + err := fmt.Errorf("the pinned flag is immutable") + nnfcontainerprofilelog.Error(err, "invalid") + return err + } if obj.Data.Pinned { // Allow metadata to be updated, for things like finalizers, // ownerReferences, and labels, but do not allow Data to be diff --git a/api/v1alpha1/nnfcontainerprofile_webhook_test.go b/api/v1alpha1/nnfcontainerprofile_webhook_test.go index fe2a1cc00..61084abcd 100644 --- a/api/v1alpha1/nnfcontainerprofile_webhook_test.go +++ b/api/v1alpha1/nnfcontainerprofile_webhook_test.go @@ -23,6 +23,7 @@ import ( "context" "os" + "github.com/google/uuid" mpicommonv1 "github.com/kubeflow/common/pkg/apis/common/v1" mpiv2beta1 "github.com/kubeflow/mpi-operator/pkg/apis/kubeflow/v2beta1" . "github.com/onsi/ginkgo/v2" @@ -37,16 +38,21 @@ import ( var _ = Describe("NnfContainerProfile Webhook", func() { var ( - namespaceName = os.Getenv("NNF_CONTAINER_PROFILE_NAMESPACE") - pinnedResourceName = "test-pinned" - nnfProfile *NnfContainerProfile = nil + namespaceName = os.Getenv("NNF_CONTAINER_PROFILE_NAMESPACE") + otherNamespaceName string + otherNamespace *corev1.Namespace + + pinnedResourceName string + nnfProfile *NnfContainerProfile newProfile *NnfContainerProfile ) BeforeEach(func() { + pinnedResourceName = "test-pinned-" + uuid.NewString()[:8] + nnfProfile = &NnfContainerProfile{ ObjectMeta: metav1.ObjectMeta{ - Name: "test", + Name: "test-" + uuid.NewString()[:8], Namespace: namespaceName, }, Data: NnfContainerProfileData{ @@ -61,6 +67,21 @@ var _ = Describe("NnfContainerProfile Webhook", func() { newProfile = &NnfContainerProfile{} }) + BeforeEach(func() { + otherNamespaceName = "other-" + uuid.NewString()[:8] + + otherNamespace = &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: otherNamespaceName, + }, + } + Expect(k8sClient.Create(context.TODO(), otherNamespace)).To(Succeed()) + }) + + AfterEach(func() { + Expect(k8sClient.Delete(context.TODO(), otherNamespace)).To(Succeed()) + }) + AfterEach(func() { if nnfProfile != nil { Expect(k8sClient.Delete(context.TODO(), nnfProfile)).To(Succeed()) @@ -71,28 +92,35 @@ var _ = Describe("NnfContainerProfile Webhook", func() { } }) + It("should accept system profiles in the designated namespace", func() { + Expect(k8sClient.Create(context.TODO(), nnfProfile)).To(Succeed()) + }) + + It("should not accept system profiles that are not in the designated namespace", func() { + nnfProfile.ObjectMeta.Namespace = otherNamespaceName + err := k8sClient.Create(context.TODO(), nnfProfile) + Expect(err.Error()).To(MatchRegexp("webhook .* denied the request: incorrect namespace")) + nnfProfile = nil + }) + It("Should not allow a negative retryLimit", func() { - nnfProfile.ObjectMeta.Name = pinnedResourceName nnfProfile.Data.RetryLimit = -1 Expect(k8sClient.Create(context.TODO(), nnfProfile)).ToNot(Succeed()) nnfProfile = nil }) It("Should allow a zero retryLimit", func() { - nnfProfile.ObjectMeta.Name = pinnedResourceName nnfProfile.Data.RetryLimit = 0 Expect(k8sClient.Create(context.TODO(), nnfProfile)).To(Succeed()) }) It("Should not allow a negative postRunTimeoutSeconds", func() { - nnfProfile.ObjectMeta.Name = pinnedResourceName nnfProfile.Data.PostRunTimeoutSeconds = -1 Expect(k8sClient.Create(context.TODO(), nnfProfile)).ToNot(Succeed()) nnfProfile = nil }) It("Should not allow setting both Spec and MPISpec", func() { - nnfProfile.ObjectMeta.Name = pinnedResourceName nnfProfile.Data.Spec = &corev1.PodSpec{} nnfProfile.Data.MPISpec = &mpiv2beta1.MPIJobSpec{} Expect(k8sClient.Create(context.TODO(), nnfProfile)).ToNot(Succeed()) @@ -100,7 +128,6 @@ var _ = Describe("NnfContainerProfile Webhook", func() { }) It("Should fail when both Spec and MPISpec are unset", func() { - nnfProfile.ObjectMeta.Name = pinnedResourceName nnfProfile.Data.Spec = nil nnfProfile.Data.MPISpec = nil Expect(k8sClient.Create(context.TODO(), nnfProfile)).ToNot(Succeed()) @@ -108,7 +135,6 @@ var _ = Describe("NnfContainerProfile Webhook", func() { }) It("Should not allow an empty MPIReplicaSpecs", func() { - nnfProfile.ObjectMeta.Name = pinnedResourceName nnfProfile.Data.MPISpec = &mpiv2beta1.MPIJobSpec{ MPIReplicaSpecs: map[mpiv2beta1.MPIReplicaType]*mpicommonv1.ReplicaSpec{}, } @@ -117,7 +143,6 @@ var _ = Describe("NnfContainerProfile Webhook", func() { }) It("Should not allow both an empty Launcher and Worker ReplicaSpecs", func() { - nnfProfile.ObjectMeta.Name = pinnedResourceName nnfProfile.Data.MPISpec = &mpiv2beta1.MPIJobSpec{ MPIReplicaSpecs: map[mpiv2beta1.MPIReplicaType]*mpicommonv1.ReplicaSpec{ mpiv2beta1.MPIReplicaTypeLauncher: nil, @@ -129,7 +154,6 @@ var _ = Describe("NnfContainerProfile Webhook", func() { }) It("Should not allow an empty Launcher ReplicaSpec", func() { - nnfProfile.ObjectMeta.Name = pinnedResourceName nnfProfile.Data.MPISpec = &mpiv2beta1.MPIJobSpec{ MPIReplicaSpecs: map[mpiv2beta1.MPIReplicaType]*mpicommonv1.ReplicaSpec{ mpiv2beta1.MPIReplicaTypeLauncher: nil, @@ -145,7 +169,6 @@ var _ = Describe("NnfContainerProfile Webhook", func() { }) It("Should not allow an empty Worker ReplicaSpec", func() { - nnfProfile.ObjectMeta.Name = pinnedResourceName nnfProfile.Data.MPISpec = &mpiv2beta1.MPIJobSpec{ MPIReplicaSpecs: map[mpiv2beta1.MPIReplicaType]*mpicommonv1.ReplicaSpec{ mpiv2beta1.MPIReplicaTypeLauncher: { @@ -161,7 +184,6 @@ var _ = Describe("NnfContainerProfile Webhook", func() { }) It("Should not allow an empty Launcher and Worker PodSpecs", func() { - nnfProfile.ObjectMeta.Name = pinnedResourceName nnfProfile.Data.MPISpec = &mpiv2beta1.MPIJobSpec{ MPIReplicaSpecs: map[mpiv2beta1.MPIReplicaType]*mpicommonv1.ReplicaSpec{ mpiv2beta1.MPIReplicaTypeLauncher: { @@ -181,7 +203,6 @@ var _ = Describe("NnfContainerProfile Webhook", func() { }) It("Should not allow setting both PostRunTimeoutSeconds and MPISpec.RunPolicy.ActiveDeadlineSeconds", func() { - nnfProfile.ObjectMeta.Name = pinnedResourceName nnfProfile.Data.Spec = nil nnfProfile.Data.MPISpec = &mpiv2beta1.MPIJobSpec{} @@ -194,8 +215,6 @@ var _ = Describe("NnfContainerProfile Webhook", func() { }) It("Should not allow setting both PostRunTimeoutSeconds and Spec.ActiveDeadlineSeconds", func() { - nnfProfile.ObjectMeta.Name = pinnedResourceName - timeout := int64(10) nnfProfile.Data.PostRunTimeoutSeconds = timeout nnfProfile.Data.Spec.ActiveDeadlineSeconds = &timeout @@ -205,7 +224,6 @@ var _ = Describe("NnfContainerProfile Webhook", func() { }) It("Should not allow setting MPISpec.RunPolicy.BackoffLimit directly", func() { - nnfProfile.ObjectMeta.Name = pinnedResourceName nnfProfile.Data.Spec = nil nnfProfile.Data.MPISpec = &mpiv2beta1.MPIJobSpec{} @@ -217,22 +235,19 @@ var _ = Describe("NnfContainerProfile Webhook", func() { }) It("Should allow a zero postRunTimeoutSeconds", func() { - nnfProfile.ObjectMeta.Name = pinnedResourceName nnfProfile.Data.PostRunTimeoutSeconds = 0 Expect(k8sClient.Create(context.TODO(), nnfProfile)).To(Succeed()) }) It("Should not allow modification of Data in a pinned resource", func() { nnfProfile.ObjectMeta.Name = pinnedResourceName + nnfProfile.ObjectMeta.Namespace = otherNamespaceName + nnfProfile.Data.Pinned = true Expect(k8sClient.Create(context.TODO(), nnfProfile)).To(Succeed()) Eventually(func() error { return k8sClient.Get(context.TODO(), client.ObjectKeyFromObject(nnfProfile), nnfProfile) }).Should(Succeed()) - // Set it as pinned with an Update - nnfProfile.Data.Pinned = true - Expect(k8sClient.Update(context.TODO(), nnfProfile)).To(Succeed()) - // Verify pinned Expect(k8sClient.Get(context.TODO(), client.ObjectKeyFromObject(nnfProfile), newProfile)).To(Succeed()) Expect(newProfile.Data.Pinned).To(BeTrue()) @@ -244,15 +259,13 @@ var _ = Describe("NnfContainerProfile Webhook", func() { It("Should allow modification of Meta in a pinned resource", func() { nnfProfile.ObjectMeta.Name = pinnedResourceName + nnfProfile.ObjectMeta.Namespace = otherNamespaceName + nnfProfile.Data.Pinned = true Expect(k8sClient.Create(context.TODO(), nnfProfile)).To(Succeed()) Eventually(func() error { return k8sClient.Get(context.TODO(), client.ObjectKeyFromObject(nnfProfile), nnfProfile) }).Should(Succeed()) - // Set it as pinned with an Update - nnfProfile.Data.Pinned = true - Expect(k8sClient.Update(context.TODO(), nnfProfile)).To(Succeed()) - // Verify pinned Expect(k8sClient.Get(context.TODO(), client.ObjectKeyFromObject(nnfProfile), newProfile)).To(Succeed()) Expect(newProfile.Data.Pinned).To(BeTrue()) @@ -267,4 +280,28 @@ var _ = Describe("NnfContainerProfile Webhook", func() { newProfile.SetLabels(labels) Expect(k8sClient.Update(context.TODO(), newProfile)).To(Succeed()) }) + + It("Should not allow an unpinned profile to become pinned", func() { + Expect(k8sClient.Create(context.TODO(), nnfProfile)).To(Succeed()) + Eventually(func() error { + return k8sClient.Get(context.TODO(), client.ObjectKeyFromObject(nnfProfile), newProfile) + }).Should(Succeed()) + + newProfile.Data.Pinned = true + Expect(k8sClient.Update(context.TODO(), newProfile)).ToNot(Succeed()) + }) + + It("Should not allow a pinned profile to become unpinned", func() { + nnfProfile.ObjectMeta.Name = pinnedResourceName + nnfProfile.ObjectMeta.Namespace = otherNamespaceName + nnfProfile.Data.Pinned = true + + Expect(k8sClient.Create(context.TODO(), nnfProfile)).To(Succeed()) + Eventually(func() error { + return k8sClient.Get(context.TODO(), client.ObjectKeyFromObject(nnfProfile), newProfile) + }).Should(Succeed()) + + newProfile.Data.Pinned = false + Expect(k8sClient.Update(context.TODO(), newProfile)).ToNot(Succeed()) + }) }) diff --git a/api/v1alpha1/nnfstorageprofile_webhook.go b/api/v1alpha1/nnfstorageprofile_webhook.go index 23906e2c5..1ecec5978 100644 --- a/api/v1alpha1/nnfstorageprofile_webhook.go +++ b/api/v1alpha1/nnfstorageprofile_webhook.go @@ -1,5 +1,5 @@ /* - * Copyright 2022 Hewlett Packard Enterprise Development LP + * Copyright 2022-2023 Hewlett Packard Enterprise Development LP * Other additional copyright holders may be indicated within. * * The entirety of this work is licensed under the Apache License, @@ -21,6 +21,7 @@ package v1alpha1 import ( "fmt" + "os" "reflect" "k8s.io/apimachinery/pkg/runtime" @@ -47,6 +48,14 @@ var _ webhook.Validator = &NnfStorageProfile{} func (r *NnfStorageProfile) ValidateCreate() error { nnfstorageprofilelog.V(1).Info("validate create", "name", r.Name) + // If it's not pinned, then it's being made available for users to select + // and it must be in the correct namespace. + profileNamespace := os.Getenv("NNF_STORAGE_PROFILE_NAMESPACE") + if !r.Data.Pinned && r.GetNamespace() != profileNamespace { + err := fmt.Errorf("incorrect namespace for profile that is intended to be selected by users; the namespace should be '%s'", profileNamespace) + nnfstorageprofilelog.Error(err, "invalid") + return err + } if err := r.validateContent(); err != nil { nnfstorageprofilelog.Error(err, "invalid NnfStorageProfile resource") return err @@ -59,6 +68,11 @@ func (r *NnfStorageProfile) ValidateUpdate(old runtime.Object) error { nnfstorageprofilelog.V(1).Info("validate update", "name", r.Name) obj := old.(*NnfStorageProfile) + if obj.Data.Pinned != r.Data.Pinned { + err := fmt.Errorf("the pinned flag is immutable") + nnfcontainerprofilelog.Error(err, "invalid") + return err + } if obj.Data.Pinned { // Allow metadata to be updated, for things like finalizers, // ownerReferences, and labels, but do not allow Data to be diff --git a/api/v1alpha1/nnfstorageprofile_webhook_test.go b/api/v1alpha1/nnfstorageprofile_webhook_test.go index e09177c99..04be1ec1a 100644 --- a/api/v1alpha1/nnfstorageprofile_webhook_test.go +++ b/api/v1alpha1/nnfstorageprofile_webhook_test.go @@ -1,5 +1,5 @@ /* - * Copyright 2022 Hewlett Packard Enterprise Development LP + * Copyright 2022-2023 Hewlett Packard Enterprise Development LP * Other additional copyright holders may be indicated within. * * The entirety of this work is licensed under the Apache License, @@ -23,8 +23,10 @@ import ( "context" "os" + "github.com/google/uuid" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" + corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "sigs.k8s.io/controller-runtime/pkg/client" ) @@ -34,16 +36,21 @@ import ( var _ = Describe("NnfStorageProfile Webhook", func() { var ( - namespaceName = os.Getenv("NNF_STORAGE_PROFILE_NAMESPACE") - pinnedResourceName = "test-pinned" - nnfProfile *NnfStorageProfile = nil + namespaceName = os.Getenv("NNF_STORAGE_PROFILE_NAMESPACE") + otherNamespaceName string + otherNamespace *corev1.Namespace + + pinnedResourceName string + nnfProfile *NnfStorageProfile newProfile *NnfStorageProfile ) BeforeEach(func() { + pinnedResourceName = "test-pinned-" + uuid.NewString()[:8] + nnfProfile = &NnfStorageProfile{ ObjectMeta: metav1.ObjectMeta{ - Name: "test", + Name: "test-" + uuid.NewString()[:8], Namespace: namespaceName, }, } @@ -51,6 +58,21 @@ var _ = Describe("NnfStorageProfile Webhook", func() { newProfile = &NnfStorageProfile{} }) + BeforeEach(func() { + otherNamespaceName = "other-" + uuid.NewString()[:8] + + otherNamespace = &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: otherNamespaceName, + }, + } + Expect(k8sClient.Create(context.TODO(), otherNamespace)).To(Succeed()) + }) + + AfterEach(func() { + Expect(k8sClient.Delete(context.TODO(), otherNamespace)).To(Succeed()) + }) + AfterEach(func() { if nnfProfile != nil { Expect(k8sClient.Delete(context.TODO(), nnfProfile)).To(Succeed()) @@ -61,6 +83,17 @@ var _ = Describe("NnfStorageProfile Webhook", func() { } }) + It("should accept system profiles in the designated namespace", func() { + Expect(k8sClient.Create(context.TODO(), nnfProfile)).To(Succeed()) + }) + + It("should not accept system profiles that are not in the designated namespace", func() { + nnfProfile.ObjectMeta.Namespace = otherNamespaceName + err := k8sClient.Create(context.TODO(), nnfProfile) + Expect(err.Error()).To(MatchRegexp("webhook .* denied the request: incorrect namespace")) + nnfProfile = nil + }) + It("should accept default=true", func() { nnfProfile.Data.Default = true Expect(k8sClient.Create(context.TODO(), nnfProfile)).To(Succeed()) @@ -120,6 +153,7 @@ var _ = Describe("NnfStorageProfile Webhook", func() { It("Should not allow modification of Data in a pinned resource", func() { nnfProfile.ObjectMeta.Name = pinnedResourceName + nnfProfile.ObjectMeta.Namespace = otherNamespaceName nnfProfile.Data.Pinned = true Expect(k8sClient.Create(context.TODO(), nnfProfile)).To(Succeed()) @@ -135,6 +169,7 @@ var _ = Describe("NnfStorageProfile Webhook", func() { It("Should allow modification of Meta in a pinned resource", func() { nnfProfile.ObjectMeta.Name = pinnedResourceName + nnfProfile.ObjectMeta.Namespace = otherNamespaceName nnfProfile.Data.Pinned = true Expect(k8sClient.Create(context.TODO(), nnfProfile)).To(Succeed()) @@ -199,4 +234,28 @@ var _ = Describe("NnfStorageProfile Webhook", func() { Expect(k8sClient.Create(context.TODO(), nnfProfile)).ToNot(Succeed()) nnfProfile = nil }) + + It("Should not allow an unpinned profile to become pinned", func() { + Expect(k8sClient.Create(context.TODO(), nnfProfile)).To(Succeed()) + Eventually(func() error { + return k8sClient.Get(context.TODO(), client.ObjectKeyFromObject(nnfProfile), newProfile) + }).Should(Succeed()) + + newProfile.Data.Pinned = true + Expect(k8sClient.Update(context.TODO(), newProfile)).ToNot(Succeed()) + }) + + It("Should not allow a pinned profile to become unpinned", func() { + nnfProfile.ObjectMeta.Name = pinnedResourceName + nnfProfile.ObjectMeta.Namespace = otherNamespaceName + nnfProfile.Data.Pinned = true + + Expect(k8sClient.Create(context.TODO(), nnfProfile)).To(Succeed()) + Eventually(func() error { + return k8sClient.Get(context.TODO(), client.ObjectKeyFromObject(nnfProfile), newProfile) + }).Should(Succeed()) + + newProfile.Data.Pinned = false + Expect(k8sClient.Update(context.TODO(), newProfile)).ToNot(Succeed()) + }) }) diff --git a/api/v1alpha1/webhook_suite_test.go b/api/v1alpha1/webhook_suite_test.go index 98a25206f..2ee3d8164 100644 --- a/api/v1alpha1/webhook_suite_test.go +++ b/api/v1alpha1/webhook_suite_test.go @@ -34,6 +34,7 @@ import ( admissionv1beta1 "k8s.io/api/admission/v1beta1" //+kubebuilder:scaffold:imports + corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/client-go/rest" ctrl "sigs.k8s.io/controller-runtime" @@ -98,6 +99,9 @@ var _ = BeforeSuite(func() { err = admissionv1beta1.AddToScheme(scheme) Expect(err).NotTo(HaveOccurred()) + err = corev1.AddToScheme(scheme) + Expect(err).NotTo(HaveOccurred()) + //+kubebuilder:scaffold:scheme k8sClient, err = client.New(cfg, client.Options{Scheme: scheme}) From 3d439e4f39d325bfe4cf186028da22152148e476 Mon Sep 17 00:00:00 2001 From: Blake Devcich <89158881+bdevcich-hpe@users.noreply.github.com> Date: Wed, 12 Jul 2023 11:10:46 -0500 Subject: [PATCH 09/19] Containers: Add global lustre support (#213) This adds `DW_GLOBAL_*` storage support to container workflows/profiles, which are backed by the `LustreFilesystem` resource. This allows users to mount global lustre fileystems into user containers. An administrator must first edit the `LustreFilesystem` resource to add the workflow's namespace (e.g. `default`) to the list of namespaces along with a mode. The mode defaults to `ReadWriteMany` if not set. Doing so will create a PVC that can be used to mount the fileystem. A `DW_GLOBAL_*` storage must also be added to the NnfContainer Profile. See `config/examples` and `config/samples` for more detail. The value for the `DW_GLOBAL_*` parameter is the path of the lustre filesystem, e.g.: ``` #DW jobdw type=gfs2 name=my-local-storage capacity=100GB #DW container name=my-container-workflow profile=example-mpi \ DW_JOB_foo_local_storage=my-local-storage \ DW_GLOBAL_foo_global_lustre=/lus/sawbill ``` Signed-off-by: Blake Devcich --- api/v1alpha1/nnfcontainerprofile_types.go | 5 + api/v1alpha1/nnfcontainerprofile_webhook.go | 70 ++++-- .../nnfcontainerprofile_webhook_test.go | 29 +++ ...nnf.cray.hpe.com_nnfcontainerprofiles.yaml | 6 + config/dws/nnf-ruleset.yaml | 2 +- .../nnf_v1alpha1_nnfcontainerprofiles.yaml | 3 + .../nnf_v1alpha1_nnfcontainerprofile.yaml | 8 +- ...f_workflow_controller_container_helpers.go | 25 ++- .../nnf_workflow_controller_helpers.go | 85 +++++-- controllers/nnf_workflow_controller_test.go | 209 +++++++++++++++--- controllers/nnfcontainerprofile_test.go | 1 + 11 files changed, 364 insertions(+), 79 deletions(-) diff --git a/api/v1alpha1/nnfcontainerprofile_types.go b/api/v1alpha1/nnfcontainerprofile_types.go index bfd0705ab..b193e2aaf 100644 --- a/api/v1alpha1/nnfcontainerprofile_types.go +++ b/api/v1alpha1/nnfcontainerprofile_types.go @@ -83,6 +83,11 @@ type NnfContainerProfileStorage struct { // the user not supplying this filesystem in the #DW directives //+kubebuilder:default:=false Optional bool `json:"optional"` + + // For DW_GLOBAL_ (global lustre) storages, the access mode must match what is configured in + // the LustreFilesystem resource for the namespace. Defaults to `ReadWriteMany` for global + // lustre, otherwise empty. + PVCMode corev1.PersistentVolumeAccessMode `json:"pvcMode,omitempty"` } // +kubebuilder:object:root=true diff --git a/api/v1alpha1/nnfcontainerprofile_webhook.go b/api/v1alpha1/nnfcontainerprofile_webhook.go index bdf7bbcb3..1e69b1509 100644 --- a/api/v1alpha1/nnfcontainerprofile_webhook.go +++ b/api/v1alpha1/nnfcontainerprofile_webhook.go @@ -23,6 +23,7 @@ import ( "fmt" "os" "reflect" + "strings" "github.com/kubeflow/mpi-operator/pkg/apis/kubeflow/v2beta1" "k8s.io/apimachinery/pkg/runtime" @@ -58,6 +59,46 @@ func (r *NnfContainerProfile) ValidateCreate() error { return err } + if err := r.validateContent(); err != nil { + nnfcontainerprofilelog.Error(err, "invalid NnfContainerProfile resource") + return err + } + + return nil +} + +// ValidateUpdate implements webhook.Validator so a webhook will be registered for the type +func (r *NnfContainerProfile) ValidateUpdate(old runtime.Object) error { + nnfcontainerprofilelog.Info("validate update", "name", r.Name) + + obj := old.(*NnfContainerProfile) + + if obj.Data.Pinned != r.Data.Pinned { + err := fmt.Errorf("the pinned flag is immutable") + nnfcontainerprofilelog.Error(err, "invalid") + return err + } + + if obj.Data.Pinned { + // Allow metadata to be updated, for things like finalizers, + // ownerReferences, and labels, but do not allow Data to be + // updated. + if !reflect.DeepEqual(r.Data, obj.Data) { + err := fmt.Errorf("update on pinned resource not allowed") + nnfcontainerprofilelog.Error(err, "invalid") + return err + } + } + + if err := r.validateContent(); err != nil { + nnfcontainerprofilelog.Error(err, "invalid NnfContainerProfile resource") + return err + } + + return nil +} + +func (r *NnfContainerProfile) validateContent() error { mpiJob := r.Data.MPISpec != nil nonmpiJob := r.Data.Spec != nil @@ -99,27 +140,12 @@ func (r *NnfContainerProfile) ValidateCreate() error { } } - return nil -} - -// ValidateUpdate implements webhook.Validator so a webhook will be registered for the type -func (r *NnfContainerProfile) ValidateUpdate(old runtime.Object) error { - nnfcontainerprofilelog.Info("validate update", "name", r.Name) - - obj := old.(*NnfContainerProfile) - if obj.Data.Pinned != r.Data.Pinned { - err := fmt.Errorf("the pinned flag is immutable") - nnfcontainerprofilelog.Error(err, "invalid") - return err - } - if obj.Data.Pinned { - // Allow metadata to be updated, for things like finalizers, - // ownerReferences, and labels, but do not allow Data to be - // updated. - if !reflect.DeepEqual(r.Data, obj.Data) { - err := fmt.Errorf("update on pinned resource not allowed") - nnfcontainerprofilelog.Error(err, "invalid") - return err + // Ensure only DW_GLOBAL_ storages have PVCMode + for _, storage := range r.Data.Storages { + if !strings.HasPrefix(storage.Name, "DW_GLOBAL_") { + if storage.PVCMode != "" { + return fmt.Errorf("PVCMode is only supported for global lustre storages (DW_GLOBAL_)") + } } } @@ -129,7 +155,5 @@ func (r *NnfContainerProfile) ValidateUpdate(old runtime.Object) error { // ValidateDelete implements webhook.Validator so a webhook will be registered for the type func (r *NnfContainerProfile) ValidateDelete() error { nnfcontainerprofilelog.Info("validate delete", "name", r.Name) - - // TODO(user): fill in your validation logic upon object deletion. return nil } diff --git a/api/v1alpha1/nnfcontainerprofile_webhook_test.go b/api/v1alpha1/nnfcontainerprofile_webhook_test.go index 61084abcd..a4d990492 100644 --- a/api/v1alpha1/nnfcontainerprofile_webhook_test.go +++ b/api/v1alpha1/nnfcontainerprofile_webhook_test.go @@ -61,6 +61,11 @@ var _ = Describe("NnfContainerProfile Webhook", func() { {Name: "test"}, }, }, + Storages: []NnfContainerProfileStorage{ + {Name: "DW_JOB_storage", Optional: true}, + {Name: "DW_PERSISTENT_storage", Optional: true}, + {Name: "DW_GLOBAL_storage", Optional: true}, + }, }, } @@ -304,4 +309,28 @@ var _ = Describe("NnfContainerProfile Webhook", func() { newProfile.Data.Pinned = false Expect(k8sClient.Update(context.TODO(), newProfile)).ToNot(Succeed()) }) + + DescribeTable("when modes are set for storages on creation", + func(storageName string, mode corev1.PersistentVolumeAccessMode, result bool) { + for i, storage := range nnfProfile.Data.Storages { + if storage.Name == storageName && mode != "" { + nnfProfile.Data.Storages[i].PVCMode = mode + } + } + if result { + Expect(k8sClient.Create(context.TODO(), nnfProfile)).To(Succeed()) + } else { + Expect(k8sClient.Create(context.TODO(), nnfProfile)).ToNot(Succeed()) + nnfProfile = nil + } + }, + // Only nil modes should pass for JOB/PERSISTENT + Entry("should pass when DW_JOB has no mode", "DW_JOB_storage", corev1.PersistentVolumeAccessMode(""), true), + Entry("should fail when DW_JOB has a mode", "DW_JOB_storage", corev1.ReadWriteMany, false), + Entry("should pass when DW_PERSISTENT has no mode", "DW_PERSISTENT_storage", corev1.PersistentVolumeAccessMode(""), true), + Entry("should fail when DW_PERSISTENT has a mode", "DW_PERSISTENT_storage", corev1.ReadWriteMany, false), + // Both should pass + Entry("should pass when DW_GLOBAL has no mode (defaults)", "DW_GLOBAL_storage", corev1.PersistentVolumeAccessMode(""), true), + Entry("should pass when DW_GLOBAL has a mode", "DW_GLOBAL_storage", corev1.ReadWriteMany, true), + ) }) diff --git a/config/crd/bases/nnf.cray.hpe.com_nnfcontainerprofiles.yaml b/config/crd/bases/nnf.cray.hpe.com_nnfcontainerprofiles.yaml index 3d4cce9b0..1ab85f2dc 100644 --- a/config/crd/bases/nnf.cray.hpe.com_nnfcontainerprofiles.yaml +++ b/config/crd/bases/nnf.cray.hpe.com_nnfcontainerprofiles.yaml @@ -15736,6 +15736,12 @@ spec: to be mounted, but can be ignored by the user not supplying this filesystem in the #DW directives' type: boolean + pvcMode: + description: For DW_GLOBAL_ (global lustre) storages, the access + mode must match what is configured in the LustreFilesystem + resource for the namespace. Defaults to `ReadWriteMany` for + global lustre, otherwise empty. + type: string required: - name - optional diff --git a/config/dws/nnf-ruleset.yaml b/config/dws/nnf-ruleset.yaml index 6563e99e5..359453589 100644 --- a/config/dws/nnf-ruleset.yaml +++ b/config/dws/nnf-ruleset.yaml @@ -105,7 +105,7 @@ spec: pattern: "^[a-z][a-z0-9-]+$" isRequired: true isValueRequired: true - - key: '^(DW_JOB_|DW_PERSISTENT_)[a-z][a-z0-9_]+$' + - key: '^(DW_JOB_|DW_PERSISTENT_|DW_GLOBAL_)[a-z][a-z0-9_]+$' type: "string" isRequired: false isValueRequired: true diff --git a/config/examples/nnf_v1alpha1_nnfcontainerprofiles.yaml b/config/examples/nnf_v1alpha1_nnfcontainerprofiles.yaml index 8a60bbd7b..01527afdd 100644 --- a/config/examples/nnf_v1alpha1_nnfcontainerprofiles.yaml +++ b/config/examples/nnf_v1alpha1_nnfcontainerprofiles.yaml @@ -74,6 +74,9 @@ data: optional: false - name: DW_PERSISTENT_foo_persistent_storage optional: true + - name: DW_GLOBAL_foo_global_lustre + optional: true + pvcMode: ReadWriteMany mpiSpec: runPolicy: cleanPodPolicy: Running diff --git a/config/samples/nnf_v1alpha1_nnfcontainerprofile.yaml b/config/samples/nnf_v1alpha1_nnfcontainerprofile.yaml index 2d95a99d8..e64e3d143 100644 --- a/config/samples/nnf_v1alpha1_nnfcontainerprofile.yaml +++ b/config/samples/nnf_v1alpha1_nnfcontainerprofile.yaml @@ -16,7 +16,7 @@ data: # List of possible filesystems supported by this container profile. These # storages are mounted inside of the container. Any non-optional storage must # be supplied with the container directive as an argument and must reference - # a valid jobdw or persistentdw directive's name. + # a valid jobdw/persistentdw directive's name or refer to a LustreFilesystem path. # # Example: # DW jobdw name=my-gfs2 type=gfs2 capacity=50GB @@ -26,9 +26,15 @@ data: optional: false - name: DW_PERSISTENT_foo_persistent_storage optional: true + # For Global lustre pvcMode is supported and must match the mode configured in the + # LustreFilesystem Resource + - name: DW_GLOBAL_foo_global_lustre + optional: true + pvcMode: ReadWriteMany # Template defines the containers that will be created from container profile. template: + # TODO: Update for mpiSpec spec: containers: - name: sample-nnfcontainerprofile diff --git a/controllers/nnf_workflow_controller_container_helpers.go b/controllers/nnf_workflow_controller_container_helpers.go index ff5112285..9d4c50f36 100644 --- a/controllers/nnf_workflow_controller_container_helpers.go +++ b/controllers/nnf_workflow_controller_container_helpers.go @@ -61,6 +61,7 @@ type nnfContainerVolume struct { directiveIndex int mountPath string envVarName string + pvcName string } // MPI container workflow. In this model, we use mpi-operator to create an MPIJob, which creates @@ -392,17 +393,27 @@ func (c *nnfUserContainer) applyPermissions(spec *corev1.PodSpec, mpiJobSpec *mp func (c *nnfUserContainer) addNnfVolumes(spec *corev1.PodSpec) { for _, vol := range c.volumes { - // Volumes - hostPathType := corev1.HostPathDirectory - spec.Volumes = append(spec.Volumes, corev1.Volume{ - Name: vol.name, - VolumeSource: corev1.VolumeSource{ + + var volSource corev1.VolumeSource + + // If global lustre, use a PVC, otherwise use a HostPath on the rabbit to the mounts that + // already exist. + if vol.command == "globaldw" { + volSource = corev1.VolumeSource{ + PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{ + ClaimName: vol.pvcName, + }, + } + } else { + hostPathType := corev1.HostPathDirectory + volSource = corev1.VolumeSource{ HostPath: &corev1.HostPathVolumeSource{ Path: vol.mountPath, Type: &hostPathType, }, - }, - }) + } + } + spec.Volumes = append(spec.Volumes, corev1.Volume{Name: vol.name, VolumeSource: volSource}) // Add VolumeMounts and Volume environment variables for all containers for idx := range spec.Containers { diff --git a/controllers/nnf_workflow_controller_helpers.go b/controllers/nnf_workflow_controller_helpers.go index d6aa45535..efe9413f1 100644 --- a/controllers/nnf_workflow_controller_helpers.go +++ b/controllers/nnf_workflow_controller_helpers.go @@ -291,7 +291,7 @@ func (r *NnfWorkflowReconciler) validateContainerDirective(ctx context.Context, suppliedStorageArguments = append(suppliedStorageArguments, arg) } else if strings.HasPrefix(arg, "DW_PERSISTENT_") { if err := r.validatePersistentInstanceForStaging(ctx, storageName, workflow.Namespace); err != nil { - return nnfv1alpha1.NewWorkflowError(fmt.Sprintf("persistent storage instance '%s' not found", storageName)).WithFatal() + return nnfv1alpha1.NewWorkflowError(fmt.Sprintf("persistent storage instance '%s' not found: %v", storageName, err)).WithFatal() } idx := findDirectiveIndexByName(workflow, storageName, "persistentdw") if idx == -1 { @@ -304,6 +304,15 @@ func (r *NnfWorkflowReconciler) validateContainerDirective(ctx context.Context, return nnfv1alpha1.NewWorkflowError(err.Error()).WithFatal() } suppliedStorageArguments = append(suppliedStorageArguments, arg) + } else if strings.HasPrefix(arg, "DW_GLOBAL_") { + // Look up the global lustre fs by path rather than LustreFilesystem name + if globalLustre := r.findLustreFileSystemForPath(ctx, storageName, r.Log); globalLustre == nil { + return nnfv1alpha1.NewWorkflowError(fmt.Sprintf("global Lustre file system containing '%s' not found", storageName)).WithFatal() + } + if err := checkStorageIsInProfile(arg); err != nil { + return nnfv1alpha1.NewWorkflowError(err.Error()).WithFatal() + } + suppliedStorageArguments = append(suppliedStorageArguments, arg) } else { return nnfv1alpha1.NewWorkflowError(fmt.Sprintf("unrecognized container argument: %s", arg)).WithFatal() } @@ -1053,7 +1062,7 @@ func (r *NnfWorkflowReconciler) userContainerHandler(ctx context.Context, workfl } // Get the NNF volumes to mount into the containers - volumes, result, err := r.getContainerVolumes(ctx, workflow, dwArgs) + volumes, result, err := r.getContainerVolumes(ctx, workflow, dwArgs, profile) if err != nil { return nil, nnfv1alpha1.NewWorkflowErrorf("could not determine the list of volumes need to create container job for workflow: %s", workflow.Name).WithError(err).WithFatal() } @@ -1447,7 +1456,7 @@ func (r *NnfWorkflowReconciler) getContainerJobs(ctx context.Context, workflow * } // Create a list of volumes to be mounted inside of the containers based on the DW_JOB/DW_PERSISTENT arguments -func (r *NnfWorkflowReconciler) getContainerVolumes(ctx context.Context, workflow *dwsv1alpha2.Workflow, dwArgs map[string]string) ([]nnfContainerVolume, *result, error) { +func (r *NnfWorkflowReconciler) getContainerVolumes(ctx context.Context, workflow *dwsv1alpha2.Workflow, dwArgs map[string]string, profile *nnfv1alpha1.NnfContainerProfile) ([]nnfContainerVolume, *result, error) { volumes := []nnfContainerVolume{} // TODO: ssh is necessary for mpi see setupSSHAuthVolumes(manager, podSpec) in nnf-dm @@ -1462,6 +1471,9 @@ func (r *NnfWorkflowReconciler) getContainerVolumes(ctx context.Context, workflo } else if strings.HasPrefix(arg, "DW_PERSISTENT_") { volName = strings.TrimPrefix(arg, "DW_PERSISTENT_") cmd = "persistentdw" + } else if strings.HasPrefix(arg, "DW_GLOBAL_") { + volName = strings.TrimPrefix(arg, "DW_GLOBAL_") + cmd = "globaldw" } else { continue } @@ -1478,27 +1490,58 @@ func (r *NnfWorkflowReconciler) getContainerVolumes(ctx context.Context, workflo envVarName: strings.ReplaceAll(arg, "-", "_"), } - // Find the directive index for the given name so we can retrieve its NnfAccess - vol.directiveIndex = findDirectiveIndexByName(workflow, vol.directiveName, vol.command) - if vol.directiveIndex < 0 { - return nil, nil, nnfv1alpha1.NewWorkflowErrorf("could not retrieve the directive breakdown for '%s'", vol.directiveName) - } + // For global lustre, a namespace that matches the workflow's namespace must be present in + // the LustreFilesystem's Spec.Namespaces list. This results in a matching PVC that can + // then be mounted into containers in that namespace. + if cmd == "globaldw" { + globalLustre := r.findLustreFileSystemForPath(ctx, val, r.Log) + if globalLustre == nil { + return nil, nil, nnfv1alpha1.NewWorkflowError(fmt.Sprintf( + "global Lustre file system containing '%s' not found", val)).WithFatal() + } - nnfAccess := &nnfv1alpha1.NnfAccess{ - ObjectMeta: metav1.ObjectMeta{ - Name: workflow.Name + "-" + strconv.Itoa(vol.directiveIndex) + "-servers", - Namespace: workflow.Namespace, - }, - } - if err := r.Get(ctx, client.ObjectKeyFromObject(nnfAccess), nnfAccess); err != nil { - return nil, nil, nnfv1alpha1.NewWorkflowErrorf("could not retrieve the NnfAccess '%s'", nnfAccess.Name) - } + ns, nsFound := globalLustre.Spec.Namespaces[workflow.Namespace] + if !nsFound || len(ns.Modes) < 1 { + return nil, nil, nnfv1alpha1.NewWorkflowError(fmt.Sprintf( + "global Lustre file system containing '%s' is not configured for the '%s' namespace", val, workflow.Namespace)).WithFatal() + } - if !nnfAccess.Status.Ready { - return nil, Requeue(fmt.Sprintf("NnfAccess '%s' is not ready to be mounted into container", nnfAccess.Name)).after(2 * time.Second), nil - } + // Retrieve the desired PVC mode from the container profile. Default to readwritemany. + modeStr := strings.ToLower(string(corev1.ReadWriteMany)) + if profile != nil { + for _, storage := range profile.Data.Storages { + if storage.Name == arg && storage.PVCMode != "" { + modeStr = strings.ToLower(string(storage.PVCMode)) + } + } + } + + // e.g. PVC name: global-default-readwritemany-pvc + vol.pvcName = strings.ToLower(fmt.Sprintf("%s-%s-%s-pvc", globalLustre.Name, globalLustre.Namespace, modeStr)) + vol.mountPath = globalLustre.Spec.MountRoot + } else { + // Find the directive index for the given name so we can retrieve its NnfAccess + vol.directiveIndex = findDirectiveIndexByName(workflow, vol.directiveName, vol.command) + if vol.directiveIndex < 0 { + return nil, nil, nnfv1alpha1.NewWorkflowErrorf("could not retrieve the directive breakdown for '%s'", vol.directiveName) + } + + nnfAccess := &nnfv1alpha1.NnfAccess{ + ObjectMeta: metav1.ObjectMeta{ + Name: workflow.Name + "-" + strconv.Itoa(vol.directiveIndex) + "-servers", + Namespace: workflow.Namespace, + }, + } + if err := r.Get(ctx, client.ObjectKeyFromObject(nnfAccess), nnfAccess); err != nil { + return nil, nil, nnfv1alpha1.NewWorkflowErrorf("could not retrieve the NnfAccess '%s'", nnfAccess.Name) + } - vol.mountPath = nnfAccess.Spec.MountPath + if !nnfAccess.Status.Ready { + return nil, Requeue(fmt.Sprintf("NnfAccess '%s' is not ready to be mounted into container", nnfAccess.Name)).after(2 * time.Second), nil + } + + vol.mountPath = nnfAccess.Spec.MountPath + } volumes = append(volumes, vol) } diff --git a/controllers/nnf_workflow_controller_test.go b/controllers/nnf_workflow_controller_test.go index 0e98915d2..02401854d 100644 --- a/controllers/nnf_workflow_controller_test.go +++ b/controllers/nnf_workflow_controller_test.go @@ -134,7 +134,7 @@ var _ = Describe("NNF Workflow Unit Tests", func() { return nil } - createPersistentStorageInstance := func(name string) { + createPersistentStorageInstance := func(name, fsType string) { By("Fabricate the persistent storage instance") // Create a persistent storage instance to be found @@ -143,7 +143,7 @@ var _ = Describe("NNF Workflow Unit Tests", func() { ObjectMeta: metav1.ObjectMeta{Name: name, Namespace: workflow.Namespace}, Spec: dwsv1alpha2.PersistentStorageInstanceSpec{ Name: name, - FsType: "lustre", + FsType: fsType, // DWDirective: workflow.Spec.DWDirectives[0], DWDirective: "#DW persistentdw name=" + name, State: dwsv1alpha2.PSIStateActive, @@ -164,7 +164,7 @@ var _ = Describe("NNF Workflow Unit Tests", func() { Namespace: workflow.Namespace, }, Spec: nnfv1alpha1.NnfStorageSpec{ - FileSystemType: "lustre", + FileSystemType: fsType, AllocationSets: []nnfv1alpha1.NnfStorageAllocationSetSpec{}, }, } @@ -497,7 +497,7 @@ var _ = Describe("NNF Workflow Unit Tests", func() { fmt.Sprintf("#DW copy_in source=/lus/maui/my-file.in destination=$DW_PERSISTENT_%s/my-persistent-file.out", strings.ReplaceAll(persistentStorageName, "-", "_")), } - createPersistentStorageInstance(persistentStorageName) + createPersistentStorageInstance(persistentStorageName, "lustre") }) // Create/Delete the "nnf-system" namespace as part of the test life-cycle; the persistent storage instances are @@ -1027,16 +1027,24 @@ var _ = Describe("NNF Workflow Unit Tests", func() { }) When("Using container directives", func() { - var ns *corev1.Namespace + var ( + ns *corev1.Namespace + + createPersistent bool + createPersistentType string - var createPersistent bool + createGlobalLustre bool + globalLustre *lusv1beta1.LustreFileSystem - var containerProfile *nnfv1alpha1.NnfContainerProfile - var containerProfileStorages []nnfv1alpha1.NnfContainerProfileStorage - var createContainerProfile bool + containerProfile *nnfv1alpha1.NnfContainerProfile + containerProfileStorages []nnfv1alpha1.NnfContainerProfileStorage + createContainerProfile bool + ) BeforeEach(func() { createPersistent = true + createPersistentType = "lustre" + createGlobalLustre = false containerProfile = nil containerProfileStorages = nil createContainerProfile = true @@ -1051,9 +1059,6 @@ var _ = Describe("NNF Workflow Unit Tests", func() { } k8sClient.Create(context.TODO(), ns) - if createPersistent { - createPersistentStorageInstance(persistentStorageName) - } }) JustBeforeEach(func() { @@ -1061,6 +1066,25 @@ var _ = Describe("NNF Workflow Unit Tests", func() { if createContainerProfile { containerProfile = createBasicNnfContainerProfile(containerProfileStorages) } + + if createPersistent { + createPersistentStorageInstance(persistentStorageName, createPersistentType) + } + + if createGlobalLustre { + globalLustre = &lusv1beta1.LustreFileSystem{ + ObjectMeta: metav1.ObjectMeta{ + Name: "sawbill", + Namespace: corev1.NamespaceDefault, + }, + Spec: lusv1beta1.LustreFileSystemSpec{ + Name: "sawbill", + MountRoot: "/lus/sawbill", + MgsNids: "10.0.0.2@tcp", + }, + } + Expect(k8sClient.Create(context.TODO(), globalLustre)).To(Succeed()) + } }) AfterEach(func() { @@ -1075,11 +1099,17 @@ var _ = Describe("NNF Workflow Unit Tests", func() { if createPersistent { deletePersistentStorageInstance(persistentStorageName) } + + if createGlobalLustre { + Expect(k8sClient.Get(context.TODO(), client.ObjectKeyFromObject(globalLustre), globalLustre)).To(Succeed()) + Expect(k8sClient.Delete(context.TODO(), globalLustre)).To(Succeed()) + } }) Context("with container restrictions", func() { BeforeEach(func() { createContainerProfile = false // We'll make a custom version. + createGlobalLustre = true }) // buildRestrictedContainerProfile will create a NnfContainerProfile that @@ -1104,8 +1134,9 @@ var _ = Describe("NNF Workflow Unit Tests", func() { "#DW persistentdw name=" + persistentStorageName, fmt.Sprintf("#DW container name=container profile=%s "+ "DW_JOB_foo_local_storage=container-storage "+ - "DW_PERSISTENT_foo_persistent_storage=%s", - containerProfile.Name, persistentStorageName), + "DW_PERSISTENT_foo_persistent_storage=%s "+ + "DW_GLOBAL_foo_global_lustre=%s", + containerProfile.Name, persistentStorageName, globalLustre.Spec.MountRoot), } Expect(k8sClient.Create(context.TODO(), workflow)).Should(Succeed()) } @@ -1146,6 +1177,7 @@ var _ = Describe("NNF Workflow Unit Tests", func() { containerProfileStorages = []nnfv1alpha1.NnfContainerProfileStorage{ {Name: "DW_JOB_foo_local_storage", Optional: false}, {Name: "DW_PERSISTENT_foo_persistent_storage", Optional: true}, + {Name: "DW_GLOBAL_foo_global_lustre", Optional: true}, } }) @@ -1210,26 +1242,151 @@ var _ = Describe("NNF Workflow Unit Tests", func() { }) }) - Context("when an argument is not in the container profile", func() { + Context("when an argument is present in the container directive but not in the container profile", func() { + var storageArgsList []string + localStorageName := "local-storage" + BeforeEach(func() { - containerProfileStorages = []nnfv1alpha1.NnfContainerProfileStorage{ - {Name: "DW_PERSISTENT_foo_persistent_storage", Optional: true}, + createContainerProfile = false // We'll make a custom version. + createGlobalLustre = true + }) + + JustBeforeEach(func() { + // Build a list of storage arguments for the test. This is necessary because things + // like persistentStorageName are not initialized until the parent's BeforeEach() + // block, and the Entry() in the DescribeTable() will be translated well before + // then. So create a list of canned directive arguments for use in the Entries. + storageArgsList = []string{ + fmt.Sprintf("DW_JOB_foo_local_storage=%s", localStorageName), + fmt.Sprintf("DW_PERSISTENT_foo_persistent_storage=%s", persistentStorageName), + fmt.Sprintf("DW_GLOBAL_foo_global_lustre=%s", globalLustre.Spec.MountRoot), } }) - It("should go to error", func() { + + buildContainerProfile := func(storages []nnfv1alpha1.NnfContainerProfileStorage) { + By("Creating a profile with specific storages") + tempProfile := basicNnfContainerProfile("restricted-"+uuid.NewString()[:8], storages) + containerProfile = createNnfContainerProfile(tempProfile, true) + } + + buildContainerWorkflowWithArgs := func(args string) { + By("creating the workflow") workflow.Spec.DWDirectives = []string{ - "#DW jobdw name=container-storage type=gfs2 capacity=1GB", - fmt.Sprintf("#DW container name=container profile=%s "+ - "DW_JOB_foo_local_storage=container-storage ", - containerProfile.Name), + fmt.Sprintf("#DW jobdw name=%s type=gfs2 capacity=1GB", localStorageName), + fmt.Sprintf("#DW persistentdw name=%s", persistentStorageName), + fmt.Sprintf("#DW container name=container profile=%s %s", containerProfile.Name, args), } Expect(k8sClient.Create(context.TODO(), workflow)).Should(Succeed()) + } - Eventually(func(g Gomega) bool { - g.Expect(k8sClient.Get(context.TODO(), key, workflow)).To(Succeed()) - return !workflow.Status.Ready && workflow.Status.Status == dwsv1alpha2.StatusError - }).Should(BeTrue(), "be in error state") + DescribeTable("should not go to Proposal Ready", + func(argIdx int, storages []nnfv1alpha1.NnfContainerProfileStorage) { + buildContainerProfile(storages) + buildContainerWorkflowWithArgs(storageArgsList[argIdx]) + Eventually(func(g Gomega) bool { + g.Expect(k8sClient.Get(context.TODO(), key, workflow)).To(Succeed()) + return workflow.Status.Status == dwsv1alpha2.StatusError && + strings.Contains(workflow.Status.Message, "not found in container profile") + }).Should(BeTrue(), "does not reach desired Proposal state") + }, + + Entry("when DW_JOB_ not present in the container profile", 0, + []nnfv1alpha1.NnfContainerProfileStorage{ + {Name: "DW_PERSISTENT_foo_persistent_storage", Optional: true}, + {Name: "DW_GLOBAL_foo_global_lustre", Optional: true}, + }, + ), + Entry("when DW_PERSISTENT_ not present in the container profile", 1, + []nnfv1alpha1.NnfContainerProfileStorage{ + {Name: "DW_JOB_foo_local_storage", Optional: true}, + {Name: "DW_GLOBAL_foo_global_lustre", Optional: true}, + }, + ), + Entry("when DW_GLOBAL_ not present in the container profile", 2, + []nnfv1alpha1.NnfContainerProfileStorage{ + {Name: "DW_JOB_foo_local_storage", Optional: true}, + {Name: "DW_PERSISTENT_foo_persistent_storage", Optional: true}, + }, + ), + ) + }) + + Context("when an unsupported jobdw container filesystem type is specified", func() { + localStorageName := "local-storage" + + buildContainerWorkflowWithJobDWType := func(fsType string) { + By("creating the workflow") + workflow.Spec.DWDirectives = []string{ + fmt.Sprintf("#DW jobdw name=%s type=%s capacity=1GB", localStorageName, fsType), + fmt.Sprintf("#DW container name=container profile=%s DW_JOB_foo_local_storage=%s", + containerProfile.Name, localStorageName), + } + Expect(k8sClient.Create(context.TODO(), workflow)).Should(Succeed()) + } + + DescribeTable("should reach the desired Proposal state", + func(fsType string, shouldError bool) { + buildContainerWorkflowWithJobDWType(fsType) + Eventually(func(g Gomega) bool { + g.Expect(k8sClient.Get(context.TODO(), key, workflow)).To(Succeed()) + if shouldError { + return workflow.Status.Status == dwsv1alpha2.StatusError && + strings.Contains(workflow.Status.Message, "unsupported container filesystem") + } else { + return workflow.Status.Ready == true + } + }).Should(BeTrue(), "should reach desired Proposal state") + + }, + Entry("when gfs2 jobdw storage is used", "gfs2", false), + Entry("when lustre jobdw storage is used", "lustre", false), + Entry("when xfs jobdw storage is used", "xfs", true), + Entry("when raw jobdw storage is used", "raw", true), + ) + }) + + Context("when an unsupported persistentdw container filesystem type is specified", func() { + + BeforeEach(func() { + createPersistent = false }) + + buildContainerWorkflowWithPersistentDWType := func(fsType string) { + By("creating the workflow") + workflow.Spec.DWDirectives = []string{ + fmt.Sprintf("#DW persistentdw name=%s", persistentStorageName), + fmt.Sprintf("#DW container name=container profile=%s DW_PERSISTENT_foo_persistent_storage=%s", + containerProfile.Name, persistentStorageName), + } + Expect(k8sClient.Create(context.TODO(), workflow)).Should(Succeed()) + } + + DescribeTable("should reach the desired Proposal state", + func(fsType string, shouldError bool) { + createPersistentStorageInstance(persistentStorageName, fsType) + buildContainerWorkflowWithPersistentDWType(fsType) + Eventually(func(g Gomega) bool { + g.Expect(k8sClient.Get(context.TODO(), key, workflow)).To(Succeed()) + if shouldError { + // Raw isn't supported for persistent storage, make sure that error gets + // reported properly + if fsType == "raw" { + return workflow.Status.Status == dwsv1alpha2.StatusError && + strings.Contains(workflow.Status.Message, "can not be used with raw allocations") + } + return workflow.Status.Status == dwsv1alpha2.StatusError && + strings.Contains(workflow.Status.Message, "unsupported container filesystem: "+fsType) + } else { + return workflow.Status.Ready == true + } + }).Should(BeTrue(), "should reach desired Proposal state") + + }, + Entry("when gfs2 persistentdw storage is used", "gfs2", false), + Entry("when lustre persistentdw storage is used", "lustre", false), + Entry("when xfs persistentdw storage is used", "xfs", true), + Entry("when raw persistentdw storage is used", "raw", true), + ) }) }) }) diff --git a/controllers/nnfcontainerprofile_test.go b/controllers/nnfcontainerprofile_test.go index 1fcf04658..fa1277cdd 100644 --- a/controllers/nnfcontainerprofile_test.go +++ b/controllers/nnfcontainerprofile_test.go @@ -69,6 +69,7 @@ func basicNnfContainerProfile(name string, storages []nnfv1alpha1.NnfContainerPr storages = []nnfv1alpha1.NnfContainerProfileStorage{ {Name: "DW_JOB_foo_local_storage", Optional: true}, {Name: "DW_PERSISTENT_foo_persistent_storage", Optional: true}, + {Name: "DW_GLOBAL_foo_global_lustre", Optional: true}, } } From 7b17a96bf2de11469cb4b1dacdee25925b90b0ce Mon Sep 17 00:00:00 2001 From: matthew-richerson <82597529+matthew-richerson@users.noreply.github.com> Date: Wed, 2 Aug 2023 10:34:26 -0500 Subject: [PATCH 10/19] github-46: Use ResourceError when returning errors (#215) * github-46: Use ResourceError when returning errors This commit uses the new ResourceError struct embedded in the status section of the DWS/nnf-sos resources. When returning an error, use the NewResourceError() call to return a ResourceError and fill it in with the correct information. This allows the end user and WLM to make informed decisions about what to do when there's an error. Signed-off-by: Matt Richerson * copyrights and print column ordering Signed-off-by: Matt Richerson * re-vendor Signed-off-by: Matt Richerson --------- Signed-off-by: Matt Richerson --- api/v1alpha1/nnf_access_types.go | 3 +- api/v1alpha1/nnf_datamovement_types.go | 5 +- api/v1alpha1/nnf_node_storage_types.go | 6 +- api/v1alpha1/nnf_storage_types.go | 7 +- api/v1alpha1/workflow_error.go | 105 -------- api/v1alpha1/zz_generated.deepcopy.go | 15 +- .../bases/nnf.cray.hpe.com_nnfaccesses.yaml | 24 +- .../nnf.cray.hpe.com_nnfdatamovements.yaml | 32 +++ .../nnf.cray.hpe.com_nnfnodestorages.yaml | 94 ++----- .../bases/nnf.cray.hpe.com_nnfstorages.yaml | 33 ++- controllers/directivebreakdown_controller.go | 17 +- controllers/dws_servers_controller.go | 12 +- controllers/nnf_access_controller.go | 61 +++-- controllers/nnf_access_controller_test.go | 6 +- controllers/nnf_clientmount_controller.go | 37 ++- controllers/nnf_node_storage_controller.go | 159 +++-------- ...nf_persistentstorageinstance_controller.go | 2 +- controllers/nnf_storage_controller.go | 21 +- controllers/nnf_workflow_controller.go | 161 +++++------ ...f_workflow_controller_container_helpers.go | 2 +- .../nnf_workflow_controller_helpers.go | 252 +++++++++++------- controllers/nnf_workflow_controller_test.go | 6 - controllers/nnfcontainerprofile_helpers.go | 10 +- controllers/nnfstorageprofile_helpers.go | 15 +- go.mod | 2 +- go.sum | 4 +- .../dws/api/v1alpha2/clientmount_types.go | 2 + .../api/v1alpha2/directivebreakdown_types.go | 1 + .../persistentstorageinstance_types.go | 2 + .../dws/api/v1alpha2/resource_error.go | 167 ++++++++++-- .../dws/api/v1alpha2/servers_types.go | 10 + .../dws/api/v1alpha2/workflow_types.go | 25 +- .../dws/api/v1alpha2/zz_generated.deepcopy.go | 1 + .../bases/dws.cray.hpe.com_clientmounts.yaml | 30 ++- .../dws.cray.hpe.com_directivebreakdowns.yaml | 24 +- ...ay.hpe.com_persistentstorageinstances.yaml | 30 ++- .../crd/bases/dws.cray.hpe.com_servers.yaml | 32 +++ .../crd/bases/dws.cray.hpe.com_workflows.yaml | 13 +- .../dws/controllers/clientmount_controller.go | 1 + .../dws/controllers/workflow_controller.go | 73 ++++- vendor/modules.txt | 2 +- 41 files changed, 823 insertions(+), 681 deletions(-) delete mode 100644 api/v1alpha1/workflow_error.go diff --git a/api/v1alpha1/nnf_access_types.go b/api/v1alpha1/nnf_access_types.go index fa63b98bb..103a22947 100644 --- a/api/v1alpha1/nnf_access_types.go +++ b/api/v1alpha1/nnf_access_types.go @@ -1,5 +1,5 @@ /* - * Copyright 2021, 2022 Hewlett Packard Enterprise Development LP + * Copyright 2021-2023 Hewlett Packard Enterprise Development LP * Other additional copyright holders may be indicated within. * * The entirety of this work is licensed under the Apache License, @@ -83,6 +83,7 @@ type NnfAccessStatus struct { //+kubebuilder:printcolumn:name="DESIREDSTATE",type="string",JSONPath=".spec.desiredState",description="The desired state" //+kubebuilder:printcolumn:name="STATE",type="string",JSONPath=".status.state",description="The current state" //+kubebuilder:printcolumn:name="READY",type="boolean",JSONPath=".status.ready",description="Whether the state has been achieved" +//+kubebuilder:printcolumn:name="ERROR",type="string",JSONPath=".status.error.severity" //+kubebuilder:printcolumn:name="AGE",type="date",JSONPath=".metadata.creationTimestamp" // NnfAccess is the Schema for the nnfaccesses API diff --git a/api/v1alpha1/nnf_datamovement_types.go b/api/v1alpha1/nnf_datamovement_types.go index 09cea91dc..e5fc744e6 100644 --- a/api/v1alpha1/nnf_datamovement_types.go +++ b/api/v1alpha1/nnf_datamovement_types.go @@ -1,5 +1,5 @@ /* - * Copyright 2021, 2022 Hewlett Packard Enterprise Development LP + * Copyright 2021-2023 Hewlett Packard Enterprise Development LP * Other additional copyright holders may be indicated within. * * The entirety of this work is licensed under the Apache License, @@ -153,6 +153,8 @@ type NnfDataMovementStatus struct { // as it executes. The command status is polled at a certain frequency to avoid excessive // updates to the Data Movement resource. CommandStatus *NnfDataMovementCommandStatus `json:"commandStatus,omitempty"` + + dwsv1alpha2.ResourceError `json:",inline"` } // Types describing the various data movement status conditions. @@ -175,6 +177,7 @@ const ( //+kubebuilder:subresource:status //+kubebuilder:printcolumn:name="STATE",type="string",JSONPath=".status.state",description="Current state" //+kubebuilder:printcolumn:name="STATUS",type="string",JSONPath=".status.status",description="Status of current state" +//+kubebuilder:printcolumn:name="ERROR",type="string",JSONPath=".status.error.severity" //+kubebuilder:printcolumn:name="AGE",type="date",JSONPath=".metadata.creationTimestamp" // NnfDataMovement is the Schema for the datamovements API diff --git a/api/v1alpha1/nnf_node_storage_types.go b/api/v1alpha1/nnf_node_storage_types.go index 13d305671..579d8dca5 100644 --- a/api/v1alpha1/nnf_node_storage_types.go +++ b/api/v1alpha1/nnf_node_storage_types.go @@ -1,5 +1,5 @@ /* - * Copyright 2021, 2022 Hewlett Packard Enterprise Development LP + * Copyright 2021-2023 Hewlett Packard Enterprise Development LP * Other additional copyright holders may be indicated within. * * The entirety of this work is licensed under the Apache License, @@ -169,8 +169,6 @@ type NnfNodeStorageAllocationStatus struct { StoragePool NnfResourceStatus `json:"storagePool,omitempty"` FileSystem NnfResourceStatus `json:"fileSystem,omitempty"` - - Conditions []metav1.Condition `json:"conditions,omitempty"` } // LustreStorageStatus describes the Lustre target created here. @@ -196,6 +194,8 @@ func (ns *NnfNodeStorage) GetStatus() updater.Status[*NnfNodeStorageStatus] { } //+kubebuilder:object:root=true +//+kubebuilder:printcolumn:name="ERROR",type="string",JSONPath=".status.error.severity" +//+kubebuilder:printcolumn:name="AGE",type="date",JSONPath=".metadata.creationTimestamp" // NnfNodeStorageList contains a list of NNF Nodes type NnfNodeStorageList struct { diff --git a/api/v1alpha1/nnf_storage_types.go b/api/v1alpha1/nnf_storage_types.go index 0ce009a1e..e3f579177 100644 --- a/api/v1alpha1/nnf_storage_types.go +++ b/api/v1alpha1/nnf_storage_types.go @@ -1,5 +1,5 @@ /* - * Copyright 2021, 2022 Hewlett Packard Enterprise Development LP + * Copyright 2021-2023 Hewlett Packard Enterprise Development LP * Other additional copyright holders may be indicated within. * * The entirety of this work is licensed under the Apache License, @@ -106,9 +106,6 @@ type NnfStorageAllocationSetStatus struct { // Health reflects the health of this allocation set Health NnfResourceHealthType `json:"health,omitempty"` - // Error is the human readable error string - Error string `json:"error,omitempty"` - // AllocationCount is the total number of allocations that currently // exist AllocationCount int `json:"allocationCount"` @@ -135,6 +132,8 @@ type NnfStorageStatus struct { //+kubebuilder:object:root=true //+kubebuilder:subresource:status +//+kubebuilder:printcolumn:name="AGE",type="date",JSONPath=".metadata.creationTimestamp" +//+kubebuilder:printcolumn:name="ERROR",type="string",JSONPath=".status.error.severity" // NnfStorage is the Schema for the storages API type NnfStorage struct { diff --git a/api/v1alpha1/workflow_error.go b/api/v1alpha1/workflow_error.go deleted file mode 100644 index e3602e194..000000000 --- a/api/v1alpha1/workflow_error.go +++ /dev/null @@ -1,105 +0,0 @@ -/* - * Copyright 2022 Hewlett Packard Enterprise Development LP - * Other additional copyright holders may be indicated within. - * - * The entirety of this work is licensed under the Apache License, - * Version 2.0 (the "License"); you may not use this file except - * in compliance with the License. - * - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package v1alpha1 - -import ( - "fmt" - - dwsv1alpha2 "github.com/HewlettPackard/dws/api/v1alpha2" -) - -// +kubebuilder:object:generate=false -type WorkflowError struct { - message string - recoverable bool - err error -} - -func NewWorkflowError(message string) *WorkflowError { - return &WorkflowError{ - message: message, - recoverable: true, - } -} - -func NewWorkflowErrorf(format string, a ...any) *WorkflowError { - return NewWorkflowError(fmt.Sprintf(format, a...)) -} - -func (e *WorkflowError) GetMessage() string { - return e.message -} - -func (e *WorkflowError) GetRecoverable() bool { - return e.recoverable -} - -func (e *WorkflowError) GetError() error { - return e.err -} - -func (e *WorkflowError) Error() string { - if e.err == nil { - return e.message - } - - return e.message + ": " + e.err.Error() -} - -func (e *WorkflowError) Unwrap() error { - return e.err -} - -func (e *WorkflowError) Inject(driverStatus *dwsv1alpha2.WorkflowDriverStatus) { - driverStatus.Message = e.GetMessage() - if e.GetRecoverable() { - driverStatus.Status = dwsv1alpha2.StatusRunning - } else { - driverStatus.Status = dwsv1alpha2.StatusError - } - - if e.Unwrap() != nil { - driverStatus.Error = e.Unwrap().Error() - } else { - driverStatus.Error = e.Error() - } -} - -func (e *WorkflowError) WithFatal() *WorkflowError { - e.recoverable = false - return e -} - -func (e *WorkflowError) WithError(err error) *WorkflowError { - // if the error is already a WorkflowError, then return it unmodified - workflowError, ok := err.(*WorkflowError) - if ok { - return workflowError - } - - resourceError, ok := err.(*dwsv1alpha2.ResourceErrorInfo) - if ok { - e.message = resourceError.UserMessage - e.recoverable = resourceError.Recoverable - } - - e.err = err - return e -} diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index ee905d733..9321abb9b 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -26,8 +26,7 @@ package v1alpha1 import ( "github.com/kubeflow/mpi-operator/pkg/apis/kubeflow/v2beta1" - corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/runtime" ) @@ -219,7 +218,7 @@ func (in *NnfContainerProfileData) DeepCopyInto(out *NnfContainerProfileData) { } if in.Spec != nil { in, out := &in.Spec, &out.Spec - *out = new(corev1.PodSpec) + *out = new(v1.PodSpec) (*in).DeepCopyInto(*out) } if in.MPISpec != nil { @@ -444,6 +443,7 @@ func (in *NnfDataMovementStatus) DeepCopyInto(out *NnfDataMovementStatus) { *out = new(NnfDataMovementCommandStatus) (*in).DeepCopyInto(*out) } + in.ResourceError.DeepCopyInto(&out.ResourceError) } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NnfDataMovementStatus. @@ -745,13 +745,6 @@ func (in *NnfNodeStorageAllocationStatus) DeepCopyInto(out *NnfNodeStorageAlloca out.FileShare = in.FileShare out.StoragePool = in.StoragePool out.FileSystem = in.FileSystem - if in.Conditions != nil { - in, out := &in.Conditions, &out.Conditions - *out = make([]v1.Condition, len(*in)) - for i := range *in { - (*in)[i].DeepCopyInto(&(*out)[i]) - } - } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NnfNodeStorageAllocationStatus. @@ -906,7 +899,7 @@ func (in *NnfPortManagerAllocationStatus) DeepCopyInto(out *NnfPortManagerAlloca *out = *in if in.Requester != nil { in, out := &in.Requester, &out.Requester - *out = new(corev1.ObjectReference) + *out = new(v1.ObjectReference) **out = **in } if in.Ports != nil { diff --git a/config/crd/bases/nnf.cray.hpe.com_nnfaccesses.yaml b/config/crd/bases/nnf.cray.hpe.com_nnfaccesses.yaml index ff2278cca..b3e32f2e6 100644 --- a/config/crd/bases/nnf.cray.hpe.com_nnfaccesses.yaml +++ b/config/crd/bases/nnf.cray.hpe.com_nnfaccesses.yaml @@ -27,6 +27,9 @@ spec: jsonPath: .status.ready name: READY type: boolean + - jsonPath: .status.error.severity + name: ERROR + type: string - jsonPath: .metadata.creationTimestamp name: AGE type: date @@ -189,17 +192,28 @@ spec: debugMessage: description: Internal debug message for the error type: string - recoverable: - description: Indication if the error is likely recoverable or - not - type: boolean + severity: + description: Indication of how severe the error is. Minor will + likely succeed, Major may succeed, and Fatal will never succeed. + enum: + - Minor + - Major + - Fatal + type: string + type: + description: Internal or user error + enum: + - Internal + - User + type: string userMessage: description: Optional user facing message if the error is relevant to an end user type: string required: - debugMessage - - recoverable + - severity + - type type: object ready: description: Ready signifies whether status.state has been achieved diff --git a/config/crd/bases/nnf.cray.hpe.com_nnfdatamovements.yaml b/config/crd/bases/nnf.cray.hpe.com_nnfdatamovements.yaml index 30ea8fa04..96661f84b 100644 --- a/config/crd/bases/nnf.cray.hpe.com_nnfdatamovements.yaml +++ b/config/crd/bases/nnf.cray.hpe.com_nnfdatamovements.yaml @@ -23,6 +23,9 @@ spec: jsonPath: .status.status name: STATUS type: string + - jsonPath: .status.error.severity + name: ERROR + type: string - jsonPath: .metadata.creationTimestamp name: AGE type: date @@ -232,6 +235,35 @@ spec: operation ended. format: date-time type: string + error: + description: Error information + properties: + debugMessage: + description: Internal debug message for the error + type: string + severity: + description: Indication of how severe the error is. Minor will + likely succeed, Major may succeed, and Fatal will never succeed. + enum: + - Minor + - Major + - Fatal + type: string + type: + description: Internal or user error + enum: + - Internal + - User + type: string + userMessage: + description: Optional user facing message if the error is relevant + to an end user + type: string + required: + - debugMessage + - severity + - type + type: object message: description: Message contains any text that explains the Status. If Data Movement failed or storeStdout is enabled, this will contain diff --git a/config/crd/bases/nnf.cray.hpe.com_nnfnodestorages.yaml b/config/crd/bases/nnf.cray.hpe.com_nnfnodestorages.yaml index b9807ce68..60365f77f 100644 --- a/config/crd/bases/nnf.cray.hpe.com_nnfnodestorages.yaml +++ b/config/crd/bases/nnf.cray.hpe.com_nnfnodestorages.yaml @@ -154,79 +154,6 @@ spec: storage requirements (i.e. block size / stripe size). format: int64 type: integer - conditions: - items: - description: "Condition contains details for one aspect of - the current state of this API Resource. --- This struct - is intended for direct use as an array at the field path - .status.conditions. For example, \n type FooStatus struct{ - // Represents the observations of a foo's current state. - // Known .status.conditions.type are: \"Available\", \"Progressing\", - and \"Degraded\" // +patchMergeKey=type // +patchStrategy=merge - // +listType=map // +listMapKey=type Conditions []metav1.Condition - `json:\"conditions,omitempty\" patchStrategy:\"merge\" patchMergeKey:\"type\" - protobuf:\"bytes,1,rep,name=conditions\"` \n // other fields - }" - properties: - lastTransitionTime: - description: lastTransitionTime is the last time the condition - transitioned from one status to another. This should - be when the underlying condition changed. If that is - not known, then using the time when the API field changed - is acceptable. - format: date-time - type: string - message: - description: message is a human readable message indicating - details about the transition. This may be an empty string. - maxLength: 32768 - type: string - observedGeneration: - description: observedGeneration represents the .metadata.generation - that the condition was set based upon. For instance, - if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration - is 9, the condition is out of date with respect to the - current state of the instance. - format: int64 - minimum: 0 - type: integer - reason: - description: reason contains a programmatic identifier - indicating the reason for the condition's last transition. - Producers of specific condition types may define expected - values and meanings for this field, and whether the - values are considered a guaranteed API. The value should - be a CamelCase string. This field may not be empty. - maxLength: 1024 - minLength: 1 - pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ - type: string - status: - description: status of the condition, one of True, False, - Unknown. - enum: - - "True" - - "False" - - Unknown - type: string - type: - description: type of condition in CamelCase or in foo.example.com/CamelCase. - --- Many .condition.type values are consistent across - resources like Available, but because arbitrary conditions - can be useful (see .node.status.conditions), the ability - to deconflict is important. The regex it matches is - (dns1123SubdomainFmt/)?(qualifiedNameFmt) - maxLength: 316 - pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ - type: string - required: - - lastTransitionTime - - message - - reason - - status - - type - type: object - type: array creationTime: description: Represents the time when the storage was created by the controller It is represented in RFC3339 form and is @@ -364,17 +291,28 @@ spec: debugMessage: description: Internal debug message for the error type: string - recoverable: - description: Indication if the error is likely recoverable or - not - type: boolean + severity: + description: Indication of how severe the error is. Minor will + likely succeed, Major may succeed, and Fatal will never succeed. + enum: + - Minor + - Major + - Fatal + type: string + type: + description: Internal or user error + enum: + - Internal + - User + type: string userMessage: description: Optional user facing message if the error is relevant to an end user type: string required: - debugMessage - - recoverable + - severity + - type type: object lustreStorage: description: LustreStorageStatus describes the Lustre targets created diff --git a/config/crd/bases/nnf.cray.hpe.com_nnfstorages.yaml b/config/crd/bases/nnf.cray.hpe.com_nnfstorages.yaml index 1c584ac7d..07dd1b985 100644 --- a/config/crd/bases/nnf.cray.hpe.com_nnfstorages.yaml +++ b/config/crd/bases/nnf.cray.hpe.com_nnfstorages.yaml @@ -14,7 +14,14 @@ spec: singular: nnfstorage scope: Namespaced versions: - - name: v1alpha1 + - additionalPrinterColumns: + - jsonPath: .metadata.creationTimestamp + name: AGE + type: date + - jsonPath: .status.error.severity + name: ERROR + type: string + name: v1alpha1 schema: openAPIV3Schema: description: NnfStorage is the Schema for the storages API @@ -144,9 +151,6 @@ spec: description: AllocationCount is the total number of allocations that currently exist type: integer - error: - description: Error is the human readable error string - type: string health: description: Health reflects the health of this allocation set type: string @@ -163,17 +167,28 @@ spec: debugMessage: description: Internal debug message for the error type: string - recoverable: - description: Indication if the error is likely recoverable or - not - type: boolean + severity: + description: Indication of how severe the error is. Minor will + likely succeed, Major may succeed, and Fatal will never succeed. + enum: + - Minor + - Major + - Fatal + type: string + type: + description: Internal or user error + enum: + - Internal + - User + type: string userMessage: description: Optional user facing message if the error is relevant to an end user type: string required: - debugMessage - - recoverable + - severity + - type type: object mgsNode: description: MgsNode is the NID of the MGS. diff --git a/controllers/directivebreakdown_controller.go b/controllers/directivebreakdown_controller.go index a2d249992..43e58ad00 100644 --- a/controllers/directivebreakdown_controller.go +++ b/controllers/directivebreakdown_controller.go @@ -107,7 +107,7 @@ func (r *DirectiveBreakdownReconciler) Reconcile(ctx context.Context, req ctrl.R statusUpdater := updater.NewStatusUpdater[*dwsv1alpha2.DirectiveBreakdownStatus](dbd) defer func() { err = statusUpdater.CloseWithStatusUpdate(ctx, r.Client.Status(), err) }() - defer func() { dbd.Status.SetResourceError(err) }() + defer func() { dbd.Status.SetResourceErrorAndLog(err, log) }() // Check if the object is being deleted if !dbd.GetDeletionTimestamp().IsZero() { @@ -117,7 +117,7 @@ func (r *DirectiveBreakdownReconciler) Reconcile(ctx context.Context, req ctrl.R return ctrl.Result{}, nil } - // Delete all children that are owned by this DirectiveBreakdown. + // Delete all children that are owned by this DirectiveBreakdown. deleteStatus, err := dwsv1alpha2.DeleteChildren(ctx, r.Client, r.ChildObjects, dbd) if err != nil { return ctrl.Result{}, err @@ -155,7 +155,7 @@ func (r *DirectiveBreakdownReconciler) Reconcile(ctx context.Context, req ctrl.R argsMap, err := dwdparse.BuildArgsMap(dbd.Spec.Directive) if err != nil { - return ctrl.Result{}, err + return ctrl.Result{}, dwsv1alpha2.NewResourceError("invalid DW directive: %s", dbd.Spec.Directive).WithError(err).WithUserMessage("invalid DW directive").WithFatal() } commonResourceName, commonResourceNamespace := getStorageReferenceNameFromDBD(dbd) @@ -376,7 +376,7 @@ func (r *DirectiveBreakdownReconciler) createOrUpdatePersistentStorageInstance(c } } else { if psi.Spec.UserID != dbd.Spec.UserID { - return dwsv1alpha2.NewResourceError(fmt.Sprintf("Existing persistent storage user ID %v does not match user ID %v", psi.Spec.UserID, dbd.Spec.UserID), nil).WithUserMessage("User ID does not match existing persistent storage").WithFatal() + return dwsv1alpha2.NewResourceError("existing persistent storage user ID %v does not match user ID %v", psi.Spec.UserID, dbd.Spec.UserID).WithUserMessage("User ID does not match existing persistent storage").WithFatal().WithUser() } } @@ -456,8 +456,7 @@ func (r *DirectiveBreakdownReconciler) populateStorageBreakdown(ctx context.Cont // The pinned profile will be named for the NnfStorage. nnfStorageProfile, err := findPinnedProfile(ctx, r.Client, dbd.GetNamespace(), commonResourceName) if err != nil { - log.Error(err, "Unable to find pinned NnfStorageProfile", "name", commonResourceName) - return err + return dwsv1alpha2.NewResourceError("unable to find pinned NnfStorageProfile: %s/%s", commonResourceName, dbd.GetNamespace()).WithError(err).WithUserMessage("Unable to find pinned NnfStorageProfile").WithFatal() } // The directive has been validated by the webhook, so we can assume the pieces we need are in the map. @@ -519,9 +518,7 @@ func (r *DirectiveBreakdownReconciler) populateStorageBreakdown(ctx context.Cont } default: - err := fmt.Errorf("failed to populate directiveBreakdown") - log.Error(err, "populate directiveBreakdown", "directiveBreakdown", dbd.Name, "filesystem", filesystem) - return err + return dwsv1alpha2.NewResourceError("invalid DW directive file system type: %s", filesystem).WithUserMessage("invalid DW directive").WithFatal() } if dbd.Status.Storage == nil { @@ -558,7 +555,7 @@ func getCapacityInBytes(capacity string) (int64, error) { // matches[0] is the entire string, we want the parts. val, err := strconv.ParseFloat(matches[1], 64) if err != nil { - return 0, fmt.Errorf("invalid capacity string, %s", capacity) + return 0, dwsv1alpha2.NewResourceError("invalid capacity string, %s", capacity) } return int64(math.Round(val * powers[matches[3]])), nil diff --git a/controllers/dws_servers_controller.go b/controllers/dws_servers_controller.go index badb1b875..51c438ed9 100644 --- a/controllers/dws_servers_controller.go +++ b/controllers/dws_servers_controller.go @@ -1,5 +1,5 @@ /* - * Copyright 2021, 2022 Hewlett Packard Enterprise Development LP + * Copyright 2021-2023 Hewlett Packard Enterprise Development LP * Other additional copyright holders may be indicated within. * * The entirety of this work is licensed under the Apache License, @@ -21,7 +21,6 @@ package controllers import ( "context" - "fmt" "os" "reflect" "runtime" @@ -43,6 +42,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/source" dwsv1alpha2 "github.com/HewlettPackard/dws/api/v1alpha2" + "github.com/HewlettPackard/dws/utils/updater" nnfv1alpha1 "github.com/NearNodeFlash/nnf-sos/api/v1alpha1" "github.com/NearNodeFlash/nnf-sos/controllers/metrics" ) @@ -84,7 +84,7 @@ const ( // For more details, check Reconcile and its Result here: // - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.9.2/pkg/reconcile func (r *DWSServersReconciler) Reconcile(ctx context.Context, req ctrl.Request) (res ctrl.Result, err error) { - + log := r.Log.WithValues("Servers", req.NamespacedName) metrics.NnfServersReconcilesTotal.Inc() servers := &dwsv1alpha2.Servers{} @@ -95,6 +95,10 @@ func (r *DWSServersReconciler) Reconcile(ctx context.Context, req ctrl.Request) return ctrl.Result{}, client.IgnoreNotFound(err) } + statusUpdater := updater.NewStatusUpdater[*dwsv1alpha2.ServersStatus](servers) + defer func() { err = statusUpdater.CloseWithStatusUpdate(ctx, r.Client.Status(), err) }() + defer func() { servers.Status.SetResourceErrorAndLog(err, log) }() + // Check if the object is being deleted if !servers.GetDeletionTimestamp().IsZero() { if !controllerutil.ContainsFinalizer(servers, finalizerNnfServers) { @@ -217,7 +221,7 @@ func (r *DWSServersReconciler) updateCapacityUsed(ctx context.Context, servers * // If the nnfStorage was created using information from the Servers resource, then // we should always find a match. if serversIndex == -1 { - return ctrl.Result{}, fmt.Errorf("Unable to find allocation label %s", label) + return ctrl.Result{}, dwsv1alpha2.NewResourceError("unable to find allocation label %s", label).WithFatal() } // Loop through the nnfNodeStorages corresponding to each of the Rabbit nodes and find diff --git a/controllers/nnf_access_controller.go b/controllers/nnf_access_controller.go index 3afe297a6..78321c56b 100644 --- a/controllers/nnf_access_controller.go +++ b/controllers/nnf_access_controller.go @@ -1,5 +1,5 @@ /* - * Copyright 2021, 2022 Hewlett Packard Enterprise Development LP + * Copyright 2021-2023 Hewlett Packard Enterprise Development LP * Other additional copyright holders may be indicated within. * * The entirety of this work is licensed under the Apache License, @@ -92,6 +92,11 @@ func (r *NnfAccessReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( statusUpdater := updater.NewStatusUpdater[*nnfv1alpha1.NnfAccessStatus](access) defer func() { err = statusUpdater.CloseWithStatusUpdate(ctx, r.Client.Status(), err) }() + defer func() { + if err != nil || (!res.Requeue && res.RequeueAfter == 0) { + access.Status.SetResourceErrorAndLog(err, log) + } + }() // Create a list of names of the client nodes. This is pulled from either // the Computes resource specified in the ClientReference or the NnfStorage @@ -174,13 +179,16 @@ func (r *NnfAccessReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( if access.Status.State == "mounted" { result, err = r.mount(ctx, access, clientList, storageMapping) + if err != nil { + return ctrl.Result{}, dwsv1alpha2.NewResourceError("").WithError(err).WithUserMessage("unable to mount file system on client nodes") + } } else { result, err = r.unmount(ctx, access, clientList, storageMapping) + if err != nil { + return ctrl.Result{}, dwsv1alpha2.NewResourceError("").WithError(err).WithUserMessage("unable to unmount file system from client nodes") + } } - if err != nil { - return ctrl.Result{}, err - } if result != nil { return *result, nil } @@ -201,7 +209,7 @@ func (r *NnfAccessReconciler) mount(ctx context.Context, access *nnfv1alpha1.Nnf // from a single host. wait, err := r.lockStorage(ctx, access) if err != nil { - return nil, err + return nil, dwsv1alpha2.NewResourceError("unable to lock storage").WithError(err) } if wait { @@ -211,18 +219,26 @@ func (r *NnfAccessReconciler) mount(ctx context.Context, access *nnfv1alpha1.Nnf // Add compute node information to the storage map, if necessary. err = r.addNodeStorageEndpoints(ctx, access, storageMapping) if err != nil { - return nil, err + if apierrors.IsConflict(err) { + return &ctrl.Result{}, nil + } + + return nil, dwsv1alpha2.NewResourceError("unable to add endpoints to NnfNodeStorage").WithError(err) } // Create the ClientMount resources. One ClientMount resource is created per client - err = r.createClientMounts(ctx, access, storageMapping) + err = r.manageClientMounts(ctx, access, storageMapping) if err != nil { - return nil, err + if apierrors.IsConflict(err) { + return &ctrl.Result{}, nil + } + + return nil, dwsv1alpha2.NewResourceError("unable to create ClientMount resources").WithError(err) } ready, err := r.getNodeStorageEndpointStatus(ctx, access, storageMapping) if err != nil { - return nil, err + return nil, dwsv1alpha2.NewResourceError("unable to check endpoints for NnfNodeStorage").WithError(err) } if ready == false { @@ -232,7 +248,7 @@ func (r *NnfAccessReconciler) mount(ctx context.Context, access *nnfv1alpha1.Nnf // Aggregate the status from all the ClientMount resources ready, err = r.getClientMountStatus(ctx, access, clientList) if err != nil { - return nil, err + return nil, dwsv1alpha2.NewResourceError("unable to check ClientMount status").WithError(err) } // Wait for all of the ClientMounts to be ready @@ -245,15 +261,15 @@ func (r *NnfAccessReconciler) mount(ctx context.Context, access *nnfv1alpha1.Nnf func (r *NnfAccessReconciler) unmount(ctx context.Context, access *nnfv1alpha1.NnfAccess, clientList []string, storageMapping map[string][]dwsv1alpha2.ClientMountInfo) (*ctrl.Result, error) { // Create the ClientMount resources. One ClientMount resource is created per client - err := r.createClientMounts(ctx, access, storageMapping) + err := r.manageClientMounts(ctx, access, storageMapping) if err != nil { - return nil, err + return nil, dwsv1alpha2.NewResourceError("unable to update ClientMount resources").WithError(err) } // Aggregate the status from all the ClientMount resources ready, err := r.getClientMountStatus(ctx, access, clientList) if err != nil { - return nil, err + return nil, dwsv1alpha2.NewResourceError("unable to get ClientMount status").WithError(err) } // Wait for all of the ClientMounts to be ready @@ -263,12 +279,12 @@ func (r *NnfAccessReconciler) unmount(ctx context.Context, access *nnfv1alpha1.N err = r.removeNodeStorageEndpoints(ctx, access, storageMapping) if err != nil { - return nil, err + return nil, dwsv1alpha2.NewResourceError("unable to remove NnfNodeStorage endpoints").WithError(err) } // Unlock the NnfStorage so it can be used by another NnfAccess if err = r.unlockStorage(ctx, access); err != nil { - return nil, err + return nil, dwsv1alpha2.NewResourceError("unable to unlock storage").WithError(err) } return nil, nil @@ -280,7 +296,7 @@ func (r *NnfAccessReconciler) unmount(ctx context.Context, access *nnfv1alpha1.N func (r *NnfAccessReconciler) lockStorage(ctx context.Context, access *nnfv1alpha1.NnfAccess) (bool, error) { if access.Spec.StorageReference.Kind != reflect.TypeOf(nnfv1alpha1.NnfStorage{}).Name() { - return false, fmt.Errorf("Invalid StorageReference kind %s", access.Spec.StorageReference.Kind) + return false, fmt.Errorf("invalid StorageReference kind %s", access.Spec.StorageReference.Kind) } namespacedName := types.NamespacedName{ @@ -580,7 +596,7 @@ func (r *NnfAccessReconciler) mapClientLocalStorage(ctx context.Context, access // Check that the correct number of NnfNodeStorage resources were found for this // Rabbit. if len(nnfNodeStorageList.Items) != storageCount.instanceCount { - return nil, fmt.Errorf("Incorrect number of NnfNodeStorages. found %d. Needed %d.", len(nnfNodeStorageList.Items), storageCount.instanceCount) + return nil, dwsv1alpha2.NewResourceError("incorrect number of NnfNodeStorages. found %d. Needed %d.", len(nnfNodeStorageList.Items), storageCount.instanceCount).WithMajor() } for _, nnfNodeStorage := range nnfNodeStorageList.Items { @@ -684,7 +700,7 @@ func (r *NnfAccessReconciler) mapClientLocalStorage(ctx context.Context, access } if len(existingStorage[storageName]) == 0 { - return nil, fmt.Errorf("Invalid matching between clients and storage. Too many clients for storage %s", storageName) + return nil, dwsv1alpha2.NewResourceError("").WithUserMessage("invalid matching between clients and storage. Too many clients for storage").WithWLM().WithFatal() } // If target==all, then the client wants to access all the storage it can see @@ -897,8 +913,8 @@ func (r *NnfAccessReconciler) removeNodeStorageEndpoints(ctx context.Context, ac return nil } -// createClientMounts creates the ClientMount resources based on the information in the storageMapping map. -func (r *NnfAccessReconciler) createClientMounts(ctx context.Context, access *nnfv1alpha1.NnfAccess, storageMapping map[string][]dwsv1alpha2.ClientMountInfo) error { +// manageClientMounts creates or updates the ClientMount resources based on the information in the storageMapping map. +func (r *NnfAccessReconciler) manageClientMounts(ctx context.Context, access *nnfv1alpha1.NnfAccess, storageMapping map[string][]dwsv1alpha2.ClientMountInfo) error { log := r.Log.WithValues("NnfAccess", client.ObjectKeyFromObject(access)) g := new(errgroup.Group) @@ -928,7 +944,10 @@ func (r *NnfAccessReconciler) createClientMounts(ctx context.Context, access *nn namespacedName := client.ObjectKeyFromObject(clientMount).String() if err != nil { - log.Error(err, "failed to create or update ClientMount", "name", namespacedName) + if !apierrors.IsConflict(err) { + log.Error(err, "failed to create or update ClientMount", "name", namespacedName) + } + return err } if result == controllerutil.OperationResultCreated { diff --git a/controllers/nnf_access_controller_test.go b/controllers/nnf_access_controller_test.go index 9b33bfb02..9101beb54 100644 --- a/controllers/nnf_access_controller_test.go +++ b/controllers/nnf_access_controller_test.go @@ -1,5 +1,5 @@ /* - * Copyright 2021, 2022 Hewlett Packard Enterprise Development LP + * Copyright 2021-2023 Hewlett Packard Enterprise Development LP * Other additional copyright holders may be indicated within. * * The entirety of this work is licensed under the Apache License, @@ -69,7 +69,7 @@ var _ = Describe("Access Controller Test", func() { FileSystemType: "lustre", AllocationSets: []nnfv1alpha1.NnfStorageAllocationSetSpec{ { - Name: "MGTMDT", + Name: "mgtmdt", NnfStorageLustreSpec: nnfv1alpha1.NnfStorageLustreSpec{ FileSystemName: "MGTMDT", TargetType: "MGTMDT", @@ -82,7 +82,7 @@ var _ = Describe("Access Controller Test", func() { }, }, { - Name: "OST", + Name: "ost", NnfStorageLustreSpec: nnfv1alpha1.NnfStorageLustreSpec{ FileSystemName: "OST", TargetType: "OST", diff --git a/controllers/nnf_clientmount_controller.go b/controllers/nnf_clientmount_controller.go index d5557dac3..8a8b3198c 100644 --- a/controllers/nnf_clientmount_controller.go +++ b/controllers/nnf_clientmount_controller.go @@ -1,5 +1,5 @@ /* - * Copyright 2021, 2022 Hewlett Packard Enterprise Development LP + * Copyright 2021-2023 Hewlett Packard Enterprise Development LP * Other additional copyright holders may be indicated within. * * The entirety of this work is licensed under the Apache License, @@ -21,7 +21,6 @@ package controllers import ( "context" - "fmt" "os" "runtime" "strings" @@ -79,23 +78,23 @@ func (r *NnfClientMountReconciler) Reconcile(ctx context.Context, req ctrl.Reque // on deleted requests. return ctrl.Result{}, client.IgnoreNotFound(err) } + // Create a status updater that handles the call to status().Update() if any of the fields + // in clientMount.Status change + statusUpdater := updater.NewStatusUpdater[*dwsv1alpha2.ClientMountStatus](clientMount) + defer func() { err = statusUpdater.CloseWithStatusUpdate(ctx, r.Client.Status(), err) }() + defer func() { clientMount.Status.SetResourceErrorAndLog(err, log) }() // Ensure the NNF Storage Service is running prior to taking any action. ss := nnf.NewDefaultStorageService() storageService := &sf.StorageServiceV150StorageService{} if err := ss.StorageServiceIdGet(ss.Id(), storageService); err != nil { - return ctrl.Result{}, err + return ctrl.Result{}, dwsv1alpha2.NewResourceError("unable to get redfish storage service status").WithError(err).WithMajor() } if storageService.Status.State != sf.ENABLED_RST { return ctrl.Result{RequeueAfter: 1 * time.Second}, nil } - // Create a status updater that handles the call to status().Update() if any of the fields - // in clientMount.Status change - statusUpdater := updater.NewStatusUpdater[*dwsv1alpha2.ClientMountStatus](clientMount) - defer func() { err = statusUpdater.CloseWithStatusUpdate(ctx, r.Client.Status(), err) }() - // Handle cleanup if the resource is being deleted if !clientMount.GetDeletionTimestamp().IsZero() { if !controllerutil.ContainsFinalizer(clientMount, finalizerNnfClientMount) { @@ -152,7 +151,7 @@ func (r *NnfClientMountReconciler) Reconcile(ctx context.Context, req ctrl.Reque clientMount.Status.Error = nil if err := r.changeMountAll(ctx, clientMount, clientMount.Spec.DesiredState); err != nil { - resourceError := dwsv1alpha2.NewResourceError("Mount/Unmount failed", err) + resourceError := dwsv1alpha2.NewResourceError("mount/unmount failed").WithError(err) log.Info(resourceError.Error()) clientMount.Status.Error = resourceError @@ -176,7 +175,7 @@ func (r *NnfClientMountReconciler) changeMountAll(ctx context.Context, clientMou case dwsv1alpha2.ClientMountStateUnmounted: err = r.changeMount(ctx, mount, false, log) default: - return fmt.Errorf("Invalid desired state %s", state) + return dwsv1alpha2.NewResourceError("invalid desired state %s", state).WithFatal() } if err != nil { @@ -198,7 +197,7 @@ func (r *NnfClientMountReconciler) changeMount(ctx context.Context, clientMountI if os.Getenv("ENVIRONMENT") == "kind" { if shouldMount { if err := os.MkdirAll(clientMountInfo.MountPath, 0755); err != nil { - return dwsv1alpha2.NewResourceError(fmt.Sprintf("Make directory failed: %s", clientMountInfo.MountPath), err) + return dwsv1alpha2.NewResourceError("make directory failed: %s", clientMountInfo.MountPath).WithError(err).WithMajor() } log.Info("Fake mounted file system", "Mount path", clientMountInfo.MountPath) @@ -209,7 +208,7 @@ func (r *NnfClientMountReconciler) changeMount(ctx context.Context, clientMountI } if err := os.RemoveAll(clientMountInfo.MountPath); err != nil { - return dwsv1alpha2.NewResourceError(fmt.Sprintf("Remove directory failed: %s", clientMountInfo.MountPath), err) + return dwsv1alpha2.NewResourceError("remove directory failed: %s", clientMountInfo.MountPath).WithError(err).WithMajor() } log.Info("Fake unmounted file system", "Mount path", clientMountInfo.MountPath) @@ -217,7 +216,7 @@ func (r *NnfClientMountReconciler) changeMount(ctx context.Context, clientMountI if clientMountInfo.SetPermissions { if err := os.Chown(clientMountInfo.MountPath, int(clientMountInfo.UserID), int(clientMountInfo.GroupID)); err != nil { - return dwsv1alpha2.NewResourceError(fmt.Sprintf("Chown failed: %s", clientMountInfo.MountPath), err) + return dwsv1alpha2.NewResourceError("chown failed: %s", clientMountInfo.MountPath).WithError(err).WithMajor() } } @@ -248,18 +247,18 @@ func (r *NnfClientMountReconciler) changeMount(ctx context.Context, clientMountI if !testEnv { if err := os.MkdirAll(mountPath, 0755); err != nil { - return dwsv1alpha2.NewResourceError(fmt.Sprintf("Make directory failed: %s", mountPath), err) + return dwsv1alpha2.NewResourceError("make directory failed: %s", mountPath).WithError(err).WithMajor() } } if err := mounter.Mount(mountSource, mountPath, "lustre", nil); err != nil { - return err + return dwsv1alpha2.NewResourceError("unable to mount file system").WithError(err).WithMajor() } } } else { if !isNotMountPoint { if err := mounter.Unmount(mountPath); err != nil { - return err + return dwsv1alpha2.NewResourceError("unable to unmount file system").WithError(err).WithMajor() } } } @@ -279,7 +278,7 @@ func (r *NnfClientMountReconciler) changeMount(ctx context.Context, clientMountI allocationStatus := nodeStorage.Status.Allocations[clientMountInfo.Device.DeviceReference.Data] fileShare, err := r.getFileShare(allocationStatus.FileSystem.ID, allocationStatus.FileShare.ID) if err != nil { - return dwsv1alpha2.NewResourceError("Could not get file share", err).WithFatal() + return dwsv1alpha2.NewResourceError("could not get file share").WithError(err).WithMajor() } if shouldMount { @@ -290,11 +289,11 @@ func (r *NnfClientMountReconciler) changeMount(ctx context.Context, clientMountI fileShare, err = r.updateFileShare(allocationStatus.FileSystem.ID, fileShare) if err != nil { - return dwsv1alpha2.NewResourceError("Could not update file share", err) + return dwsv1alpha2.NewResourceError("could not update file share").WithError(err).WithMajor() } default: - return dwsv1alpha2.NewResourceError(fmt.Sprintf("Invalid device type %s", clientMountInfo.Device.Type), nil).WithFatal() + return dwsv1alpha2.NewResourceError("invalid device type %s", clientMountInfo.Device.Type).WithFatal() } if shouldMount { diff --git a/controllers/nnf_node_storage_controller.go b/controllers/nnf_node_storage_controller.go index a807b7473..f036436bf 100644 --- a/controllers/nnf_node_storage_controller.go +++ b/controllers/nnf_node_storage_controller.go @@ -1,5 +1,5 @@ /* - * Copyright 2021, 2022 Hewlett Packard Enterprise Development LP + * Copyright 2021-2023 Hewlett Packard Enterprise Development LP * Other additional copyright holders may be indicated within. * * The entirety of this work is licensed under the Apache License, @@ -22,7 +22,6 @@ package controllers import ( "context" "crypto/md5" - "errors" "fmt" "net/http" "os" @@ -82,7 +81,7 @@ type NnfNodeStorageReconciler struct { // For more details, check Reconcile and its Result here: // - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.7.2/pkg/reconcile func (r *NnfNodeStorageReconciler) Reconcile(ctx context.Context, req ctrl.Request) (res ctrl.Result, err error) { - + log := r.Log.WithValues("NnfNodeStorage", req.NamespacedName) metrics.NnfNodeStorageReconcilesTotal.Inc() nodeStorage := &nnfv1alpha1.NnfNodeStorage{} @@ -114,6 +113,7 @@ func (r *NnfNodeStorageReconciler) Reconcile(ctx context.Context, req ctrl.Reque // the r.Update() statusUpdater := updater.NewStatusUpdater[*nnfv1alpha1.NnfNodeStorageStatus](nodeStorage) defer func() { err = statusUpdater.CloseWithUpdate(ctx, r, err) }() + defer func() { nodeStorage.Status.SetResourceErrorAndLog(err, log) }() // Check if the object is being deleted. Deletion is carefully coordinated around // the NNF resources being managed by this NNF Node Storage resource. For a @@ -151,7 +151,7 @@ func (r *NnfNodeStorageReconciler) Reconcile(ctx context.Context, req ctrl.Reque } // First time setup requires programming of the storage status such that the resource - // is labeled as "Starting" and all Conditions are initialized. After this is done, + // is labeled as "Starting". After this is done, // the resource obtains a finalizer to manage the resource lifetime. if !controllerutil.ContainsFinalizer(nodeStorage, finalizerNnfNodeStorage) { controllerutil.AddFinalizer(nodeStorage, finalizerNnfNodeStorage) @@ -173,7 +173,6 @@ func (r *NnfNodeStorageReconciler) Reconcile(ctx context.Context, req ctrl.Reque for i := range nodeStorage.Status.Allocations { allocation := &nodeStorage.Status.Allocations[i] - allocation.Conditions = nnfv1alpha1.NewConditions() allocation.StoragePool.Status = nnfv1alpha1.ResourceStarting allocation.StorageGroup.Status = nnfv1alpha1.ResourceStarting allocation.FileSystem.Status = nnfv1alpha1.ResourceStarting @@ -183,14 +182,12 @@ func (r *NnfNodeStorageReconciler) Reconcile(ctx context.Context, req ctrl.Reque return ctrl.Result{}, nil } - nodeStorage.Status.Error = nil - // Loop through each allocation and create the storage for i := 0; i < nodeStorage.Spec.Count; i++ { // Allocate physical storage result, err := r.allocateStorage(nodeStorage, i) if err != nil { - return ctrl.Result{}, err + return ctrl.Result{}, dwsv1alpha2.NewResourceError("unable to allocate NVMe namespaces for allocation %v", i).WithError(err).WithMajor() } if result != nil { return *result, nil @@ -199,7 +196,7 @@ func (r *NnfNodeStorageReconciler) Reconcile(ctx context.Context, req ctrl.Reque // Create a block device in /dev that is accessible on the Rabbit node result, err = r.createBlockDevice(ctx, nodeStorage, i) if err != nil { - return ctrl.Result{}, err + return ctrl.Result{}, dwsv1alpha2.NewResourceError("unable to attache NVMe namespace to Rabbit node for allocation %v", i).WithError(err).WithMajor() } if result != nil { return *result, nil @@ -208,7 +205,7 @@ func (r *NnfNodeStorageReconciler) Reconcile(ctx context.Context, req ctrl.Reque // Format the block device from the Rabbit with a file system (if needed) result, err = r.formatFileSystem(ctx, nodeStorage, i) if err != nil { - return ctrl.Result{}, err + return ctrl.Result{}, dwsv1alpha2.NewResourceError("unable to format file system for allocation %v", i).WithError(err).WithMajor() } if result != nil { return *result, nil @@ -223,7 +220,7 @@ func (r *NnfNodeStorageReconciler) Reconcile(ctx context.Context, req ctrl.Reque } if err := r.setLustreOwnerGroup(nodeStorage); err != nil { - return ctrl.Result{}, err + return ctrl.Result{}, dwsv1alpha2.NewResourceError("unable to set owner and group for file system").WithError(err).WithMajor() } nodeStorage.Status.OwnerGroupStatus = nnfv1alpha1.ResourceReady @@ -239,17 +236,12 @@ func (r *NnfNodeStorageReconciler) allocateStorage(nodeStorage *nnfv1alpha1.NnfN allocationStatus := &nodeStorage.Status.Allocations[index] - condition := &allocationStatus.Conditions[nnfv1alpha1.ConditionIndexCreateStoragePool] - if len(allocationStatus.StoragePool.ID) == 0 { - condition.LastTransitionTime = metav1.Now() - condition.Status = metav1.ConditionTrue - } - storagePoolID := fmt.Sprintf("%s-%d", nodeStorage.Name, index) sp, err := r.createStoragePool(ss, storagePoolID, nodeStorage.Spec.Capacity) if err != nil { - updateError(condition, &allocationStatus.StoragePool, err) - return r.handleCreateError(nodeStorage, "could not create storage pool", err) + allocationStatus.StoragePool.Status = nnfv1alpha1.ResourceFailed + return &ctrl.Result{}, dwsv1alpha2.NewResourceError("could not create storage pool").WithError(err).WithMajor() + } allocationStatus.StoragePool.Status = nnfv1alpha1.ResourceStatus(sp.Status) @@ -260,9 +252,6 @@ func (r *NnfNodeStorageReconciler) allocateStorage(nodeStorage *nnfv1alpha1.NnfN if len(allocationStatus.StoragePool.ID) == 0 { log.Info("Created storage pool", "Id", sp.Id) allocationStatus.StoragePool.ID = sp.Id - condition.Status = metav1.ConditionFalse - condition.Reason = nnfv1alpha1.ConditionSuccess - condition.Message = "" return &ctrl.Result{}, nil } @@ -275,24 +264,16 @@ func (r *NnfNodeStorageReconciler) createBlockDevice(ctx context.Context, nodeSt ss := nnf.NewDefaultStorageService() allocationStatus := &nodeStorage.Status.Allocations[index] - condition := &allocationStatus.Conditions[nnfv1alpha1.ConditionIndexCreateStorageGroup] // Create a Storage Group if none is currently present. Recall that a Storage Group // is a mapping from the Storage Pool to a Server Endpoint. Establishing a Storage // Group makes block storage available on the server, which itself is a prerequisite to // any file system built on top of the block storage. - if len(allocationStatus.StorageGroup.ID) == 0 { - condition.LastTransitionTime = metav1.Now() - condition.Status = metav1.ConditionTrue - } // Retrieve the collection of endpoints for us to map serverEndpointCollection := &sf.EndpointCollectionEndpointCollection{} if err := ss.StorageServiceIdEndpointsGet(ss.Id(), serverEndpointCollection); err != nil { - nodeStorage.Status.Error = dwsv1alpha2.NewResourceError("Could not get service endpoint", err).WithFatal() - log.Info(nodeStorage.Status.Error.Error()) - - return &ctrl.Result{Requeue: true}, nil + return nil, dwsv1alpha2.NewResourceError("could not get service endpoint").WithError(err).WithFatal() } // Get the Storage resource to map between compute node name and @@ -305,10 +286,7 @@ func (r *NnfNodeStorageReconciler) createBlockDevice(ctx context.Context, nodeSt storage := &dwsv1alpha2.Storage{} err := r.Get(ctx, namespacedName, storage) if err != nil { - nodeStorage.Status.Error = dwsv1alpha2.NewResourceError("Could not read storage resource", err) - log.Info(nodeStorage.Status.Error.Error()) - - return &ctrl.Result{Requeue: true}, nil + return nil, dwsv1alpha2.NewResourceError("could not read storage resource").WithError(err) } // Build a list of all nodes with access to the storage @@ -346,10 +324,7 @@ func (r *NnfNodeStorageReconciler) createBlockDevice(ctx context.Context, nodeSt } if err := r.deleteStorageGroup(ss, storageGroupID); err != nil { - nodeStorage.Status.Error = dwsv1alpha2.NewResourceError("Could not delete storage group", err).WithFatal() - log.Info(nodeStorage.Status.Error.Error()) - - return &ctrl.Result{Requeue: true}, nil + return nil, dwsv1alpha2.NewResourceError("could not delete storage group").WithError(err).WithMajor() } log.Info("Deleted storage group", "storageGroupID", storageGroupID) @@ -361,10 +336,7 @@ func (r *NnfNodeStorageReconciler) createBlockDevice(ctx context.Context, nodeSt endPoint, err := r.getEndpoint(ss, endpointID) if err != nil { - nodeStorage.Status.Error = dwsv1alpha2.NewResourceError("Could not get endpoint", err).WithFatal() - log.Info(nodeStorage.Status.Error.Error()) - - return &ctrl.Result{Requeue: true}, nil + return nil, dwsv1alpha2.NewResourceError("could not get endpoint").WithError(err).WithFatal() } // Skip the endpoints that are not ready @@ -374,8 +346,8 @@ func (r *NnfNodeStorageReconciler) createBlockDevice(ctx context.Context, nodeSt sg, err := r.createStorageGroup(ss, storageGroupID, allocationStatus.StoragePool.ID, endpointID) if err != nil { - updateError(condition, &allocationStatus.StorageGroup, err) - return r.handleCreateError(nodeStorage, "could not create storage group", err) + allocationStatus.StorageGroup.Status = nnfv1alpha1.ResourceFailed + return &ctrl.Result{}, dwsv1alpha2.NewResourceError("could not create storage group").WithError(err).WithMajor() } allocationStatus.StorageGroup.Status = nnfv1alpha1.ResourceStatus(sg.Status) @@ -385,10 +357,6 @@ func (r *NnfNodeStorageReconciler) createBlockDevice(ctx context.Context, nodeSt if len(allocationStatus.StorageGroup.ID) == 0 { log.Info("Created storage group", "Id", storageGroupID) allocationStatus.StorageGroup.ID = sg.Id - condition.LastTransitionTime = metav1.Now() - condition.Status = metav1.ConditionFalse // we are finished with this state - condition.Reason = nnfv1alpha1.ConditionSuccess - condition.Message = "" return &ctrl.Result{}, nil } @@ -414,29 +382,22 @@ func (r *NnfNodeStorageReconciler) formatFileSystem(ctx context.Context, nodeSto // Find the Rabbit node endpoint to collect LNet information endpoint, err := r.getEndpoint(ss, os.Getenv("RABBIT_NODE")) if err != nil { - nnfv1alpha1.SetGetResourceFailureCondition(allocationStatus.Conditions, err) - nodeStorage.Status.Error = dwsv1alpha2.NewResourceError("Could not get endpoint", err).WithFatal() + nodeStorage.Status.Error = dwsv1alpha2.NewResourceError("could not get endpoint").WithError(err).WithFatal() log.Info(nodeStorage.Status.Error.Error()) - return &ctrl.Result{}, nil + return &ctrl.Result{}, dwsv1alpha2.NewResourceError("could not get endpoint").WithError(err).WithMajor() } nnfStorageProfile, err := getPinnedStorageProfileFromLabel(ctx, r.Client, nodeStorage) if err != nil { - nnfv1alpha1.SetGetResourceFailureCondition(allocationStatus.Conditions, err) - nodeStorage.Status.Error = dwsv1alpha2.NewResourceError("Could not find pinned storage profile", err).WithFatal() + allocationStatus.FileSystem.Status = nnfv1alpha1.ResourceFailed + nodeStorage.Status.Error = dwsv1alpha2.NewResourceError("could not find pinned storage profile").WithError(err).WithFatal() log.Info(nodeStorage.Status.Error.Error()) return &ctrl.Result{}, nil } // Create the FileSystem - condition := &allocationStatus.Conditions[nnfv1alpha1.ConditionIndexCreateFileSystem] - if len(allocationStatus.FileSystem.ID) == 0 { - condition.Status = metav1.ConditionTrue - condition.LastTransitionTime = metav1.Now() - } - oem := nnfserver.FileSystemOem{ Type: nodeStorage.Spec.FileSystemType, } @@ -517,9 +478,9 @@ func (r *NnfNodeStorageReconciler) formatFileSystem(ctx context.Context, nodeSto fileSystemID := fmt.Sprintf("%s-%d", nodeStorage.Name, index) fs, err := r.createFileSystem(ss, fileSystemID, allocationStatus.StoragePool.ID, oem) if err != nil { - updateError(condition, &allocationStatus.FileSystem, err) + allocationStatus.FileSystem.Status = nnfv1alpha1.ResourceFailed - return r.handleCreateError(nodeStorage, "could not create file system", err) + return &ctrl.Result{}, dwsv1alpha2.NewResourceError("could not create file system").WithError(err).WithMajor() } allocationStatus.FileSystem.Status = nnfv1alpha1.ResourceReady @@ -529,21 +490,11 @@ func (r *NnfNodeStorageReconciler) formatFileSystem(ctx context.Context, nodeSto if len(allocationStatus.FileSystem.ID) == 0 { log.Info("Created filesystem", "Id", fs.Id) allocationStatus.FileSystem.ID = fs.Id - condition.LastTransitionTime = metav1.Now() - condition.Status = metav1.ConditionFalse - condition.Reason = nnfv1alpha1.ConditionSuccess - condition.Message = "" return &ctrl.Result{}, nil } // Create the FileShare - condition = &allocationStatus.Conditions[nnfv1alpha1.ConditionIndexCreateFileShare] - if len(allocationStatus.FileShare.ID) == 0 { - condition.Status = metav1.ConditionTrue - condition.LastTransitionTime = metav1.Now() - } - fileShareID := fmt.Sprintf("%s-%d", nodeStorage.Name, index) mountPath := "" @@ -560,11 +511,8 @@ func (r *NnfNodeStorageReconciler) formatFileSystem(ctx context.Context, nodeSto } else { volumeGroupName, logicalVolumeName, err = r.lvmNames(ctx, nodeStorage, index) if err != nil { - updateError(condition, &allocationStatus.FileShare, err) - nodeStorage.Status.Error = dwsv1alpha2.NewResourceError("could not get VG/LV names", err).WithFatal() - log.Info(nodeStorage.Status.Error.Error()) - - return &ctrl.Result{RequeueAfter: time.Minute * 2}, nil + allocationStatus.FileShare.Status = nnfv1alpha1.ResourceFailed + return &ctrl.Result{}, dwsv1alpha2.NewResourceError("could not get VG/LV names").WithError(err).WithFatal() } shareOptions["volumeGroupName"] = volumeGroupName @@ -575,8 +523,8 @@ func (r *NnfNodeStorageReconciler) formatFileSystem(ctx context.Context, nodeSto sh, err = r.createFileShare(ss, fileShareID, allocationStatus.FileSystem.ID, os.Getenv("RABBIT_NODE"), mountPath, shareOptions) if err != nil { - updateError(condition, &allocationStatus.FileShare, err) - return r.handleCreateError(nodeStorage, "could not create file share", err) + allocationStatus.FileShare.Status = nnfv1alpha1.ResourceFailed + return &ctrl.Result{}, dwsv1alpha2.NewResourceError("could not create file share").WithError(err).WithMajor() } nid := "" @@ -599,10 +547,6 @@ func (r *NnfNodeStorageReconciler) formatFileSystem(ctx context.Context, nodeSto allocationStatus.FileShare.ID = sh.Id allocationStatus.VolumeGroup = volumeGroupName allocationStatus.LogicalVolume = logicalVolumeName - condition.LastTransitionTime = metav1.Now() - condition.Status = metav1.ConditionFalse - condition.Reason = nnfv1alpha1.ConditionSuccess - condition.Message = "" return &ctrl.Result{}, nil } @@ -668,11 +612,6 @@ func (r *NnfNodeStorageReconciler) deleteStorage(nodeStorage *nnfv1alpha1.NnfNod return nil, nil } - condition := &allocationStatus.Conditions[nnfv1alpha1.ConditionIndexDeleteStoragePool] - - condition.Status = metav1.ConditionTrue - condition.LastTransitionTime = metav1.Now() - log.Info("Deleting storage pool", "Id", allocationStatus.StoragePool.ID) err := r.deleteStoragePool(ss, allocationStatus.StoragePool.ID) @@ -682,8 +621,8 @@ func (r *NnfNodeStorageReconciler) deleteStorage(nodeStorage *nnfv1alpha1.NnfNod // If the error is from a 404 error, then there's nothing to clean up and we // assume everything has been deleted if !ok || ecErr.StatusCode() != http.StatusNotFound { - updateError(condition, &allocationStatus.FileShare, err) - nodeStorage.Status.Error = dwsv1alpha2.NewResourceError("Could not delete storage pool", err).WithFatal() + allocationStatus.FileShare.Status = nnfv1alpha1.ResourceFailed + nodeStorage.Status.Error = dwsv1alpha2.NewResourceError("could not delete storage pool").WithError(err).WithFatal() log.Info(nodeStorage.Status.Error.Error()) return &ctrl.Result{Requeue: true}, nil @@ -730,7 +669,7 @@ func (r *NnfNodeStorageReconciler) lvmNames(ctx context.Context, nodeStorage *nn }, } if err := r.Get(ctx, client.ObjectKeyFromObject(workflow), workflow); err != nil { - return "", "", dwsv1alpha2.NewResourceError("could get workflow", err) + return "", "", dwsv1alpha2.NewResourceError("could get workflow").WithError(err) } return fmt.Sprintf("%s_%s_%d", workflow.GetUID(), directiveIndex, index), "lv", nil @@ -763,18 +702,18 @@ func (r *NnfNodeStorageReconciler) createStoragePool(ss nnf.StorageServiceApi, i } if err := ss.StorageServiceIdStoragePoolIdPut(ss.Id(), id, sp); err != nil { + resourceErr := dwsv1alpha2.NewResourceError("could not allocate storage pool").WithError(err) ecErr, ok := err.(*ec.ControllerError) if ok { - resourceErr := dwsv1alpha2.NewResourceError("", err) switch ecErr.Cause() { case "Insufficient capacity available": - return nil, resourceErr.WithUserMessage("Insufficient capacity available").WithFatal() + return nil, resourceErr.WithUserMessage("insufficient capacity available").WithWLM().WithFatal() default: - return nil, err + return nil, resourceErr } } - return nil, err + return nil, resourceErr } return sp, nil @@ -926,36 +865,6 @@ func (r *NnfNodeStorageReconciler) getFileSystem(ss nnf.StorageServiceApi, id st return fs, nil } -func (r *NnfNodeStorageReconciler) handleCreateError(storage *nnfv1alpha1.NnfNodeStorage, message string, err error) (*ctrl.Result, error) { - - resourceError := dwsv1alpha2.NewResourceError(message, err) - defer func() { - r.Log.WithValues("NnfNodeStorage", client.ObjectKeyFromObject(storage).String()).Info(resourceError.Error()) - storage.Status.Error = resourceError - }() - - controllerError := &ec.ControllerError{} - if errors.As(err, &controllerError) && controllerError.IsRetryable() { - return &ctrl.Result{RequeueAfter: controllerError.RetryDelay()}, nil - } - - resourceError = resourceError.WithFatal() - - // If this is really Fatal, we should not retry. But not all of nnf-ec supports the - // retryable classification of errors. Instead we mark the error as Fatal() but continue - // to retry with a modest delay. If the resource creation error occurs perpetually, an - // external entity should timeout the operation and therefore prevent future create attempts. - // Once nnf-ec has correctly classified all errors, there should be no need to requeue. - - return &ctrl.Result{RequeueAfter: time.Minute}, nil -} - -func updateError(condition *metav1.Condition, status *nnfv1alpha1.NnfResourceStatus, err error) { - status.Status = nnfv1alpha1.ResourceFailed - condition.Reason = nnfv1alpha1.ConditionFailed - condition.Message = err.Error() -} - // SetupWithManager sets up the controller with the Manager. func (r *NnfNodeStorageReconciler) SetupWithManager(mgr ctrl.Manager) error { // nnf-ec is not thread safe, so we are limited to a single reconcile thread. diff --git a/controllers/nnf_persistentstorageinstance_controller.go b/controllers/nnf_persistentstorageinstance_controller.go index f355bd47b..3ece24302 100644 --- a/controllers/nnf_persistentstorageinstance_controller.go +++ b/controllers/nnf_persistentstorageinstance_controller.go @@ -1,5 +1,5 @@ /* - * Copyright 2022 Hewlett Packard Enterprise Development LP + * Copyright 2022, 2023 Hewlett Packard Enterprise Development LP * Other additional copyright holders may be indicated within. * * The entirety of this work is licensed under the Apache License, diff --git a/controllers/nnf_storage_controller.go b/controllers/nnf_storage_controller.go index 14ea534cd..86b2eb9ed 100644 --- a/controllers/nnf_storage_controller.go +++ b/controllers/nnf_storage_controller.go @@ -1,5 +1,5 @@ /* - * Copyright 2021, 2022 Hewlett Packard Enterprise Development LP + * Copyright 2021-2023 Hewlett Packard Enterprise Development LP * Other additional copyright holders may be indicated within. * * The entirety of this work is licensed under the Apache License, @@ -23,6 +23,7 @@ import ( "context" "runtime" "strconv" + "time" "github.com/go-logr/logr" @@ -85,7 +86,7 @@ const ( // Reconcile is part of the main kubernetes reconciliation loop which aims to // move the current state of the cluster closer to the desired state. func (r *NnfStorageReconciler) Reconcile(ctx context.Context, req ctrl.Request) (res ctrl.Result, err error) { - + log := r.Log.WithValues("NnfStorage", req.NamespacedName) metrics.NnfStorageReconcilesTotal.Inc() storage := &nnfv1alpha1.NnfStorage{} @@ -101,6 +102,11 @@ func (r *NnfStorageReconciler) Reconcile(ctx context.Context, req ctrl.Request) // occuring on the on function exit. statusUpdater := updater.NewStatusUpdater[*nnfv1alpha1.NnfStorageStatus](storage) defer func() { err = statusUpdater.CloseWithStatusUpdate(ctx, r.Client.Status(), err) }() + defer func() { + if err != nil || (!res.Requeue && res.RequeueAfter == 0) { + storage.Status.SetResourceErrorAndLog(err, log) + } + }() // Check if the object is being deleted if !storage.GetDeletionTimestamp().IsZero() { @@ -187,7 +193,7 @@ func (r *NnfStorageReconciler) Reconcile(ctx context.Context, req ctrl.Request) // Wait for all the allocation sets to be ready for _, allocationSet := range storage.Status.AllocationSets { if allocationSet.Status != nnfv1alpha1.ResourceReady { - return ctrl.Result{}, nil + return ctrl.Result{RequeueAfter: time.Minute}, nil } } @@ -279,7 +285,7 @@ func (r *NnfStorageReconciler) createNodeStorage(ctx context.Context, storage *n if err != nil { if !apierrors.IsConflict(err) { - storage.Status.AllocationSets[allocationSetIndex].Error = err.Error() + return nil, err } return &ctrl.Result{Requeue: true}, nil @@ -306,7 +312,6 @@ func (r *NnfStorageReconciler) aggregateNodeStorageStatus(ctx context.Context, s var status nnfv1alpha1.NnfResourceStatusType = nnfv1alpha1.ResourceReady allocationSet.AllocationCount = 0 - allocationSet.Error = "" nnfNodeStorageList := &nnfv1alpha1.NnfNodeStorageList{} matchLabels := dwsv1alpha2.MatchingOwner(storage) @@ -356,12 +361,6 @@ func (r *NnfStorageReconciler) aggregateNodeStorageStatus(ctx context.Context, s nodeAllocation.StorageGroup.Status.UpdateIfWorseThan(&status) nodeAllocation.FileSystem.Status.UpdateIfWorseThan(&status) nodeAllocation.FileShare.Status.UpdateIfWorseThan(&status) - - for _, condition := range nodeAllocation.Conditions { - if condition.Reason == nnfv1alpha1.ConditionFailed { - allocationSet.Error = condition.Message - } - } } if nnfNodeStorage.Status.Error != nil { diff --git a/controllers/nnf_workflow_controller.go b/controllers/nnf_workflow_controller.go index cae1fc244..821660587 100644 --- a/controllers/nnf_workflow_controller.go +++ b/controllers/nnf_workflow_controller.go @@ -37,7 +37,6 @@ import ( apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" kruntime "k8s.io/apimachinery/pkg/runtime" - "k8s.io/apimachinery/pkg/types" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller" @@ -205,6 +204,10 @@ func (r *NnfWorkflowReconciler) Reconcile(ctx context.Context, req ctrl.Request) log := log.WithValues("state", workflow.Status.State, "index", driverStatus.DWDIndex) log.Info("Start", "directive", workflow.Spec.DWDirectives[driverStatus.DWDIndex]) + driverStatus.Status = dwsv1alpha2.StatusRunning + driverStatus.Message = "" + driverStatus.Error = "" + result, err := startFunctions[workflow.Status.State](r, ctx, workflow, driverStatus.DWDIndex) if err != nil { handleWorkflowError(err, driverStatus) @@ -213,10 +216,6 @@ func (r *NnfWorkflowReconciler) Reconcile(ctx context.Context, req ctrl.Request) return ctrl.Result{}, err } - driverStatus.Status = dwsv1alpha2.StatusRunning - driverStatus.Message = "" - driverStatus.Error = "" - if result != nil { log.Info("Start wait", result.info()...) driverStatus.Message = result.reason @@ -243,6 +242,10 @@ func (r *NnfWorkflowReconciler) Reconcile(ctx context.Context, req ctrl.Request) log := log.WithValues("state", workflow.Status.State, "index", driverStatus.DWDIndex) log.Info("Finish", "directive", workflow.Spec.DWDirectives[driverStatus.DWDIndex]) + driverStatus.Status = dwsv1alpha2.StatusRunning + driverStatus.Message = "" + driverStatus.Error = "" + result, err := finishFunctions[workflow.Status.State](r, ctx, workflow, driverStatus.DWDIndex) if err != nil { handleWorkflowError(err, driverStatus) @@ -252,13 +255,11 @@ func (r *NnfWorkflowReconciler) Reconcile(ctx context.Context, req ctrl.Request) return ctrl.Result{}, err } - driverStatus.Status = dwsv1alpha2.StatusRunning - driverStatus.Message = "" - driverStatus.Error = "" - if result != nil { log.Info("Finish wait", result.info()...) - driverStatus.Message = result.reason + if driverStatus.Message == "" { + driverStatus.Message = result.reason + } return result.Result, nil } @@ -280,8 +281,7 @@ func (r *NnfWorkflowReconciler) startProposalState(ctx context.Context, workflow dwArgs, _ := dwdparse.BuildArgsMap(workflow.Spec.DWDirectives[index]) if err := r.validateWorkflow(ctx, workflow); err != nil { - log.Error(err, "Unable to validate workflow") - return nil, nnfv1alpha1.NewWorkflowError("Unable to validate DW directives").WithFatal().WithError(err) + return nil, dwsv1alpha2.NewResourceError("").WithError(err).WithUserMessage("unable to validate DW directives") } // only jobdw, persistentdw, and create_persistent need a directive breakdown @@ -296,7 +296,7 @@ func (r *NnfWorkflowReconciler) startProposalState(ctx context.Context, workflow directiveBreakdown, err := r.generateDirectiveBreakdown(ctx, index, workflow, log) if err != nil { - return nil, nnfv1alpha1.NewWorkflowError("Unable to start parsing DW directive").WithError(err) + return nil, dwsv1alpha2.NewResourceError("could not generate DirectiveBreakdown").WithError(err).WithUserMessage("unable to start parsing DW directive") } if directiveBreakdown == nil { @@ -324,7 +324,6 @@ func (r *NnfWorkflowReconciler) startProposalState(ctx context.Context, workflow } func (r *NnfWorkflowReconciler) finishProposalState(ctx context.Context, workflow *dwsv1alpha2.Workflow, index int) (*result, error) { - log := r.Log.WithValues("Workflow", client.ObjectKeyFromObject(workflow), "Index", index) dwArgs, _ := dwdparse.BuildArgsMap(workflow.Spec.DWDirectives[index]) // only jobdw, persistentdw, and create_persistent have a directive breakdown @@ -344,12 +343,13 @@ func (r *NnfWorkflowReconciler) finishProposalState(ctx context.Context, workflo err := r.Get(ctx, client.ObjectKeyFromObject(directiveBreakdown), directiveBreakdown) if err != nil { - log.Info("Failed to get DirectiveBreakdown", "name", directiveBreakdown.GetName(), "error", err.Error()) - return nil, nnfv1alpha1.NewWorkflowError("Unable to finish parsing DW directive").WithError(err) + return nil, dwsv1alpha2.NewResourceError("could not get DirectiveBreakdown: %v", client.ObjectKeyFromObject(directiveBreakdown)).WithError(err).WithUserMessage("unable to finish parsing DW directive") } if directiveBreakdown.Status.Error != nil { - return nil, nnfv1alpha1.NewWorkflowError("").WithError(directiveBreakdown.Status.Error) + handleWorkflowErrorByIndex(directiveBreakdown.Status.Error, workflow, index) + + return Requeue("error").withObject(directiveBreakdown), nil } // Wait for the breakdown to be ready @@ -379,9 +379,7 @@ func (r *NnfWorkflowReconciler) startSetupState(ctx context.Context, workflow *d } err := r.Get(ctx, client.ObjectKeyFromObject(dbd), dbd) if err != nil { - log.Info("Unable to get directiveBreakdown", "dbd", client.ObjectKeyFromObject(dbd), "Message", err) - err = fmt.Errorf("Unable to get DirectiveBreakdown %v: %w", client.ObjectKeyFromObject(dbd), err) - return nil, nnfv1alpha1.NewWorkflowError("Could not read allocation request").WithError(err) + return nil, dwsv1alpha2.NewResourceError("unable to get DirectiveBreakdown: %v", client.ObjectKeyFromObject(dbd)).WithError(err).WithUserMessage("could not read allocation request") } s := &dwsv1alpha2.Servers{ @@ -392,14 +390,12 @@ func (r *NnfWorkflowReconciler) startSetupState(ctx context.Context, workflow *d } err = r.Get(ctx, client.ObjectKeyFromObject(s), s) if err != nil { - log.Info("Unable to get servers", "servers", client.ObjectKeyFromObject(s), "Message", err) - err = fmt.Errorf("Unable to get Servers %v: %w", client.ObjectKeyFromObject(s), err) - return nil, nnfv1alpha1.NewWorkflowError("Could not read allocation request").WithError(err) + return nil, dwsv1alpha2.NewResourceError("unable to get Servers: %v", client.ObjectKeyFromObject(s)).WithError(err).WithUserMessage("could not read allocation request") } if _, present := os.LookupEnv("RABBIT_TEST_ENV_BYPASS_SERVER_STORAGE_CHECK"); !present { if err := r.validateServerAllocations(ctx, dbd, s); err != nil { - return nil, err + return nil, dwsv1alpha2.NewResourceError("invalid Rabbit allocations for servers: %v", client.ObjectKeyFromObject(s)).WithError(err).WithUserMessage("invalid Rabbit allocations") } } @@ -408,9 +404,7 @@ func (r *NnfWorkflowReconciler) startSetupState(ctx context.Context, workflow *d return Requeue("conflict").withObject(storage), nil } - log.Info("Failed to create nnf storage", "Message", err) - err = fmt.Errorf("Could not create NnfStorage %w", err) - return nil, nnfv1alpha1.NewWorkflowError("Could not create allocation").WithError(err) + return nil, dwsv1alpha2.NewResourceError("could not create NnfStorage").WithError(err).WithUserMessage("could not create allocation") } } @@ -428,7 +422,7 @@ func (r *NnfWorkflowReconciler) finishSetupState(ctx context.Context, workflow * }, } if err := r.Get(ctx, client.ObjectKeyFromObject(nnfStorage), nnfStorage); err != nil { - return nil, nnfv1alpha1.NewWorkflowErrorf("failed to get NNF storage resource '%s", client.ObjectKeyFromObject(nnfStorage)).WithError(err) + return nil, dwsv1alpha2.NewResourceError("could not get NnfStorage: %v", client.ObjectKeyFromObject(nnfStorage)).WithError(err).WithUserMessage("could not allocate storage") } // If the Status section has not been filled in yet, exit and wait. @@ -438,7 +432,8 @@ func (r *NnfWorkflowReconciler) finishSetupState(ctx context.Context, workflow * } if nnfStorage.Status.Error != nil { - return nil, nnfv1alpha1.NewWorkflowErrorf("storage resource '%s' has error", client.ObjectKeyFromObject(nnfStorage)).WithError(nnfStorage.Status.Error) + handleWorkflowErrorByIndex(dwsv1alpha2.NewResourceError("storage resource error: %v", client.ObjectKeyFromObject(nnfStorage)).WithError(nnfStorage.Status.Error).WithUserMessage("could not allocate storage"), workflow, index) + return Requeue("error").withObject(nnfStorage), nil } if nnfStorage.Status.Status != nnfv1alpha1.ResourceReady { @@ -454,7 +449,7 @@ func (r *NnfWorkflowReconciler) startDataInOutState(ctx context.Context, workflo dwArgs, err := dwdparse.BuildArgsMap(workflow.Spec.DWDirectives[index]) if err != nil { - return nil, nnfv1alpha1.NewWorkflowError("Invalid DW directive: " + workflow.Spec.DWDirectives[index]).WithFatal() + return nil, dwsv1alpha2.NewResourceError("").WithUserMessage("Invalid DW directive: %v", workflow.Spec.DWDirectives[index]).WithFatal().WithUser() } // NOTE: We don't need to check for the occurrence of a source or destination parameters since these are required fields and validated through the webhook @@ -488,7 +483,7 @@ func (r *NnfWorkflowReconciler) startDataInOutState(ctx context.Context, workflo } if parentDwIndex < 0 { - return nil, nil, nil, nnfv1alpha1.NewWorkflowError("No directive matching '" + name + "' found in workflow").WithFatal() + return nil, nil, nil, dwsv1alpha2.NewResourceError("").WithUserMessage("no directive matching '%v' found in workflow", name).WithFatal().WithUser() } // If directive specifies a persistent storage instance, `name` will be the nnfStorageName @@ -508,7 +503,7 @@ func (r *NnfWorkflowReconciler) startDataInOutState(ctx context.Context, workflo } if err := r.Get(ctx, client.ObjectKeyFromObject(storage), storage); err != nil { - return nil, nil, nil, fmt.Errorf("Could not get NnfStorage %v: %w", client.ObjectKeyFromObject(storage), err) + return nil, nil, nil, dwsv1alpha2.NewResourceError("could not get NnfStorage %v", client.ObjectKeyFromObject(storage)).WithError(err).WithUserMessage("could not find storage allocation") } storageReference = &corev1.ObjectReference{ @@ -539,7 +534,7 @@ func (r *NnfWorkflowReconciler) startDataInOutState(ctx context.Context, workflo // Setup NNF Access for the NNF Servers so we can run data movement on them. access, err := r.setupNnfAccessForServers(ctx, storage, workflow, index, parentDwIndex, teardownState, log) if err != nil { - return storageReference, access, nil, nnfv1alpha1.NewWorkflowError("Could not create data movement mount points").WithError(err) + return storageReference, access, nil, dwsv1alpha2.NewResourceError("").WithError(err).WithUserMessage("could not create data movement mount points") } // Wait for accesses to go ready @@ -559,19 +554,19 @@ func (r *NnfWorkflowReconciler) startDataInOutState(ctx context.Context, workflo return storageReference, nil, nil, nil } - return nil, nil, nil, nnfv1alpha1.NewWorkflowError(fmt.Sprintf("Staging parameter '%s' is invalid", param)).WithFatal() + return nil, nil, nil, dwsv1alpha2.NewResourceError("").WithUserMessage("Staging parameter '%s' is invalid", param).WithFatal().WithUser() } sourceStorage, sourceAccess, result, err := prepareStagingArgumentFn(dwArgs["source"]) if err != nil { - return nil, nnfv1alpha1.NewWorkflowError("Could not prepare data movement resources").WithError(err) + return nil, dwsv1alpha2.NewResourceError("").WithError(err).WithUserMessage("could not prepare data movement resources") } else if result != nil { return result, nil } destStorage, destAccess, result, err := prepareStagingArgumentFn(dwArgs["destination"]) if err != nil { - return nil, nnfv1alpha1.NewWorkflowError("Could not prepare data movement resources").WithError(err) + return nil, dwsv1alpha2.NewResourceError("").WithError(err).WithUserMessage("Could not prepare data movement resources") } else if result != nil { return result, nil } @@ -580,7 +575,7 @@ func (r *NnfWorkflowReconciler) startDataInOutState(ctx context.Context, workflo for _, access := range []*nnfv1alpha1.NnfAccess{sourceAccess, destAccess} { if access != nil { if err := r.Get(ctx, client.ObjectKeyFromObject(access), access); err != nil { - return nil, fmt.Errorf("Could not get NnfAccess %v: %w", client.ObjectKeyFromObject(access), err) + return nil, dwsv1alpha2.NewResourceError("could not get NnfAccess %v", client.ObjectKeyFromObject(access)).WithError(err).WithUserMessage("could not create data movement mount points") } if access.Status.State != "mounted" || !access.Status.Ready { @@ -600,9 +595,14 @@ func (r *NnfWorkflowReconciler) startDataInOutState(ctx context.Context, workflo targetStorageRef = sourceStorage } - targetStorage := &nnfv1alpha1.NnfStorage{} - if err := r.Get(ctx, types.NamespacedName{Name: targetStorageRef.Name, Namespace: targetStorageRef.Namespace}, targetStorage); err != nil { - return nil, nnfv1alpha1.NewWorkflowError("Data Movement: Failed to retrieve NNF Storage").WithError(err) + targetStorage := &nnfv1alpha1.NnfStorage{ + ObjectMeta: metav1.ObjectMeta{ + Name: targetStorageRef.Name, + Namespace: targetStorageRef.Namespace, + }, + } + if err := r.Get(ctx, client.ObjectKeyFromObject(targetStorage), targetStorage); err != nil { + return nil, dwsv1alpha2.NewResourceError("could not get NnfStorage: %v", client.ObjectKeyFromObject(targetStorage)).WithError(err).WithUserMessage("could not find storage allocations") } _, source := splitStagingArgumentIntoNameAndPath(dwArgs["source"]) @@ -630,8 +630,7 @@ func (r *NnfWorkflowReconciler) startDataInOutState(ctx context.Context, workflo // XFS & GFS2 require the individual rabbit nodes are performing the data movement. if len(targetStorage.Spec.AllocationSets) != 1 { - msg := fmt.Sprintf("Data Movement: File System %s has unexpected allocation sets %d", fsType, len(targetStorage.Spec.AllocationSets)) - return nil, nnfv1alpha1.NewWorkflowError(msg).WithFatal() + return nil, dwsv1alpha2.NewResourceError("file system %s has unexpected allocation sets %d", fsType, len(targetStorage.Spec.AllocationSets)).WithUserMessage("unexpected allocation count").WithFatal() } nodes := targetStorage.Spec.AllocationSets[0].Nodes @@ -666,7 +665,7 @@ func (r *NnfWorkflowReconciler) startDataInOutState(ctx context.Context, workflo log.Info("Creating NNF Data Movement", "name", client.ObjectKeyFromObject(dm).String()) if err := r.Create(ctx, dm); err != nil { if !errors.IsAlreadyExists(err) { - return nil, nnfv1alpha1.NewWorkflowError("Data Movement failed to create").WithError(err) + return nil, dwsv1alpha2.NewResourceError("could not create DataMovement: %v", client.ObjectKeyFromObject(dm)).WithError(err).WithUserMessage("could not start data movement") } } } @@ -702,7 +701,7 @@ func (r *NnfWorkflowReconciler) startDataInOutState(ctx context.Context, workflo log.Info("Creating NNF Data Movement", "name", client.ObjectKeyFromObject(dm).String()) if err := r.Create(ctx, dm); err != nil { if !errors.IsAlreadyExists(err) { - return nil, nnfv1alpha1.NewWorkflowError("Data Movement failed to create").WithError(err) + return nil, dwsv1alpha2.NewResourceError("could not create DataMovement: %v", client.ObjectKeyFromObject(dm)).WithError(err).WithUserMessage("could not start data movement") } } } @@ -721,7 +720,7 @@ func (r *NnfWorkflowReconciler) finishDataInOutState(ctx context.Context, workfl dataMovementList := &nnfv1alpha1.NnfDataMovementList{} if err := r.List(ctx, dataMovementList, matchingLabels); err != nil { - return nil, nnfv1alpha1.NewWorkflowError("Could not retrieve data movements").WithError(err) + return nil, dwsv1alpha2.NewResourceError("could not list DataMovements with labels: %v", matchingLabels).WithError(err).WithUserMessage("could not find data movement information") } // Since the Finish state is only called when copy_in / copy_out directives are present - the lack of any items @@ -740,7 +739,8 @@ func (r *NnfWorkflowReconciler) finishDataInOutState(ctx context.Context, workfl // TODO: Detailed Fail Message? for _, dm := range dataMovementList.Items { if dm.Status.Status != nnfv1alpha1.DataMovementConditionReasonSuccess { - return nil, nnfv1alpha1.NewWorkflowError(fmt.Sprintf("Staging operation failed")).WithFatal() + handleWorkflowErrorByIndex(dwsv1alpha2.NewResourceError("").WithUserMessage("data movement operation failed").WithFatal(), workflow, index) + return Requeue("error").withObject(&dm), nil } } @@ -756,7 +756,7 @@ func (r *NnfWorkflowReconciler) startPreRunState(ctx context.Context, workflow * // time. unmountResult, err := r.unmountNnfAccessIfNecessary(ctx, workflow, index, "servers") if err != nil { - return nil, err + return nil, dwsv1alpha2.NewResourceError("could not unmount NnfAccess index: %v", index).WithError(err).WithUserMessage("could not unmount on Rabbit nodes") } if unmountResult != nil { @@ -772,7 +772,16 @@ func (r *NnfWorkflowReconciler) startPreRunState(ctx context.Context, workflow * // Create container service and jobs if dwArgs["command"] == "container" { - return r.userContainerHandler(ctx, workflow, dwArgs, index, log) + result, err := r.userContainerHandler(ctx, workflow, dwArgs, index, log) + + if err != nil { + return nil, dwsv1alpha2.NewResourceError("").WithError(err).WithFatal().WithUserMessage("unable to create/update Container Jobs") + } + if result != nil { + return result, nil + } + + return nil, nil } // Create an NNFAccess for the compute clients @@ -807,8 +816,7 @@ func (r *NnfWorkflowReconciler) startPreRunState(ctx context.Context, workflow * return ctrl.SetControllerReference(workflow, access, r.Scheme) }) if err != nil { - err = fmt.Errorf("Could not CreateOrUpdate compute node NnfAccess %v: %w", client.ObjectKeyFromObject(access), err) - return nil, nnfv1alpha1.NewWorkflowError("Could not mount file system on compute nodes").WithError(err) + return nil, dwsv1alpha2.NewResourceError("Could not CreateOrUpdate compute node NnfAccess: %v", client.ObjectKeyFromObject(access)).WithError(err).WithUserMessage("could not mount file system on compute nodes") } if result == controllerutil.OperationResultCreated { @@ -826,7 +834,7 @@ func (r *NnfWorkflowReconciler) startPreRunState(ctx context.Context, workflow * fsType, err := r.getDirectiveFileSystemType(ctx, workflow, index) if err != nil { - return nil, nnfv1alpha1.NewWorkflowError("Unable to determine directive file system type").WithError(err) + return nil, dwsv1alpha2.NewResourceError("").WithError(err).WithFatal().WithUser().WithUserMessage("Unable to determine directive file system type") } if fsType == "gfs2" || fsType == "lustre" { @@ -849,7 +857,7 @@ func (r *NnfWorkflowReconciler) startPreRunState(ctx context.Context, workflow * _, err := r.setupNnfAccessForServers(ctx, storage, workflow, index, index, teardownState, log) if err != nil { - return nil, nnfv1alpha1.NewWorkflowError(fmt.Sprintf("Could not setup NNF Access in state %s", workflow.Status.State)).WithError(err) + return nil, dwsv1alpha2.NewResourceError("could not setup NNF Access in state %s", workflow.Status.State).WithError(err).WithUserMessage("could not mount file system on Rabbit nodes") } } @@ -875,7 +883,7 @@ func (r *NnfWorkflowReconciler) finishPreRunState(ctx context.Context, workflow case "container": return r.waitForContainersToStart(ctx, workflow, index) default: - return nil, nnfv1alpha1.NewWorkflowErrorf("Unexpected directive %v", dwArgs["command"]) + return nil, dwsv1alpha2.NewResourceError("unexpected directive: %v", dwArgs["command"]).WithFatal().WithUserMessage("could not mount file system on compute nodes") } workflow.Status.Env[envName] = buildMountPath(workflow, index) @@ -883,7 +891,7 @@ func (r *NnfWorkflowReconciler) finishPreRunState(ctx context.Context, workflow // Containers do not have NNFAccesses, so only do this after r.waitForContainersToStart() would have returned result, err := r.waitForNnfAccessStateAndReady(ctx, workflow, index, "mounted") if err != nil { - return nil, nnfv1alpha1.NewWorkflowError("Failed to achieve NnfAccess 'mounted' state").WithError(err).WithFatal() + return nil, dwsv1alpha2.NewResourceError("could not mount rabbit NnfAccess for index %v", index).WithError(err).WithUserMessage("could not mount file system on compute nodes") } else if result != nil { return result, nil } @@ -902,7 +910,11 @@ func (r *NnfWorkflowReconciler) startPostRunState(ctx context.Context, workflow // Unmount the NnfAccess for the compute nodes. This will free the compute nodes to be used // in a different job even if there is data movement happening on the Rabbits. if result, err := r.unmountNnfAccessIfNecessary(ctx, workflow, index, "computes"); result != nil || err != nil { - return result, err + if err != nil { + return nil, dwsv1alpha2.NewResourceError("").WithError(err).WithUserMessage("could not unmount file system from compute nodes") + } + + return result, nil } // Wait for data movement resources to complete @@ -911,7 +923,7 @@ func (r *NnfWorkflowReconciler) startPostRunState(ctx context.Context, workflow dataMovementList := &nnfv1alpha1.NnfDataMovementList{} if err := r.List(ctx, dataMovementList, matchingLabels); err != nil { - return nil, nnfv1alpha1.NewWorkflowError("Could not retrieve data movements").WithError(err) + return nil, dwsv1alpha2.NewResourceError("could not list DataMovements with labels: %v", matchingLabels).WithError(err).WithUserMessage("could not find data movement information") } for _, dm := range dataMovementList.Items { @@ -923,7 +935,7 @@ func (r *NnfWorkflowReconciler) startPostRunState(ctx context.Context, workflow // Unmount the NnfAccess for the servers resource if necessary. fsType, err := r.getDirectiveFileSystemType(ctx, workflow, index) if err != nil { - return nil, nnfv1alpha1.NewWorkflowError("Unable to determine directive file system type").WithError(err) + return nil, dwsv1alpha2.NewResourceError("").WithError(err).WithFatal().WithUser().WithUserMessage("Unable to determine directive file system type") } if fsType == "gfs2" || fsType == "lustre" { @@ -944,7 +956,7 @@ func (r *NnfWorkflowReconciler) finishPostRunState(ctx context.Context, workflow result, err := r.waitForNnfAccessStateAndReady(ctx, workflow, index, "unmounted") if err != nil { - return nil, nnfv1alpha1.NewWorkflowError("Failed to achieve NnfAccess 'unmounted' state").WithError(err).WithFatal() + return nil, dwsv1alpha2.NewResourceError("could not unmount compute NnfAccess for index %v", index).WithError(err).WithUserMessage("could not unmount file system on compute nodes") } else if result != nil { return result, nil } @@ -956,7 +968,7 @@ func (r *NnfWorkflowReconciler) finishPostRunState(ctx context.Context, workflow dataMovementList := &nnfv1alpha1.NnfDataMovementList{} if err := r.List(ctx, dataMovementList, matchingLabels); err != nil { - return nil, nnfv1alpha1.NewWorkflowError("Could not retrieve data movements").WithError(err) + return nil, dwsv1alpha2.NewResourceError("could not list DataMovements with labels: %v", matchingLabels).WithError(err).WithUserMessage("could not find data movement information") } for _, dm := range dataMovementList.Items { @@ -965,8 +977,8 @@ func (r *NnfWorkflowReconciler) finishPostRunState(ctx context.Context, workflow } if dm.Status.Status == nnfv1alpha1.DataMovementConditionReasonFailed { - err := fmt.Errorf("Data movement %s failed", client.ObjectKeyFromObject(&dm).String()) - return nil, nnfv1alpha1.NewWorkflowError("Data movement unsuccessful").WithError(err).WithFatal() + handleWorkflowErrorByIndex(dwsv1alpha2.NewResourceError("data movement %v failed", client.ObjectKeyFromObject(&dm)).WithUserMessage("data movement failed").WithFatal(), workflow, index) + return Requeue("error").withObject(&dm), nil } } @@ -985,8 +997,7 @@ func (r *NnfWorkflowReconciler) startTeardownState(ctx context.Context, workflow deleteStatus, err := dwsv1alpha2.DeleteChildrenWithLabels(ctx, r.Client, childObjects, workflow, client.MatchingLabels{nnfv1alpha1.DirectiveIndexLabel: strconv.Itoa(index)}) if err != nil { - err = fmt.Errorf("Could not delete NnfDataMovement and NnfAccess children: %w", err) - return nil, nnfv1alpha1.NewWorkflowError("Could not stop data movement and unmount file systems").WithError(err) + return nil, dwsv1alpha2.NewResourceError("could not delete NnfDataMovement and NnfAccess children").WithError(err).WithUserMessage("could not stop data movement and unmount file systems") } if !deleteStatus.Complete() { @@ -1014,7 +1025,7 @@ func (r *NnfWorkflowReconciler) finishTeardownState(ctx context.Context, workflo persistentStorage, err := r.findPersistentInstance(ctx, workflow, dwArgs["name"]) if err != nil { - return nil, nnfv1alpha1.NewWorkflowError(fmt.Sprintf("Could not find persistent storage %v", dwArgs["name"])).WithError(err) + return nil, dwsv1alpha2.NewResourceError("").WithError(err).WithUserMessage("could not find persistent storage %v", dwArgs["name"]) } persistentStorage.SetOwnerReferences([]metav1.OwnerReference{}) @@ -1025,30 +1036,27 @@ func (r *NnfWorkflowReconciler) finishTeardownState(ctx context.Context, workflo err = r.Update(ctx, persistentStorage) if err != nil { - err = fmt.Errorf("Could not update PersistentStorage %v: %w", client.ObjectKeyFromObject(persistentStorage), err) - return nil, nnfv1alpha1.NewWorkflowError("Could not finalize peristent storage").WithError(err) + return nil, dwsv1alpha2.NewResourceError("could not update PersistentStorage: %v", client.ObjectKeyFromObject(persistentStorage)).WithError(err).WithUserMessage("could not finalize peristent storage") } log.Info("Removed owner reference from persistent storage", "psi", persistentStorage) case "destroy_persistent": persistentStorage, err := r.findPersistentInstance(ctx, workflow, dwArgs["name"]) if err != nil { if !apierrors.IsNotFound(err) { - return nil, nnfv1alpha1.NewWorkflowError(fmt.Sprintf("Could not find peristent storage %v", dwArgs["name"])).WithError(err) + return nil, dwsv1alpha2.NewResourceError("").WithError(err).WithFatal().WithUser().WithUserMessage("could not find peristent storage %v", dwArgs["name"]) } return nil, nil } if persistentStorage.Spec.UserID != workflow.Spec.UserID { - err = fmt.Errorf("Existing persistent storage user ID %v does not match user ID %v", persistentStorage.Spec.UserID, workflow.Spec.UserID) - log.Info(err.Error()) - return nil, nnfv1alpha1.NewWorkflowError("user ID does not match existing persistent storage").WithError(err).WithFatal() + return nil, dwsv1alpha2.NewResourceError("Existing persistent storage user ID %v does not match user ID %v", persistentStorage.Spec.UserID, workflow.Spec.UserID).WithError(err).WithUserMessage("user ID does not match existing persistent storage").WithFatal().WithUser() } if len(persistentStorage.Spec.ConsumerReferences) != 0 { err = fmt.Errorf("PersistentStorage cannot be deleted with %v consumers", len(persistentStorage.Spec.ConsumerReferences)) log.Info(err.Error()) - return nil, nnfv1alpha1.NewWorkflowError("PersistentStorage cannot be deleted while in use").WithError(err).WithFatal() + return nil, dwsv1alpha2.NewResourceError("persistent storage cannot be deleted with %v consumers", len(persistentStorage.Spec.ConsumerReferences)).WithError(err).WithUserMessage("persistent storage cannot be deleted while in use").WithFatal().WithUser() } persistentStorage.Spec.State = dwsv1alpha2.PSIStateDestroying @@ -1057,21 +1065,18 @@ func (r *NnfWorkflowReconciler) finishTeardownState(ctx context.Context, workflo addDirectiveIndexLabel(persistentStorage, index) if err := controllerutil.SetControllerReference(workflow, persistentStorage, r.Scheme); err != nil { - log.Info("Unable to assign workflow as owner of persistentInstance", "psi", persistentStorage) - err = fmt.Errorf("Could not assign workflow as owner of PersistentInstance %v: %w", client.ObjectKeyFromObject(persistentStorage), err) - return nil, nnfv1alpha1.NewWorkflowError(fmt.Sprintf("Could not delete peristent storage %v", dwArgs["name"])).WithError(err) + return nil, dwsv1alpha2.NewResourceError("could not assign workflow as owner of PersistentInstance: %v", client.ObjectKeyFromObject(persistentStorage)).WithError(err).WithUserMessage("could not delete persistent storage %v", dwArgs["name"]) } err = r.Update(ctx, persistentStorage) if err != nil { - err = fmt.Errorf("Could not update PersistentInstance %v: %w", client.ObjectKeyFromObject(persistentStorage), err) - return nil, nnfv1alpha1.NewWorkflowError(fmt.Sprintf("Could not delete peristent storage %v", dwArgs["name"])).WithError(err) + return nil, dwsv1alpha2.NewResourceError("could not update PersistentInstance: %v", client.ObjectKeyFromObject(persistentStorage)).WithError(err).WithUserMessage("could not delete persistent storage %v", dwArgs["name"]) } log.Info("Add owner reference for persistent storage for deletion", "psi", persistentStorage) case "persistentdw": err := r.removePersistentStorageReference(ctx, workflow, index) if err != nil { - return nil, nnfv1alpha1.NewWorkflowError("Could not remove persistent storage reference").WithError(err) + return nil, dwsv1alpha2.NewResourceError("").WithError(err).WithUserMessage("Could not remove persistent storage reference") } default: } @@ -1083,7 +1088,7 @@ func (r *NnfWorkflowReconciler) finishTeardownState(ctx context.Context, workflo deleteStatus, err := dwsv1alpha2.DeleteChildrenWithLabels(ctx, r.Client, childObjects, workflow, client.MatchingLabels{nnfv1alpha1.DirectiveIndexLabel: strconv.Itoa(index)}) if err != nil { - return nil, nnfv1alpha1.NewWorkflowError("Could not delete storage allocations").WithError(err) + return nil, dwsv1alpha2.NewResourceError("could not delete NnfStorage and PersistentStorageInstance children").WithError(err).WithUserMessage("could not delete storage allocations") } if !deleteStatus.Complete() { diff --git a/controllers/nnf_workflow_controller_container_helpers.go b/controllers/nnf_workflow_controller_container_helpers.go index 9d4c50f36..f9631c5a0 100644 --- a/controllers/nnf_workflow_controller_container_helpers.go +++ b/controllers/nnf_workflow_controller_container_helpers.go @@ -238,7 +238,7 @@ func (c *nnfUserContainer) applyLabels(job metav1.Object) error { job.SetLabels(labels) if err := ctrl.SetControllerReference(c.workflow, job, c.scheme); err != nil { - return nnfv1alpha1.NewWorkflowErrorf("setting Job controller reference failed for '%s':", job.GetName()).WithError(err) + return err } return nil diff --git a/controllers/nnf_workflow_controller_helpers.go b/controllers/nnf_workflow_controller_helpers.go index efe9413f1..32ced98d2 100644 --- a/controllers/nnf_workflow_controller_helpers.go +++ b/controllers/nnf_workflow_controller_helpers.go @@ -22,6 +22,7 @@ package controllers import ( "context" "fmt" + "os" "reflect" "strconv" "strings" @@ -103,6 +104,8 @@ func (r *result) info() []interface{} { // Validate the workflow and return any error found func (r *NnfWorkflowReconciler) validateWorkflow(ctx context.Context, wf *dwsv1alpha2.Workflow) error { + log := r.Log.WithValues("Workflow", types.NamespacedName{Name: wf.Name, Namespace: wf.Namespace}) + var createPersistentCount, deletePersistentCount, directiveCount, containerCount int for index, directive := range wf.Spec.DWDirectives { @@ -116,7 +119,7 @@ func (r *NnfWorkflowReconciler) validateWorkflow(ctx context.Context, wf *dwsv1a case "copy_in", "copy_out": if err := r.validateStagingDirective(ctx, wf, directive); err != nil { - return nnfv1alpha1.NewWorkflowError("Invalid staging Directive: " + directive).WithFatal().WithError(err) + return dwsv1alpha2.NewResourceError("").WithError(err).WithUserMessage("invalid staging Directive: '%v'", directive) } case "create_persistent": @@ -127,27 +130,28 @@ func (r *NnfWorkflowReconciler) validateWorkflow(ctx context.Context, wf *dwsv1a case "persistentdw": if err := r.validatePersistentInstanceDirective(ctx, wf, directive); err != nil { - return nnfv1alpha1.NewWorkflowError("Could not validate persistent instance: " + directive).WithFatal().WithError(err) + return dwsv1alpha2.NewResourceError("").WithError(err).WithUserMessage("could not validate persistent instance: '%s'", directive) } case "container": containerCount++ if err := r.validateContainerDirective(ctx, wf, index); err != nil { - return nnfv1alpha1.NewWorkflowError("Could not validate container directive: " + directive).WithFatal().WithError(err) + return dwsv1alpha2.NewResourceError("").WithError(err).WithUserMessage("could not validate container directive: '%s'", directive) } } } + log.Info("counts", "directive", directiveCount, "create", createPersistentCount, "delete", deletePersistentCount) if directiveCount > 1 { // Ensure create_persistent or destroy_persistent are singletons in the workflow if createPersistentCount+deletePersistentCount > 0 { - return nnfv1alpha1.NewWorkflowError("Only a single create_persistent or destroy_persistent directive is allowed per workflow").WithFatal() + return dwsv1alpha2.NewResourceError("").WithUserMessage("only a single create_persistent or destroy_persistent directive is allowed per workflow").WithFatal().WithUser() } // Only allow 1 container directive (for now) if containerCount > 1 { - return nnfv1alpha1.NewWorkflowError("Only a single container directive is supported per workflow").WithFatal() + return dwsv1alpha2.NewResourceError("").WithUserMessage("only a single container directive is supported per workflow").WithFatal().WithUser() } } @@ -169,32 +173,32 @@ func (r *NnfWorkflowReconciler) validateStagingDirective(ctx context.Context, wf if strings.HasPrefix(arg, "$DW_JOB_") { index := findDirectiveIndexByName(wf, name, "jobdw") if index == -1 { - return nnfv1alpha1.NewWorkflowError(fmt.Sprintf("Job storage instance '%s' not found", name)).WithFatal() + return dwsv1alpha2.NewResourceError("").WithUserMessage("job storage instance '%s' not found", name).WithFatal().WithUser() } args, err := dwdparse.BuildArgsMap(wf.Spec.DWDirectives[index]) if err != nil { - return nnfv1alpha1.NewWorkflowError("Invalid DW directive: " + wf.Spec.DWDirectives[index]).WithFatal() + return dwsv1alpha2.NewResourceError("").WithUserMessage("invalid DW directive: '%s'", wf.Spec.DWDirectives[index]).WithFatal() } fsType, exists := args["type"] if !exists { - return nnfv1alpha1.NewWorkflowError("Invalid DW directive match for staging argument") + return dwsv1alpha2.NewResourceError("").WithUserMessage("invalid DW directive match for staging argument").WithFatal() } if fsType == "raw" { - return nnfv1alpha1.NewWorkflowError("Data movement can not be used with raw allocations").WithFatal() + return dwsv1alpha2.NewResourceError("").WithUserMessage("data movement can not be used with raw allocations").WithFatal().WithUser() } } else if strings.HasPrefix(arg, "$DW_PERSISTENT_") { if err := r.validatePersistentInstanceForStaging(ctx, name, wf.Namespace); err != nil { - return nnfv1alpha1.NewWorkflowError(fmt.Sprintf("Persistent storage instance '%s' not found", name)).WithFatal() + return dwsv1alpha2.NewResourceError("").WithUserMessage("persistent storage instance '%s' not found", name).WithFatal().WithUser() } if findDirectiveIndexByName(wf, name, "persistentdw") == -1 { - return nnfv1alpha1.NewWorkflowError(fmt.Sprintf("persistentdw directive mentioning '%s' not found", name)).WithFatal() + return dwsv1alpha2.NewResourceError("").WithUserMessage("persistentdw directive mentioning '%s' not found", name).WithFatal().WithUser() } } else { if r.findLustreFileSystemForPath(ctx, arg, r.Log) == nil { - return nnfv1alpha1.NewWorkflowError(fmt.Sprintf("global Lustre file system containing '%s' not found", arg)).WithFatal() + return dwsv1alpha2.NewResourceError("").WithUserMessage("global Lustre file system containing '%s' not found", arg).WithFatal().WithUser() } } @@ -203,15 +207,15 @@ func (r *NnfWorkflowReconciler) validateStagingDirective(ctx context.Context, wf args, err := dwdparse.BuildArgsMap(directive) if err != nil { - return nnfv1alpha1.NewWorkflowError("Invalid DW directive: " + directive).WithFatal() + return dwsv1alpha2.NewResourceError("").WithUserMessage("invalid DW directive: '%s'", directive).WithFatal() } if err := validateStagingArgument(args["source"]); err != nil { - return err + return dwsv1alpha2.NewResourceError("Invalid source argument: '%s'", args["source"]).WithError(err) } if err := validateStagingArgument(args["destination"]); err != nil { - return err + return dwsv1alpha2.NewResourceError("Invalid destination argument: '%s'", args["destination"]).WithError(err) } return nil @@ -221,13 +225,13 @@ func (r *NnfWorkflowReconciler) validateStagingDirective(ctx context.Context, wf func (r *NnfWorkflowReconciler) validateContainerDirective(ctx context.Context, workflow *dwsv1alpha2.Workflow, index int) error { args, err := dwdparse.BuildArgsMap(workflow.Spec.DWDirectives[index]) if err != nil { - return nnfv1alpha1.NewWorkflowError("invalid DW directive: " + workflow.Spec.DWDirectives[index]).WithFatal() + return dwsv1alpha2.NewResourceError("").WithUserMessage("invalid DW directive: '%s'", workflow.Spec.DWDirectives[index]).WithFatal() } // Ensure the supplied profile exists profile, err := findContainerProfile(ctx, r.Client, workflow, index) if err != nil { - return nnfv1alpha1.NewWorkflowError(err.Error()).WithFatal() + return dwsv1alpha2.NewResourceError("").WithError(err).WithUserMessage("no valid container profile found").WithError(err).WithFatal() } // Check to see if the container storage argument is in the list of storages in the container profile @@ -237,7 +241,7 @@ func (r *NnfWorkflowReconciler) validateContainerDirective(ctx context.Context, return nil } } - return fmt.Errorf("storage '%s' not found in container profile '%s'", storageName, profile.Name) + return dwsv1alpha2.NewResourceError("").WithUserMessage("storage '%s' not found in container profile '%s'", storageName, profile.Name).WithFatal().WithUser() } checkContainerFs := func(idx int) error { @@ -248,7 +252,7 @@ func (r *NnfWorkflowReconciler) validateContainerDirective(ctx context.Context, if args["command"] == "persistentdw" { psi, err := r.getPersistentStorageInstance(ctx, args["name"], workflow.Namespace) if err != nil { - return "", fmt.Errorf("could not retrieve persistent instance '%s' for container directive: %s", args["name"], err) + return "", fmt.Errorf("could not retrieve persistent instance %s for container directive: %v", args["name"], err) } return psi.Spec.FsType, nil @@ -263,7 +267,7 @@ func (r *NnfWorkflowReconciler) validateContainerDirective(ctx context.Context, } if strings.ToLower(t) != "lustre" && strings.ToLower(t) != "gfs2" { - return fmt.Errorf("unsupported container filesystem: %s", t) + return dwsv1alpha2.NewResourceError("").WithUserMessage("unsupported container filesystem: %s", t).WithFatal().WithUser() } return nil @@ -280,41 +284,41 @@ func (r *NnfWorkflowReconciler) validateContainerDirective(ctx context.Context, if strings.HasPrefix(arg, "DW_JOB_") { idx := findDirectiveIndexByName(workflow, storageName, "jobdw") if idx == -1 { - return nnfv1alpha1.NewWorkflowError(fmt.Sprintf("jobdw directive mentioning '%s' not found", storageName)).WithFatal() + return dwsv1alpha2.NewResourceError("").WithUserMessage("jobdw directive mentioning '%s' not found", storageName).WithFatal().WithUser() } if err := checkContainerFs(idx); err != nil { - return nnfv1alpha1.NewWorkflowError(err.Error()).WithFatal() + return err } if err := checkStorageIsInProfile(arg); err != nil { - return nnfv1alpha1.NewWorkflowError(err.Error()).WithFatal() + return err } suppliedStorageArguments = append(suppliedStorageArguments, arg) } else if strings.HasPrefix(arg, "DW_PERSISTENT_") { - if err := r.validatePersistentInstanceForStaging(ctx, storageName, workflow.Namespace); err != nil { - return nnfv1alpha1.NewWorkflowError(fmt.Sprintf("persistent storage instance '%s' not found: %v", storageName, err)).WithFatal() + if err := r.validatePersistentInstance(ctx, storageName, workflow.Namespace); err != nil { + return dwsv1alpha2.NewResourceError("").WithError(err).WithUserMessage("persistent storage instance '%s' not found", storageName).WithFatal() } idx := findDirectiveIndexByName(workflow, storageName, "persistentdw") if idx == -1 { - return nnfv1alpha1.NewWorkflowError(fmt.Sprintf("persistentdw directive mentioning '%s' not found", storageName)).WithFatal() + return dwsv1alpha2.NewResourceError("").WithUserMessage("persistentdw directive mentioning '%s' not found", storageName).WithFatal().WithUser() } if err := checkContainerFs(idx); err != nil { - return nnfv1alpha1.NewWorkflowError(err.Error()).WithFatal() + return err } if err := checkStorageIsInProfile(arg); err != nil { - return nnfv1alpha1.NewWorkflowError(err.Error()).WithFatal() + return err } suppliedStorageArguments = append(suppliedStorageArguments, arg) } else if strings.HasPrefix(arg, "DW_GLOBAL_") { // Look up the global lustre fs by path rather than LustreFilesystem name if globalLustre := r.findLustreFileSystemForPath(ctx, storageName, r.Log); globalLustre == nil { - return nnfv1alpha1.NewWorkflowError(fmt.Sprintf("global Lustre file system containing '%s' not found", storageName)).WithFatal() + return dwsv1alpha2.NewResourceError("").WithUserMessage("global Lustre file system containing '%s' not found", storageName).WithFatal().WithUser() } if err := checkStorageIsInProfile(arg); err != nil { - return nnfv1alpha1.NewWorkflowError(err.Error()).WithFatal() + return dwsv1alpha2.NewResourceError("").WithError(err).WithUserMessage("storage '%s' is not present in the container profile", arg).WithUser().WithFatal() } suppliedStorageArguments = append(suppliedStorageArguments, arg) } else { - return nnfv1alpha1.NewWorkflowError(fmt.Sprintf("unrecognized container argument: %s", arg)).WithFatal() + return dwsv1alpha2.NewResourceError("").WithUserMessage("unrecognized container argument: %s", arg).WithFatal().WithUser() } } } @@ -333,8 +337,8 @@ func (r *NnfWorkflowReconciler) validateContainerDirective(ctx context.Context, for _, storage := range profile.Data.Storages { if !storage.Optional { if !findInStorageArguments(storage.Name) { - return fmt.Errorf("storage '%s' in container profile '%s' is not optional: storage argument not found in the supplied arguments", - storage.Name, profile.Name) + return dwsv1alpha2.NewResourceError("").WithUserMessage("storage '%s' in container profile '%s' is not optional: storage argument not found in the supplied arguments", + storage.Name, profile.Name).WithUser().WithFatal() } } } @@ -343,7 +347,7 @@ func (r *NnfWorkflowReconciler) validateContainerDirective(ctx context.Context, } if err := checkNonOptionalStorages(suppliedStorageArguments); err != nil { - return nnfv1alpha1.NewWorkflowError(err.Error()).WithFatal() + return err } return nil @@ -353,11 +357,29 @@ func (r *NnfWorkflowReconciler) validateContainerDirective(ctx context.Context, func (r *NnfWorkflowReconciler) validatePersistentInstanceForStaging(ctx context.Context, name string, namespace string) error { psi, err := r.getPersistentStorageInstance(ctx, name, namespace) if err != nil { - return err + return dwsv1alpha2.NewResourceError("").WithError(err).WithUserMessage("could not get PersistentStorageInstance '%s'", name).WithFatal().WithUser() } if psi.Spec.FsType == "raw" { - return nnfv1alpha1.NewWorkflowError("Data movement can not be used with raw allocations").WithFatal() + return dwsv1alpha2.NewResourceError("").WithUserMessage("data movement can not be used with raw allocations").WithFatal().WithUser() + } + + if !psi.DeletionTimestamp.IsZero() { + return dwsv1alpha2.NewResourceError("").WithUserMessage("Persistent storage instance '%s' is deleting", name).WithUser().WithFatal() + } + + return nil +} + +// validatePersistentInstance validates the persistentdw directive. +func (r *NnfWorkflowReconciler) validatePersistentInstance(ctx context.Context, name string, namespace string) error { + psi, err := r.getPersistentStorageInstance(ctx, name, namespace) + if err != nil { + return dwsv1alpha2.NewResourceError("").WithError(err).WithUserMessage("could not get PersistentStorageInstance %s", name).WithFatal().WithUser() + } + + if !psi.DeletionTimestamp.IsZero() { + return dwsv1alpha2.NewResourceError("").WithUserMessage("Persistent storage instance '%s' is deleting", name).WithUser().WithFatal() } return nil @@ -368,16 +390,16 @@ func (r *NnfWorkflowReconciler) validatePersistentInstanceDirective(ctx context. // Validate that the persistent instance is available and not in the process of being deleted args, err := dwdparse.BuildArgsMap(directive) if err != nil { - return nnfv1alpha1.NewWorkflowError("Invalid DW directive: " + directive).WithFatal() + return dwsv1alpha2.NewResourceError("invalid DW directive: %s", directive).WithFatal() } psi, err := r.getPersistentStorageInstance(ctx, args["name"], wf.Namespace) if err != nil { - return err + return dwsv1alpha2.NewResourceError("").WithError(err).WithUserMessage("could not get PersistentStorageInstance '%s'", args["name"]).WithFatal().WithUser() } if !psi.DeletionTimestamp.IsZero() { - return nnfv1alpha1.NewWorkflowError("Persistent storage instance " + args["name"] + " is deleting").WithFatal() + return dwsv1alpha2.NewResourceError("").WithUserMessage("Persistent storage instance '%s' is deleting", args["name"]).WithUser().WithFatal() } return nil @@ -455,8 +477,7 @@ func (r *NnfWorkflowReconciler) generateDirectiveBreakdown(ctx context.Context, }) if err != nil { - log.Error(err, "failed to create or update DirectiveBreakdown", "name", directiveBreakdown.Name) - return nil, fmt.Errorf("CreateOrUpdate failed for DirectiveBreakdown %v: %w", client.ObjectKeyFromObject(directiveBreakdown), err) + return nil, dwsv1alpha2.NewResourceError("CreateOrUpdate failed for DirectiveBreakdown: %v", client.ObjectKeyFromObject(directiveBreakdown)).WithError(err) } if result == controllerutil.OperationResultCreated { @@ -477,8 +498,7 @@ func (r *NnfWorkflowReconciler) generateDirectiveBreakdown(ctx context.Context, func (r *NnfWorkflowReconciler) validateServerAllocations(ctx context.Context, dbd *dwsv1alpha2.DirectiveBreakdown, servers *dwsv1alpha2.Servers) error { if len(dbd.Status.Storage.AllocationSets) != 0 && len(dbd.Status.Storage.AllocationSets) != len(servers.Spec.AllocationSets) { - err := fmt.Errorf("Servers resource does not meet storage requirements for directive '%s'", dbd.Spec.Directive) - return nnfv1alpha1.NewWorkflowError("Allocation request does not meet directive requirements").WithFatal().WithError(err) + return dwsv1alpha2.NewResourceError("Servers resource does not meet storage requirements for directive '%s'", dbd.Spec.Directive).WithUserMessage("Allocation request does not meet directive requirements").WithWLM().WithFatal() } for _, breakdownAllocationSet := range dbd.Status.Storage.AllocationSets { @@ -492,8 +512,7 @@ func (r *NnfWorkflowReconciler) validateServerAllocations(ctx context.Context, d if breakdownAllocationSet.AllocationStrategy == dwsv1alpha2.AllocateSingleServer { if len(serverAllocationSet.Storage) != 1 || serverAllocationSet.Storage[0].AllocationCount != 1 { - err := fmt.Errorf("Allocation set %s expected single allocation", breakdownAllocationSet.Label) - return nnfv1alpha1.NewWorkflowError("Allocation request does not meet directive requirements").WithFatal().WithError(err) + return dwsv1alpha2.NewResourceError("allocation set %s expected single allocation", breakdownAllocationSet.Label).WithUserMessage("storage directive requirements were not satisfied").WithWLM().WithFatal() } } @@ -508,8 +527,7 @@ func (r *NnfWorkflowReconciler) validateServerAllocations(ctx context.Context, d } if totalCapacity < breakdownAllocationSet.MinimumCapacity { - err := fmt.Errorf("Allocation set %s specified insufficient capacity", breakdownAllocationSet.Label) - return nnfv1alpha1.NewWorkflowError("Allocation request does not meet directive requirements").WithFatal().WithError(err) + return dwsv1alpha2.NewResourceError("allocation set %s specified insufficient capacity", breakdownAllocationSet.Label).WithUserMessage("storage directive requirements were not satisfied").WithWLM().WithFatal() } // Look up each of the storages specified to make sure they exist @@ -522,18 +540,13 @@ func (r *NnfWorkflowReconciler) validateServerAllocations(ctx context.Context, d } if err := r.Get(ctx, client.ObjectKeyFromObject(storage), storage); err != nil { - if apierrors.IsNotFound(err) { - return nnfv1alpha1.NewWorkflowError("Allocation request did not specify valid storage").WithFatal().WithError(err) - } - - return nnfv1alpha1.NewWorkflowError("Could not validate allocation request").WithError(err) + return dwsv1alpha2.NewResourceError("could not get storage: %s", client.ObjectKeyFromObject(storage)).WithError(err).WithUserMessage("storage directive requirements were not satisfied").WithFatal() } } } if !found { - err := fmt.Errorf("Allocation set %s not found in Servers resource", breakdownAllocationSet.Label) - return nnfv1alpha1.NewWorkflowError("Allocation request does not meet directive requirements").WithFatal().WithError(err) + return dwsv1alpha2.NewResourceError("allocation set %s not found in Servers resource", breakdownAllocationSet.Label).WithUserMessage("storage directive requirements were not satisfied").WithWLM().WithFatal() } } @@ -551,21 +564,20 @@ func (r *NnfWorkflowReconciler) createNnfStorage(ctx context.Context, workflow * dwArgs, err := dwdparse.BuildArgsMap(workflow.Spec.DWDirectives[index]) if err != nil { - return nil, nnfv1alpha1.NewWorkflowError("Invalid DW directive: " + workflow.Spec.DWDirectives[index]).WithFatal() + return nil, dwsv1alpha2.NewResourceError("").WithUserMessage("invalid DW directive: %s", workflow.Spec.DWDirectives[index]).WithFatal().WithUser() } pinnedName, pinnedNamespace := getStorageReferenceNameFromWorkflowActual(workflow, index) nnfStorageProfile, err := findPinnedProfile(ctx, r.Client, pinnedNamespace, pinnedName) if err != nil { - log.Error(err, "Unable to find pinned NnfStorageProfile", "name", pinnedName) - return nil, fmt.Errorf("Could not find pinned NnfStorageProfile %v: %w", types.NamespacedName{Name: pinnedName, Namespace: pinnedNamespace}, err) + return nil, dwsv1alpha2.NewResourceError("could not find pinned NnfStorageProfile: %v", types.NamespacedName{Name: pinnedName, Namespace: pinnedNamespace}).WithError(err).WithFatal() } var owner metav1.Object = workflow if dwArgs["command"] == "create_persistent" { psi, err := r.findPersistentInstance(ctx, workflow, dwArgs["name"]) if err != nil { - return nil, fmt.Errorf("Could not find PersistentStorageInstance %v for 'create_persistent' directive: %w", dwArgs["name"], err) + return nil, dwsv1alpha2.NewResourceError("could not find PersistentStorageInstance: %v", dwArgs["name"]).WithError(err).WithFatal() } owner = psi @@ -625,8 +637,7 @@ func (r *NnfWorkflowReconciler) createNnfStorage(ctx context.Context, workflow * }) if err != nil { - log.Error(err, "Failed to create or update NnfStorage", "name", nnfStorage.Name) - return nnfStorage, fmt.Errorf("CreateOrUpdate failed for NnfStorage %v: %w", client.ObjectKeyFromObject(nnfStorage), err) + return nil, dwsv1alpha2.NewResourceError("CreateOrUpdate failed for NnfStorage: %v", client.ObjectKeyFromObject(nnfStorage)).WithError(err) } if result == controllerutil.OperationResultCreated { @@ -692,7 +703,7 @@ func (r *NnfWorkflowReconciler) setupNnfAccessForServers(ctx context.Context, st }) if err != nil { - return nil, fmt.Errorf("CreateOrUpdate failed for NnfAccess %v: %w", client.ObjectKeyFromObject(access), err) + return nil, dwsv1alpha2.NewResourceError("CreateOrUpdate failed for NnfAccess: %v", client.ObjectKeyFromObject(access)).WithError(err) } if result == controllerutil.OperationResultCreated { @@ -719,12 +730,12 @@ func (r *NnfWorkflowReconciler) getDirectiveFileSystemType(ctx context.Context, } if err := r.Get(ctx, client.ObjectKeyFromObject(nnfStorage), nnfStorage); err != nil { - return "", fmt.Errorf("Could not get persistent NnfStorage %v to determine file system type: %w", client.ObjectKeyFromObject(nnfStorage), err) + return "", dwsv1alpha2.NewResourceError("could not get persistent NnfStorage %v to determine file system type", client.ObjectKeyFromObject(nnfStorage)).WithError(err) } return nnfStorage.Spec.FileSystemType, nil default: - return "", fmt.Errorf("Invalid directive '%s' to get file system type", workflow.Spec.DWDirectives[index]) + return "", dwsv1alpha2.NewResourceError("invalid directive '%s' to get file system type", workflow.Spec.DWDirectives[index]).WithFatal() } } @@ -746,13 +757,24 @@ func (r *NnfWorkflowReconciler) findPersistentInstance(ctx context.Context, wf * return nil, err } - return psi, err + return psi, nil } func handleWorkflowError(err error, driverStatus *dwsv1alpha2.WorkflowDriverStatus) { - e, ok := err.(*nnfv1alpha1.WorkflowError) + e, ok := err.(*dwsv1alpha2.ResourceErrorInfo) if ok { - e.Inject(driverStatus) + switch e.Severity { + case dwsv1alpha2.SeverityMinor: + driverStatus.Status = dwsv1alpha2.StatusRunning + case dwsv1alpha2.SeverityMajor: + driverStatus.Status = dwsv1alpha2.StatusTransientCondition + case dwsv1alpha2.SeverityFatal: + driverStatus.Status = dwsv1alpha2.StatusError + } + + driverStatus.Message = e.UserMessage + driverStatus.Error = e.Error() + } else { driverStatus.Status = dwsv1alpha2.StatusError driverStatus.Message = "Internal error: " + err.Error() @@ -760,6 +782,41 @@ func handleWorkflowError(err error, driverStatus *dwsv1alpha2.WorkflowDriverStat } } +func handleWorkflowErrorByIndex(err error, workflow *dwsv1alpha2.Workflow, index int) { + // Create a list of the driverStatus array elements that correspond to the current state + // of the workflow and are targeted for the Rabbit driver + driverList := []*dwsv1alpha2.WorkflowDriverStatus{} + driverID := os.Getenv("DWS_DRIVER_ID") + + for i := range workflow.Status.Drivers { + driverStatus := &workflow.Status.Drivers[i] + + if driverStatus.DriverID != driverID { + continue + } + if workflow.Status.State != driverStatus.WatchState { + continue + } + if driverStatus.Completed { + continue + } + + driverList = append(driverList, driverStatus) + } + + for _, driverStatus := range driverList { + if driverStatus.DWDIndex != index { + continue + } + + handleWorkflowError(err, driverStatus) + + return + } + + panic(index) +} + // Returns the directive index with the 'name' argument matching name, or -1 if not found func findDirectiveIndexByName(workflow *dwsv1alpha2.Workflow, name string, command string) int { for idx, directive := range workflow.Spec.DWDirectives { @@ -905,8 +962,7 @@ func (r *NnfWorkflowReconciler) unmountNnfAccessIfNecessary(ctx context.Context, if err := r.Update(ctx, access); err != nil { if !apierrors.IsConflict(err) { - err = fmt.Errorf("Could not update NnfAccess %v: %w", client.ObjectKeyFromObject(access), err) - return nil, nnfv1alpha1.NewWorkflowError("Unable to request compute node unmount").WithError(err) + return nil, dwsv1alpha2.NewResourceError("could not update NnfAccess: %v", client.ObjectKeyFromObject(access)).WithError(err) } return Requeue("conflict").withObject(access), nil @@ -929,7 +985,7 @@ func (r *NnfWorkflowReconciler) waitForNnfAccessStateAndReady(ctx context.Contex // Check if we should also wait on the NnfAccess for the servers fsType, err := r.getDirectiveFileSystemType(ctx, workflow, index) if err != nil { - return nil, nnfv1alpha1.NewWorkflowError("Unable to determine directive file system type").WithError(err) + return nil, dwsv1alpha2.NewResourceError("unable to determine directive file system type").WithError(err).WithFatal() } if fsType == "gfs2" || fsType == "lustre" { @@ -946,13 +1002,13 @@ func (r *NnfWorkflowReconciler) waitForNnfAccessStateAndReady(ctx context.Contex } if err := r.Get(ctx, client.ObjectKeyFromObject(access), access); err != nil { - err = fmt.Errorf("Could not get NnfAccess %s: %w", client.ObjectKeyFromObject(access).String(), err) - return nil, nnfv1alpha1.NewWorkflowError("Could not access file system on nodes").WithError(err) + return nil, dwsv1alpha2.NewResourceError("could not get NnfAccess: %v", client.ObjectKeyFromObject(access)).WithError(err) } if access.Status.Error != nil { - err = fmt.Errorf("Error on NnfAccess %s: %w", client.ObjectKeyFromObject(access).String(), access.Status.Error) - return nil, nnfv1alpha1.NewWorkflowError("Could not access file system on nodes").WithError(err) + handleWorkflowErrorByIndex(access.Status.Error, workflow, index) + + return Requeue("mount/unmount error").withObject(access), nil } if state == "mounted" { @@ -981,11 +1037,11 @@ func (r *NnfWorkflowReconciler) addPersistentStorageReference(ctx context.Contex persistentStorage, err := r.findPersistentInstance(ctx, workflow, dwArgs["name"]) if err != nil { - return err + return dwsv1alpha2.NewResourceError("").WithUserMessage("PersistentStorage '%v' not found", dwArgs["name"]).WithMajor().WithUser() } if persistentStorage.Status.State != dwsv1alpha2.PSIStateActive { - return fmt.Errorf("PersistentStorage is not active") + return dwsv1alpha2.NewResourceError("").WithUserMessage("PersistentStorage is not active").WithFatal().WithUser() } // Add a consumer reference to the persistent storage for this directive @@ -1058,13 +1114,13 @@ func (r *NnfWorkflowReconciler) userContainerHandler(ctx context.Context, workfl // Get the targeted NNF nodes for the container jobs nnfNodes, err := r.getNnfNodesFromComputes(ctx, workflow) if err != nil || len(nnfNodes) <= 0 { - return nil, nnfv1alpha1.NewWorkflowError("error obtaining the target NNF nodes for containers:").WithError(err).WithFatal() + return nil, dwsv1alpha2.NewResourceError("error obtaining the target NNF nodes for containers").WithError(err).WithMajor() } // Get the NNF volumes to mount into the containers volumes, result, err := r.getContainerVolumes(ctx, workflow, dwArgs, profile) if err != nil { - return nil, nnfv1alpha1.NewWorkflowErrorf("could not determine the list of volumes need to create container job for workflow: %s", workflow.Name).WithError(err).WithFatal() + return nil, dwsv1alpha2.NewResourceError("could not determine the list of volumes needed to create container job for workflow: %s", workflow.Name).WithError(err).WithFatal() } if result != nil { return result, nil @@ -1087,16 +1143,16 @@ func (r *NnfWorkflowReconciler) userContainerHandler(ctx context.Context, workfl if mpiJob { if err := c.createMPIJob(); err != nil { - return nil, nnfv1alpha1.NewWorkflowError("Unable to create/update MPIJob").WithFatal().WithError(err) + return nil, dwsv1alpha2.NewResourceError("unable to create/update MPIJob").WithMajor().WithError(err) } } else { // For non-MPI jobs, we need to create a service ourselves if err := r.createContainerService(ctx, workflow); err != nil { - return nil, nnfv1alpha1.NewWorkflowError("Unable to create/update Container Service").WithFatal().WithError(err) + return nil, dwsv1alpha2.NewResourceError("unable to create/update Container Service").WithMajor().WithError(err) } if err := c.createNonMPIJob(); err != nil { - return nil, nnfv1alpha1.NewWorkflowError("Unable to create/update Container Jobs").WithFatal().WithError(err) + return nil, dwsv1alpha2.NewResourceError("unable to create/update Container Jobs").WithMajor().WithError(err) } } @@ -1146,7 +1202,7 @@ func (r *NnfWorkflowReconciler) getNnfNodesFromComputes(ctx context.Context, wor }, } if err := r.Get(ctx, client.ObjectKeyFromObject(&computes), &computes); err != nil { - return ret, nnfv1alpha1.NewWorkflowError("could not find Computes resource for workflow") + return ret, dwsv1alpha2.NewResourceError("could not find Computes resource for workflow") } // Build the list of computes @@ -1154,12 +1210,12 @@ func (r *NnfWorkflowReconciler) getNnfNodesFromComputes(ctx context.Context, wor computeNodes = append(computeNodes, c.Name) } if len(computeNodes) == 0 { - return computeNodes, nnfv1alpha1.NewWorkflowError("the Computes resources does not specify any compute nodes") + return computeNodes, dwsv1alpha2.NewResourceError("the Computes resources does not specify any compute nodes").WithWLM().WithFatal() } systemConfig := &dwsv1alpha2.SystemConfiguration{} if err := r.Get(ctx, types.NamespacedName{Name: "default", Namespace: corev1.NamespaceDefault}, systemConfig); err != nil { - return ret, nnfv1alpha1.NewWorkflowError("could not get system configuration") + return ret, dwsv1alpha2.NewResourceError("could not get system configuration") } // The SystemConfiguration is organized by rabbit. Make a map of computes:rabbit for easy lookup. @@ -1177,7 +1233,7 @@ func (r *NnfWorkflowReconciler) getNnfNodesFromComputes(ctx context.Context, wor for _, c := range computeNodes { nnfNode, found := computeMap[c] if !found { - return ret, nnfv1alpha1.NewWorkflowErrorf("supplied compute node '%s' not found in SystemConfiguration", c) + return ret, dwsv1alpha2.NewResourceError("supplied compute node '%s' not found in SystemConfiguration", c).WithFatal() } // Add the node to the map @@ -1294,7 +1350,7 @@ func (r *NnfWorkflowReconciler) waitForContainersToFinish(ctx context.Context, w }) if err != nil { - return nnfv1alpha1.NewWorkflowErrorf("error updating job '%s' activeDeadlineSeconds:", job.Name) + return dwsv1alpha2.NewResourceError("error updating job '%s' activeDeadlineSeconds:", job.Name) } } @@ -1307,11 +1363,11 @@ func (r *NnfWorkflowReconciler) waitForContainersToFinish(ctx context.Context, w // by the MPIJob. jobList, err := r.getMPIJobList(ctx, workflow, mpiJob) if err != nil { - return nnfv1alpha1.NewWorkflowErrorf("waitForContainersToFinish: no MPIJob JobList found for workflow '%s', index: %d", workflow.Name, index) + return dwsv1alpha2.NewResourceError("waitForContainersToFinish: no MPIJob JobList found for workflow '%s', index: %d", workflow.Name, index).WithMajor() } if len(jobList.Items) < 1 { - return nnfv1alpha1.NewWorkflowErrorf("waitForContainersToFinish: no MPIJob jobs found for workflow '%s', index: %d", workflow.Name, index) + return dwsv1alpha2.NewResourceError("waitForContainersToFinish: no MPIJob jobs found for workflow '%s', index: %d", workflow.Name, index).WithMajor() } for _, job := range jobList.Items { @@ -1353,7 +1409,7 @@ func (r *NnfWorkflowReconciler) waitForContainersToFinish(ctx context.Context, w } if len(jobList.Items) < 1 { - return nil, nnfv1alpha1.NewWorkflowErrorf("waitForContainersToFinish: no container jobs found for workflow '%s', index: %d", workflow.Name, index) + return nil, dwsv1alpha2.NewResourceError("waitForContainersToFinish: no container jobs found for workflow '%s', index: %d", workflow.Name, index).WithMajor() } // Ensure all the jobs are done running before we check the conditions. @@ -1386,7 +1442,7 @@ func (r *NnfWorkflowReconciler) checkContainersResults(ctx context.Context, work for _, c := range mpiJob.Status.Conditions { if c.Type == mpiv2beta1.JobFailed { - return nil, nnfv1alpha1.NewWorkflowErrorf("container MPIJob %s (%s): %s", c.Type, c.Reason, c.Message) + return nil, dwsv1alpha2.NewResourceError("container MPIJob %s (%s): %s", c.Type, c.Reason, c.Message).WithFatal() } } } else { @@ -1396,13 +1452,13 @@ func (r *NnfWorkflowReconciler) checkContainersResults(ctx context.Context, work } if len(jobList.Items) < 1 { - return nil, nnfv1alpha1.NewWorkflowErrorf("checkContainersResults: no container jobs found for workflow '%s', index: %d", workflow.Name, index) + return nil, dwsv1alpha2.NewResourceError("checkContainersResults: no container jobs found for workflow '%s', index: %d", workflow.Name, index).WithMajor() } for _, job := range jobList.Items { for _, condition := range job.Status.Conditions { if condition.Type != batchv1.JobComplete { - return nil, nnfv1alpha1.NewWorkflowErrorf("container job %s (%s): %s", condition.Type, condition.Reason, condition.Message) + return nil, dwsv1alpha2.NewResourceError("container job %s (%s): %s", condition.Type, condition.Reason, condition.Message).WithFatal() } } } @@ -1421,7 +1477,7 @@ func (r *NnfWorkflowReconciler) getMPIJobList(ctx context.Context, workflow *dws jobList := &batchv1.JobList{} if err := r.List(ctx, jobList, matchLabels); err != nil { - return nil, nnfv1alpha1.NewWorkflowErrorf("could not retrieve Jobs for MPIJob %s", mpiJob.Name).WithError(err) + return nil, dwsv1alpha2.NewResourceError("could not retrieve Jobs for MPIJob %s", mpiJob.Name).WithError(err) } // Create a new list so we don't alter the loop iterator @@ -1449,7 +1505,7 @@ func (r *NnfWorkflowReconciler) getContainerJobs(ctx context.Context, workflow * jobList := &batchv1.JobList{} if err := r.List(ctx, jobList, matchLabels); err != nil { - return nil, nnfv1alpha1.NewWorkflowErrorf("could not retrieve Jobs for index %d", index).WithError(err) + return nil, dwsv1alpha2.NewResourceError("could not retrieve Jobs for index %d", index).WithError(err).WithMajor() } return jobList, nil @@ -1496,14 +1552,12 @@ func (r *NnfWorkflowReconciler) getContainerVolumes(ctx context.Context, workflo if cmd == "globaldw" { globalLustre := r.findLustreFileSystemForPath(ctx, val, r.Log) if globalLustre == nil { - return nil, nil, nnfv1alpha1.NewWorkflowError(fmt.Sprintf( - "global Lustre file system containing '%s' not found", val)).WithFatal() + return nil, nil, dwsv1alpha2.NewResourceError("").WithUserMessage("global Lustre file system containing '%s' not found", val).WithUser().WithFatal() } ns, nsFound := globalLustre.Spec.Namespaces[workflow.Namespace] if !nsFound || len(ns.Modes) < 1 { - return nil, nil, nnfv1alpha1.NewWorkflowError(fmt.Sprintf( - "global Lustre file system containing '%s' is not configured for the '%s' namespace", val, workflow.Namespace)).WithFatal() + return nil, nil, dwsv1alpha2.NewResourceError("").WithUserMessage("global Lustre file system containing '%s' is not configured for the '%s' namespace", val, workflow.Namespace).WithUser().WithFatal() } // Retrieve the desired PVC mode from the container profile. Default to readwritemany. @@ -1523,7 +1577,7 @@ func (r *NnfWorkflowReconciler) getContainerVolumes(ctx context.Context, workflo // Find the directive index for the given name so we can retrieve its NnfAccess vol.directiveIndex = findDirectiveIndexByName(workflow, vol.directiveName, vol.command) if vol.directiveIndex < 0 { - return nil, nil, nnfv1alpha1.NewWorkflowErrorf("could not retrieve the directive breakdown for '%s'", vol.directiveName) + return nil, nil, dwsv1alpha2.NewResourceError("could not retrieve the directive breakdown for '%s'", vol.directiveName).WithMajor() } nnfAccess := &nnfv1alpha1.NnfAccess{ @@ -1533,7 +1587,7 @@ func (r *NnfWorkflowReconciler) getContainerVolumes(ctx context.Context, workflo }, } if err := r.Get(ctx, client.ObjectKeyFromObject(nnfAccess), nnfAccess); err != nil { - return nil, nil, nnfv1alpha1.NewWorkflowErrorf("could not retrieve the NnfAccess '%s'", nnfAccess.Name) + return nil, nil, dwsv1alpha2.NewResourceError("could not retrieve the NnfAccess '%s'", nnfAccess.Name).WithMajor() } if !nnfAccess.Status.Ready { diff --git a/controllers/nnf_workflow_controller_test.go b/controllers/nnf_workflow_controller_test.go index 02401854d..5a79eeeab 100644 --- a/controllers/nnf_workflow_controller_test.go +++ b/controllers/nnf_workflow_controller_test.go @@ -1368,12 +1368,6 @@ var _ = Describe("NNF Workflow Unit Tests", func() { Eventually(func(g Gomega) bool { g.Expect(k8sClient.Get(context.TODO(), key, workflow)).To(Succeed()) if shouldError { - // Raw isn't supported for persistent storage, make sure that error gets - // reported properly - if fsType == "raw" { - return workflow.Status.Status == dwsv1alpha2.StatusError && - strings.Contains(workflow.Status.Message, "can not be used with raw allocations") - } return workflow.Status.Status == dwsv1alpha2.StatusError && strings.Contains(workflow.Status.Message, "unsupported container filesystem: "+fsType) } else { diff --git a/controllers/nnfcontainerprofile_helpers.go b/controllers/nnfcontainerprofile_helpers.go index 6a0a9c6a5..fcbb2a502 100644 --- a/controllers/nnfcontainerprofile_helpers.go +++ b/controllers/nnfcontainerprofile_helpers.go @@ -43,7 +43,7 @@ func getContainerProfile(ctx context.Context, clnt client.Client, workflow *dwsv } if profile == nil { - return nil, nnfv1alpha1.NewWorkflowErrorf("container profile '%s' not found", indexedResourceName(workflow, index)).WithFatal() + return nil, dwsv1alpha2.NewResourceError("container profile '%s' not found", indexedResourceName(workflow, index)).WithFatal() } return profile, nil @@ -62,7 +62,7 @@ func findPinnedContainerProfile(ctx context.Context, clnt client.Client, workflo } if !profile.Data.Pinned { - return nil, nnfv1alpha1.NewWorkflowErrorf("expected a pinned container profile '%s', but found one that is not pinned", indexedResourceName(workflow, index)).WithFatal() + return nil, dwsv1alpha2.NewResourceError("expected a pinned container profile '%s', but found one that is not pinned", indexedResourceName(workflow, index)).WithFatal() } return profile, nil @@ -91,16 +91,16 @@ func findContainerProfile(ctx context.Context, clnt client.Client, workflow *dws } if profile.Data.Pinned { - return nil, nnfv1alpha1.NewWorkflowErrorf("expected container profile that is not pinned '%s', but found one that is pinned", indexedResourceName(workflow, index)).WithFatal() + return nil, dwsv1alpha2.NewResourceError("expected container profile that is not pinned '%s', but found one that is pinned", indexedResourceName(workflow, index)).WithFatal() } // Determine whether the profile is restricted to a UserID/GroupID. restrictedMsg := "container profile '%s' is restricted to %s %d" if profile.Data.UserID != nil && *profile.Data.UserID != workflow.Spec.UserID { - return nil, fmt.Errorf(restrictedMsg, profile.Name, "UserID", *profile.Data.UserID) + return nil, dwsv1alpha2.NewResourceError("").WithUserMessage(restrictedMsg, profile.Name, "UserID", *profile.Data.UserID).WithUser().WithFatal() } if profile.Data.GroupID != nil && *profile.Data.GroupID != workflow.Spec.GroupID { - return nil, fmt.Errorf(restrictedMsg, profile.Name, "GroupID", *profile.Data.GroupID) + return nil, dwsv1alpha2.NewResourceError("").WithUserMessage(restrictedMsg, profile.Name, "GroupID", *profile.Data.GroupID).WithUser().WithFatal() } diff --git a/controllers/nnfstorageprofile_helpers.go b/controllers/nnfstorageprofile_helpers.go index 859fc5134..5a9aea87e 100644 --- a/controllers/nnfstorageprofile_helpers.go +++ b/controllers/nnfstorageprofile_helpers.go @@ -21,7 +21,6 @@ package controllers import ( "context" - "fmt" "os" apierrors "k8s.io/apimachinery/pkg/api/errors" @@ -61,14 +60,14 @@ func findProfileToUse(ctx context.Context, clnt client.Client, args map[string]s } // Require that there be one and only one default. if len(profilesFound) == 0 { - return nil, fmt.Errorf("Unable to find a default NnfStorageProfile to use") + return nil, dwsv1alpha2.NewResourceError("").WithUserMessage("Unable to find a default NnfStorageProfile to use").WithFatal() } else if len(profilesFound) > 1 { - return nil, fmt.Errorf("More than one default NnfStorageProfile found; unable to pick one: %v", profilesFound) + return nil, dwsv1alpha2.NewResourceError("").WithUserMessage("More than one default NnfStorageProfile found; unable to pick one: %v", profilesFound).WithFatal() } profileName = profilesFound[0] } if len(profileName) == 0 { - return nil, fmt.Errorf("Unable to find an NnfStorageProfile name") + return nil, dwsv1alpha2.NewResourceError("").WithUserMessage("Unable to find an NnfStorageProfile name").WithUser().WithFatal() } err := clnt.Get(ctx, types.NamespacedName{Namespace: profileNamespace, Name: profileName}, nnfStorageProfile) if err != nil { @@ -86,7 +85,7 @@ func findPinnedProfile(ctx context.Context, clnt client.Client, namespace string return nil, err } if !nnfStorageProfile.Data.Pinned { - return nil, fmt.Errorf("Expected pinned NnfStorageProfile, but it was not pinned: %s", pinnedName) + return nil, dwsv1alpha2.NewResourceError("Expected pinned NnfStorageProfile, but it was not pinned: %s", pinnedName).WithFatal() } return nnfStorageProfile, nil } @@ -150,16 +149,16 @@ func addPinnedStorageProfileLabel(object metav1.Object, nnfStorageProfile *nnfv1 func getPinnedStorageProfileFromLabel(ctx context.Context, clnt client.Client, object metav1.Object) (*nnfv1alpha1.NnfStorageProfile, error) { labels := object.GetLabels() if labels == nil { - return nil, fmt.Errorf("unable to find labels") + return nil, dwsv1alpha2.NewResourceError("unable to find labels").WithFatal() } pinnedName, okName := labels[nnfv1alpha1.PinnedStorageProfileLabelName] if !okName { - return nil, fmt.Errorf("unable to find %s label", nnfv1alpha1.PinnedStorageProfileLabelName) + return nil, dwsv1alpha2.NewResourceError("unable to find %s label", nnfv1alpha1.PinnedStorageProfileLabelName).WithFatal() } pinnedNamespace, okNamespace := labels[nnfv1alpha1.PinnedStorageProfileLabelNameSpace] if !okNamespace { - return nil, fmt.Errorf("unable to find %s label", nnfv1alpha1.PinnedStorageProfileLabelNameSpace) + return nil, dwsv1alpha2.NewResourceError("unable to find %s label", nnfv1alpha1.PinnedStorageProfileLabelNameSpace).WithFatal() } return findPinnedProfile(ctx, clnt, pinnedNamespace, pinnedName) diff --git a/go.mod b/go.mod index 3c79b7ebb..0a4891513 100644 --- a/go.mod +++ b/go.mod @@ -3,7 +3,7 @@ module github.com/NearNodeFlash/nnf-sos go 1.19 require ( - github.com/HewlettPackard/dws v0.0.1-0.20230613201835-73abc41bd83c + github.com/HewlettPackard/dws v0.0.1-0.20230802152955-11a333f31153 github.com/NearNodeFlash/lustre-fs-operator v0.0.1-0.20230613180840-6178f2b04900 github.com/NearNodeFlash/nnf-ec v0.0.0-20230526161255-cfb2d89b35d7 github.com/ghodss/yaml v1.0.0 diff --git a/go.sum b/go.sum index 6ae120757..27346b40e 100644 --- a/go.sum +++ b/go.sum @@ -1,8 +1,8 @@ cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= -github.com/HewlettPackard/dws v0.0.1-0.20230613201835-73abc41bd83c h1:atwVAI9Gslf501a4ADo/nkJol141DgF8YR4AiMtj4E8= -github.com/HewlettPackard/dws v0.0.1-0.20230613201835-73abc41bd83c/go.mod h1:YvNzcgAPmwhl/YQj6dMwsB9OpwbI5bp/41kINfFiXX8= +github.com/HewlettPackard/dws v0.0.1-0.20230802152955-11a333f31153 h1:9vMjataXTnCwXEGwxu0dQrOLUW5ujoJiTWAUTb8k50w= +github.com/HewlettPackard/dws v0.0.1-0.20230802152955-11a333f31153/go.mod h1:YvNzcgAPmwhl/YQj6dMwsB9OpwbI5bp/41kINfFiXX8= github.com/HewlettPackard/structex v1.0.4 h1:RVTdN5FWhDWr1IkjllU8wxuLjISo4gr6u5ryZpzyHcA= github.com/HewlettPackard/structex v1.0.4/go.mod h1:3frC4RY/cPsP/4+N8rkxsNAGlQwHV+zDC7qvrN+N+rE= github.com/NearNodeFlash/lustre-fs-operator v0.0.1-0.20230613180840-6178f2b04900 h1:jOrP2H+D5amgHIONcucYS3/kJm6QfmqAG23Ke7elunI= diff --git a/vendor/github.com/HewlettPackard/dws/api/v1alpha2/clientmount_types.go b/vendor/github.com/HewlettPackard/dws/api/v1alpha2/clientmount_types.go index 6380e23d8..e4f375df3 100644 --- a/vendor/github.com/HewlettPackard/dws/api/v1alpha2/clientmount_types.go +++ b/vendor/github.com/HewlettPackard/dws/api/v1alpha2/clientmount_types.go @@ -189,6 +189,8 @@ type ClientMountStatus struct { //+kubebuilder:object:root=true //+kubebuilder:storageversion //+kubebuilder:subresource:status +//+kubebuilder:printcolumn:name="ERROR",type="string",JSONPath=".status.error.severity" +//+kubebuilder:printcolumn:name="AGE",type="date",JSONPath=".metadata.creationTimestamp" // ClientMount is the Schema for the clientmounts API type ClientMount struct { diff --git a/vendor/github.com/HewlettPackard/dws/api/v1alpha2/directivebreakdown_types.go b/vendor/github.com/HewlettPackard/dws/api/v1alpha2/directivebreakdown_types.go index 19301de04..df5c95bcb 100644 --- a/vendor/github.com/HewlettPackard/dws/api/v1alpha2/directivebreakdown_types.go +++ b/vendor/github.com/HewlettPackard/dws/api/v1alpha2/directivebreakdown_types.go @@ -190,6 +190,7 @@ type DirectiveBreakdownStatus struct { //+kubebuilder:storageversion //+kubebuilder:subresource:status //+kubebuilder:printcolumn:name="READY",type="boolean",JSONPath=".status.ready",description="True if allocation sets have been generated" +//+kubebuilder:printcolumn:name="ERROR",type="string",JSONPath=".status.error.severity" //+kubebuilder:printcolumn:name="AGE",type="date",JSONPath=".metadata.creationTimestamp" // DirectiveBreakdown is the Schema for the directivebreakdown API diff --git a/vendor/github.com/HewlettPackard/dws/api/v1alpha2/persistentstorageinstance_types.go b/vendor/github.com/HewlettPackard/dws/api/v1alpha2/persistentstorageinstance_types.go index 34e26f202..90e196b5e 100644 --- a/vendor/github.com/HewlettPackard/dws/api/v1alpha2/persistentstorageinstance_types.go +++ b/vendor/github.com/HewlettPackard/dws/api/v1alpha2/persistentstorageinstance_types.go @@ -91,6 +91,8 @@ type PersistentStorageInstanceStatus struct { //+kubebuilder:object:root=true //+kubebuilder:storageversion //+kubebuilder:subresource:status +//+kubebuilder:printcolumn:name="ERROR",type="string",JSONPath=".status.error.severity" +//+kubebuilder:printcolumn:name="AGE",type="date",JSONPath=".metadata.creationTimestamp" // PersistentStorageInstance is the Schema for the Persistentstorageinstances API type PersistentStorageInstance struct { diff --git a/vendor/github.com/HewlettPackard/dws/api/v1alpha2/resource_error.go b/vendor/github.com/HewlettPackard/dws/api/v1alpha2/resource_error.go index 29dd3084b..49ba6aa8a 100644 --- a/vendor/github.com/HewlettPackard/dws/api/v1alpha2/resource_error.go +++ b/vendor/github.com/HewlettPackard/dws/api/v1alpha2/resource_error.go @@ -1,5 +1,5 @@ /* - * Copyright 2022 Hewlett Packard Enterprise Development LP + * Copyright 2022-2023 Hewlett Packard Enterprise Development LP * Other additional copyright holders may be indicated within. * * The entirety of this work is licensed under the Apache License, @@ -19,6 +19,42 @@ package v1alpha2 +import ( + "fmt" + "strings" + + "github.com/go-logr/logr" +) + +type ResourceErrorSeverity string +type ResourceErrorType string + +const ( + // Minor errors are very likely to eventually succeed (e.g., errors caused by a stale cache) + // The WLM doesn't see these errors directly. The workflow stays in the DriverWait state, and + // the error string is put in workflow.Status.Message. + SeverityMinor ResourceErrorSeverity = "Minor" + + // Major errors may or may not succeed. These are transient errors that could be persistent + // due to an underlying problem (e.g., errors from OS calls) + SeverityMajor ResourceErrorSeverity = "Major" + + // Fatal errors will never succeed. This is for situations where we can guarantee that retrying + // will not fix the error (e.g., a DW directive that is not valid) + SeverityFatal ResourceErrorSeverity = "Fatal" +) + +const ( + // Internal errors are due to an error in the DWS/driver code + TypeInternal ResourceErrorType = "Internal" + + // WLM errors are due to an error with the input from the WLM + TypeWLM ResourceErrorType = "WLM" + + // User errors are due to an error with the input from a user + TypeUser ResourceErrorType = "User" +) + type ResourceErrorInfo struct { // Optional user facing message if the error is relevant to an end user UserMessage string `json:"userMessage,omitempty"` @@ -26,8 +62,14 @@ type ResourceErrorInfo struct { // Internal debug message for the error DebugMessage string `json:"debugMessage"` - // Indication if the error is likely recoverable or not - Recoverable bool `json:"recoverable"` + // Internal or user error + // +kubebuilder:validation:Enum=Internal;User + Type ResourceErrorType `json:"type"` + + // Indication of how severe the error is. Minor will likely succeed, Major may + // succeed, and Fatal will never succeed. + // +kubebuilder:validation:Enum=Minor;Major;Fatal + Severity ResourceErrorSeverity `json:"severity"` } type ResourceError struct { @@ -35,54 +77,127 @@ type ResourceError struct { Error *ResourceErrorInfo `json:"error,omitempty"` } -func NewResourceError(message string, err error) *ResourceErrorInfo { - resourceError := &ResourceErrorInfo{ - Recoverable: true, +func NewResourceError(format string, a ...any) *ResourceErrorInfo { + return &ResourceErrorInfo{ + Type: TypeInternal, + Severity: SeverityMinor, + DebugMessage: fmt.Sprintf(format, a...), } +} - if err != nil { - // If the error provided is already a ResourceError, use it and concatenate - // the debug messages - _, ok := err.(*ResourceErrorInfo) - if ok { - resourceError = err.(*ResourceErrorInfo) - } +// A resource error can have an optional user message that is displayed in the workflow.Status.Message +// field. The user message of the lowest level error is all that's displayed. +func (e *ResourceErrorInfo) WithUserMessage(format string, a ...any) *ResourceErrorInfo { + // Only set the user message if it's empty. This prevents upper layers + // from overriding a user message set by a lower layer + if e.UserMessage == "" { + e.UserMessage = fmt.Sprintf(format, a...) + } - if message == "" { - message = err.Error() + return e +} + +func (e *ResourceErrorInfo) WithError(err error) *ResourceErrorInfo { + if err == nil { + return e + } + + // Concatenate the parent and child debug messages + debugMessageList := []string{} + if e.DebugMessage != "" { + debugMessageList = append(debugMessageList, e.DebugMessage) + } + + childError, ok := err.(*ResourceErrorInfo) + if ok { + // Inherit the severity and the user message if the child error is a ResourceError + e.Severity = childError.Severity + e.UserMessage = childError.UserMessage + e.Type = childError.Type + + // If the child resource error doesn't have a debug message, use the user message instead + if childError.DebugMessage == "" { + debugMessageList = append(debugMessageList, childError.UserMessage) } else { - message = message + ": " + err.Error() + debugMessageList = append(debugMessageList, childError.DebugMessage) } + } else { + debugMessageList = append(debugMessageList, err.Error()) } - resourceError.DebugMessage = message + e.DebugMessage = strings.Join(debugMessageList, ": ") - return resourceError + return e } func (e *ResourceErrorInfo) WithFatal() *ResourceErrorInfo { - e.Recoverable = false + e.Severity = SeverityFatal return e } -func (e *ResourceErrorInfo) WithUserMessage(message string) *ResourceErrorInfo { - // Only set the user message if it's empty. This prevents upper layers - // from overriding a user message set by a lower layer - if e.UserMessage == "" { - e.UserMessage = message +func (e *ResourceErrorInfo) WithMajor() *ResourceErrorInfo { + if e.Severity != SeverityFatal { + e.Severity = SeverityMajor + } + return e +} + +func (e *ResourceErrorInfo) WithMinor() *ResourceErrorInfo { + if e.Severity != SeverityFatal && e.Severity != SeverityMajor { + e.Severity = SeverityMinor } + return e +} + +func (e *ResourceErrorInfo) WithInternal() *ResourceErrorInfo { + e.Type = TypeInternal + return e +} + +func (e *ResourceErrorInfo) WithWLM() *ResourceErrorInfo { + e.Type = TypeWLM + return e +} +func (e *ResourceErrorInfo) WithUser() *ResourceErrorInfo { + e.Type = TypeUser return e } func (e *ResourceErrorInfo) Error() string { - return e.DebugMessage + message := "" + if e.DebugMessage == "" { + message = e.UserMessage + } else { + message = e.DebugMessage + } + return fmt.Sprintf("%s error: %s", strings.ToLower(string(e.Type)), message) +} + +func (e *ResourceError) SetResourceErrorAndLog(err error, log logr.Logger) { + e.SetResourceError(err) + if err == nil { + return + } + + childError, ok := err.(*ResourceErrorInfo) + if ok { + if childError.Severity == SeverityFatal { + log.Error(err, "Fatal error") + return + } + + log.Info("Recoverable Error", "Severity", childError.Severity, "Message", err.Error()) + return + } + + log.Info("Recoverable Error", "Message", err.Error()) } func (e *ResourceError) SetResourceError(err error) { if err == nil { e.Error = nil } else { - e.Error = NewResourceError("", err) + e.Error = NewResourceError("").WithError(err) } } diff --git a/vendor/github.com/HewlettPackard/dws/api/v1alpha2/servers_types.go b/vendor/github.com/HewlettPackard/dws/api/v1alpha2/servers_types.go index f3f2cc922..3ce6bc6f7 100644 --- a/vendor/github.com/HewlettPackard/dws/api/v1alpha2/servers_types.go +++ b/vendor/github.com/HewlettPackard/dws/api/v1alpha2/servers_types.go @@ -20,6 +20,8 @@ package v1alpha2 import ( + "github.com/HewlettPackard/dws/utils/updater" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "sigs.k8s.io/controller-runtime/pkg/client" ) @@ -79,12 +81,16 @@ type ServersStatus struct { Ready bool `json:"ready"` LastUpdate *metav1.MicroTime `json:"lastUpdate,omitempty"` AllocationSets []ServersStatusAllocationSet `json:"allocationSets,omitempty"` + + // Error information + ResourceError `json:",inline"` } //+kubebuilder:object:root=true //+kubebuilder:storageversion //+kubebuilder:subresource:status //+kubebuilder:printcolumn:name="READY",type="boolean",JSONPath=".status.ready",description="True if allocation sets have been generated" +//+kubebuilder:printcolumn:name="ERROR",type="string",JSONPath=".status.error.severity" //+kubebuilder:printcolumn:name="AGE",type="date",JSONPath=".metadata.creationTimestamp" // Servers is the Schema for the servers API @@ -96,6 +102,10 @@ type Servers struct { Status ServersStatus `json:"status,omitempty"` } +func (s *Servers) GetStatus() updater.Status[*ServersStatus] { + return &s.Status +} + //+kubebuilder:object:root=true // ServersList contains a list of Servers diff --git a/vendor/github.com/HewlettPackard/dws/api/v1alpha2/workflow_types.go b/vendor/github.com/HewlettPackard/dws/api/v1alpha2/workflow_types.go index 25a5b3386..3d189f18e 100644 --- a/vendor/github.com/HewlettPackard/dws/api/v1alpha2/workflow_types.go +++ b/vendor/github.com/HewlettPackard/dws/api/v1alpha2/workflow_types.go @@ -92,12 +92,13 @@ func (s WorkflowState) after(t WorkflowState) bool { // Strings associated with workflow statuses const ( - StatusPending = "Pending" - StatusQueued = "Queued" - StatusRunning = "Running" - StatusCompleted = "Completed" - StatusError = "Error" - StatusDriverWait = "DriverWait" + StatusPending = "Pending" + StatusQueued = "Queued" + StatusRunning = "Running" + StatusCompleted = "Completed" + StatusTransientCondition = "TransientCondition" + StatusError = "Error" + StatusDriverWait = "DriverWait" ) // WorkflowSpec defines the desired state of Workflow @@ -147,8 +148,8 @@ type WorkflowDriverStatus struct { // User readable reason. // For the CDS driver, this could be the state of the underlying - // data movement request: Pending, Queued, Running, Completed or Error - // +kubebuilder:validation:Enum=Pending;Queued;Running;Completed;Error;DriverWait + // data movement request + // +kubebuilder:validation:Enum=Pending;Queued;Running;Completed;TransientCondition;Error;DriverWait Status string `json:"status,omitempty"` // Message provides additional details on the current status of the resource @@ -172,8 +173,12 @@ type WorkflowStatus struct { // Indicates whether State has been reached. Ready bool `json:"ready"` - // User readable reason and status message - // +kubebuilder:validation:Enum=Completed;DriverWait;Error + // User readable reason and status message. + // - Completed: The workflow has reached the state in workflow.Status.State. + // - DriverWait: The underlying drivers are currently running. + // - TransientCondition: A driver has encountered an error that might be recoverable. + // - Error: A driver has encountered an error that will not recover. + // +kubebuilder:validation:Enum=Completed;DriverWait;TransientCondition;Error Status string `json:"status,omitempty"` // Message provides additional details on the current status of the resource diff --git a/vendor/github.com/HewlettPackard/dws/api/v1alpha2/zz_generated.deepcopy.go b/vendor/github.com/HewlettPackard/dws/api/v1alpha2/zz_generated.deepcopy.go index 2a7278155..96f72a977 100644 --- a/vendor/github.com/HewlettPackard/dws/api/v1alpha2/zz_generated.deepcopy.go +++ b/vendor/github.com/HewlettPackard/dws/api/v1alpha2/zz_generated.deepcopy.go @@ -891,6 +891,7 @@ func (in *ServersStatus) DeepCopyInto(out *ServersStatus) { (*in)[i].DeepCopyInto(&(*out)[i]) } } + in.ResourceError.DeepCopyInto(&out.ResourceError) } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ServersStatus. diff --git a/vendor/github.com/HewlettPackard/dws/config/crd/bases/dws.cray.hpe.com_clientmounts.yaml b/vendor/github.com/HewlettPackard/dws/config/crd/bases/dws.cray.hpe.com_clientmounts.yaml index 698b6ea28..716467db0 100644 --- a/vendor/github.com/HewlettPackard/dws/config/crd/bases/dws.cray.hpe.com_clientmounts.yaml +++ b/vendor/github.com/HewlettPackard/dws/config/crd/bases/dws.cray.hpe.com_clientmounts.yaml @@ -269,7 +269,14 @@ spec: storage: false subresources: status: {} - - name: v1alpha2 + - additionalPrinterColumns: + - jsonPath: .status.error.severity + name: ERROR + type: string + - jsonPath: .metadata.creationTimestamp + name: AGE + type: date + name: v1alpha2 schema: openAPIV3Schema: description: ClientMount is the Schema for the clientmounts API @@ -484,17 +491,28 @@ spec: debugMessage: description: Internal debug message for the error type: string - recoverable: - description: Indication if the error is likely recoverable or - not - type: boolean + severity: + description: Indication of how severe the error is. Minor will + likely succeed, Major may succeed, and Fatal will never succeed. + enum: + - Minor + - Major + - Fatal + type: string + type: + description: Internal or user error + enum: + - Internal + - User + type: string userMessage: description: Optional user facing message if the error is relevant to an end user type: string required: - debugMessage - - recoverable + - severity + - type type: object mounts: description: List of mount statuses diff --git a/vendor/github.com/HewlettPackard/dws/config/crd/bases/dws.cray.hpe.com_directivebreakdowns.yaml b/vendor/github.com/HewlettPackard/dws/config/crd/bases/dws.cray.hpe.com_directivebreakdowns.yaml index d13dd663d..ede580e6e 100644 --- a/vendor/github.com/HewlettPackard/dws/config/crd/bases/dws.cray.hpe.com_directivebreakdowns.yaml +++ b/vendor/github.com/HewlettPackard/dws/config/crd/bases/dws.cray.hpe.com_directivebreakdowns.yaml @@ -319,6 +319,9 @@ spec: jsonPath: .status.ready name: READY type: boolean + - jsonPath: .status.error.severity + name: ERROR + type: string - jsonPath: .metadata.creationTimestamp name: AGE type: date @@ -448,17 +451,28 @@ spec: debugMessage: description: Internal debug message for the error type: string - recoverable: - description: Indication if the error is likely recoverable or - not - type: boolean + severity: + description: Indication of how severe the error is. Minor will + likely succeed, Major may succeed, and Fatal will never succeed. + enum: + - Minor + - Major + - Fatal + type: string + type: + description: Internal or user error + enum: + - Internal + - User + type: string userMessage: description: Optional user facing message if the error is relevant to an end user type: string required: - debugMessage - - recoverable + - severity + - type type: object ready: description: Ready indicates whether AllocationSets have been generated diff --git a/vendor/github.com/HewlettPackard/dws/config/crd/bases/dws.cray.hpe.com_persistentstorageinstances.yaml b/vendor/github.com/HewlettPackard/dws/config/crd/bases/dws.cray.hpe.com_persistentstorageinstances.yaml index 4253ebee6..274055ad4 100644 --- a/vendor/github.com/HewlettPackard/dws/config/crd/bases/dws.cray.hpe.com_persistentstorageinstances.yaml +++ b/vendor/github.com/HewlettPackard/dws/config/crd/bases/dws.cray.hpe.com_persistentstorageinstances.yaml @@ -207,7 +207,14 @@ spec: storage: false subresources: status: {} - - name: v1alpha2 + - additionalPrinterColumns: + - jsonPath: .status.error.severity + name: ERROR + type: string + - jsonPath: .metadata.creationTimestamp + name: AGE + type: date + name: v1alpha2 schema: openAPIV3Schema: description: PersistentStorageInstance is the Schema for the Persistentstorageinstances @@ -335,17 +342,28 @@ spec: debugMessage: description: Internal debug message for the error type: string - recoverable: - description: Indication if the error is likely recoverable or - not - type: boolean + severity: + description: Indication of how severe the error is. Minor will + likely succeed, Major may succeed, and Fatal will never succeed. + enum: + - Minor + - Major + - Fatal + type: string + type: + description: Internal or user error + enum: + - Internal + - User + type: string userMessage: description: Optional user facing message if the error is relevant to an end user type: string required: - debugMessage - - recoverable + - severity + - type type: object servers: description: Servers refers to the Servers resource that provides diff --git a/vendor/github.com/HewlettPackard/dws/config/crd/bases/dws.cray.hpe.com_servers.yaml b/vendor/github.com/HewlettPackard/dws/config/crd/bases/dws.cray.hpe.com_servers.yaml index bb9c2dd4b..74649af1b 100644 --- a/vendor/github.com/HewlettPackard/dws/config/crd/bases/dws.cray.hpe.com_servers.yaml +++ b/vendor/github.com/HewlettPackard/dws/config/crd/bases/dws.cray.hpe.com_servers.yaml @@ -135,6 +135,9 @@ spec: jsonPath: .status.ready name: READY type: boolean + - jsonPath: .status.error.severity + name: ERROR + type: string - jsonPath: .metadata.creationTimestamp name: AGE type: date @@ -233,6 +236,35 @@ spec: - storage type: object type: array + error: + description: Error information + properties: + debugMessage: + description: Internal debug message for the error + type: string + severity: + description: Indication of how severe the error is. Minor will + likely succeed, Major may succeed, and Fatal will never succeed. + enum: + - Minor + - Major + - Fatal + type: string + type: + description: Internal or user error + enum: + - Internal + - User + type: string + userMessage: + description: Optional user facing message if the error is relevant + to an end user + type: string + required: + - debugMessage + - severity + - type + type: object lastUpdate: format: date-time type: string diff --git a/vendor/github.com/HewlettPackard/dws/config/crd/bases/dws.cray.hpe.com_workflows.yaml b/vendor/github.com/HewlettPackard/dws/config/crd/bases/dws.cray.hpe.com_workflows.yaml index 62735bb69..7cf158bcb 100644 --- a/vendor/github.com/HewlettPackard/dws/config/crd/bases/dws.cray.hpe.com_workflows.yaml +++ b/vendor/github.com/HewlettPackard/dws/config/crd/bases/dws.cray.hpe.com_workflows.yaml @@ -603,14 +603,14 @@ spec: status of the resource type: string status: - description: 'User readable reason. For the CDS driver, this - could be the state of the underlying data movement request: Pending, - Queued, Running, Completed or Error' + description: User readable reason. For the CDS driver, this + could be the state of the underlying data movement request enum: - Pending - Queued - Running - Completed + - TransientCondition - Error - DriverWait type: string @@ -673,10 +673,15 @@ spec: - Teardown type: string status: - description: User readable reason and status message + description: 'User readable reason and status message. - Completed: + The workflow has reached the state in workflow.Status.State. - DriverWait: + The underlying drivers are currently running. - TransientCondition: + A driver has encountered an error that might be recoverable. - Error: + A driver has encountered an error that will not recover.' enum: - Completed - DriverWait + - TransientCondition - Error type: string required: diff --git a/vendor/github.com/HewlettPackard/dws/controllers/clientmount_controller.go b/vendor/github.com/HewlettPackard/dws/controllers/clientmount_controller.go index 7e99b6754..87bbc2107 100644 --- a/vendor/github.com/HewlettPackard/dws/controllers/clientmount_controller.go +++ b/vendor/github.com/HewlettPackard/dws/controllers/clientmount_controller.go @@ -66,6 +66,7 @@ func (r *ClientMountReconciler) Reconcile(ctx context.Context, req ctrl.Request) // in clientMount.Status{} change statusUpdater := updater.NewStatusUpdater[*dwsv1alpha2.ClientMountStatus](clientMount) defer func() { err = statusUpdater.CloseWithStatusUpdate(ctx, r.Client.Status(), err) }() + defer func() { clientMount.Status.SetResourceError(err) }() // Handle cleanup if the resource is being deleted if !clientMount.GetDeletionTimestamp().IsZero() { diff --git a/vendor/github.com/HewlettPackard/dws/controllers/workflow_controller.go b/vendor/github.com/HewlettPackard/dws/controllers/workflow_controller.go index 78df0f8c2..fb1576a60 100644 --- a/vendor/github.com/HewlettPackard/dws/controllers/workflow_controller.go +++ b/vendor/github.com/HewlettPackard/dws/controllers/workflow_controller.go @@ -24,6 +24,7 @@ import ( "fmt" "reflect" "runtime" + "sort" "time" "github.com/go-logr/logr" @@ -162,6 +163,9 @@ func (r *WorkflowReconciler) Reconcile(ctx context.Context, req ctrl.Request) (r err = r.Update(ctx, workflow) if err != nil { + if apierrors.IsConflict(err) { + return ctrl.Result{}, nil + } log.Error(err, "Failed to add computes reference") } return ctrl.Result{}, err @@ -179,25 +183,47 @@ func (r *WorkflowReconciler) Reconcile(ctx context.Context, req ctrl.Request) (r workflow.Status.Status = dwsv1alpha2.StatusCompleted workflow.Status.Message = "" - // Loop through the driver status array and update the workflow - // status as necessary + // Loop through the driver status array find the entries that are for the current state + drivers := []dwsv1alpha2.WorkflowDriverStatus{} + for _, driver := range workflow.Status.Drivers { if driver.WatchState != workflow.Status.State { continue } - if driver.Completed == false { - workflow.Status.Ready = false - workflow.Status.Status = dwsv1alpha2.StatusDriverWait - } + drivers = append(drivers, driver) + } - if driver.Message != "" { - workflow.Status.Message = fmt.Sprintf("DW Directive %d: %s", driver.DWDIndex, driver.Message) - } + if len(drivers) > 0 { + // Sort the driver entries by the priority of their status + sort.Slice(drivers, func(i, j int) bool { + return statusPriority(drivers[i].Status) > statusPriority(drivers[j].Status) + }) + + // Pull info from the driver entries with the highest priority. This means + // we'll only report status info in the workflow status section based on the + // most important driver status. Error > TransientCondition > Running > Completed. This + // keeps us from overwriting the workflow.Status.Message with a message from + // a less interesting driver entry. + priority := statusPriority(drivers[0].Status) + for _, driver := range drivers { + if driver.Completed == false { + workflow.Status.Ready = false + } + + if statusPriority(driver.Status) < priority { + break + } + + if driver.Message != "" { + workflow.Status.Message = fmt.Sprintf("DW Directive %d: %s", driver.DWDIndex, driver.Message) + } - if driver.Status == dwsv1alpha2.StatusError { - workflow.Status.Status = dwsv1alpha2.StatusError - break + if driver.Status == dwsv1alpha2.StatusTransientCondition || driver.Status == dwsv1alpha2.StatusError || driver.Status == dwsv1alpha2.StatusCompleted { + workflow.Status.Status = driver.Status + } else { + workflow.Status.Status = dwsv1alpha2.StatusDriverWait + } } } @@ -244,6 +270,29 @@ func (r *WorkflowReconciler) createComputes(ctx context.Context, wf *dwsv1alpha2 return computes, nil } +// statusPriority returns the priority of a driver's status. Errors have +// the lowest priority and completed entries have the lowest priority. +func statusPriority(status string) int { + switch status { + case dwsv1alpha2.StatusCompleted: + return 1 + case dwsv1alpha2.StatusDriverWait: + fallthrough + case dwsv1alpha2.StatusPending: + fallthrough + case dwsv1alpha2.StatusQueued: + fallthrough + case dwsv1alpha2.StatusRunning: + return 2 + case dwsv1alpha2.StatusTransientCondition: + return 3 + case dwsv1alpha2.StatusError: + return 4 + } + + panic(status) +} + type workflowStatusUpdater struct { workflow *dwsv1alpha2.Workflow existingStatus dwsv1alpha2.WorkflowStatus diff --git a/vendor/modules.txt b/vendor/modules.txt index 74e03a88b..571b60d22 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -1,4 +1,4 @@ -# github.com/HewlettPackard/dws v0.0.1-0.20230613201835-73abc41bd83c +# github.com/HewlettPackard/dws v0.0.1-0.20230802152955-11a333f31153 ## explicit; go 1.19 github.com/HewlettPackard/dws/api/v1alpha2 github.com/HewlettPackard/dws/config/crd/bases From b01d954896cc06b153c9838aa153b1327dd0dc36 Mon Sep 17 00:00:00 2001 From: Blake Devcich <89158881+bdevcich-hpe@users.noreply.github.com> Date: Thu, 3 Aug 2023 10:05:03 -0500 Subject: [PATCH 11/19] =?UTF-8?q?Containers:=20Allow=20containers=20to=20o?= =?UTF-8?q?pen=20ports=20for=20communication=20with=20com=E2=80=A6=20(#214?= =?UTF-8?q?)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This change allows for container workflows to open ports. These ports are opened on the host nodes (i.e. NNF nodes) where the containers are running. This enables traffic from outside of the network through the IP address of the NNF node and the port. An application on the compute node can contact the container with :. The port number(s) can be retrieved via the NNF_CONTAINER_PORTS environment variable. This environment variable is available inside of the containers. It is also provided to the Workflow so that Flux can inform the application on the compute node of which port(s) to use. If multiple ports are desired, the environment variable will provide a comma separated list of port numbers. Ports are requested via the NnfContainerProfile's `numPorts`. **A system admin must enable the `Ports` port range in the `SystemConfiguration` before ports can be requested**. If not, the NnfPortManager will not allocate any ports. More details: - Enabled default NnfPortManager to manage port allocation - Port allocation occurs in the Setup State - Port de-allocation occurs in the Teardown State - User Container Pods are now destroyed in the Teardown State prior to Port de-allocation - Added `example-mpi-webserver` NnfContainerProfile to show use of envionrment variable with a simple webserver - Added container teardown + port allocation to workflow deletion Signed-off-by: Blake Devcich --- api/v1alpha1/nnfcontainerprofile_types.go | 6 + ...nnf.cray.hpe.com_nnfcontainerprofiles.yaml | 8 + config/dws/nnf-ruleset.yaml | 2 +- .../nnf_v1alpha1_nnfcontainerprofiles.yaml | 40 +++ config/manager/manager.yaml | 4 + config/ports/kustomization.yaml | 12 + config/{manager => ports}/port_manager.yaml | 0 .../nnf_v1alpha1_nnfcontainerprofile.yaml | 5 + controllers/nnf_port_manager_controller.go | 26 +- .../nnf_port_manager_controller_test.go | 65 ++++- controllers/nnf_workflow_controller.go | 110 +++++--- ...f_workflow_controller_container_helpers.go | 100 +++++++- .../nnf_workflow_controller_helpers.go | 236 +++++++++++++++++- controllers/suite_test.go | 2 + deploy.sh | 9 + 15 files changed, 568 insertions(+), 57 deletions(-) create mode 100644 config/ports/kustomization.yaml rename config/{manager => ports}/port_manager.yaml (100%) diff --git a/api/v1alpha1/nnfcontainerprofile_types.go b/api/v1alpha1/nnfcontainerprofile_types.go index b193e2aaf..969d66fc5 100644 --- a/api/v1alpha1/nnfcontainerprofile_types.go +++ b/api/v1alpha1/nnfcontainerprofile_types.go @@ -62,6 +62,12 @@ type NnfContainerProfileData struct { // this profile. GroupID *uint32 `json:"groupID,omitempty"` + // Number of ports to open for communication with the user container. These ports are opened on + // the targeted NNF nodes and can be accessed outside of the k8s cluster (e.g. compute nodes). + // The requested ports are made available as environment variables inside the container and in + // the DWS workflow (NNF_CONTAINER_PORTS). + NumPorts int32 `json:"numPorts,omitempty"` + // Spec to define the containers created from container profile. This is used for non-MPI // containers. // Either this or MPISpec must be provided, but not both. diff --git a/config/crd/bases/nnf.cray.hpe.com_nnfcontainerprofiles.yaml b/config/crd/bases/nnf.cray.hpe.com_nnfcontainerprofiles.yaml index 1ab85f2dc..65ea0c77e 100644 --- a/config/crd/bases/nnf.cray.hpe.com_nnfcontainerprofiles.yaml +++ b/config/crd/bases/nnf.cray.hpe.com_nnfcontainerprofiles.yaml @@ -8616,6 +8616,14 @@ spec: required: - mpiReplicaSpecs type: object + numPorts: + description: Number of ports to open for communication with the user + container. These ports are opened on the targeted NNF nodes and + can be accessed outside of the k8s cluster (e.g. compute nodes). + The requested ports are made available as environment variables + inside the container and in the DWS workflow (NNF_CONTAINER_PORTS). + format: int32 + type: integer pinned: default: false description: Pinned is true if this instance is an immutable copy diff --git a/config/dws/nnf-ruleset.yaml b/config/dws/nnf-ruleset.yaml index 359453589..0bec8cec5 100644 --- a/config/dws/nnf-ruleset.yaml +++ b/config/dws/nnf-ruleset.yaml @@ -93,7 +93,7 @@ spec: isRequired: true isValueRequired: true - command: "container" - watchStates: Proposal,PreRun,PostRun,Teardown + watchStates: Proposal,Setup,PreRun,PostRun,Teardown ruleDefs: - key: "^name$" type: "string" diff --git a/config/examples/nnf_v1alpha1_nnfcontainerprofiles.yaml b/config/examples/nnf_v1alpha1_nnfcontainerprofiles.yaml index 01527afdd..6b0840896 100644 --- a/config/examples/nnf_v1alpha1_nnfcontainerprofiles.yaml +++ b/config/examples/nnf_v1alpha1_nnfcontainerprofiles.yaml @@ -69,6 +69,7 @@ metadata: name: example-mpi data: retryLimit: 6 + numPorts: 1 storages: - name: DW_JOB_foo_local_storage optional: false @@ -98,3 +99,42 @@ data: containers: - name: example-mpi image: nnf-mfu:latest + +--- +apiVersion: nnf.cray.hpe.com/v1alpha1 +kind: NnfContainerProfile +metadata: + name: example-mpi-webserver +data: + retryLimit: 6 + numPorts: 1 + storages: + - name: DW_JOB_foo_local_storage + optional: false + - name: DW_PERSISTENT_foo_persistent_storage + optional: true + - name: DW_GLOBAL_foo_global_lustre + optional: true + pvcMode: ReadWriteMany + mpiSpec: + runPolicy: + cleanPodPolicy: Running + mpiReplicaSpecs: + Launcher: + template: + spec: + containers: + - name: example-mpi-webserver + image: ghcr.io/nearnodeflash/nnf-container-example:latest + command: + - mpirun + - python3 + - -m + - http.server + - $(NNF_CONTAINER_PORTS) + Worker: + template: + spec: + containers: + - name: example-mpi-webserver + image: ghcr.io/nearnodeflash/nnf-container-example:latest diff --git a/config/manager/manager.yaml b/config/manager/manager.yaml index 36e25caed..a446694ce 100644 --- a/config/manager/manager.yaml +++ b/config/manager/manager.yaml @@ -111,6 +111,10 @@ spec: value: nnf-system - name: NNF_CONTAINER_PROFILE_NAMESPACE value: nnf-system + - name: NNF_PORT_MANAGER_NAME + value: nnf-port-manager + - name: NNF_PORT_MANAGER_NAMESPACE + value: nnf-system ports: - containerPort: 50057 name: nnf-ec diff --git a/config/ports/kustomization.yaml b/config/ports/kustomization.yaml new file mode 100644 index 000000000..6b22c995f --- /dev/null +++ b/config/ports/kustomization.yaml @@ -0,0 +1,12 @@ +# Adds namespace to all resources. +namespace: nnf-system + +# Value of this field is prepended to the +# names of all resources, e.g. a deployment named +# "wordpress" becomes "alices-wordpress". +# Note that it should also match with the prefix (text before '-') of the namespace +# field above. +namePrefix: nnf- + +resources: +- port_manager.yaml diff --git a/config/manager/port_manager.yaml b/config/ports/port_manager.yaml similarity index 100% rename from config/manager/port_manager.yaml rename to config/ports/port_manager.yaml diff --git a/config/samples/nnf_v1alpha1_nnfcontainerprofile.yaml b/config/samples/nnf_v1alpha1_nnfcontainerprofile.yaml index e64e3d143..5d63d7769 100644 --- a/config/samples/nnf_v1alpha1_nnfcontainerprofile.yaml +++ b/config/samples/nnf_v1alpha1_nnfcontainerprofile.yaml @@ -13,6 +13,11 @@ data: # PostRun. Defaults to 0. A value of 0 disables this behavior. postRunTimeoutSeconds: 0 + # Request the number of ports to open on the targeted rabbits. These ports are accessible outside + # of the k8s cluster. The requested ports are made available as environment variables inside the + # container and in the DWS workflow (NNF_CONTAINER_PORTS). + numPorts: 0 + # List of possible filesystems supported by this container profile. These # storages are mounted inside of the container. Any non-optional storage must # be supplied with the container directive as an argument and must reference diff --git a/controllers/nnf_port_manager_controller.go b/controllers/nnf_port_manager_controller.go index fb61d0879..6944fbe6f 100644 --- a/controllers/nnf_port_manager_controller.go +++ b/controllers/nnf_port_manager_controller.go @@ -159,23 +159,31 @@ func (r *NnfPortManagerReconciler) cleanupUnusedAllocations(log logr.Logger, mgr // Free unused allocations. This will check if the Status.Allocations exist in // the list of desired allocations in the Spec field and mark any unused allocations // as freed. - failedIndices := make([]int, 0) + allocsToRemove := make([]int, 0) for idx := range mgr.Status.Allocations { status := &mgr.Status.Allocations[idx] if !r.isAllocationNeeded(mgr, status) { log.Info("Allocation unused", "requester", status.Requester, "status", status.Status) - if status.Status == nnfv1alpha1.NnfPortManagerAllocationStatusInUse { - status.Requester = nil - status.Status = nnfv1alpha1.NnfPortManagerAllocationStatusFree - } else if status.Status != nnfv1alpha1.NnfPortManagerAllocationStatusFree { - failedIndices = append(failedIndices, idx) - } + + // TODO: allow for cooldown + // if status.Status == nnfv1alpha1.NnfPortManagerAllocationStatusInUse { + // status.Requester = nil + // status.Status = nnfv1alpha1.NnfPortManagerAllocationStatusCooldown + // } else if status.Status == nnfv1alpha1.NnfPortManagerAllocationStatusCooldown { + // if now() - status.timeFreed > cooldownPeriod { + // allocsToRemove = append(allocsToRemove, idx) + // } + // } else if status.Status != nnfv1alpha1.NnfPortManagerAllocationStatusFree { + // allocsToRemove = append(allocsToRemove, idx) + // } + + allocsToRemove = append(allocsToRemove, idx) } } - for idx := range failedIndices { - failedIdx := failedIndices[len(failedIndices)-1-idx] // remove in reverse order + for idx := range allocsToRemove { + failedIdx := allocsToRemove[len(allocsToRemove)-1-idx] // remove in reverse order mgr.Status.Allocations = append(mgr.Status.Allocations[:failedIdx], mgr.Status.Allocations[failedIdx+1:]...) } } diff --git a/controllers/nnf_port_manager_controller_test.go b/controllers/nnf_port_manager_controller_test.go index dae0f0701..b5ca50272 100644 --- a/controllers/nnf_port_manager_controller_test.go +++ b/controllers/nnf_port_manager_controller_test.go @@ -85,7 +85,7 @@ var _ = Context("NNF Port Manager Controller Setup", Ordered, func() { DeferCleanup(func() { Expect(k8sClient.Delete(ctx, mgr)).To(Succeed()) }) }) - reservePorts := func(mgr *nnfv1alpha1.NnfPortManager, name string, count int) { + reservePorts := func(mgr *nnfv1alpha1.NnfPortManager, name string, count int) []uint16 { By(fmt.Sprintf("Reserving %d ports for '%s'", count, name)) allocation := nnfv1alpha1.NnfPortManagerAllocationSpec{ @@ -110,6 +110,8 @@ var _ = Context("NNF Port Manager Controller Setup", Ordered, func() { Expect(status).ToNot(BeNil()) Expect(status.Ports).To(HaveLen(allocation.Count)) Expect(status.Status).To(Equal(nnfv1alpha1.NnfPortManagerAllocationStatusInUse)) + + return status.Ports } releasePorts := func(mgr *nnfv1alpha1.NnfPortManager, name string) { @@ -130,16 +132,69 @@ var _ = Context("NNF Port Manager Controller Setup", Ordered, func() { }).Should(Succeed()) } + // Verify the number of allocations in the status allocation list + verifyNumAllocations := func(mgr *nnfv1alpha1.NnfPortManager, count int) { + By(fmt.Sprintf("Verifying there are %d allocations in the status allocation list", count)) + + Eventually(func() int { + Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(mgr), mgr)).To(Succeed()) + return len(mgr.Status.Allocations) + }).Should(Equal(count)) + } + It("Reserves & removes a single port", func() { const name = "single" - reservePorts(mgr, name, 1) + ports := reservePorts(mgr, name, 1) + Expect(ports[0]).To(BeEquivalentTo(portStart)) + verifyNumAllocations(mgr, 1) releasePorts(mgr, name) + verifyNumAllocations(mgr, 0) + }) + + It("Reserves & removes a multiple ports, one after another", func() { + first := "first" + ports := reservePorts(mgr, first, 1) + Expect(ports[0]).To(BeEquivalentTo(portStart)) + verifyNumAllocations(mgr, 1) + + second := "second" + ports = reservePorts(mgr, second, 1) + Expect(ports[0]).To(BeEquivalentTo(portStart + 1)) + verifyNumAllocations(mgr, 2) + + releasePorts(mgr, first) + verifyNumAllocations(mgr, 1) + + releasePorts(mgr, second) + verifyNumAllocations(mgr, 0) + }) + + It("Reserves & removes a multiple ports, one at a time", func() { + first := "first" + ports := reservePorts(mgr, first, 1) + firstPort := ports[0] + Expect(ports[0]).To(BeEquivalentTo(portStart)) + verifyNumAllocations(mgr, 1) + releasePorts(mgr, first) + verifyNumAllocations(mgr, 0) + + // Port should be reused since it was freed already + // This will fail once cooldowns are introduced + second := "second" + ports = reservePorts(mgr, second, 1) + Expect(ports[0]).To(BeEquivalentTo(firstPort)) + verifyNumAllocations(mgr, 1) + + releasePorts(mgr, second) + verifyNumAllocations(mgr, 0) }) It("Reserves & removes all ports", func() { const name = "all" reservePorts(mgr, name, portEnd-portStart+1) + verifyNumAllocations(mgr, 1) releasePorts(mgr, name) + verifyNumAllocations(mgr, 0) }) It("Reserves from free list", func() { @@ -147,11 +202,15 @@ var _ = Context("NNF Port Manager Controller Setup", Ordered, func() { reservePorts(mgr, single, 1) const remaining = "remaining" - reservePorts(mgr, remaining, portEnd-portStart) + count := portEnd - portStart + reservePorts(mgr, remaining, count) releasePorts(mgr, single) + verifyNumAllocations(mgr, 1) reservePorts(mgr, "free", 1) + + verifyNumAllocations(mgr, 2) }) It("Fails with insufficient resources", func() { diff --git a/controllers/nnf_workflow_controller.go b/controllers/nnf_workflow_controller.go index 821660587..5b2aee1c2 100644 --- a/controllers/nnf_workflow_controller.go +++ b/controllers/nnf_workflow_controller.go @@ -123,6 +123,20 @@ func (r *NnfWorkflowReconciler) Reconcile(ctx context.Context, req ctrl.Request) return ctrl.Result{}, err } + // Delete containers and unallocate port + containerRes, err := r.deleteContainers(ctx, workflow, -1) + if err != nil { + return ctrl.Result{}, err + } else if containerRes != nil { + return containerRes.Result, nil + } + containerRes, err = r.releaseContainerPorts(ctx, workflow) + if err != nil { + return ctrl.Result{}, err + } else if containerRes != nil { + return containerRes.Result, nil + } + deleteStatus, err := dwsv1alpha2.DeleteChildren(ctx, r.Client, r.ChildObjects, workflow) if err != nil { return ctrl.Result{}, err @@ -406,39 +420,48 @@ func (r *NnfWorkflowReconciler) startSetupState(ctx context.Context, workflow *d return nil, dwsv1alpha2.NewResourceError("could not create NnfStorage").WithError(err).WithUserMessage("could not create allocation") } + case "container": + return r.getContainerPorts(ctx, workflow, index) } return nil, nil } func (r *NnfWorkflowReconciler) finishSetupState(ctx context.Context, workflow *dwsv1alpha2.Workflow, index int) (*result, error) { - name, namespace := getStorageReferenceNameFromWorkflowActual(workflow, index) + dwArgs, _ := dwdparse.BuildArgsMap(workflow.Spec.DWDirectives[index]) - // Check whether the NnfStorage has finished creating the storage. - nnfStorage := &nnfv1alpha1.NnfStorage{ - ObjectMeta: metav1.ObjectMeta{ - Name: name, - Namespace: namespace, - }, - } - if err := r.Get(ctx, client.ObjectKeyFromObject(nnfStorage), nnfStorage); err != nil { - return nil, dwsv1alpha2.NewResourceError("could not get NnfStorage: %v", client.ObjectKeyFromObject(nnfStorage)).WithError(err).WithUserMessage("could not allocate storage") - } + switch dwArgs["command"] { + case "container": + return r.checkContainerPorts(ctx, workflow, index) + default: + name, namespace := getStorageReferenceNameFromWorkflowActual(workflow, index) - // If the Status section has not been filled in yet, exit and wait. - if len(nnfStorage.Status.AllocationSets) != len(nnfStorage.Spec.AllocationSets) { - // RequeueAfter is necessary for persistent storage that isn't owned by this workflow - return Requeue("allocation").after(2 * time.Second).withObject(nnfStorage), nil - } + // Check whether the NnfStorage has finished creating the storage. + nnfStorage := &nnfv1alpha1.NnfStorage{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: namespace, + }, + } + if err := r.Get(ctx, client.ObjectKeyFromObject(nnfStorage), nnfStorage); err != nil { + return nil, dwsv1alpha2.NewResourceError("could not get NnfStorage: %v", client.ObjectKeyFromObject(nnfStorage)).WithError(err).WithUserMessage("could not allocate storage") + } - if nnfStorage.Status.Error != nil { - handleWorkflowErrorByIndex(dwsv1alpha2.NewResourceError("storage resource error: %v", client.ObjectKeyFromObject(nnfStorage)).WithError(nnfStorage.Status.Error).WithUserMessage("could not allocate storage"), workflow, index) - return Requeue("error").withObject(nnfStorage), nil - } + // If the Status section has not been filled in yet, exit and wait. + if len(nnfStorage.Status.AllocationSets) != len(nnfStorage.Spec.AllocationSets) { + // RequeueAfter is necessary for persistent storage that isn't owned by this workflow + return Requeue("allocation").after(2 * time.Second).withObject(nnfStorage), nil + } + + if nnfStorage.Status.Error != nil { + handleWorkflowErrorByIndex(dwsv1alpha2.NewResourceError("storage resource error: %v", client.ObjectKeyFromObject(nnfStorage)).WithError(nnfStorage.Status.Error).WithUserMessage("could not allocate storage"), workflow, index) + return Requeue("error").withObject(nnfStorage), nil + } - if nnfStorage.Status.Status != nnfv1alpha1.ResourceReady { - // RequeueAfter is necessary for persistent storage that isn't owned by this workflow - return Requeue("allocation set not ready").after(2 * time.Second).withObject(nnfStorage), nil + if nnfStorage.Status.Status != nnfv1alpha1.ResourceReady { + // RequeueAfter is necessary for persistent storage that isn't owned by this workflow + return Requeue("allocation set not ready").after(2 * time.Second).withObject(nnfStorage), nil + } } return nil, nil @@ -986,22 +1009,31 @@ func (r *NnfWorkflowReconciler) finishPostRunState(ctx context.Context, workflow } func (r *NnfWorkflowReconciler) startTeardownState(ctx context.Context, workflow *dwsv1alpha2.Workflow, index int) (*result, error) { + dwArgs, _ := dwdparse.BuildArgsMap(workflow.Spec.DWDirectives[index]) - // Delete the NnfDataMovement and NnfAccess for this directive before removing the NnfStorage. - // copy_in/out directives can reference NnfStorage from a different directive, so all the NnfAccesses - // need to be removed first. - childObjects := []dwsv1alpha2.ObjectList{ - &nnfv1alpha1.NnfDataMovementList{}, - &nnfv1alpha1.NnfAccessList{}, - } + switch dwArgs["command"] { + case "container": + res, err := r.deleteContainers(ctx, workflow, index) + if res != nil || err != nil { + return res, err + } + default: + // Delete the NnfDataMovement and NnfAccess for this directive before removing the NnfStorage. + // copy_in/out directives can reference NnfStorage from a different directive, so all the NnfAccesses + // need to be removed first. + childObjects := []dwsv1alpha2.ObjectList{ + &nnfv1alpha1.NnfDataMovementList{}, + &nnfv1alpha1.NnfAccessList{}, + } - deleteStatus, err := dwsv1alpha2.DeleteChildrenWithLabels(ctx, r.Client, childObjects, workflow, client.MatchingLabels{nnfv1alpha1.DirectiveIndexLabel: strconv.Itoa(index)}) - if err != nil { - return nil, dwsv1alpha2.NewResourceError("could not delete NnfDataMovement and NnfAccess children").WithError(err).WithUserMessage("could not stop data movement and unmount file systems") - } + deleteStatus, err := dwsv1alpha2.DeleteChildrenWithLabels(ctx, r.Client, childObjects, workflow, client.MatchingLabels{nnfv1alpha1.DirectiveIndexLabel: strconv.Itoa(index)}) + if err != nil { + return nil, dwsv1alpha2.NewResourceError("could not delete NnfDataMovement and NnfAccess children").WithError(err).WithUserMessage("could not stop data movement and unmount file systems") + } - if !deleteStatus.Complete() { - return Requeue("delete").withDeleteStatus(deleteStatus), nil + if !deleteStatus.Complete() { + return Requeue("delete").withDeleteStatus(deleteStatus), nil + } } return nil, nil @@ -1078,6 +1110,12 @@ func (r *NnfWorkflowReconciler) finishTeardownState(ctx context.Context, workflo if err != nil { return nil, dwsv1alpha2.NewResourceError("").WithError(err).WithUserMessage("Could not remove persistent storage reference") } + case "container": + // Release container ports + res, err := r.releaseContainerPorts(ctx, workflow) + if res != nil || err != nil { + return res, err + } default: } diff --git a/controllers/nnf_workflow_controller_container_helpers.go b/controllers/nnf_workflow_controller_container_helpers.go index f9631c5a0..b23ddace7 100644 --- a/controllers/nnf_workflow_controller_container_helpers.go +++ b/controllers/nnf_workflow_controller_container_helpers.go @@ -152,12 +152,22 @@ func (c *nnfUserContainer) createMPIJob() error { c.applyPermissions(launcherSpec, &mpiJob.Spec, false) c.applyPermissions(workerSpec, &mpiJob.Spec, true) + // Get the ports from the port manager + ports, err := c.getHostPorts() + if err != nil { + return err + } + // Add the ports to the worker spec and add environment variable for both launcher/worker + addHostPorts(workerSpec, ports) + addPortsEnvVars(launcherSpec, ports) + addPortsEnvVars(workerSpec, ports) + c.addNnfVolumes(launcherSpec) c.addNnfVolumes(workerSpec) c.addEnvVars(launcherSpec, true) c.addEnvVars(workerSpec, true) - err := c.client.Create(c.ctx, mpiJob) + err = c.client.Create(c.ctx, mpiJob) if err != nil { if !apierrors.IsAlreadyExists(err) { return err @@ -194,6 +204,14 @@ func (c *nnfUserContainer) createNonMPIJob() error { podSpec.RestartPolicy = corev1.RestartPolicyNever podSpec.Subdomain = c.workflow.Name // service name == workflow name + // Get the ports from the port manager + ports, err := c.getHostPorts() + if err != nil { + return err + } + addHostPorts(podSpec, ports) + addPortsEnvVars(podSpec, ports) + c.applyTolerations(podSpec) c.applyPermissions(podSpec, nil, false) c.addNnfVolumes(podSpec) @@ -391,6 +409,86 @@ func (c *nnfUserContainer) applyPermissions(spec *corev1.PodSpec, mpiJobSpec *mp } } +func (c *nnfUserContainer) getHostPorts() ([]uint16, error) { + ports := []uint16{} + expectedPorts := int(c.profile.Data.NumPorts) + + if expectedPorts < 1 { + return ports, nil + } + + pm, err := getContainerPortManager(c.ctx, c.client) + if err != nil { + return nil, err + } + + // Get the ports from the port manager for this workflow + for _, alloc := range pm.Status.Allocations { + if alloc.Requester != nil && alloc.Requester.UID == c.workflow.UID && alloc.Status == nnfv1alpha1.NnfPortManagerAllocationStatusInUse { + ports = append(ports, alloc.Ports...) + } + } + + // Make sure we found the number of ports in the port manager that we expect + if len(ports) != expectedPorts { + return nil, dwsv1alpha2.NewResourceError( + "number of ports found in NnfPortManager's allocation (%d) does not equal the profile's requested ports (%d)", + len(ports), expectedPorts). + WithUserMessage("requested ports do not meet the number of allocated ports").WithFatal() + } + + return ports, nil +} + +// Given a list of ports, add HostPort entries for all containers in a PodSpec +func addHostPorts(spec *corev1.PodSpec, ports []uint16) { + + // Nothing to add + if len(ports) < 1 { + return + } + + // Add the ports to the containers + for idx := range spec.Containers { + container := &spec.Containers[idx] + + for _, port := range ports { + container.Ports = append(container.Ports, corev1.ContainerPort{ + ContainerPort: int32(port), + HostPort: int32(port), + }) + } + } +} + +// Given a list of ports, convert it into an environment variable name and comma separated value +func getContainerPortsEnvVar(ports []uint16) (string, string) { + portStr := []string{} + for _, port := range ports { + portStr = append(portStr, strconv.Itoa(int(port))) + } + + return "NNF_CONTAINER_PORTS", strings.Join(portStr, ",") +} + +// Add a environment variable for the container ports to all containers in a PodSpec +func addPortsEnvVars(spec *corev1.PodSpec, ports []uint16) { + if len(ports) < 1 { + return + } + + // Add port environment variable to containers + for idx := range spec.Containers { + container := &spec.Containers[idx] + + name, val := getContainerPortsEnvVar(ports) + container.Env = append(container.Env, corev1.EnvVar{ + Name: name, + Value: val, + }) + } +} + func (c *nnfUserContainer) addNnfVolumes(spec *corev1.PodSpec) { for _, vol := range c.volumes { diff --git a/controllers/nnf_workflow_controller_helpers.go b/controllers/nnf_workflow_controller_helpers.go index 32ced98d2..150a0fa6b 100644 --- a/controllers/nnf_workflow_controller_helpers.go +++ b/controllers/nnf_workflow_controller_helpers.go @@ -1302,6 +1302,66 @@ func (r *NnfWorkflowReconciler) waitForContainersToStart(ctx context.Context, wo return nil, nil } +func (r *NnfWorkflowReconciler) deleteContainers(ctx context.Context, workflow *dwsv1alpha2.Workflow, index int) (*result, error) { + doneMpi := false + doneNonMpi := false + + // Set the delete propagation + policy := metav1.DeletePropagationBackground + deleteAllOptions := &client.DeleteAllOfOptions{ + DeleteOptions: client.DeleteOptions{ + PropagationPolicy: &policy, + }, + } + // Add workflow matchLabels + directive index (if desired) + matchLabels := dwsv1alpha2.MatchingWorkflow(workflow) + if index >= 0 { + matchLabels[nnfv1alpha1.DirectiveIndexLabel] = strconv.Itoa(index) + } + + // Delete MPIJobs + mpiJobList, err := r.getMPIJobs(ctx, workflow, index) + if err != nil { + if strings.Contains(err.Error(), "no kind is registered for the type") || apierrors.IsNotFound(err) { + doneMpi = true + } else { + return nil, dwsv1alpha2.NewResourceError("could not delete container MPIJob(s)").WithError(err).WithMajor().WithInternal() + } + } else if len(mpiJobList.Items) > 0 { + if err := r.DeleteAllOf(ctx, &mpiJobList.Items[0], client.InNamespace(workflow.Namespace), matchLabels, deleteAllOptions); err != nil { + if !apierrors.IsNotFound(err) { + return nil, dwsv1alpha2.NewResourceError("could not delete container MPIJob(s)").WithError(err).WithMajor().WithInternal() + } + } + } else { + doneMpi = true + } + + // Delete non-MPI Jobs + jobList, err := r.getContainerJobs(ctx, workflow, index) + if err != nil { + if apierrors.IsNotFound(err) { + doneNonMpi = true + } else { + return nil, dwsv1alpha2.NewResourceError("could not delete container Job(s)").WithError(err).WithMajor().WithInternal() + } + } else if len(jobList.Items) > 0 { + if err := r.DeleteAllOf(ctx, &jobList.Items[0], client.InNamespace(workflow.Namespace), matchLabels, deleteAllOptions); err != nil { + if !apierrors.IsNotFound(err) { + return nil, dwsv1alpha2.NewResourceError("could not delete container Job(s)").WithError(err).WithMajor().WithInternal() + } + } + } else { + doneNonMpi = true + } + + if doneMpi && doneNonMpi { + return nil, nil + } + + return Requeue("pending container deletion"), nil +} + func (r *NnfWorkflowReconciler) getMPIJobConditions(ctx context.Context, workflow *dwsv1alpha2.Workflow, index, expected int) (*mpiv2beta1.MPIJob, *result) { mpiJob := &mpiv2beta1.MPIJob{ ObjectMeta: metav1.ObjectMeta{ @@ -1361,7 +1421,7 @@ func (r *NnfWorkflowReconciler) waitForContainersToFinish(ctx context.Context, w // Set the ActiveDeadLineSeconds on each of the k8s jobs created by MPIJob/mpi-operator. We // need to retrieve the jobs in a different way than non-MPI jobs since the jobs are created // by the MPIJob. - jobList, err := r.getMPIJobList(ctx, workflow, mpiJob) + jobList, err := r.getMPIJobChildrenJobs(ctx, workflow, mpiJob) if err != nil { return dwsv1alpha2.NewResourceError("waitForContainersToFinish: no MPIJob JobList found for workflow '%s', index: %d", workflow.Name, index).WithMajor() } @@ -1467,7 +1527,8 @@ func (r *NnfWorkflowReconciler) checkContainersResults(ctx context.Context, work return nil, nil } -func (r *NnfWorkflowReconciler) getMPIJobList(ctx context.Context, workflow *dwsv1alpha2.Workflow, mpiJob *mpiv2beta1.MPIJob) (*batchv1.JobList, error) { +// Given an MPIJob, return a list of all the k8s Jobs owned by the MPIJob +func (r *NnfWorkflowReconciler) getMPIJobChildrenJobs(ctx context.Context, workflow *dwsv1alpha2.Workflow, mpiJob *mpiv2beta1.MPIJob) (*batchv1.JobList, error) { // The k8s jobs that are spawned off by MPIJob do not have labels tied to the workflow. // Therefore, we need to get the k8s jobs manually. To do this, we can query the jobs by the // name of the MPIJob. However, this doesn't account for the namespace. We need another way. @@ -1477,7 +1538,7 @@ func (r *NnfWorkflowReconciler) getMPIJobList(ctx context.Context, workflow *dws jobList := &batchv1.JobList{} if err := r.List(ctx, jobList, matchLabels); err != nil { - return nil, dwsv1alpha2.NewResourceError("could not retrieve Jobs for MPIJob %s", mpiJob.Name).WithError(err) + return nil, dwsv1alpha2.NewResourceError("could not retrieve Jobs for MPIJob %s", mpiJob.Name).WithError(err).WithMajor() } // Create a new list so we don't alter the loop iterator @@ -1498,14 +1559,31 @@ func (r *NnfWorkflowReconciler) getMPIJobList(ctx context.Context, workflow *dws return jobList, nil } +func (r *NnfWorkflowReconciler) getMPIJobs(ctx context.Context, workflow *dwsv1alpha2.Workflow, index int) (*mpiv2beta1.MPIJobList, error) { + // Get the MPIJobs for this workflow and directive index + matchLabels := dwsv1alpha2.MatchingWorkflow(workflow) + if index >= 0 { + matchLabels[nnfv1alpha1.DirectiveIndexLabel] = strconv.Itoa(index) + } + + jobList := &mpiv2beta1.MPIJobList{} + if err := r.List(ctx, jobList, matchLabels); err != nil { + return nil, dwsv1alpha2.NewResourceError("could not retrieve MPIJobs").WithError(err).WithMajor() + } + + return jobList, nil +} + func (r *NnfWorkflowReconciler) getContainerJobs(ctx context.Context, workflow *dwsv1alpha2.Workflow, index int) (*batchv1.JobList, error) { // Get the jobs for this workflow and directive index matchLabels := dwsv1alpha2.MatchingWorkflow(workflow) - matchLabels[nnfv1alpha1.DirectiveIndexLabel] = strconv.Itoa(index) + if index >= 0 { + matchLabels[nnfv1alpha1.DirectiveIndexLabel] = strconv.Itoa(index) + } jobList := &batchv1.JobList{} if err := r.List(ctx, jobList, matchLabels); err != nil { - return nil, dwsv1alpha2.NewResourceError("could not retrieve Jobs for index %d", index).WithError(err).WithMajor() + return nil, dwsv1alpha2.NewResourceError("could not retrieve Jobs").WithError(err).WithMajor() } return jobList, nil @@ -1515,8 +1593,6 @@ func (r *NnfWorkflowReconciler) getContainerJobs(ctx context.Context, workflow * func (r *NnfWorkflowReconciler) getContainerVolumes(ctx context.Context, workflow *dwsv1alpha2.Workflow, dwArgs map[string]string, profile *nnfv1alpha1.NnfContainerProfile) ([]nnfContainerVolume, *result, error) { volumes := []nnfContainerVolume{} - // TODO: ssh is necessary for mpi see setupSSHAuthVolumes(manager, podSpec) in nnf-dm - for arg, val := range dwArgs { volName, cmd := "", "" @@ -1601,3 +1677,149 @@ func (r *NnfWorkflowReconciler) getContainerVolumes(ctx context.Context, workflo return volumes, nil, nil } + +// Use the container profile to determine how many ports are needed and request them from the default NnfPortManager +func (r *NnfWorkflowReconciler) getContainerPorts(ctx context.Context, workflow *dwsv1alpha2.Workflow, index int) (*result, error) { + profile, err := getContainerProfile(ctx, r.Client, workflow, index) + if err != nil { + return nil, err + } + + // Nothing to do here if ports are not requested + if profile.Data.NumPorts > 0 { + pm, err := getContainerPortManager(ctx, r.Client) + if err != nil { + return nil, err + } + + // Check to see if we've already made an allocation + for _, alloc := range pm.Spec.Allocations { + if alloc.Requester.UID == workflow.UID { + return nil, nil + } + } + + // Add a port allocation request to the manager for the number of ports specified by the + // container profile + pm.Spec.Allocations = append(pm.Spec.Allocations, nnfv1alpha1.NnfPortManagerAllocationSpec{ + Requester: corev1.ObjectReference{ + Name: workflow.Name, + Namespace: workflow.Namespace, + Kind: reflect.TypeOf(dwsv1alpha2.Workflow{}).Name(), + UID: workflow.UID, + }, + Count: int(profile.Data.NumPorts), + }) + + if err := r.Update(ctx, pm); err != nil { + if !apierrors.IsConflict(err) { + return nil, err + } + return Requeue("update port manager allocation"), nil + } + + r.Log.Info("Ports Requested", "numPorts", profile.Data.NumPorts) + } + + return nil, nil +} + +// Ensure that the default NnfPortManager has assigned the appropriate number of requested ports +func (r *NnfWorkflowReconciler) checkContainerPorts(ctx context.Context, workflow *dwsv1alpha2.Workflow, index int) (*result, error) { + + profile, err := getContainerProfile(ctx, r.Client, workflow, index) + if err != nil { + return nil, err + } + + // Nothing to do here if ports are not requested + r.Log.Info("Checking for requested ports", "numPorts", profile.Data.NumPorts) + if profile.Data.NumPorts > 0 { + pm, err := getContainerPortManager(ctx, r.Client) + if err != nil { + return nil, err + } + + for _, alloc := range pm.Status.Allocations { + if alloc.Requester != nil && alloc.Requester.UID == workflow.UID { + if alloc.Status == nnfv1alpha1.NnfPortManagerAllocationStatusInUse && len(alloc.Ports) == int(profile.Data.NumPorts) { + // Add workflow env var for the ports + name, val := getContainerPortsEnvVar(alloc.Ports) + workflow.Status.Env[name] = val + return nil, nil // done + } else if alloc.Status == nnfv1alpha1.NnfPortManagerAllocationStatusInvalidConfiguration { + return nil, dwsv1alpha2.NewResourceError("").WithUserMessage("could not request ports for container workflow: Invalid NnfPortManager configuration").WithFatal().WithUser() + } else if alloc.Status == nnfv1alpha1.NnfPortManagerAllocationStatusInsufficientResources { + return nil, dwsv1alpha2.NewResourceError("").WithUserMessage("could not request ports for container workflow: InsufficientResources").WithFatal() + } + } + } + + return Requeue("NnfPortManager allocation not ready").after(2 * time.Second).withObject(pm), nil + } + + return nil, nil +} + +// Retrieve the default NnfPortManager for user containers. Allow a client to be passed in as this +// is meant to be used by reconcilers or container helpers. +func getContainerPortManager(ctx context.Context, cl client.Client) (*nnfv1alpha1.NnfPortManager, error) { + portManagerName := os.Getenv("NNF_PORT_MANAGER_NAME") + portManagerNamespace := os.Getenv("NNF_PORT_MANAGER_NAMESPACE") + + pm := &nnfv1alpha1.NnfPortManager{ + ObjectMeta: metav1.ObjectMeta{ + Name: portManagerName, + Namespace: portManagerNamespace, + }, + } + if err := cl.Get(ctx, client.ObjectKeyFromObject(pm), pm); err != nil { + return nil, err + } + + return pm, nil +} + +// Tell the NnfPortManager that the ports are no longer needed +// func (r *NnfWorkflowReconciler) releaseContainerPorts(ctx context.Context, workflow *dwsv1alpha2.Workflow, index int) (*result, error) { +func (r *NnfWorkflowReconciler) releaseContainerPorts(ctx context.Context, workflow *dwsv1alpha2.Workflow) (*result, error) { + found := false + + pm, err := getContainerPortManager(ctx, r.Client) + if err != nil { + if apierrors.IsNotFound(err) { + return nil, nil + } else { + return nil, err + } + } + + // Find the allocation in the Status + for _, alloc := range pm.Status.Allocations { + if alloc.Requester.UID == workflow.UID && alloc.Status == nnfv1alpha1.NnfPortManagerAllocationStatusInUse { + found = true + break + } + } + + if found { + // Remove the allocation request from the Spec + // TODO: For cooldowns, change the status to cooldown/time_wait rather than delete. Can we + // even do that from here? + for idx, alloc := range pm.Spec.Allocations { + if alloc.Requester.UID == workflow.UID { + pm.Spec.Allocations = append(pm.Spec.Allocations[:idx], pm.Spec.Allocations[idx+1:]...) + } + } + + if err := r.Update(ctx, pm); err != nil { + if !apierrors.IsConflict(err) { + return nil, err + } + } + + return Requeue("pending port de-allocation"), nil + } else { + return nil, nil + } +} diff --git a/controllers/suite_test.go b/controllers/suite_test.go index d042e2677..9cd1b1bfa 100644 --- a/controllers/suite_test.go +++ b/controllers/suite_test.go @@ -75,6 +75,8 @@ var envVars = []envSetting{ {"POD_NAMESPACE", "default"}, {"NNF_STORAGE_PROFILE_NAMESPACE", "default"}, {"NNF_CONTAINER_PROFILE_NAMESPACE", "default"}, + {"NNF_PORT_MANAGER_NAME", "nnf-port-manager"}, + {"NNF_PORT_MANAGER_NAMESPACE", "default"}, {"NNF_POD_IP", "172.0.0.1"}, {"NNF_NODE_NAME", "nnf-test-node"}, {"ACK_GINKGO_DEPRECATIONS", "1.16.4"}, diff --git a/deploy.sh b/deploy.sh index 94e924215..2366d78ef 100755 --- a/deploy.sh +++ b/deploy.sh @@ -52,9 +52,18 @@ if [[ $CMD == 'deploy' ]]; then # MPIJobSpec (with large annotations). (cd config/examples && $KUSTOMIZE edit set image nnf-mfu=$NNFMFU_IMG) $KUSTOMIZE build config/examples | kubectl apply --server-side=true --force-conflicts -f - + + # Deploy the nnfportmanager after everything else + echo "Waiting for the nnfportmamanger CRD to become ready..." + while :; do + sleep 1 + kubectl get crds nnfportmanagers.nnf.cray.hpe.com && break + done + $KUSTOMIZE build config/ports| kubectl apply --server-side=true --force-conflicts -f - fi if [[ $CMD == 'undeploy' ]]; then + $KUSTOMIZE build config/ports | kubectl delete --ignore-not-found -f - $KUSTOMIZE build config/examples | kubectl delete --ignore-not-found -f - $KUSTOMIZE build config/$OVERLAY | kubectl delete --ignore-not-found -f - fi From 75367ef6fd8714ae66e5999ed0be292b73802889 Mon Sep 17 00:00:00 2001 From: matthew-richerson <82597529+matthew-richerson@users.noreply.github.com> Date: Fri, 4 Aug 2023 13:19:43 -0500 Subject: [PATCH 12/19] github-83: Add standaloneMgt option to StorageProfile (#216) Create an option in the Lustre section of the storage profile to only create an MGT. This option will be used by admins to create a pool of MGTs that can be used as external MGTs for other Lustre file systems. Signed-off-by: Matt Richerson --- api/v1alpha1/nnfstorageprofile_types.go | 4 + api/v1alpha1/nnfstorageprofile_webhook.go | 8 ++ .../nnfstorageprofile_webhook_test.go | 21 +++++ .../nnf.cray.hpe.com_nnfstorageprofiles.yaml | 5 ++ controllers/directivebreakdown_controller.go | 6 ++ .../directivebreakdown_controller_test.go | 81 +++++++++++++++++++ controllers/nnf_access_controller_test.go | 7 +- 7 files changed, 130 insertions(+), 2 deletions(-) diff --git a/api/v1alpha1/nnfstorageprofile_types.go b/api/v1alpha1/nnfstorageprofile_types.go index 5a0bf7917..7e13967a7 100644 --- a/api/v1alpha1/nnfstorageprofile_types.go +++ b/api/v1alpha1/nnfstorageprofile_types.go @@ -83,6 +83,10 @@ type NnfStorageProfileLustreData struct { // +kubebuilder:default:=false ExclusiveMDT bool `json:"exclusiveMdt,omitempty"` + // StandAloneMGT creates only a Lustre MGT without an MDT or OST + // +kubebuilder:default:=false + StandaloneMGT bool `json:"standaloneMgt,omitempty"` + // MgtCmdLines contains commands to create an MGT target. MgtCmdLines NnfStorageProfileLustreCmdLines `json:"mgtCommandlines,omitempty"` diff --git a/api/v1alpha1/nnfstorageprofile_webhook.go b/api/v1alpha1/nnfstorageprofile_webhook.go index 1ecec5978..d4e41e963 100644 --- a/api/v1alpha1/nnfstorageprofile_webhook.go +++ b/api/v1alpha1/nnfstorageprofile_webhook.go @@ -116,6 +116,14 @@ func (r *NnfStorageProfile) validateContentLustre() error { return fmt.Errorf("cannot set both combinedMgtMdt and externalMgs") } + if r.Data.LustreStorage.StandaloneMGT && len(r.Data.LustreStorage.ExternalMGS) > 0 { + return fmt.Errorf("cannot set both standaloneMgt and externalMgs") + } + + if r.Data.LustreStorage.StandaloneMGT && r.Data.LustreStorage.CombinedMGTMDT { + return fmt.Errorf("cannot set standaloneMgt and combinedMgtMdt") + } + for _, target := range []string{"mgt", "mdt", "mgtmdt", "ost"} { targetMiscOptions := r.GetLustreMiscOptions(target) err := r.validateLustreTargetMiscOptions(targetMiscOptions) diff --git a/api/v1alpha1/nnfstorageprofile_webhook_test.go b/api/v1alpha1/nnfstorageprofile_webhook_test.go index 04be1ec1a..77c2234a4 100644 --- a/api/v1alpha1/nnfstorageprofile_webhook_test.go +++ b/api/v1alpha1/nnfstorageprofile_webhook_test.go @@ -114,6 +114,13 @@ var _ = Describe("NnfStorageProfile Webhook", func() { Expect(newProfile.Data.Default).ToNot(BeTrue()) }) + It("should accept standaloneMgt", func() { + nnfProfile.Data.LustreStorage.StandaloneMGT = true + Expect(k8sClient.Create(context.TODO(), nnfProfile)).To(Succeed()) + Expect(k8sClient.Get(context.TODO(), client.ObjectKeyFromObject(nnfProfile), newProfile)).To(Succeed()) + Expect(newProfile.Data.Default).ToNot(BeTrue()) + }) + It("should accept combinedMgtMdt", func() { nnfProfile.Data.LustreStorage.CombinedMGTMDT = true Expect(k8sClient.Create(context.TODO(), nnfProfile)).To(Succeed()) @@ -143,6 +150,20 @@ var _ = Describe("NnfStorageProfile Webhook", func() { nnfProfile = nil }) + It("should not accept standaloneMgt with externalMgs", func() { + nnfProfile.Data.LustreStorage.StandaloneMGT = true + nnfProfile.Data.LustreStorage.ExternalMGS = "10.0.0.1@tcp" + Expect(k8sClient.Create(context.TODO(), nnfProfile)).ToNot(Succeed()) + nnfProfile = nil + }) + + It("should not accept standaloneMgt with combinedMgtMdt", func() { + nnfProfile.Data.LustreStorage.StandaloneMGT = true + nnfProfile.Data.LustreStorage.CombinedMGTMDT = true + Expect(k8sClient.Create(context.TODO(), nnfProfile)).ToNot(Succeed()) + nnfProfile = nil + }) + It("Should not allow a default resource to be pinned", func() { nnfProfile.Data.Default = true nnfProfile.Data.Pinned = true diff --git a/config/crd/bases/nnf.cray.hpe.com_nnfstorageprofiles.yaml b/config/crd/bases/nnf.cray.hpe.com_nnfstorageprofiles.yaml index ca7281a49..23b2bfa86 100644 --- a/config/crd/bases/nnf.cray.hpe.com_nnfstorageprofiles.yaml +++ b/config/crd/bases/nnf.cray.hpe.com_nnfstorageprofiles.yaml @@ -337,6 +337,11 @@ spec: required: - colocateComputes type: object + standaloneMgt: + default: false + description: StandAloneMGT creates only a Lustre MGT without an + MDT or OST + type: boolean type: object pinned: default: false diff --git a/controllers/directivebreakdown_controller.go b/controllers/directivebreakdown_controller.go index 43e58ad00..dc0b3b365 100644 --- a/controllers/directivebreakdown_controller.go +++ b/controllers/directivebreakdown_controller.go @@ -504,6 +504,12 @@ func (r *DirectiveBreakdownReconciler) populateStorageBreakdown(ctx context.Cont lustreComponents = append(lustreComponents, lustreComponentType{dwsv1alpha2.AllocateAcrossServers, mdtCapacity, "mgtmdt", useKey}) } else if len(nnfStorageProfile.Data.LustreStorage.ExternalMGS) > 0 { lustreComponents = append(lustreComponents, lustreComponentType{dwsv1alpha2.AllocateAcrossServers, mdtCapacity, "mdt", mdtKey}) + } else if nnfStorageProfile.Data.LustreStorage.StandaloneMGT { + if argsMap["command"] != "create_persistent" { + return dwsv1alpha2.NewResourceError("").WithUserMessage("standaloneMgt option can only be used with 'create_persistent' directive").WithFatal().WithUser() + } + + lustreComponents = []lustreComponentType{lustreComponentType{dwsv1alpha2.AllocateSingleServer, mgtCapacity, "mgt", mgtKey}} } else { lustreComponents = append(lustreComponents, lustreComponentType{dwsv1alpha2.AllocateAcrossServers, mdtCapacity, "mdt", mdtKey}) lustreComponents = append(lustreComponents, lustreComponentType{dwsv1alpha2.AllocateSingleServer, mgtCapacity, "mgt", mgtKey}) diff --git a/controllers/directivebreakdown_controller_test.go b/controllers/directivebreakdown_controller_test.go index 634e60fc9..5c3845615 100644 --- a/controllers/directivebreakdown_controller_test.go +++ b/controllers/directivebreakdown_controller_test.go @@ -138,4 +138,85 @@ var _ = Describe("DirectiveBreakdown test", func() { return k8sClient.Get(context.TODO(), client.ObjectKeyFromObject(persistentStorage), persistentStorage) }).ShouldNot(Succeed()) }) + + It("Creates a DirectiveBreakdown with a lustre jobdw and standaloneMgt", func() { + By("Setting standaloneMgt in the storage profile") + Eventually(func(g Gomega) error { + g.Expect(k8sClient.Get(context.TODO(), client.ObjectKeyFromObject(storageProfile), storageProfile)).To(Succeed()) + storageProfile.Data.LustreStorage.StandaloneMGT = true + return k8sClient.Update(context.TODO(), storageProfile) + }).Should(Succeed()) + + By("Creating a DirectiveBreakdown") + directiveBreakdown := &dwsv1alpha2.DirectiveBreakdown{ + ObjectMeta: metav1.ObjectMeta{ + Name: "standalone-lustre-jobdw-test", + Namespace: corev1.NamespaceDefault, + }, + Spec: dwsv1alpha2.DirectiveBreakdownSpec{ + Directive: "#DW jobdw name=jobdw-lustre type=lustre capacity=1GiB", + }, + } + + Expect(k8sClient.Create(context.TODO(), directiveBreakdown)).To(Succeed()) + + Eventually(func(g Gomega) error { + g.Expect(k8sClient.Get(context.TODO(), client.ObjectKeyFromObject(directiveBreakdown), directiveBreakdown)).To(Succeed()) + return directiveBreakdown.Status.Error + }).ShouldNot(BeNil()) + }) + + It("Creates a DirectiveBreakdown with an xfs jobdw and standaloneMgt", func() { + By("Setting standaloneMgt in the storage profile") + Eventually(func(g Gomega) error { + g.Expect(k8sClient.Get(context.TODO(), client.ObjectKeyFromObject(storageProfile), storageProfile)).To(Succeed()) + storageProfile.Data.LustreStorage.StandaloneMGT = true + return k8sClient.Update(context.TODO(), storageProfile) + }).Should(Succeed()) + + By("Creating a DirectiveBreakdown") + directiveBreakdown := &dwsv1alpha2.DirectiveBreakdown{ + ObjectMeta: metav1.ObjectMeta{ + Name: "standalone-xfs-jobdw-test", + Namespace: corev1.NamespaceDefault, + }, + Spec: dwsv1alpha2.DirectiveBreakdownSpec{ + Directive: "#DW jobdw name=jobdw-xfs type=xfs capacity=1GiB", + }, + } + + Expect(k8sClient.Create(context.TODO(), directiveBreakdown)).To(Succeed()) + + Eventually(func(g Gomega) bool { + g.Expect(k8sClient.Get(context.TODO(), client.ObjectKeyFromObject(directiveBreakdown), directiveBreakdown)).To(Succeed()) + return directiveBreakdown.Status.Ready + }).Should(BeTrue()) + }) + + It("Creates a DirectiveBreakdown with a create_persistent and standaloneMgt", func() { + By("Setting standaloneMgt in the storage profile") + Eventually(func(g Gomega) error { + g.Expect(k8sClient.Get(context.TODO(), client.ObjectKeyFromObject(storageProfile), storageProfile)).To(Succeed()) + storageProfile.Data.LustreStorage.StandaloneMGT = true + return k8sClient.Update(context.TODO(), storageProfile) + }).Should(Succeed()) + + By("Creating a DirectiveBreakdown") + directiveBreakdown := &dwsv1alpha2.DirectiveBreakdown{ + ObjectMeta: metav1.ObjectMeta{ + Name: "standalone-lustre-persistent-test", + Namespace: corev1.NamespaceDefault, + }, + Spec: dwsv1alpha2.DirectiveBreakdownSpec{ + Directive: "#DW create_persistent name=persistent-lustre type=lustre capacity=1GiB", + }, + } + + Expect(k8sClient.Create(context.TODO(), directiveBreakdown)).To(Succeed()) + + Eventually(func(g Gomega) bool { + g.Expect(k8sClient.Get(context.TODO(), client.ObjectKeyFromObject(directiveBreakdown), directiveBreakdown)).To(Succeed()) + return directiveBreakdown.Status.Ready + }).Should(BeTrue()) + }) }) diff --git a/controllers/nnf_access_controller_test.go b/controllers/nnf_access_controller_test.go index 9101beb54..7fd698bd2 100644 --- a/controllers/nnf_access_controller_test.go +++ b/controllers/nnf_access_controller_test.go @@ -165,8 +165,11 @@ var _ = Describe("Access Controller Test", func() { } By("Set NNF Access Desired State to unmounted") - access.Spec.DesiredState = "unmounted" - Expect(k8sClient.Update(context.TODO(), access)).To(Succeed()) + Eventually(func(g Gomega) error { + g.Expect(k8sClient.Get(context.TODO(), client.ObjectKeyFromObject(access), access)).To(Succeed()) + access.Spec.DesiredState = "unmounted" + return k8sClient.Update(context.TODO(), access) + }).Should(Succeed()) By("Verify NNF Access goes Ready in unmounted state") Eventually(func(g Gomega) bool { From 56de8aaa068bea7741d3f660fe19344ad471bc1b Mon Sep 17 00:00:00 2001 From: Blake Devcich Date: Tue, 8 Aug 2023 14:53:47 -0500 Subject: [PATCH 13/19] Added Port Cooldown to NnfPortManager Once ports have been released, they now go into a Cooldown period. The cooldown period can be set via the SystemConfiguration. This defaults to 60s and corresponds with the kernel's default TIME_WAIT. Once the cooldown period expires, the port is then removed from the allocation list - freeing it up for reuse. The cooldown period is checked in the following situations: 1. When a new user requests a port 2. When a user has requested a port but there are none left (forcing a requeue) 3. When a user has released a port For #2, the reconciler will requeue for the cooldown period in hopes that a port has been freed in that time. If not, the requque will continue until that request has been satisfied. If these situations do not occur, the port will remain in Cooldown even though the time has expired. Since there is no reason to reconcile, the cooldown period will not be checked until a new port is requested/released (or the reconcile fires for another reason). Signed-off-by: Blake Devcich --- api/v1alpha1/nnf_port_manager_types.go | 7 +- api/v1alpha1/zz_generated.deepcopy.go | 4 + .../nnf.cray.hpe.com_nnfportmanagers.yaml | 6 + controllers/nnf_port_manager_controller.go | 99 ++-- .../nnf_port_manager_controller_test.go | 436 +++++++++++++----- go.mod | 2 +- go.sum | 22 + .../api/v1alpha2/systemconfiguration_types.go | 6 + ...dws.cray.hpe.com_systemconfigurations.yaml | 9 + vendor/modules.txt | 2 +- 10 files changed, 437 insertions(+), 156 deletions(-) diff --git a/api/v1alpha1/nnf_port_manager_types.go b/api/v1alpha1/nnf_port_manager_types.go index 2b987799c..fc447316d 100644 --- a/api/v1alpha1/nnf_port_manager_types.go +++ b/api/v1alpha1/nnf_port_manager_types.go @@ -60,12 +60,13 @@ type NnfPortManagerSpec struct { // AllocationStatus is the current status of a port requestor. A port that is in use by the respective owner // will have a status of "InUse". A port that is freed by the owner but not yet reclaimed by the port manager // will have a status of "Free". Any other status value indicates a failure of the port allocation. -// +kubebuilder:validation:Enum:=InUse;Free;InvalidConfiguration;InsufficientResources +// +kubebuilder:validation:Enum:=InUse;Free;Cooldown;InvalidConfiguration;InsufficientResources type NnfPortManagerAllocationStatusStatus string const ( NnfPortManagerAllocationStatusInUse NnfPortManagerAllocationStatusStatus = "InUse" NnfPortManagerAllocationStatusFree NnfPortManagerAllocationStatusStatus = "Free" + NnfPortManagerAllocationStatusCooldown NnfPortManagerAllocationStatusStatus = "Cooldown" NnfPortManagerAllocationStatusInvalidConfiguration NnfPortManagerAllocationStatusStatus = "InvalidConfiguration" NnfPortManagerAllocationStatusInsufficientResources NnfPortManagerAllocationStatusStatus = "InsufficientResources" // NOTE: You must ensure any new value is added to the above kubebuilder validation enum @@ -82,6 +83,10 @@ type NnfPortManagerAllocationStatus struct { // Status is the ownership status of the port. Status NnfPortManagerAllocationStatusStatus `json:"status"` + + // TimeUnallocated is when the port was unallocated. This is to ensure the proper cooldown + // duration. + TimeUnallocated *metav1.Time `json:"timeUnallocated,omitempty"` } // PortManagerStatus is the current status of the port manager. diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index 9321abb9b..6ac55b96a 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -907,6 +907,10 @@ func (in *NnfPortManagerAllocationStatus) DeepCopyInto(out *NnfPortManagerAlloca *out = make([]uint16, len(*in)) copy(*out, *in) } + if in.TimeUnallocated != nil { + in, out := &in.TimeUnallocated, &out.TimeUnallocated + *out = (*in).DeepCopy() + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NnfPortManagerAllocationStatus. diff --git a/config/crd/bases/nnf.cray.hpe.com_nnfportmanagers.yaml b/config/crd/bases/nnf.cray.hpe.com_nnfportmanagers.yaml index aab8d03ec..dee321fae 100644 --- a/config/crd/bases/nnf.cray.hpe.com_nnfportmanagers.yaml +++ b/config/crd/bases/nnf.cray.hpe.com_nnfportmanagers.yaml @@ -198,9 +198,15 @@ spec: enum: - InUse - Free + - Cooldown - InvalidConfiguration - InsufficientResources type: string + timeUnallocated: + description: TimeUnallocated is when the port was unallocated. + This is to ensure the proper cooldown duration. + format: date-time + type: string required: - status type: object diff --git a/controllers/nnf_port_manager_controller.go b/controllers/nnf_port_manager_controller.go index 6944fbe6f..1ca669090 100644 --- a/controllers/nnf_port_manager_controller.go +++ b/controllers/nnf_port_manager_controller.go @@ -1,5 +1,5 @@ /* - * Copyright 2022 Hewlett Packard Enterprise Development LP + * Copyright 2023 Hewlett Packard Enterprise Development LP * Other additional copyright holders may be indicated within. * * The entirety of this work is licensed under the Apache License, @@ -21,6 +21,7 @@ package controllers import ( "context" + "time" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" @@ -57,15 +58,12 @@ type AllocationStatus = nnfv1alpha1.NnfPortManagerAllocationStatus // Reconcile is part of the main kubernetes reconciliation loop which aims to // move the current state of the cluster closer to the desired state. -// TODO(user): Modify the Reconcile function to compare the state specified by -// the NnfPortManager object against the actual cluster state, and then -// perform operations to make the cluster state reflect the state specified by -// the user. // // For more details, check Reconcile and its Result here: // - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.14.1/pkg/reconcile func (r *NnfPortManagerReconciler) Reconcile(ctx context.Context, req ctrl.Request) (res ctrl.Result, err error) { log := log.FromContext(ctx) + unsatisfiedRequests := 0 mgr := &nnfv1alpha1.NnfPortManager{} if err := r.Get(ctx, req.NamespacedName, mgr); err != nil { @@ -96,19 +94,22 @@ func (r *NnfPortManagerReconciler) Reconcile(ctx context.Context, req ctrl.Reque } // Free any unused allocations - r.cleanupUnusedAllocations(log, mgr) + r.cleanupUnusedAllocations(log, mgr, config.Spec.PortsCooldownInSeconds) // For each "requester" in the mgr.Spec.Allocations, try to satisfy the request by // allocating the desired ports. for _, spec := range mgr.Spec.Allocations { + var ports []uint16 + var status nnfv1alpha1.NnfPortManagerAllocationStatusStatus + var allocationStatus *nnfv1alpha1.NnfPortManagerAllocationStatus - // If the specification is already included in the allocations, continue - if r.isAllocated(mgr, spec) { + // If the specification is already included in the allocations and InUse, continue + allocationStatus = r.findAllocationStatus(mgr, spec) + if allocationStatus != nil && allocationStatus.Status == nnfv1alpha1.NnfPortManagerAllocationStatusInUse { continue } - var ports []uint16 - var status nnfv1alpha1.NnfPortManagerAllocationStatusStatus + // Determine if the port manager is ready and find a free port if mgr.Status.Status != nnfv1alpha1.NnfPortManagerStatusReady { ports, status = nil, nnfv1alpha1.NnfPortManagerAllocationStatusInvalidConfiguration } else { @@ -116,19 +117,40 @@ func (r *NnfPortManagerReconciler) Reconcile(ctx context.Context, req ctrl.Reque } log.Info("Allocation", "requester", spec.Requester, "count", spec.Count, "ports", ports, "status", status) - allocationStatus := AllocationStatus{ - Requester: &corev1.ObjectReference{}, - Ports: ports, - Status: status, + + // Port could not be allocated - try again next time + if status != nnfv1alpha1.NnfPortManagerAllocationStatusInUse { + unsatisfiedRequests++ + log.Info("Allocation unsatisfied", "requester", spec.Requester, "count", spec.Count, "ports", ports, "status", status) } - spec.Requester.DeepCopyInto(allocationStatus.Requester) + // Create a new entry if not already present, otherwise update + if allocationStatus == nil { + allocationStatus := AllocationStatus{ + Requester: &corev1.ObjectReference{}, + Ports: ports, + Status: status, + } + + spec.Requester.DeepCopyInto(allocationStatus.Requester) - if mgr.Status.Allocations == nil { - mgr.Status.Allocations = make([]nnfv1alpha1.NnfPortManagerAllocationStatus, 0) + if mgr.Status.Allocations == nil { + mgr.Status.Allocations = make([]nnfv1alpha1.NnfPortManagerAllocationStatus, 0) + } + + mgr.Status.Allocations = append(mgr.Status.Allocations, allocationStatus) + } else { + allocationStatus.Status = status + allocationStatus.Ports = ports } + } - mgr.Status.Allocations = append(mgr.Status.Allocations, allocationStatus) + // If there aren't enough free ports, then requeue so that something eventually frees up + if unsatisfiedRequests > 0 { + log.Info("Unsatisfied requests are pending -- requeuing") + return ctrl.Result{ + RequeueAfter: time.Duration(config.Spec.PortsCooldownInSeconds+1) * time.Second, + }, nil } return res, nil @@ -137,7 +159,7 @@ func (r *NnfPortManagerReconciler) Reconcile(ctx context.Context, req ctrl.Reque // isAllocationNeeded returns true if the provided Port Allocation Status has a matching value // requester in the specification, and false otherwise. func (r *NnfPortManagerReconciler) isAllocationNeeded(mgr *nnfv1alpha1.NnfPortManager, status *AllocationStatus) bool { - if status.Status != nnfv1alpha1.NnfPortManagerAllocationStatusInUse { + if status.Status != nnfv1alpha1.NnfPortManagerAllocationStatusInUse && status.Status != nnfv1alpha1.NnfPortManagerAllocationStatusInsufficientResources { return false } @@ -154,7 +176,7 @@ func (r *NnfPortManagerReconciler) isAllocationNeeded(mgr *nnfv1alpha1.NnfPortMa return false } -func (r *NnfPortManagerReconciler) cleanupUnusedAllocations(log logr.Logger, mgr *nnfv1alpha1.NnfPortManager) { +func (r *NnfPortManagerReconciler) cleanupUnusedAllocations(log logr.Logger, mgr *nnfv1alpha1.NnfPortManager, cooldown int) { // Free unused allocations. This will check if the Status.Allocations exist in // the list of desired allocations in the Spec field and mark any unused allocations @@ -164,21 +186,25 @@ func (r *NnfPortManagerReconciler) cleanupUnusedAllocations(log logr.Logger, mgr status := &mgr.Status.Allocations[idx] if !r.isAllocationNeeded(mgr, status) { - log.Info("Allocation unused", "requester", status.Requester, "status", status.Status) - - // TODO: allow for cooldown - // if status.Status == nnfv1alpha1.NnfPortManagerAllocationStatusInUse { - // status.Requester = nil - // status.Status = nnfv1alpha1.NnfPortManagerAllocationStatusCooldown - // } else if status.Status == nnfv1alpha1.NnfPortManagerAllocationStatusCooldown { - // if now() - status.timeFreed > cooldownPeriod { - // allocsToRemove = append(allocsToRemove, idx) - // } - // } else if status.Status != nnfv1alpha1.NnfPortManagerAllocationStatusFree { - // allocsToRemove = append(allocsToRemove, idx) - // } - - allocsToRemove = append(allocsToRemove, idx) + + // If there's no cooldown or the cooldown period has expired, remove it + // If no longer needed, set the allocation status to cooldown and record the unallocated time + now := metav1.Now() + if cooldown == 0 { + allocsToRemove = append(allocsToRemove, idx) + log.Info("Allocation unused - removing", "requester", status.Requester, "status", status.Status) + } else if status.Status == nnfv1alpha1.NnfPortManagerAllocationStatusCooldown { + period := now.Sub(status.TimeUnallocated.Time) + log.Info("Allocation unused - checking cooldown", "requester", status.Requester, "status", status.Status, "period", period, "time", status.TimeUnallocated.String()) + if period >= time.Duration(cooldown)*time.Second { + allocsToRemove = append(allocsToRemove, idx) + log.Info("Allocation unused - removing after cooldown", "requester", status.Requester, "status", status.Status) + } + } else if status.TimeUnallocated == nil { + status.TimeUnallocated = &now + status.Status = nnfv1alpha1.NnfPortManagerAllocationStatusCooldown + log.Info("Allocation unused -- cooldown set", "requester", status.Requester, "status", status.Status) + } } } @@ -214,7 +240,8 @@ func (r *NnfPortManagerReconciler) findFreePorts(log logr.Logger, mgr *nnfv1alph portsInUse := make([]uint16, 0) for _, status := range mgr.Status.Allocations { - if status.Status == nnfv1alpha1.NnfPortManagerAllocationStatusInUse { + if status.Status == nnfv1alpha1.NnfPortManagerAllocationStatusInUse || + status.Status == nnfv1alpha1.NnfPortManagerAllocationStatusCooldown { portsInUse = append(portsInUse, status.Ports...) } } diff --git a/controllers/nnf_port_manager_controller_test.go b/controllers/nnf_port_manager_controller_test.go index b5ca50272..3c613baac 100644 --- a/controllers/nnf_port_manager_controller_test.go +++ b/controllers/nnf_port_manager_controller_test.go @@ -22,7 +22,9 @@ package controllers import ( "fmt" "reflect" + "time" + "github.com/google/uuid" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" @@ -37,33 +39,40 @@ import ( var _ = Context("NNF Port Manager Controller Setup", Ordered, func() { - var cfg *dwsv1alpha2.SystemConfiguration + var r = &NnfPortManagerReconciler{} // use this to access private reconciler methods + const portStart = 20 const portEnd = 29 - - BeforeAll(func() { - cfg = &dwsv1alpha2.SystemConfiguration{ - ObjectMeta: metav1.ObjectMeta{ - Name: "port-manager-system-config", - Namespace: corev1.NamespaceDefault, - }, - Spec: dwsv1alpha2.SystemConfigurationSpec{ - Ports: []intstr.IntOrString{ - intstr.FromString(fmt.Sprintf("%d-%d", portStart, portEnd)), - }, - }, - } - - Expect(k8sClient.Create(ctx, cfg)).To(Succeed()) - DeferCleanup(func() { Expect(k8sClient.Delete(ctx, cfg)).To(Succeed()) }) - }) + portTotal := portEnd - portStart + 1 Describe("NNF Port Manager Controller Test", func() { - + var cfg *dwsv1alpha2.SystemConfiguration var mgr *nnfv1alpha1.NnfPortManager - var r = &NnfPortManagerReconciler{} // use this to access private reconciler methods + portCooldown := 1 + + JustBeforeEach(func() { + cfg = &dwsv1alpha2.SystemConfiguration{ + ObjectMeta: metav1.ObjectMeta{ + Name: "port-manager-system-config", + Namespace: corev1.NamespaceDefault, + }, + Spec: dwsv1alpha2.SystemConfigurationSpec{ + Ports: []intstr.IntOrString{ + intstr.FromString(fmt.Sprintf("%d-%d", portStart, portEnd)), + }, + PortsCooldownInSeconds: portCooldown, + }, + } + Expect(k8sClient.Create(ctx, cfg)).To(Succeed()) + DeferCleanup(func() { + if cfg != nil { + Expect(k8sClient.Delete(ctx, cfg)).To(Succeed()) + Eventually(func() error { + return k8sClient.Get(ctx, client.ObjectKeyFromObject(cfg), cfg) + }).ShouldNot(Succeed()) + } + }) - BeforeEach(func() { mgr = &nnfv1alpha1.NnfPortManager{ ObjectMeta: metav1.ObjectMeta{ Name: "nnf-port-manager", @@ -78,14 +87,23 @@ var _ = Context("NNF Port Manager Controller Setup", Ordered, func() { Allocations: make([]nnfv1alpha1.NnfPortManagerAllocationSpec, 0), }, } - }) - - JustBeforeEach(func() { Expect(k8sClient.Create(ctx, mgr)).To(Succeed()) - DeferCleanup(func() { Expect(k8sClient.Delete(ctx, mgr)).To(Succeed()) }) + DeferCleanup(func() { + if mgr != nil { + Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(mgr), mgr)) + mgr.SetFinalizers([]string{}) + Expect(k8sClient.Update(ctx, mgr)).To(Succeed()) + Expect(k8sClient.Delete(ctx, mgr)).To(Succeed()) + Eventually(func() error { + return k8sClient.Get(ctx, client.ObjectKeyFromObject(mgr), mgr) + }).ShouldNot(Succeed()) + } + }) }) - reservePorts := func(mgr *nnfv1alpha1.NnfPortManager, name string, count int) []uint16 { + // Submit an allocation and verify it has been accounted for - this doesn't mean the ports + // were successfully allocated, however. + allocatePorts := func(mgr *nnfv1alpha1.NnfPortManager, name string, count int) []uint16 { By(fmt.Sprintf("Reserving %d ports for '%s'", count, name)) allocation := nnfv1alpha1.NnfPortManagerAllocationSpec{ @@ -106,12 +124,29 @@ var _ = Context("NNF Port Manager Controller Setup", Ordered, func() { return r.isAllocated(mgr, allocation) }).Should(BeTrue()) + status := r.findAllocationStatus(mgr, allocation) + return status.Ports + } + + // Submit an allocation and expect it to be successfully allocated (i.e. ports InUse) + reservePorts := func(mgr *nnfv1alpha1.NnfPortManager, name string, count int) []uint16 { + ports := allocatePorts(mgr, name, count) + + allocation := nnfv1alpha1.NnfPortManagerAllocationSpec{ + Requester: corev1.ObjectReference{Name: name}, + Count: count, + } + status := r.findAllocationStatus(mgr, allocation) Expect(status).ToNot(BeNil()) Expect(status.Ports).To(HaveLen(allocation.Count)) Expect(status.Status).To(Equal(nnfv1alpha1.NnfPortManagerAllocationStatusInUse)) - return status.Ports + return ports + } + + reservePortsAllowFail := func(mgr *nnfv1alpha1.NnfPortManager, name string, count int) []uint16 { + return allocatePorts(mgr, name, count) } releasePorts := func(mgr *nnfv1alpha1.NnfPortManager, name string) { @@ -132,112 +167,279 @@ var _ = Context("NNF Port Manager Controller Setup", Ordered, func() { }).Should(Succeed()) } - // Verify the number of allocations in the status allocation list - verifyNumAllocations := func(mgr *nnfv1alpha1.NnfPortManager, count int) { - By(fmt.Sprintf("Verifying there are %d allocations in the status allocation list", count)) + // Simple way to fire the reconciler to test the cooldown handling + // without having to reserve new ports. This is just to limit the scope + // of the test. + kickPortManager := func(mgr *nnfv1alpha1.NnfPortManager) { + By("Kicking port manager to force reconcile") + + Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(mgr), mgr)).To(Succeed()) + finalizers := mgr.GetFinalizers() + finalizers = append(finalizers, "test-"+uuid.NewString()) + mgr.SetFinalizers(finalizers) + Eventually(func() error { + return k8sClient.Update(ctx, mgr) + }).Should(Succeed()) + } + + // Verify the number of allocations in the status allocation list that are InUse + verifyNumAllocations := func(mgr *nnfv1alpha1.NnfPortManager, status nnfv1alpha1.NnfPortManagerAllocationStatusStatus, count int) { + By(fmt.Sprintf("Verifying there are %d allocations with Status %s in the status allocation list", count, status)) Eventually(func() int { + statusCount := 0 Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(mgr), mgr)).To(Succeed()) - return len(mgr.Status.Allocations) + for _, a := range mgr.Status.Allocations { + if a.Status == status { + statusCount++ + } + } + return statusCount }).Should(Equal(count)) } - It("Reserves & removes a single port", func() { - const name = "single" - ports := reservePorts(mgr, name, 1) - Expect(ports[0]).To(BeEquivalentTo(portStart)) - verifyNumAllocations(mgr, 1) - releasePorts(mgr, name) - verifyNumAllocations(mgr, 0) - }) - - It("Reserves & removes a multiple ports, one after another", func() { - first := "first" - ports := reservePorts(mgr, first, 1) - Expect(ports[0]).To(BeEquivalentTo(portStart)) - verifyNumAllocations(mgr, 1) + verifyNumAllocationsInUse := func(mgr *nnfv1alpha1.NnfPortManager, count int) { + verifyNumAllocations(mgr, nnfv1alpha1.NnfPortManagerAllocationStatusInUse, count) + } - second := "second" - ports = reservePorts(mgr, second, 1) - Expect(ports[0]).To(BeEquivalentTo(portStart + 1)) - verifyNumAllocations(mgr, 2) + verifyNumAllocationsCooldown := func(mgr *nnfv1alpha1.NnfPortManager, count int) { + verifyNumAllocations(mgr, nnfv1alpha1.NnfPortManagerAllocationStatusCooldown, count) + } - releasePorts(mgr, first) - verifyNumAllocations(mgr, 1) + verifyNumAllocationsInsuffientResources := func(mgr *nnfv1alpha1.NnfPortManager, count int) { + verifyNumAllocations(mgr, nnfv1alpha1.NnfPortManagerAllocationStatusInsufficientResources, count) + } - releasePorts(mgr, second) - verifyNumAllocations(mgr, 0) - }) + waitForCooldown := func(extra int) { + By(fmt.Sprintf("Waiting for cooldown (%ds)to expire", portCooldown)) + time.Sleep(time.Duration(portCooldown+extra) * time.Second) - It("Reserves & removes a multiple ports, one at a time", func() { - first := "first" - ports := reservePorts(mgr, first, 1) - firstPort := ports[0] - Expect(ports[0]).To(BeEquivalentTo(portStart)) - verifyNumAllocations(mgr, 1) - releasePorts(mgr, first) - verifyNumAllocations(mgr, 0) - - // Port should be reused since it was freed already - // This will fail once cooldowns are introduced - second := "second" - ports = reservePorts(mgr, second, 1) - Expect(ports[0]).To(BeEquivalentTo(firstPort)) - verifyNumAllocations(mgr, 1) - - releasePorts(mgr, second) - verifyNumAllocations(mgr, 0) - }) + } - It("Reserves & removes all ports", func() { - const name = "all" - reservePorts(mgr, name, portEnd-portStart+1) - verifyNumAllocations(mgr, 1) - releasePorts(mgr, name) - verifyNumAllocations(mgr, 0) + When("the system configuration is missing", func() { + It("should have a status that indicates system configuration is not found", func() { + Expect(k8sClient.Delete(ctx, cfg)).To(Succeed()) + Eventually(func() error { + return k8sClient.Get(ctx, client.ObjectKeyFromObject(cfg), cfg) + }).ShouldNot(Succeed()) + cfg = nil + + kickPortManager(mgr) + + Eventually(func() nnfv1alpha1.NnfPortManagerStatusStatus { + k8sClient.Get(ctx, client.ObjectKeyFromObject(mgr), mgr) + return mgr.Status.Status + }).Should(Equal(nnfv1alpha1.NnfPortManagerStatusSystemConfigurationNotFound)) + }) }) - It("Reserves from free list", func() { - const single = "single" - reservePorts(mgr, single, 1) - - const remaining = "remaining" - count := portEnd - portStart - reservePorts(mgr, remaining, count) - - releasePorts(mgr, single) - verifyNumAllocations(mgr, 1) - - reservePorts(mgr, "free", 1) + When("reserving ports with portCooldown", func() { + + BeforeEach(func() { + portCooldown = 2 + }) + + When("a single port is reserved and removed", func() { + It("should cooldown and then free up", func() { + const name = "single" + ports := reservePorts(mgr, name, 1) + Expect(ports[0]).To(BeEquivalentTo(portStart)) + verifyNumAllocationsInUse(mgr, 1) + releasePorts(mgr, name) + verifyNumAllocationsInUse(mgr, 0) + verifyNumAllocationsCooldown(mgr, 1) + + waitForCooldown(0) + kickPortManager(mgr) + verifyNumAllocationsInUse(mgr, 0) + verifyNumAllocationsCooldown(mgr, 0) + }) + }) + + When("reserving and releasing multiple ports, one after another", func() { + It("should use the next port since the first is still in cooldown", func() { + first := "first" + ports := reservePorts(mgr, first, 1) + Expect(ports[0]).To(BeEquivalentTo(portStart)) + verifyNumAllocationsInUse(mgr, 1) + + second := "second" + ports = reservePorts(mgr, second, 1) + Expect(ports[0]).To(BeEquivalentTo(portStart + 1)) + verifyNumAllocationsInUse(mgr, 2) + + releasePorts(mgr, first) + verifyNumAllocationsInUse(mgr, 1) + verifyNumAllocationsCooldown(mgr, 1) + + releasePorts(mgr, second) + verifyNumAllocationsInUse(mgr, 0) + verifyNumAllocationsCooldown(mgr, 2) + }) + }) + + When("reserving and releasing multiple ports, one at a time", func() { + It("should use the next port since the first is still in cooldown", func() { + first := "first" + ports := reservePorts(mgr, first, 1) + firstPort := ports[0] + Expect(ports[0]).To(BeEquivalentTo(portStart)) + verifyNumAllocationsInUse(mgr, 1) + releasePorts(mgr, first) + verifyNumAllocationsInUse(mgr, 0) + verifyNumAllocationsCooldown(mgr, 1) + + second := "second" + ports = reservePorts(mgr, second, 1) + Expect(ports[0]).To(BeEquivalentTo(firstPort + 1)) + verifyNumAllocationsInUse(mgr, 1) + verifyNumAllocationsCooldown(mgr, 1) + + releasePorts(mgr, second) + verifyNumAllocationsInUse(mgr, 0) + verifyNumAllocationsCooldown(mgr, 2) + }) + }) + + When("reserving all the ports in 1 allocation", func() { + It("should reserve and cooldown successfully", func() { + const name = "all" + reservePorts(mgr, name, portEnd-portStart+1) + verifyNumAllocationsInUse(mgr, 1) + verifyNumAllocationsCooldown(mgr, 0) + releasePorts(mgr, name) + verifyNumAllocationsInUse(mgr, 0) + verifyNumAllocationsCooldown(mgr, 1) + }) + }) + + XIt("Reserves from free list", func() { + const single = "single" + reservePorts(mgr, single, 1) + + const remaining = "remaining" + count := portEnd - portStart + reservePorts(mgr, remaining, count) + + releasePorts(mgr, single) + verifyNumAllocationsInUse(mgr, 1) + + reservePorts(mgr, "free", 1) + + verifyNumAllocationsInUse(mgr, 2) + }) + + When("all ports are already reserved", func() { + It("fails with insufficient resources", func() { + const name = "all" + reservePorts(mgr, name, portEnd-portStart+1) + + allocation := nnfv1alpha1.NnfPortManagerAllocationSpec{ + Requester: corev1.ObjectReference{Name: "insufficient-resources"}, + Count: 1, + } - verifyNumAllocations(mgr, 2) + Eventually(func() error { + Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(mgr), mgr)).To(Succeed()) + mgr.Spec.Allocations = append(mgr.Spec.Allocations, allocation) + return k8sClient.Update(ctx, mgr) + }).Should(Succeed()) + + Eventually(func() bool { + Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(mgr), mgr)).To(Succeed()) + return r.isAllocated(mgr, allocation) + }).Should(BeTrue()) + + status := r.findAllocationStatus(mgr, allocation) + Expect(status).ToNot(BeNil()) + Expect(status.Ports).To(BeEmpty()) + Expect(status.Status).To(Equal(nnfv1alpha1.NnfPortManagerAllocationStatusInsufficientResources)) + }) + }) + + When("a single port is reserved and released", func() { + It("expires and is removed from allocations after the cooldown period", func() { + const name = "single" + ports := reservePorts(mgr, name, 1) + Expect(ports[0]).To(BeEquivalentTo(portStart)) + verifyNumAllocationsInUse(mgr, 1) + verifyNumAllocationsCooldown(mgr, 0) + + releasePorts(mgr, name) + verifyNumAllocationsInUse(mgr, 0) + verifyNumAllocationsCooldown(mgr, 1) + + waitForCooldown(0) + kickPortManager(mgr) + verifyNumAllocationsInUse(mgr, 0) + verifyNumAllocationsCooldown(mgr, 0) + }) + }) + + When("all ports are already reserved and another allocation is requested", func() { + It("should eventually free up the cooldown ports and successfully reserve", func() { + By("Reserving all available ports") + for i := 0; i < portTotal; i++ { + ports := reservePorts(mgr, fmt.Sprintf("test-%d", i), 1) + verifyNumAllocationsInUse(mgr, i+1) + Expect(ports[0]).To(BeEquivalentTo(portStart + i)) + } + verifyNumAllocationsInUse(mgr, portTotal) + + By("Attempting to reserve an additional port and failing") + ports := reservePortsAllowFail(mgr, "waiting", 1) + allocation := nnfv1alpha1.NnfPortManagerAllocationSpec{Requester: corev1.ObjectReference{Name: "waiting"}, Count: 1} + status := r.findAllocationStatus(mgr, allocation) + + Expect(ports).To(HaveLen(0)) + Expect(status).ToNot(BeNil()) + Expect(status.Status).To(Equal(nnfv1alpha1.NnfPortManagerAllocationStatusInsufficientResources)) + verifyNumAllocationsInUse(mgr, portTotal) + verifyNumAllocationsInsuffientResources(mgr, 1) + + By("Releasing one of the original ports to make room for previous request") + releasePorts(mgr, "test-0") + verifyNumAllocationsInUse(mgr, portTotal-1) + verifyNumAllocationsCooldown(mgr, 1) + verifyNumAllocationsInsuffientResources(mgr, 1) + + By("Verifying that the cooldown expired and the new reservation is now InUse") + waitForCooldown(0) + verifyNumAllocationsCooldown(mgr, 0) + verifyNumAllocationsInsuffientResources(mgr, 0) + verifyNumAllocationsInUse(mgr, portTotal) + }) + }) }) - It("Fails with insufficient resources", func() { - const name = "all" - reservePorts(mgr, name, portEnd-portStart+1) - - allocation := nnfv1alpha1.NnfPortManagerAllocationSpec{ - Requester: corev1.ObjectReference{Name: "insufficient-resources"}, - Count: 1, - } - - Eventually(func() error { - Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(mgr), mgr)).To(Succeed()) - mgr.Spec.Allocations = append(mgr.Spec.Allocations, allocation) - return k8sClient.Update(ctx, mgr) - }).Should(Succeed()) - - Eventually(func() bool { - Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(mgr), mgr)).To(Succeed()) - return r.isAllocated(mgr, allocation) - }).Should(BeTrue()) - - status := r.findAllocationStatus(mgr, allocation) - Expect(status).ToNot(BeNil()) - Expect(status.Ports).To(BeEmpty()) - Expect(status.Status).To(Equal(nnfv1alpha1.NnfPortManagerAllocationStatusInsufficientResources)) + When("reserving ports with portCooldown", func() { + + BeforeEach(func() { + portCooldown = 0 + }) + + When("reserving and releasing multiple ports, one at a time", func() { + It("should use the same port since the first has no cooldown", func() { + first := "first" + ports := reservePorts(mgr, first, 1) + firstPort := ports[0] + Expect(ports[0]).To(BeEquivalentTo(portStart)) + verifyNumAllocationsInUse(mgr, 1) + releasePorts(mgr, first) + verifyNumAllocationsInUse(mgr, 0) + verifyNumAllocationsCooldown(mgr, 0) + + second := "second" + ports = reservePorts(mgr, second, 1) + Expect(ports[0]).To(BeEquivalentTo(firstPort)) + verifyNumAllocationsInUse(mgr, 1) + verifyNumAllocationsCooldown(mgr, 0) + + releasePorts(mgr, second) + verifyNumAllocationsInUse(mgr, 0) + verifyNumAllocationsCooldown(mgr, 0) + }) + }) }) }) - }) diff --git a/go.mod b/go.mod index 0a4891513..d2ec9b523 100644 --- a/go.mod +++ b/go.mod @@ -3,7 +3,7 @@ module github.com/NearNodeFlash/nnf-sos go 1.19 require ( - github.com/HewlettPackard/dws v0.0.1-0.20230802152955-11a333f31153 + github.com/HewlettPackard/dws v0.0.1-0.20230808200300-d01ceeeda5b9 github.com/NearNodeFlash/lustre-fs-operator v0.0.1-0.20230613180840-6178f2b04900 github.com/NearNodeFlash/nnf-ec v0.0.0-20230526161255-cfb2d89b35d7 github.com/ghodss/yaml v1.0.0 diff --git a/go.sum b/go.sum index 27346b40e..47b027c6a 100644 --- a/go.sum +++ b/go.sum @@ -1,8 +1,18 @@ cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= +github.com/HewlettPackard/dws v0.0.1-0.20230801191319-c7c89c130874 h1:Dqri3g3RJLTmj3BJdGnTjoC90YCvrhSA8v7DAT7sswE= +github.com/HewlettPackard/dws v0.0.1-0.20230801191319-c7c89c130874/go.mod h1:YvNzcgAPmwhl/YQj6dMwsB9OpwbI5bp/41kINfFiXX8= github.com/HewlettPackard/dws v0.0.1-0.20230802152955-11a333f31153 h1:9vMjataXTnCwXEGwxu0dQrOLUW5ujoJiTWAUTb8k50w= github.com/HewlettPackard/dws v0.0.1-0.20230802152955-11a333f31153/go.mod h1:YvNzcgAPmwhl/YQj6dMwsB9OpwbI5bp/41kINfFiXX8= +github.com/HewlettPackard/dws v0.0.1-0.20230803194754-9de80c958e62 h1:bmw4JcOf+Sf+LCfem9KANTS/tz2PNyLfzFHBVBlL0jk= +github.com/HewlettPackard/dws v0.0.1-0.20230803194754-9de80c958e62/go.mod h1:YvNzcgAPmwhl/YQj6dMwsB9OpwbI5bp/41kINfFiXX8= +github.com/HewlettPackard/dws v0.0.1-0.20230807145500-18069013c7a6 h1:2aOdP7BUDe4oOjKwrIG0uZ6xrf9rIfOTdzhp0L1kf24= +github.com/HewlettPackard/dws v0.0.1-0.20230807145500-18069013c7a6/go.mod h1:YvNzcgAPmwhl/YQj6dMwsB9OpwbI5bp/41kINfFiXX8= +github.com/HewlettPackard/dws v0.0.1-0.20230808173419-44432232680c h1:Z4Lq2iFRTuWMYW17uWzgS6m/mOT2KgeC0Sgl/D8VO8g= +github.com/HewlettPackard/dws v0.0.1-0.20230808173419-44432232680c/go.mod h1:YvNzcgAPmwhl/YQj6dMwsB9OpwbI5bp/41kINfFiXX8= +github.com/HewlettPackard/dws v0.0.1-0.20230808200300-d01ceeeda5b9 h1:TWkQQjiYIbQFPePEx87L62P/1QujiVeRTpx/O9ClFAI= +github.com/HewlettPackard/dws v0.0.1-0.20230808200300-d01ceeeda5b9/go.mod h1:YvNzcgAPmwhl/YQj6dMwsB9OpwbI5bp/41kINfFiXX8= github.com/HewlettPackard/structex v1.0.4 h1:RVTdN5FWhDWr1IkjllU8wxuLjISo4gr6u5ryZpzyHcA= github.com/HewlettPackard/structex v1.0.4/go.mod h1:3frC4RY/cPsP/4+N8rkxsNAGlQwHV+zDC7qvrN+N+rE= github.com/NearNodeFlash/lustre-fs-operator v0.0.1-0.20230613180840-6178f2b04900 h1:jOrP2H+D5amgHIONcucYS3/kJm6QfmqAG23Ke7elunI= @@ -11,6 +21,10 @@ github.com/NearNodeFlash/nnf-ec v0.0.0-20230526161255-cfb2d89b35d7 h1:y4E3b/Ta6s github.com/NearNodeFlash/nnf-ec v0.0.0-20230526161255-cfb2d89b35d7/go.mod h1:11Ol46sAWdqlj3WmIFTzKO+UxQX3lvWBqpe6yaiMEIg= github.com/OneOfOne/xxhash v1.2.2 h1:KMrpdQIwFcEqXDklaen+P1axHaj9BSKzvpUUfnHldSE= github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU= +github.com/PuerkitoBio/purell v1.1.1/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0= +github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE= +github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= +github.com/alecthomas/units v0.0.0-20190924025748-f65c72e2690d/go.mod h1:rBZYJk541a8SKzHPHnH3zbiI+7dagKZ0cgpgrD7Fyho= github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY= github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5doyWs3UAsr3K4I6qtAmlQcZDesFNEHPZAzj8= github.com/benbjohnson/clock v1.1.0 h1:Q92kusRqC1XV2MjkWETPvjJVqKetz1OzxZB7mHJLju8= @@ -65,6 +79,7 @@ github.com/fsnotify/fsnotify v1.6.0 h1:n+5WquG0fcWoWp6xPWfHdbskMCQaFnG6PfBrh1Ky4 github.com/fsnotify/fsnotify v1.6.0/go.mod h1:sl3t1tCWJFWoRz9R8WJCbQihKKwmorjAbSClcnxKAGw= github.com/ghodss/yaml v1.0.0 h1:wQHKEahhL6wmXdzwWG11gIVCkOv05bNOh+Rxn0yngAk= github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= +github.com/go-logfmt/logfmt v0.5.1/go.mod h1:WYhtIu8zTZfxdn5+rREduYbwxfcBr/Vr6KEVveWlfTs= github.com/go-logr/logr v1.2.0/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= github.com/go-logr/logr v1.2.4 h1:g01GSCwiDw2xSZfjJ2/T9M+S6pFdcNtFYsp+Y43HYDQ= @@ -142,8 +157,10 @@ github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANyt github.com/jessevdk/go-flags v1.4.0/go.mod h1:4FA24M0QyGHXBuZZK/XkWh8h0e1EYbRYJSGM75WSRxI= github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= +github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4= github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= +github.com/julienschmidt/httprouter v1.3.0/go.mod h1:JR6WtHb+2LUe8TCKY3cZOxFyyO8IZAc4RVcycCCAKdM= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= github.com/klauspost/compress v1.12.3/go.mod h1:8dP1Hq4DHOhN9w426knH3Rhby4rFm6D8eO+e+Dq5Gzg= @@ -179,6 +196,9 @@ github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9G github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= +github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= +github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno= +github.com/nxadm/tail v1.4.8/go.mod h1:+ncqLTQzXmGhMZNUePPaPqPvBxHAIsmXswZKocGu+AU= github.com/onsi/ginkgo/v2 v2.9.1 h1:zie5Ly042PD3bsCvsSOPvRnFwyo3rKe64TJlD6nu0mk= github.com/onsi/ginkgo/v2 v2.9.1/go.mod h1:FEcmzVcCHl+4o9bQZVab+4dC9+j+91t2FHSzmGAPfuo= github.com/onsi/gomega v1.27.3 h1:5VwIwnBY3vbBDOJrNtA4rVdiTZCsq9B5F12pvy1Drmk= @@ -379,6 +399,7 @@ google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQ google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= google.golang.org/protobuf v1.30.0 h1:kPPoIgf3TsEvrm0PFe15JQ+570QVxYzEvvHqChK+cng= google.golang.org/protobuf v1.30.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= +gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= @@ -386,6 +407,7 @@ gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntN gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc= gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= +gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw= gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.3/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= diff --git a/vendor/github.com/HewlettPackard/dws/api/v1alpha2/systemconfiguration_types.go b/vendor/github.com/HewlettPackard/dws/api/v1alpha2/systemconfiguration_types.go index 8217718d2..c89ad18d7 100644 --- a/vendor/github.com/HewlettPackard/dws/api/v1alpha2/systemconfiguration_types.go +++ b/vendor/github.com/HewlettPackard/dws/api/v1alpha2/systemconfiguration_types.go @@ -68,6 +68,12 @@ type SystemConfigurationSpec struct { // START is an integer value that represents the start of a port range and END is an // integer value that represents the end of the port range (inclusive). Ports []intstr.IntOrString `json:"ports,omitempty"` + + // PortsCooldownInSeconds is the number of seconds to wait before a port can be reused. Defaults + // to 60 seconds (to match the typical value for the kernel's TIME_WAIT). A value of 0 means the + // ports can be reused immediately. + // +kubebuilder:default:=60 + PortsCooldownInSeconds int `json:"portsCooldownInSeconds"` } // SystemConfigurationStatus defines the status of SystemConfiguration diff --git a/vendor/github.com/HewlettPackard/dws/config/crd/bases/dws.cray.hpe.com_systemconfigurations.yaml b/vendor/github.com/HewlettPackard/dws/config/crd/bases/dws.cray.hpe.com_systemconfigurations.yaml index 85f65967a..acfad5282 100644 --- a/vendor/github.com/HewlettPackard/dws/config/crd/bases/dws.cray.hpe.com_systemconfigurations.yaml +++ b/vendor/github.com/HewlettPackard/dws/config/crd/bases/dws.cray.hpe.com_systemconfigurations.yaml @@ -177,6 +177,13 @@ spec: - type: string x-kubernetes-int-or-string: true type: array + portsCooldownInSeconds: + default: 60 + description: PortsCooldownInSeconds is the number of seconds to wait + before a port can be reused. Defaults to 60 seconds (to match the + typical value for the kernel's TIME_WAIT). A value of 0 means the + ports can be reused immediately. + type: integer storageNodes: description: StorageNodes is the list of storage nodes on the system items: @@ -212,6 +219,8 @@ spec: - type type: object type: array + required: + - portsCooldownInSeconds type: object status: description: SystemConfigurationStatus defines the status of SystemConfiguration diff --git a/vendor/modules.txt b/vendor/modules.txt index 571b60d22..2fddad2cb 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -1,4 +1,4 @@ -# github.com/HewlettPackard/dws v0.0.1-0.20230802152955-11a333f31153 +# github.com/HewlettPackard/dws v0.0.1-0.20230808200300-d01ceeeda5b9 ## explicit; go 1.19 github.com/HewlettPackard/dws/api/v1alpha2 github.com/HewlettPackard/dws/config/crd/bases From 31c55db88e75321924b72c65400c7b86c2a54743 Mon Sep 17 00:00:00 2001 From: Dean Roehrich Date: Tue, 15 Aug 2023 13:25:58 -0500 Subject: [PATCH 14/19] Translate a ResourceError severity to a Workflow status (#218) Use the routines provided by DWS. Signed-off-by: Dean Roehrich --- .../nnf_workflow_controller_helpers.go | 18 +++++----- go.mod | 2 +- go.sum | 26 ++------------ .../dws/api/v1alpha2/workflow_types.go | 36 ++++++++++++++++++- vendor/modules.txt | 2 +- 5 files changed, 47 insertions(+), 37 deletions(-) diff --git a/controllers/nnf_workflow_controller_helpers.go b/controllers/nnf_workflow_controller_helpers.go index 150a0fa6b..a0c8b92f0 100644 --- a/controllers/nnf_workflow_controller_helpers.go +++ b/controllers/nnf_workflow_controller_helpers.go @@ -763,18 +763,16 @@ func (r *NnfWorkflowReconciler) findPersistentInstance(ctx context.Context, wf * func handleWorkflowError(err error, driverStatus *dwsv1alpha2.WorkflowDriverStatus) { e, ok := err.(*dwsv1alpha2.ResourceErrorInfo) if ok { - switch e.Severity { - case dwsv1alpha2.SeverityMinor: - driverStatus.Status = dwsv1alpha2.StatusRunning - case dwsv1alpha2.SeverityMajor: - driverStatus.Status = dwsv1alpha2.StatusTransientCondition - case dwsv1alpha2.SeverityFatal: + status, err := e.Severity.ToStatus() + if err != nil { driverStatus.Status = dwsv1alpha2.StatusError + driverStatus.Message = "Internal error: " + err.Error() + driverStatus.Error = err.Error() + } else { + driverStatus.Status = status + driverStatus.Message = e.UserMessage + driverStatus.Error = e.Error() } - - driverStatus.Message = e.UserMessage - driverStatus.Error = e.Error() - } else { driverStatus.Status = dwsv1alpha2.StatusError driverStatus.Message = "Internal error: " + err.Error() diff --git a/go.mod b/go.mod index d2ec9b523..ea158d25d 100644 --- a/go.mod +++ b/go.mod @@ -3,7 +3,7 @@ module github.com/NearNodeFlash/nnf-sos go 1.19 require ( - github.com/HewlettPackard/dws v0.0.1-0.20230808200300-d01ceeeda5b9 + github.com/HewlettPackard/dws v0.0.1-0.20230815174614-998c6ad6bd1d github.com/NearNodeFlash/lustre-fs-operator v0.0.1-0.20230613180840-6178f2b04900 github.com/NearNodeFlash/nnf-ec v0.0.0-20230526161255-cfb2d89b35d7 github.com/ghodss/yaml v1.0.0 diff --git a/go.sum b/go.sum index 47b027c6a..0d7b65fa6 100644 --- a/go.sum +++ b/go.sum @@ -1,18 +1,8 @@ cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= -github.com/HewlettPackard/dws v0.0.1-0.20230801191319-c7c89c130874 h1:Dqri3g3RJLTmj3BJdGnTjoC90YCvrhSA8v7DAT7sswE= -github.com/HewlettPackard/dws v0.0.1-0.20230801191319-c7c89c130874/go.mod h1:YvNzcgAPmwhl/YQj6dMwsB9OpwbI5bp/41kINfFiXX8= -github.com/HewlettPackard/dws v0.0.1-0.20230802152955-11a333f31153 h1:9vMjataXTnCwXEGwxu0dQrOLUW5ujoJiTWAUTb8k50w= -github.com/HewlettPackard/dws v0.0.1-0.20230802152955-11a333f31153/go.mod h1:YvNzcgAPmwhl/YQj6dMwsB9OpwbI5bp/41kINfFiXX8= -github.com/HewlettPackard/dws v0.0.1-0.20230803194754-9de80c958e62 h1:bmw4JcOf+Sf+LCfem9KANTS/tz2PNyLfzFHBVBlL0jk= -github.com/HewlettPackard/dws v0.0.1-0.20230803194754-9de80c958e62/go.mod h1:YvNzcgAPmwhl/YQj6dMwsB9OpwbI5bp/41kINfFiXX8= -github.com/HewlettPackard/dws v0.0.1-0.20230807145500-18069013c7a6 h1:2aOdP7BUDe4oOjKwrIG0uZ6xrf9rIfOTdzhp0L1kf24= -github.com/HewlettPackard/dws v0.0.1-0.20230807145500-18069013c7a6/go.mod h1:YvNzcgAPmwhl/YQj6dMwsB9OpwbI5bp/41kINfFiXX8= -github.com/HewlettPackard/dws v0.0.1-0.20230808173419-44432232680c h1:Z4Lq2iFRTuWMYW17uWzgS6m/mOT2KgeC0Sgl/D8VO8g= -github.com/HewlettPackard/dws v0.0.1-0.20230808173419-44432232680c/go.mod h1:YvNzcgAPmwhl/YQj6dMwsB9OpwbI5bp/41kINfFiXX8= -github.com/HewlettPackard/dws v0.0.1-0.20230808200300-d01ceeeda5b9 h1:TWkQQjiYIbQFPePEx87L62P/1QujiVeRTpx/O9ClFAI= -github.com/HewlettPackard/dws v0.0.1-0.20230808200300-d01ceeeda5b9/go.mod h1:YvNzcgAPmwhl/YQj6dMwsB9OpwbI5bp/41kINfFiXX8= +github.com/HewlettPackard/dws v0.0.1-0.20230815174614-998c6ad6bd1d h1:QZKgq7r+4ZUOGV5IPT/HUYWxVMT7vLrYmOV5yvwB6IA= +github.com/HewlettPackard/dws v0.0.1-0.20230815174614-998c6ad6bd1d/go.mod h1:YvNzcgAPmwhl/YQj6dMwsB9OpwbI5bp/41kINfFiXX8= github.com/HewlettPackard/structex v1.0.4 h1:RVTdN5FWhDWr1IkjllU8wxuLjISo4gr6u5ryZpzyHcA= github.com/HewlettPackard/structex v1.0.4/go.mod h1:3frC4RY/cPsP/4+N8rkxsNAGlQwHV+zDC7qvrN+N+rE= github.com/NearNodeFlash/lustre-fs-operator v0.0.1-0.20230613180840-6178f2b04900 h1:jOrP2H+D5amgHIONcucYS3/kJm6QfmqAG23Ke7elunI= @@ -21,10 +11,6 @@ github.com/NearNodeFlash/nnf-ec v0.0.0-20230526161255-cfb2d89b35d7 h1:y4E3b/Ta6s github.com/NearNodeFlash/nnf-ec v0.0.0-20230526161255-cfb2d89b35d7/go.mod h1:11Ol46sAWdqlj3WmIFTzKO+UxQX3lvWBqpe6yaiMEIg= github.com/OneOfOne/xxhash v1.2.2 h1:KMrpdQIwFcEqXDklaen+P1axHaj9BSKzvpUUfnHldSE= github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU= -github.com/PuerkitoBio/purell v1.1.1/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0= -github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE= -github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= -github.com/alecthomas/units v0.0.0-20190924025748-f65c72e2690d/go.mod h1:rBZYJk541a8SKzHPHnH3zbiI+7dagKZ0cgpgrD7Fyho= github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY= github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5doyWs3UAsr3K4I6qtAmlQcZDesFNEHPZAzj8= github.com/benbjohnson/clock v1.1.0 h1:Q92kusRqC1XV2MjkWETPvjJVqKetz1OzxZB7mHJLju8= @@ -79,7 +65,6 @@ github.com/fsnotify/fsnotify v1.6.0 h1:n+5WquG0fcWoWp6xPWfHdbskMCQaFnG6PfBrh1Ky4 github.com/fsnotify/fsnotify v1.6.0/go.mod h1:sl3t1tCWJFWoRz9R8WJCbQihKKwmorjAbSClcnxKAGw= github.com/ghodss/yaml v1.0.0 h1:wQHKEahhL6wmXdzwWG11gIVCkOv05bNOh+Rxn0yngAk= github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= -github.com/go-logfmt/logfmt v0.5.1/go.mod h1:WYhtIu8zTZfxdn5+rREduYbwxfcBr/Vr6KEVveWlfTs= github.com/go-logr/logr v1.2.0/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= github.com/go-logr/logr v1.2.4 h1:g01GSCwiDw2xSZfjJ2/T9M+S6pFdcNtFYsp+Y43HYDQ= @@ -157,10 +142,8 @@ github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANyt github.com/jessevdk/go-flags v1.4.0/go.mod h1:4FA24M0QyGHXBuZZK/XkWh8h0e1EYbRYJSGM75WSRxI= github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= -github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4= github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= -github.com/julienschmidt/httprouter v1.3.0/go.mod h1:JR6WtHb+2LUe8TCKY3cZOxFyyO8IZAc4RVcycCCAKdM= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= github.com/klauspost/compress v1.12.3/go.mod h1:8dP1Hq4DHOhN9w426knH3Rhby4rFm6D8eO+e+Dq5Gzg= @@ -196,9 +179,6 @@ github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9G github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= -github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= -github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno= -github.com/nxadm/tail v1.4.8/go.mod h1:+ncqLTQzXmGhMZNUePPaPqPvBxHAIsmXswZKocGu+AU= github.com/onsi/ginkgo/v2 v2.9.1 h1:zie5Ly042PD3bsCvsSOPvRnFwyo3rKe64TJlD6nu0mk= github.com/onsi/ginkgo/v2 v2.9.1/go.mod h1:FEcmzVcCHl+4o9bQZVab+4dC9+j+91t2FHSzmGAPfuo= github.com/onsi/gomega v1.27.3 h1:5VwIwnBY3vbBDOJrNtA4rVdiTZCsq9B5F12pvy1Drmk= @@ -399,7 +379,6 @@ google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQ google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= google.golang.org/protobuf v1.30.0 h1:kPPoIgf3TsEvrm0PFe15JQ+570QVxYzEvvHqChK+cng= google.golang.org/protobuf v1.30.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= -gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= @@ -407,7 +386,6 @@ gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntN gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc= gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= -gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw= gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.3/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= diff --git a/vendor/github.com/HewlettPackard/dws/api/v1alpha2/workflow_types.go b/vendor/github.com/HewlettPackard/dws/api/v1alpha2/workflow_types.go index 3d189f18e..388dfa523 100644 --- a/vendor/github.com/HewlettPackard/dws/api/v1alpha2/workflow_types.go +++ b/vendor/github.com/HewlettPackard/dws/api/v1alpha2/workflow_types.go @@ -1,5 +1,5 @@ /* - * Copyright 2021, 2022 Hewlett Packard Enterprise Development LP + * Copyright 2021-2023 Hewlett Packard Enterprise Development LP * Other additional copyright holders may be indicated within. * * The entirety of this work is licensed under the Apache License, @@ -20,6 +20,9 @@ package v1alpha2 import ( + "fmt" + "strings" + "github.com/HewlettPackard/dws/utils/updater" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -101,6 +104,37 @@ const ( StatusDriverWait = "DriverWait" ) +// ToStatus will return a Status* string that goes with +// the given severity. +func (severity ResourceErrorSeverity) ToStatus() (string, error) { + switch severity { + case SeverityMinor: + return StatusRunning, nil + case SeverityMajor: + return StatusTransientCondition, nil + case SeverityFatal: + return StatusError, nil + default: + return "", fmt.Errorf("unknown severity: %s", string(severity)) + } +} + +// SeverityStringToStatus will return a Status* string that goes with +// the given severity. +// An empty severity string will be considered a minor severity. +func SeverityStringToStatus(severity string) (string, error) { + switch strings.ToLower(severity) { + case "", "minor": + return SeverityMinor.ToStatus() + case "major": + return SeverityMajor.ToStatus() + case "fatal": + return SeverityFatal.ToStatus() + default: + return "", fmt.Errorf("unknown severity: %s", severity) + } +} + // WorkflowSpec defines the desired state of Workflow type WorkflowSpec struct { // Desired state for the workflow to be in. Unless progressing to the teardown state, diff --git a/vendor/modules.txt b/vendor/modules.txt index 2fddad2cb..b5f76cbaa 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -1,4 +1,4 @@ -# github.com/HewlettPackard/dws v0.0.1-0.20230808200300-d01ceeeda5b9 +# github.com/HewlettPackard/dws v0.0.1-0.20230815174614-998c6ad6bd1d ## explicit; go 1.19 github.com/HewlettPackard/dws/api/v1alpha2 github.com/HewlettPackard/dws/config/crd/bases From f3eb87a996e31b227b49009613da3ce7a1460de5 Mon Sep 17 00:00:00 2001 From: matthew-richerson <82597529+matthew-richerson@users.noreply.github.com> Date: Wed, 30 Aug 2023 11:11:36 -0500 Subject: [PATCH 15/19] Use external MGS from a pool of PersistentStorageInstance MGSs (#219) * Use external MGS from a pool of PersistentStorageInstance MGSs - Add an NnfStorageProfile option "standaloneMgtPoolName" to create a Lustre file system that only has an MGT. This option only works with the "create_persistent" directive. - Apply a label to the PersistentStorageInstance with the value of the "standaloneMgtPoolName" option. This adds the PersistentStorageInstance to a pool of ExternalMGSs. - Change the externalMgs option in the NnfStorageProfile to also accept "pool:[poolname]" where "poolname" is the name of an MGS pool. - Modify the Setup phase to pick an MGS from the pool and add a reference to the PersistentStorageInstance. Signed-off-by: Matt Richerson * review comments Signed-off-by: Matt Richerson --------- Signed-off-by: Matt Richerson --- api/v1alpha1/nnf_storage_types.go | 5 ++ api/v1alpha1/nnfstorageprofile_types.go | 13 ++- api/v1alpha1/nnfstorageprofile_webhook.go | 8 +- .../nnfstorageprofile_webhook_test.go | 12 +-- api/v1alpha1/workflow_helpers.go | 4 + api/v1alpha1/zz_generated.deepcopy.go | 1 + .../nnf.cray.hpe.com_nnfstorageprofiles.yaml | 20 +++-- .../bases/nnf.cray.hpe.com_nnfstorages.yaml | 39 +++++++++ controllers/directivebreakdown_controller.go | 4 +- .../directivebreakdown_controller_test.go | 18 ++-- ...nf_persistentstorageinstance_controller.go | 16 ++++ controllers/nnf_storage_controller.go | 82 ++++++++++++++++++- .../nnf_workflow_controller_helpers.go | 75 ++++++++++++++++- 13 files changed, 262 insertions(+), 35 deletions(-) diff --git a/api/v1alpha1/nnf_storage_types.go b/api/v1alpha1/nnf_storage_types.go index e3f579177..de508c8e7 100644 --- a/api/v1alpha1/nnf_storage_types.go +++ b/api/v1alpha1/nnf_storage_types.go @@ -22,6 +22,7 @@ package v1alpha1 import ( dwsv1alpha2 "github.com/HewlettPackard/dws/api/v1alpha2" "github.com/HewlettPackard/dws/utils/updater" + corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "sigs.k8s.io/controller-runtime/pkg/client" ) @@ -56,6 +57,10 @@ type NnfStorageLustreSpec struct { // ExternalMgsNid is the NID of the MGS when a pre-existing MGS is // provided by the DataWarp directive (#DW). ExternalMgsNid string `json:"externalMgsNid,omitempty"` + + // PersistentMgsReference is a reference to a persistent storage that is providing + // the external MGS. + PersistentMgsReference corev1.ObjectReference `json:"persistentMgsReference,omitempty"` } // NnfStorageAllocationSetSpec defines the details for an allocation set diff --git a/api/v1alpha1/nnfstorageprofile_types.go b/api/v1alpha1/nnfstorageprofile_types.go index 7e13967a7..d247e5b6f 100644 --- a/api/v1alpha1/nnfstorageprofile_types.go +++ b/api/v1alpha1/nnfstorageprofile_types.go @@ -65,7 +65,11 @@ type NnfStorageProfileLustreData struct { // +kubebuilder:default:=false CombinedMGTMDT bool `json:"combinedMgtMdt,omitempty"` - // ExternalMGS contains the NIDs of a pre-existing MGS that should be used + // ExternalMGS specifies the use of an existing MGS rather than creating one. This can + // be either the NID(s) of a pre-existing MGS that should be used, or it can be an NNF Persistent + // Instance that was created with the "StandaloneMGTPoolName" option. In the latter case, the format + // is "pool:poolName" where "poolName" is the argument from "StandaloneMGTPoolName". A single MGS will + // be picked from the pool. ExternalMGS string `json:"externalMgs,omitempty"` // CapacityMGT specifies the size of the MGT device. @@ -83,9 +87,10 @@ type NnfStorageProfileLustreData struct { // +kubebuilder:default:=false ExclusiveMDT bool `json:"exclusiveMdt,omitempty"` - // StandAloneMGT creates only a Lustre MGT without an MDT or OST - // +kubebuilder:default:=false - StandaloneMGT bool `json:"standaloneMgt,omitempty"` + // StandaloneMGTPoolName creates a Lustre MGT without a MDT or OST. This option can only be used when creating + // a persistent Lustre instance. The MGS is placed into a named pool that can be used by the "ExternalMGS" option. + // Multiple pools can be created. + StandaloneMGTPoolName string `json:"standaloneMgtPoolName,omitempty"` // MgtCmdLines contains commands to create an MGT target. MgtCmdLines NnfStorageProfileLustreCmdLines `json:"mgtCommandlines,omitempty"` diff --git a/api/v1alpha1/nnfstorageprofile_webhook.go b/api/v1alpha1/nnfstorageprofile_webhook.go index d4e41e963..84f168c9c 100644 --- a/api/v1alpha1/nnfstorageprofile_webhook.go +++ b/api/v1alpha1/nnfstorageprofile_webhook.go @@ -116,12 +116,12 @@ func (r *NnfStorageProfile) validateContentLustre() error { return fmt.Errorf("cannot set both combinedMgtMdt and externalMgs") } - if r.Data.LustreStorage.StandaloneMGT && len(r.Data.LustreStorage.ExternalMGS) > 0 { - return fmt.Errorf("cannot set both standaloneMgt and externalMgs") + if len(r.Data.LustreStorage.StandaloneMGTPoolName) > 0 && len(r.Data.LustreStorage.ExternalMGS) > 0 { + return fmt.Errorf("cannot set both standaloneMgtPoolName and externalMgs") } - if r.Data.LustreStorage.StandaloneMGT && r.Data.LustreStorage.CombinedMGTMDT { - return fmt.Errorf("cannot set standaloneMgt and combinedMgtMdt") + if len(r.Data.LustreStorage.StandaloneMGTPoolName) > 0 && r.Data.LustreStorage.CombinedMGTMDT { + return fmt.Errorf("cannot set standaloneMgtPoolName and combinedMgtMdt") } for _, target := range []string{"mgt", "mdt", "mgtmdt", "ost"} { diff --git a/api/v1alpha1/nnfstorageprofile_webhook_test.go b/api/v1alpha1/nnfstorageprofile_webhook_test.go index 77c2234a4..2ee5d7c5c 100644 --- a/api/v1alpha1/nnfstorageprofile_webhook_test.go +++ b/api/v1alpha1/nnfstorageprofile_webhook_test.go @@ -114,8 +114,8 @@ var _ = Describe("NnfStorageProfile Webhook", func() { Expect(newProfile.Data.Default).ToNot(BeTrue()) }) - It("should accept standaloneMgt", func() { - nnfProfile.Data.LustreStorage.StandaloneMGT = true + It("should accept standaloneMgtPoolName", func() { + nnfProfile.Data.LustreStorage.StandaloneMGTPoolName = "FakePool" Expect(k8sClient.Create(context.TODO(), nnfProfile)).To(Succeed()) Expect(k8sClient.Get(context.TODO(), client.ObjectKeyFromObject(nnfProfile), newProfile)).To(Succeed()) Expect(newProfile.Data.Default).ToNot(BeTrue()) @@ -150,15 +150,15 @@ var _ = Describe("NnfStorageProfile Webhook", func() { nnfProfile = nil }) - It("should not accept standaloneMgt with externalMgs", func() { - nnfProfile.Data.LustreStorage.StandaloneMGT = true + It("should not accept standaloneMgtPoolName with externalMgs", func() { + nnfProfile.Data.LustreStorage.StandaloneMGTPoolName = "FakePool" nnfProfile.Data.LustreStorage.ExternalMGS = "10.0.0.1@tcp" Expect(k8sClient.Create(context.TODO(), nnfProfile)).ToNot(Succeed()) nnfProfile = nil }) - It("should not accept standaloneMgt with combinedMgtMdt", func() { - nnfProfile.Data.LustreStorage.StandaloneMGT = true + It("should not accept standaloneMgtPoolName with combinedMgtMdt", func() { + nnfProfile.Data.LustreStorage.StandaloneMGTPoolName = "FakePool" nnfProfile.Data.LustreStorage.CombinedMGTMDT = true Expect(k8sClient.Create(context.TODO(), nnfProfile)).ToNot(Succeed()) nnfProfile = nil diff --git a/api/v1alpha1/workflow_helpers.go b/api/v1alpha1/workflow_helpers.go index 0ea6b11ed..69ae9d086 100644 --- a/api/v1alpha1/workflow_helpers.go +++ b/api/v1alpha1/workflow_helpers.go @@ -41,4 +41,8 @@ const ( // PinnedContainerProfileLabelNameSpace is a label applied to NnfStorage objects to show // which pinned container profile is being used. PinnedContainerProfileLabelNameSpace = "nnf.cray.hpe.com/pinned_container_profile_namespace" + + // StandaloneMGTLabel is a label applied to the PersistentStorageInstance to show that + // it is for a Lustre MGT only. The value for the label is the pool name. + StandaloneMGTLabel = "nnf.cray.hpe.com/standalone_mgt" ) diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index 6ac55b96a..95df249a0 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -1142,6 +1142,7 @@ func (in *NnfStorageList) DeepCopyObject() runtime.Object { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *NnfStorageLustreSpec) DeepCopyInto(out *NnfStorageLustreSpec) { *out = *in + out.PersistentMgsReference = in.PersistentMgsReference } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NnfStorageLustreSpec. diff --git a/config/crd/bases/nnf.cray.hpe.com_nnfstorageprofiles.yaml b/config/crd/bases/nnf.cray.hpe.com_nnfstorageprofiles.yaml index 23b2bfa86..cd752d162 100644 --- a/config/crd/bases/nnf.cray.hpe.com_nnfstorageprofiles.yaml +++ b/config/crd/bases/nnf.cray.hpe.com_nnfstorageprofiles.yaml @@ -133,8 +133,13 @@ spec: colocated with any other target on the chosen server. type: boolean externalMgs: - description: ExternalMGS contains the NIDs of a pre-existing MGS - that should be used + description: ExternalMGS specifies the use of an existing MGS + rather than creating one. This can be either the NID(s) of a + pre-existing MGS that should be used, or it can be an NNF Persistent + Instance that was created with the "StandaloneMGTPoolName" option. + In the latter case, the format is "pool:poolName" where "poolName" + is the argument from "StandaloneMGTPoolName". A single MGS will + be picked from the pool. type: string mdtCommandlines: description: MdtCmdLines contains commands to create an MDT target. @@ -337,11 +342,12 @@ spec: required: - colocateComputes type: object - standaloneMgt: - default: false - description: StandAloneMGT creates only a Lustre MGT without an - MDT or OST - type: boolean + standaloneMgtPoolName: + description: StandaloneMGTPoolName creates a Lustre MGT without + a MDT or OST. This option can only be used when creating a persistent + Lustre instance. The MGS is placed into a named pool that can + be used by the "ExternalMGS" option. Multiple pools can be created. + type: string type: object pinned: default: false diff --git a/config/crd/bases/nnf.cray.hpe.com_nnfstorages.yaml b/config/crd/bases/nnf.cray.hpe.com_nnfstorages.yaml index 07dd1b985..1262f751e 100644 --- a/config/crd/bases/nnf.cray.hpe.com_nnfstorages.yaml +++ b/config/crd/bases/nnf.cray.hpe.com_nnfstorages.yaml @@ -97,6 +97,45 @@ spec: - name type: object type: array + persistentMgsReference: + description: PersistentMgsReference is a reference to a persistent + storage that is providing the external MGS. + properties: + apiVersion: + description: API version of the referent. + type: string + fieldPath: + description: 'If referring to a piece of an object instead + of an entire object, this string should contain a valid + JSON/Go field access statement, such as desiredState.manifest.containers[2]. + For example, if the object reference is to a container + within a pod, this would take on a value like: "spec.containers{name}" + (where "name" refers to the name of the container that + triggered the event) or if no container name is specified + "spec.containers[2]" (container with index 2 in this pod). + This syntax is chosen only to have some well-defined way + of referencing a part of an object. TODO: this design + is not final and this field is subject to change in the + future.' + type: string + kind: + description: 'Kind of the referent. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names' + type: string + namespace: + description: 'Namespace of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/' + type: string + resourceVersion: + description: 'Specific resourceVersion to which this reference + is made, if any. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency' + type: string + uid: + description: 'UID of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids' + type: string + type: object + x-kubernetes-map-type: atomic targetType: description: TargetType is the type of Lustre target to be created. enum: diff --git a/controllers/directivebreakdown_controller.go b/controllers/directivebreakdown_controller.go index dc0b3b365..50ba92c2c 100644 --- a/controllers/directivebreakdown_controller.go +++ b/controllers/directivebreakdown_controller.go @@ -504,9 +504,9 @@ func (r *DirectiveBreakdownReconciler) populateStorageBreakdown(ctx context.Cont lustreComponents = append(lustreComponents, lustreComponentType{dwsv1alpha2.AllocateAcrossServers, mdtCapacity, "mgtmdt", useKey}) } else if len(nnfStorageProfile.Data.LustreStorage.ExternalMGS) > 0 { lustreComponents = append(lustreComponents, lustreComponentType{dwsv1alpha2.AllocateAcrossServers, mdtCapacity, "mdt", mdtKey}) - } else if nnfStorageProfile.Data.LustreStorage.StandaloneMGT { + } else if len(nnfStorageProfile.Data.LustreStorage.StandaloneMGTPoolName) > 0 { if argsMap["command"] != "create_persistent" { - return dwsv1alpha2.NewResourceError("").WithUserMessage("standaloneMgt option can only be used with 'create_persistent' directive").WithFatal().WithUser() + return dwsv1alpha2.NewResourceError("").WithUserMessage("standaloneMgtPoolName option can only be used with 'create_persistent' directive").WithFatal().WithUser() } lustreComponents = []lustreComponentType{lustreComponentType{dwsv1alpha2.AllocateSingleServer, mgtCapacity, "mgt", mgtKey}} diff --git a/controllers/directivebreakdown_controller_test.go b/controllers/directivebreakdown_controller_test.go index 5c3845615..4584bc107 100644 --- a/controllers/directivebreakdown_controller_test.go +++ b/controllers/directivebreakdown_controller_test.go @@ -139,11 +139,11 @@ var _ = Describe("DirectiveBreakdown test", func() { }).ShouldNot(Succeed()) }) - It("Creates a DirectiveBreakdown with a lustre jobdw and standaloneMgt", func() { - By("Setting standaloneMgt in the storage profile") + It("Creates a DirectiveBreakdown with a lustre jobdw and standaloneMgtPoolName", func() { + By("Setting standaloneMgtPoolName in the storage profile") Eventually(func(g Gomega) error { g.Expect(k8sClient.Get(context.TODO(), client.ObjectKeyFromObject(storageProfile), storageProfile)).To(Succeed()) - storageProfile.Data.LustreStorage.StandaloneMGT = true + storageProfile.Data.LustreStorage.StandaloneMGTPoolName = "FakePool" return k8sClient.Update(context.TODO(), storageProfile) }).Should(Succeed()) @@ -166,11 +166,11 @@ var _ = Describe("DirectiveBreakdown test", func() { }).ShouldNot(BeNil()) }) - It("Creates a DirectiveBreakdown with an xfs jobdw and standaloneMgt", func() { - By("Setting standaloneMgt in the storage profile") + It("Creates a DirectiveBreakdown with an xfs jobdw and standaloneMgtPoolName", func() { + By("Setting standaloneMgtPoolName in the storage profile") Eventually(func(g Gomega) error { g.Expect(k8sClient.Get(context.TODO(), client.ObjectKeyFromObject(storageProfile), storageProfile)).To(Succeed()) - storageProfile.Data.LustreStorage.StandaloneMGT = true + storageProfile.Data.LustreStorage.StandaloneMGTPoolName = "FakePool" return k8sClient.Update(context.TODO(), storageProfile) }).Should(Succeed()) @@ -193,11 +193,11 @@ var _ = Describe("DirectiveBreakdown test", func() { }).Should(BeTrue()) }) - It("Creates a DirectiveBreakdown with a create_persistent and standaloneMgt", func() { - By("Setting standaloneMgt in the storage profile") + It("Creates a DirectiveBreakdown with a create_persistent and standaloneMgtPoolName", func() { + By("Setting standaloneMgtPoolName in the storage profile") Eventually(func(g Gomega) error { g.Expect(k8sClient.Get(context.TODO(), client.ObjectKeyFromObject(storageProfile), storageProfile)).To(Succeed()) - storageProfile.Data.LustreStorage.StandaloneMGT = true + storageProfile.Data.LustreStorage.StandaloneMGTPoolName = "FakePool" return k8sClient.Update(context.TODO(), storageProfile) }).Should(Succeed()) diff --git a/controllers/nnf_persistentstorageinstance_controller.go b/controllers/nnf_persistentstorageinstance_controller.go index 3ece24302..83f9307ec 100644 --- a/controllers/nnf_persistentstorageinstance_controller.go +++ b/controllers/nnf_persistentstorageinstance_controller.go @@ -147,6 +147,22 @@ func (r *PersistentStorageReconciler) Reconcile(ctx context.Context, req ctrl.Re return ctrl.Result{Requeue: true}, nil } + // If this PersistentStorageInstance is for a standalone MGT, add a label so it can be easily found + if argsMap["type"] == "lustre" && len(pinnedProfile.Data.LustreStorage.StandaloneMGTPoolName) > 0 { + labels := persistentStorage.GetLabels() + if _, ok := labels[nnfv1alpha1.StandaloneMGTLabel]; !ok { + labels[nnfv1alpha1.StandaloneMGTLabel] = pinnedProfile.Data.LustreStorage.StandaloneMGTPoolName + persistentStorage.SetLabels(labels) + if err := r.Update(ctx, persistentStorage); err != nil { + if !apierrors.IsConflict(err) { + return ctrl.Result{}, err + } + + return ctrl.Result{Requeue: true}, nil + } + } + } + // Create the Servers resource servers, err := r.createServers(ctx, persistentStorage) if err != nil { diff --git a/controllers/nnf_storage_controller.go b/controllers/nnf_storage_controller.go index 86b2eb9ed..e407394eb 100644 --- a/controllers/nnf_storage_controller.go +++ b/controllers/nnf_storage_controller.go @@ -21,12 +21,14 @@ package controllers import ( "context" + "reflect" "runtime" "strconv" "time" "github.com/go-logr/logr" + corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" kruntime "k8s.io/apimachinery/pkg/runtime" @@ -166,7 +168,14 @@ func (r *NnfStorageReconciler) Reconcile(ctx context.Context, req ctrl.Request) storage.Status.Error = nil // For each allocation, create the NnfNodeStorage resources to fan out to the Rabbit nodes - for i := range storage.Spec.AllocationSets { + for i, allocationSet := range storage.Spec.AllocationSets { + // Add a reference to the external MGS PersistentStorageInstance if necessary + if allocationSet.NnfStorageLustreSpec.PersistentMgsReference != (corev1.ObjectReference{}) { + if err := r.addPersistentStorageReference(ctx, storage, allocationSet.NnfStorageLustreSpec.PersistentMgsReference); err != nil { + return ctrl.Result{}, err + } + } + res, err := r.createNodeStorage(ctx, storage, i) if err != nil { return ctrl.Result{}, err @@ -220,6 +229,69 @@ func (r *NnfStorageReconciler) Reconcile(ctx context.Context, req ctrl.Request) return ctrl.Result{}, nil } +func (r *NnfStorageReconciler) addPersistentStorageReference(ctx context.Context, nnfStorage *nnfv1alpha1.NnfStorage, persistentMgsReference corev1.ObjectReference) error { + persistentStorage := &dwsv1alpha2.PersistentStorageInstance{ + ObjectMeta: metav1.ObjectMeta{ + Name: persistentMgsReference.Name, + Namespace: persistentMgsReference.Namespace, + }, + } + + if err := r.Get(ctx, client.ObjectKeyFromObject(persistentStorage), persistentStorage); err != nil { + return dwsv1alpha2.NewResourceError("").WithUserMessage("PersistentStorage '%v' not found", client.ObjectKeyFromObject(persistentStorage)).WithMajor() + } + + if persistentStorage.Status.State != dwsv1alpha2.PSIStateActive { + return dwsv1alpha2.NewResourceError("").WithUserMessage("PersistentStorage is not active").WithFatal() + } + + // Add a consumer reference to the persistent storage for this directive + reference := corev1.ObjectReference{ + Name: nnfStorage.Name, + Namespace: nnfStorage.Namespace, + Kind: reflect.TypeOf(nnfv1alpha1.NnfStorage{}).Name(), + } + + for _, existingReference := range persistentStorage.Spec.ConsumerReferences { + if existingReference == reference { + return nil + } + } + + persistentStorage.Spec.ConsumerReferences = append(persistentStorage.Spec.ConsumerReferences, reference) + + return r.Update(ctx, persistentStorage) +} + +func (r *NnfStorageReconciler) removePersistentStorageReference(ctx context.Context, nnfStorage *nnfv1alpha1.NnfStorage, persistentMgsReference corev1.ObjectReference) error { + persistentStorage := &dwsv1alpha2.PersistentStorageInstance{ + ObjectMeta: metav1.ObjectMeta{ + Name: persistentMgsReference.Name, + Namespace: persistentMgsReference.Namespace, + }, + } + + if err := r.Get(ctx, client.ObjectKeyFromObject(persistentStorage), persistentStorage); err != nil { + return client.IgnoreNotFound(err) + } + + // remove the consumer reference on the persistent storage for this directive + reference := corev1.ObjectReference{ + Name: nnfStorage.Name, + Namespace: nnfStorage.Namespace, + Kind: reflect.TypeOf(nnfv1alpha1.NnfStorage{}).Name(), + } + + for i, existingReference := range persistentStorage.Spec.ConsumerReferences { + if existingReference == reference { + persistentStorage.Spec.ConsumerReferences = append(persistentStorage.Spec.ConsumerReferences[:i], persistentStorage.Spec.ConsumerReferences[i+1:]...) + return r.Update(ctx, persistentStorage) + } + } + + return nil +} + // Create an NnfNodeStorage if it doesn't exist, or update it if it requires updating. Each // Rabbit node gets an NnfNodeStorage, and there may be multiple allocations requested in it. // This limits the number of resources that have to be broadcast to the Rabbits. @@ -433,6 +505,14 @@ func (r *NnfStorageReconciler) teardownStorage(ctx context.Context, storage *nnf return nodeStoragesExist, nil } + for _, allocationSet := range storage.Spec.AllocationSets { + if allocationSet.NnfStorageLustreSpec.PersistentMgsReference != (corev1.ObjectReference{}) { + if err := r.removePersistentStorageReference(ctx, storage, allocationSet.NnfStorageLustreSpec.PersistentMgsReference); err != nil { + return nodeStoragesExist, err + } + } + } + return nodeStoragesDeleted, nil } diff --git a/controllers/nnf_workflow_controller_helpers.go b/controllers/nnf_workflow_controller_helpers.go index a0c8b92f0..090c016d8 100644 --- a/controllers/nnf_workflow_controller_helpers.go +++ b/controllers/nnf_workflow_controller_helpers.go @@ -22,6 +22,7 @@ package controllers import ( "context" "fmt" + "math/rand" "os" "reflect" "strconv" @@ -594,6 +595,33 @@ func (r *NnfWorkflowReconciler) createNnfStorage(ctx context.Context, workflow * nnfStorage.Spec.UserID = workflow.Spec.UserID nnfStorage.Spec.GroupID = workflow.Spec.GroupID + // determine the NID of the external MGS if necessary + mgsNid := "" + persistentMgsReference := corev1.ObjectReference{} + + if dwArgs["type"] == "lustre" && len(nnfStorageProfile.Data.LustreStorage.ExternalMGS) > 0 { + // If the prefix on the ExternalMGS field is "pool:", then this is pool name instead of a NID. + if strings.HasPrefix(nnfStorageProfile.Data.LustreStorage.ExternalMGS, "pool:") { + // Copy the existing PersistentStorageInstance data if present to prevent picking a different + // MGS + for _, allocationSet := range nnfStorage.Spec.AllocationSets { + mgsNid = allocationSet.NnfStorageLustreSpec.ExternalMgsNid + persistentMgsReference = allocationSet.NnfStorageLustreSpec.PersistentMgsReference + break + } + + // If no MGS was picked yet, pick one randomly from the pool of PersistentStorageInstances with the right label + if mgsNid == "" { + persistentMgsReference, mgsNid, err = r.getLustreMgsFromPool(ctx, strings.TrimPrefix(nnfStorageProfile.Data.LustreStorage.ExternalMGS, "pool:")) + if err != nil { + return err + } + } + + } else { + mgsNid = nnfStorageProfile.Data.LustreStorage.ExternalMGS + } + } // Need to remove all of the AllocationSets in the NnfStorage object before we begin nnfStorage.Spec.AllocationSets = []nnfv1alpha1.NnfStorageAllocationSetSpec{} @@ -607,8 +635,9 @@ func (r *NnfWorkflowReconciler) createNnfStorage(ctx context.Context, workflow * nnfAllocSet.NnfStorageLustreSpec.TargetType = strings.ToUpper(s.Spec.AllocationSets[i].Label) nnfAllocSet.NnfStorageLustreSpec.BackFs = "zfs" nnfAllocSet.NnfStorageLustreSpec.FileSystemName = "z" + string(s.GetUID())[:7] - if len(nnfStorageProfile.Data.LustreStorage.ExternalMGS) > 0 { - nnfAllocSet.NnfStorageLustreSpec.ExternalMgsNid = nnfStorageProfile.Data.LustreStorage.ExternalMGS + if len(mgsNid) > 0 { + nnfAllocSet.NnfStorageLustreSpec.ExternalMgsNid = mgsNid + nnfAllocSet.NnfStorageLustreSpec.PersistentMgsReference = persistentMgsReference } } @@ -651,6 +680,46 @@ func (r *NnfWorkflowReconciler) createNnfStorage(ctx context.Context, workflow * return nnfStorage, nil } +func (r *NnfWorkflowReconciler) getLustreMgsFromPool(ctx context.Context, pool string) (corev1.ObjectReference, string, error) { + persistentStorageList := &dwsv1alpha2.PersistentStorageInstanceList{} + if err := r.List(ctx, persistentStorageList, client.MatchingLabels(map[string]string{nnfv1alpha1.StandaloneMGTLabel: pool})); err != nil { + return corev1.ObjectReference{}, "", err + } + + // Choose an MGS at random from the list of persistent storages + persistentStorage := persistentStorageList.Items[rand.Intn(len(persistentStorageList.Items))] + + // Find the NnfStorage for the PersistentStorage so we can get the LNid + nnfStorage := &nnfv1alpha1.NnfStorage{ + ObjectMeta: metav1.ObjectMeta{ + Name: persistentStorage.Name, + Namespace: persistentStorage.Namespace, + }, + } + + if err := r.Get(ctx, client.ObjectKeyFromObject(nnfStorage), nnfStorage); err != nil { + return corev1.ObjectReference{}, "", dwsv1alpha2.NewResourceError("could not get persistent NnfStorage %v for MGS", client.ObjectKeyFromObject(nnfStorage)).WithError(err) + } + + if nnfStorage.Spec.FileSystemType != "lustre" { + return corev1.ObjectReference{}, "", dwsv1alpha2.NewResourceError("invalid file systems type '%s' for persistent MGS", nnfStorage.Spec.FileSystemType).WithFatal() + } + + if len(nnfStorage.Spec.AllocationSets) != 1 { + return corev1.ObjectReference{}, "", dwsv1alpha2.NewResourceError("unexpected number of allocation sets '%d' for persistent MGS", len(nnfStorage.Spec.AllocationSets)).WithFatal() + } + + if len(nnfStorage.Status.MgsNode) == 0 { + return corev1.ObjectReference{}, "", dwsv1alpha2.NewResourceError("no LNid listed for persistent MGS").WithFatal() + } + + return corev1.ObjectReference{ + Kind: reflect.TypeOf(dwsv1alpha2.PersistentStorageInstance{}).Name(), + Name: persistentStorage.Name, + Namespace: persistentStorage.Namespace, + }, nnfStorage.Status.MgsNode, nil +} + func (r *NnfWorkflowReconciler) findLustreFileSystemForPath(ctx context.Context, path string, log logr.Logger) *lusv1beta1.LustreFileSystem { lustres := &lusv1beta1.LustreFileSystemList{} if err := r.List(ctx, lustres); err != nil { @@ -1046,6 +1115,7 @@ func (r *NnfWorkflowReconciler) addPersistentStorageReference(ctx context.Contex reference := corev1.ObjectReference{ Name: indexedResourceName(workflow, index), Namespace: workflow.Namespace, + Kind: reflect.TypeOf(dwsv1alpha2.Workflow{}).Name(), } found := false @@ -1076,6 +1146,7 @@ func (r *NnfWorkflowReconciler) removePersistentStorageReference(ctx context.Con reference := corev1.ObjectReference{ Name: indexedResourceName(workflow, index), Namespace: workflow.Namespace, + Kind: reflect.TypeOf(dwsv1alpha2.Workflow{}).Name(), } for i, existingReference := range persistentStorage.Spec.ConsumerReferences { From e62e7fe59fcf1ed43575307967656a8ab00bcf90 Mon Sep 17 00:00:00 2001 From: Blake Devcich <89158881+bdevcich-hpe@users.noreply.github.com> Date: Tue, 5 Sep 2023 08:08:05 -0500 Subject: [PATCH 16/19] Added more detail to NnfContainerProfile comments (#220) Signed-off-by: Blake Devcich --- api/v1alpha1/nnfcontainerprofile_types.go | 30 ++++-- ...nnf.cray.hpe.com_nnfcontainerprofiles.yaml | 21 ++-- .../nnf_v1alpha1_nnfcontainerprofile.yaml | 99 ++++++++++++++----- 3 files changed, 108 insertions(+), 42 deletions(-) diff --git a/api/v1alpha1/nnfcontainerprofile_types.go b/api/v1alpha1/nnfcontainerprofile_types.go index 969d66fc5..c9247d63f 100644 --- a/api/v1alpha1/nnfcontainerprofile_types.go +++ b/api/v1alpha1/nnfcontainerprofile_types.go @@ -52,14 +52,12 @@ type NnfContainerProfileData struct { // +kubebuilder:default:=6 RetryLimit int32 `json:"retryLimit"` - // UserID specifies the user ID that is allowed to use this profile. If this - // is specified, only Workflows that have a matching user ID can select - // this profile. + // UserID specifies the user ID that is allowed to use this profile. If this is specified, only + // Workflows that have a matching user ID can select this profile. UserID *uint32 `json:"userID,omitempty"` - // GroupID specifies the group ID that is allowed to use this profile. If this - // is specified, only Workflows that have a matching group ID can select - // this profile. + // GroupID specifies the group ID that is allowed to use this profile. If this is specified, + // only Workflows that have a matching group ID can select this profile. GroupID *uint32 `json:"groupID,omitempty"` // Number of ports to open for communication with the user container. These ports are opened on @@ -68,14 +66,26 @@ type NnfContainerProfileData struct { // the DWS workflow (NNF_CONTAINER_PORTS). NumPorts int32 `json:"numPorts,omitempty"` - // Spec to define the containers created from container profile. This is used for non-MPI - // containers. + // Spec to define the containers created from this profile. This is used for non-MPI containers. + // Refer to the K8s documentation for `PodSpec` for more definition: + // https://kubernetes.io/docs/reference/kubernetes-api/workload-resources/pod-v1/#PodSpec // Either this or MPISpec must be provided, but not both. Spec *corev1.PodSpec `json:"spec,omitempty"` - // MPIJobSpec to define the containers created from container profile. This is used for MPI - // containers via MPIJobs. See mpi-operator for more details. + // MPIJobSpec to define the MPI containers created from this profile. This functionality is + // provided via mpi-operator, a 3rd party tool to assist in running MPI applications across + // worker containers. // Either this or Spec must be provided, but not both. + // + // All the fields defined drive mpi-operator behavior. See the type definition of MPISpec for + // more detail: + // https://github.com/kubeflow/mpi-operator/blob/v0.4.0/pkg/apis/kubeflow/v2beta1/types.go#L137 + // + // Note: most of these fields are fully customizable with a few exceptions. These fields are + // overridden by NNF software to ensure proper behavior to interface with the DWS workflow + // - Replicas + // - RunPolicy.BackoffLimit (this is set above by `RetryLimit`) + // - Worker/Launcher.RestartPolicy MPISpec *mpiv2beta1.MPIJobSpec `json:"mpiSpec,omitempty"` } diff --git a/config/crd/bases/nnf.cray.hpe.com_nnfcontainerprofiles.yaml b/config/crd/bases/nnf.cray.hpe.com_nnfcontainerprofiles.yaml index 65ea0c77e..dee7c0578 100644 --- a/config/crd/bases/nnf.cray.hpe.com_nnfcontainerprofiles.yaml +++ b/config/crd/bases/nnf.cray.hpe.com_nnfcontainerprofiles.yaml @@ -35,10 +35,16 @@ spec: format: int32 type: integer mpiSpec: - description: MPIJobSpec to define the containers created from container - profile. This is used for MPI containers via MPIJobs. See mpi-operator - for more details. Either this or Spec must be provided, but not - both. + description: "MPIJobSpec to define the MPI containers created from + this profile. This functionality is provided via mpi-operator, a + 3rd party tool to assist in running MPI applications across worker + containers. Either this or Spec must be provided, but not both. + \n All the fields defined drive mpi-operator behavior. See the type + definition of MPISpec for more detail: https://github.com/kubeflow/mpi-operator/blob/v0.4.0/pkg/apis/kubeflow/v2beta1/types.go#L137 + \n Note: most of these fields are fully customizable with a few + exceptions. These fields are overridden by NNF software to ensure + proper behavior to interface with the DWS workflow - Replicas - + RunPolicy.BackoffLimit (this is set above by `RetryLimit`) - Worker/Launcher.RestartPolicy" properties: mpiImplementation: default: OpenMPI @@ -8644,9 +8650,10 @@ spec: minimum: 0 type: integer spec: - description: Spec to define the containers created from container - profile. This is used for non-MPI containers. Either this or MPISpec - must be provided, but not both. + description: 'Spec to define the containers created from this profile. + This is used for non-MPI containers. Refer to the K8s documentation + for `PodSpec` for more definition: https://kubernetes.io/docs/reference/kubernetes-api/workload-resources/pod-v1/#PodSpec + Either this or MPISpec must be provided, but not both.' properties: activeDeadlineSeconds: description: Optional duration in seconds the pod may be active diff --git a/config/samples/nnf_v1alpha1_nnfcontainerprofile.yaml b/config/samples/nnf_v1alpha1_nnfcontainerprofile.yaml index 5d63d7769..b2635e0f2 100644 --- a/config/samples/nnf_v1alpha1_nnfcontainerprofile.yaml +++ b/config/samples/nnf_v1alpha1_nnfcontainerprofile.yaml @@ -4,13 +4,13 @@ metadata: name: sample-nnfcontainerprofile namespace: nnf-system data: - # Specifies the number of times a container will be retried upon a failure. A - # new pod is deployed on each retry. Defaults to 6 by kubernetes itself and - # must be set. A value of 0 disables retries. + # Specifies the number of times a container will be retried upon a failure. A new pod is deployed + # on each retry. Defaults to 6 by kubernetes itself and must be set. A value of 0 disables + # retries. retryLimit: 6 - # Stop any containers after X seconds once a workflow has transitioned to - # PostRun. Defaults to 0. A value of 0 disables this behavior. + # Stop any containers after X seconds once a workflow has transitioned to PostRun. Defaults to 0. + # A value of 0 disables this behavior. postRunTimeoutSeconds: 0 # Request the number of ports to open on the targeted rabbits. These ports are accessible outside @@ -18,33 +18,82 @@ data: # container and in the DWS workflow (NNF_CONTAINER_PORTS). numPorts: 0 - # List of possible filesystems supported by this container profile. These - # storages are mounted inside of the container. Any non-optional storage must - # be supplied with the container directive as an argument and must reference - # a valid jobdw/persistentdw directive's name or refer to a LustreFilesystem path. + # UserID specifies the user ID that is allowed to use this profile. If this is specified, only + # Workflows that have a matching user ID can select this profile. + userID: 1050 + + # GroupID specifies the group ID that is allowed to use this profile. If this is specified, + # only Workflows that have a matching group ID can select this profile. + groupID: 1050 + + # List of possible filesystems supported by this container profile. These storages are mounted + # inside of the container. Any non-optional storage must be supplied with the container directive + # as an argument and must reference a valid jobdw/persistentdw directive's name or refer to a + # LustreFilesystem path. # # Example: # DW jobdw name=my-gfs2 type=gfs2 capacity=50GB # DW container name=my-container profile=nnfcontainerprofile-sample DW_JOB_foo_local_storage=my-gfs2 storages: - - name: DW_JOB_foo_local_storage - optional: false - - name: DW_PERSISTENT_foo_persistent_storage - optional: true - # For Global lustre pvcMode is supported and must match the mode configured in the - # LustreFilesystem Resource - - name: DW_GLOBAL_foo_global_lustre - optional: true - pvcMode: ReadWriteMany + - name: DW_JOB_foo_local_storage + optional: false + - name: DW_PERSISTENT_foo_persistent_storage + optional: true + # For Global lustre, pvcMode must match the mode configured in the LustreFilesystem Resource + - name: DW_GLOBAL_foo_global_lustre + optional: true + pvcMode: ReadWriteMany # Template defines the containers that will be created from container profile. + # Note: Only 1 of `spec` or `MPISpec` can be defined, not both. template: - # TODO: Update for mpiSpec + + # Spec to define the containers created from this profile. This is used for non-MPI containers. + # Refer to the K8s documentation for `PodSpec` for more definition: + # https://kubernetes.io/docs/reference/kubernetes-api/workload-resources/pod-v1/#PodSpec + # Either this or MPISpec must be provided, but not both. spec: containers: - - name: sample-nnfcontainerprofile - image: alpine:latest - command: - - /bin/sh - - -c - - "sleep 15 && exit 0" + - name: sample-nnfcontainerprofile + image: alpine:latest + command: + - /bin/sh + - -c + - "sleep 15 && exit 0" + + # MPIJobSpec to define the MPI containers created from this profile. This functionality is + # provided via mpi-operator, a 3rd party tool to assist in running MPI applications across + # worker containers. + # Either this or Spec must be provided, but not both. + # + # All the fields defined drive mpi-operator behavior. See the type definition of MPISpec for + # more detail: + # https://github.com/kubeflow/mpi-operator/blob/v0.4.0/pkg/apis/kubeflow/v2beta1/types.go#L137 + # + # Note: most of these fields are fully customizable with a few exceptions. These fields are + # overridden by NNF software to ensure proper behavior to interface with the DWS workflow + # - Replicas + # - RunPolicy.BackoffLimit (this is set above by `RetryLimit`) + # - Worker/Launcher.RestartPolicy + # - SSHAuthMountPath + mpiSpec: + runPolicy: + cleanPodPolicy: Running + mpiReplicaSpecs: + Launcher: + template: + spec: + containers: + - name: example-mpi + image: nnf-mfu:latest + command: + - mpirun + - dcmp + - "$(DW_JOB_foo_local_storage)/0" + - "$(DW_JOB_foo_local_storage)/1" + Worker: + template: + spec: + containers: + - name: example-mpi + image: nnf-mfu:latest From dc60097309c0980fd506efe0290f4017aa874168 Mon Sep 17 00:00:00 2001 From: Blake Devcich <89158881+bdevcich-hpe@users.noreply.github.com> Date: Thu, 7 Sep 2023 09:57:39 -0500 Subject: [PATCH 17/19] Containers: Added PreRunTimeoutSeconds + Error Exit Strategy (#221) - Adds a timeout to Prerun and errors out if the containers do not start - Moved timeout anonymous functions to be normal functions - Fixed some issues with the example profiles for testing + added example-mpi-fail - Improved error handing in Postrun - Made Pre/PostRunTimeSeconds a pointer to allow for disabling and defaulting Signed-off-by: Blake Devcich --- .vscode/launch.json | 4 +- api/v1alpha1/nnfcontainerprofile_types.go | 17 +- api/v1alpha1/nnfcontainerprofile_webhook.go | 17 +- .../nnfcontainerprofile_webhook_test.go | 78 +++++- api/v1alpha1/zz_generated.deepcopy.go | 10 + ...nnf.cray.hpe.com_nnfcontainerprofiles.yaml | 19 +- .../nnf_v1alpha1_nnfcontainerprofiles.yaml | 180 ++++++++----- .../nnf_workflow_controller_helpers.go | 186 +++++++++---- go.mod | 1 + go.sum | 4 +- vendor/go.openly.dev/pointy/.gitignore | 12 + vendor/go.openly.dev/pointy/LICENSE | 21 ++ vendor/go.openly.dev/pointy/README.md | 154 +++++++++++ vendor/go.openly.dev/pointy/comparison.go | 25 ++ vendor/go.openly.dev/pointy/pointy.go | 250 ++++++++++++++++++ vendor/modules.txt | 3 + 16 files changed, 841 insertions(+), 140 deletions(-) create mode 100644 vendor/go.openly.dev/pointy/.gitignore create mode 100644 vendor/go.openly.dev/pointy/LICENSE create mode 100644 vendor/go.openly.dev/pointy/README.md create mode 100644 vendor/go.openly.dev/pointy/comparison.go create mode 100644 vendor/go.openly.dev/pointy/pointy.go diff --git a/.vscode/launch.json b/.vscode/launch.json index e9754e1ac..0812f0d7a 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -23,12 +23,10 @@ "mode": "test", "program": "${relativeFileDirname}", "args": [ - "-v=4", "-ginkgo.v", - "-ginkgo.progress" ], "env": { - "KUBEBUILDER_ASSETS": "${workspaceFolder}/bin/k8s/1.25.0-darwin-amd64", + "KUBEBUILDER_ASSETS": "${workspaceFolder}/bin/k8s/1.26.0-darwin-amd64", "GOMEGA_DEFAULT_EVENTUALLY_TIMEOUT": "10m", "GOMEGA_DEFAULT_EVENTUALLY_POLLING_INTERVAL": "100ms" }, diff --git a/api/v1alpha1/nnfcontainerprofile_types.go b/api/v1alpha1/nnfcontainerprofile_types.go index c9247d63f..ad85f1176 100644 --- a/api/v1alpha1/nnfcontainerprofile_types.go +++ b/api/v1alpha1/nnfcontainerprofile_types.go @@ -40,13 +40,22 @@ type NnfContainerProfileData struct { // List of possible filesystems supported by this container profile Storages []NnfContainerProfileStorage `json:"storages,omitempty"` - // Stop any containers after X seconds once a workflow has transitioned to PostRun. Defaults to - // 0. A value of 0 disables this behavior. + // Containers are launched in the PreRun state. Allow this many seconds for the containers to + // start before declaring an error to the workflow. + // Defaults to 60 if not set. A value of 0 disables this behavior. + // +kubebuilder:default:=60 // +kubebuilder:validation:Minimum:=0 - PostRunTimeoutSeconds int64 `json:"postRunTimeoutSeconds,omitempty"` + PreRunTimeoutSeconds *int64 `json:"preRunTimeoutSeconds,omitempty"` + + // Containers are expected to complete in the PostRun State. Allow this many seconds for the + // containers to exit before declaring an error the workflow. + // Defaults to 60 if not set. A value of 0 disables this behavior. + // +kubebuilder:default:=60 + // +kubebuilder:validation:Minimum:=0 + PostRunTimeoutSeconds *int64 `json:"postRunTimeoutSeconds,omitempty"` // Specifies the number of times a container will be retried upon a failure. A new pod is - // deployed on each retry. Defaults to 6 by kubernetes itself and must be set. A value of 0 + // deployed on each retry. Defaults to 6 by kubernetes itself and must be set. A value of 0 // disables retries. // +kubebuilder:validation:Minimum:=0 // +kubebuilder:default:=6 diff --git a/api/v1alpha1/nnfcontainerprofile_webhook.go b/api/v1alpha1/nnfcontainerprofile_webhook.go index 1e69b1509..e5d195cab 100644 --- a/api/v1alpha1/nnfcontainerprofile_webhook.go +++ b/api/v1alpha1/nnfcontainerprofile_webhook.go @@ -111,11 +111,14 @@ func (r *NnfContainerProfile) validateContent() error { } if mpiJob { - // PostRunTimeoutSeconds will update the Jobs' ActiveDeadlineSeconds once Postrun starts, so we can't set them both - if r.Data.MPISpec.RunPolicy.ActiveDeadlineSeconds != nil && r.Data.PostRunTimeoutSeconds > 0 { + // PreRunTimeoutSeconds will update the Jobs' ActiveDeadlineSeconds once PreRun timeout occurs, so we can't set them both + if r.Data.MPISpec.RunPolicy.ActiveDeadlineSeconds != nil && r.Data.PreRunTimeoutSeconds != nil && *r.Data.PreRunTimeoutSeconds > 0 { + return fmt.Errorf("both PreRunTimeoutSeconds and MPISpec.RunPolicy.ActiveDeadlineSeconds are provided - only 1 can be set") + } + // PostRunTimeoutSeconds will update the Jobs' ActiveDeadlineSeconds once PostRun starts, so we can't set them both + if r.Data.MPISpec.RunPolicy.ActiveDeadlineSeconds != nil && r.Data.PostRunTimeoutSeconds != nil && *r.Data.PostRunTimeoutSeconds > 0 { return fmt.Errorf("both PostRunTimeoutSeconds and MPISpec.RunPolicy.ActiveDeadlineSeconds are provided - only 1 can be set") } - // Don't allow users to set the backoff limit directly if r.Data.MPISpec.RunPolicy.BackoffLimit != nil && r.Data.RetryLimit > 0 { return fmt.Errorf("MPISpec.RunPolicy.BackoffLimit is set. Use RetryLimit instead") @@ -130,8 +133,12 @@ func (r *NnfContainerProfile) validateContent() error { return fmt.Errorf("MPISpec.MPIReplicaSpecs.Worker must be present with at least 1 container defined") } } else { - // PostRunTimeoutSeconds will update the Jobs' ActiveDeadlineSeconds once Postrun starts, so we can't set them both - if r.Data.Spec.ActiveDeadlineSeconds != nil && r.Data.PostRunTimeoutSeconds > 0 { + // PreRunTimeoutSeconds will update the Jobs' ActiveDeadlineSeconds once PreRun timeout occurs, so we can't set them both + if r.Data.Spec.ActiveDeadlineSeconds != nil && r.Data.PreRunTimeoutSeconds != nil && *r.Data.PreRunTimeoutSeconds > 0 { + return fmt.Errorf("both PreRunTimeoutSeconds and Spec.ActiveDeadlineSeconds are provided - only 1 can be set") + } + // PostRunTimeoutSeconds will update the Jobs' ActiveDeadlineSeconds once PostRun starts, so we can't set them both + if r.Data.Spec.ActiveDeadlineSeconds != nil && r.Data.PostRunTimeoutSeconds != nil && *r.Data.PostRunTimeoutSeconds > 0 { return fmt.Errorf("both PostRunTimeoutSeconds and Spec.ActiveDeadlineSeconds are provided - only 1 can be set") } diff --git a/api/v1alpha1/nnfcontainerprofile_webhook_test.go b/api/v1alpha1/nnfcontainerprofile_webhook_test.go index a4d990492..ab0b00638 100644 --- a/api/v1alpha1/nnfcontainerprofile_webhook_test.go +++ b/api/v1alpha1/nnfcontainerprofile_webhook_test.go @@ -28,6 +28,7 @@ import ( mpiv2beta1 "github.com/kubeflow/mpi-operator/pkg/apis/kubeflow/v2beta1" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" + "go.openly.dev/pointy" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "sigs.k8s.io/controller-runtime/pkg/client" @@ -120,7 +121,7 @@ var _ = Describe("NnfContainerProfile Webhook", func() { }) It("Should not allow a negative postRunTimeoutSeconds", func() { - nnfProfile.Data.PostRunTimeoutSeconds = -1 + nnfProfile.Data.PostRunTimeoutSeconds = pointy.Int64(-1) Expect(k8sClient.Create(context.TODO(), nnfProfile)).ToNot(Succeed()) nnfProfile = nil }) @@ -207,12 +208,60 @@ var _ = Describe("NnfContainerProfile Webhook", func() { nnfProfile = nil }) + DescribeTable("Should allow a user to set PreRunTimeoutSeconds", + + func(timeout, expected *int64, succeed bool) { + nnfProfile.Data.Spec = &corev1.PodSpec{Containers: []corev1.Container{ + {Name: "test", Image: "alpine:latest"}, + }} + nnfProfile.Data.MPISpec = nil + + nnfProfile.Data.PreRunTimeoutSeconds = timeout + if succeed { + Expect(k8sClient.Create(context.TODO(), nnfProfile)).To(Succeed()) + Expect(nnfProfile.Data.PreRunTimeoutSeconds).To(Equal(expected)) + } else { + Expect(k8sClient.Create(context.TODO(), nnfProfile)).ToNot(Succeed()) + nnfProfile = nil + } + + }, + Entry("to 0", pointy.Int64(0), pointy.Int64(0), true), + Entry("to 45", pointy.Int64(45), pointy.Int64(45), true), + Entry("to nil and get the default(60)", nil, pointy.Int64(60), true), + Entry("to -1 and fail", pointy.Int64(-1), nil, false), + ) + + DescribeTable("Should allow a user to set PostRunTimeoutSeconds", + + func(timeout, expected *int64, succeed bool) { + nnfProfile.Data.Spec = &corev1.PodSpec{Containers: []corev1.Container{ + {Name: "test", Image: "alpine:latest"}, + }} + nnfProfile.Data.MPISpec = nil + + nnfProfile.Data.PostRunTimeoutSeconds = timeout + if succeed { + Expect(k8sClient.Create(context.TODO(), nnfProfile)).To(Succeed()) + Expect(nnfProfile.Data.PostRunTimeoutSeconds).To(Equal(expected)) + } else { + Expect(k8sClient.Create(context.TODO(), nnfProfile)).ToNot(Succeed()) + nnfProfile = nil + } + + }, + Entry("to 0", pointy.Int64(0), pointy.Int64(0), true), + Entry("to 45", pointy.Int64(45), pointy.Int64(45), true), + Entry("to nil and get the default(60)", nil, pointy.Int64(60), true), + Entry("to -1 and fail", pointy.Int64(-1), nil, false), + ) + It("Should not allow setting both PostRunTimeoutSeconds and MPISpec.RunPolicy.ActiveDeadlineSeconds", func() { nnfProfile.Data.Spec = nil nnfProfile.Data.MPISpec = &mpiv2beta1.MPIJobSpec{} timeout := int64(10) - nnfProfile.Data.PostRunTimeoutSeconds = timeout + nnfProfile.Data.PostRunTimeoutSeconds = &timeout nnfProfile.Data.MPISpec.RunPolicy.ActiveDeadlineSeconds = &timeout Expect(k8sClient.Create(context.TODO(), nnfProfile)).ToNot(Succeed()) @@ -221,7 +270,28 @@ var _ = Describe("NnfContainerProfile Webhook", func() { It("Should not allow setting both PostRunTimeoutSeconds and Spec.ActiveDeadlineSeconds", func() { timeout := int64(10) - nnfProfile.Data.PostRunTimeoutSeconds = timeout + nnfProfile.Data.PostRunTimeoutSeconds = &timeout + nnfProfile.Data.Spec.ActiveDeadlineSeconds = &timeout + + Expect(k8sClient.Create(context.TODO(), nnfProfile)).ToNot(Succeed()) + nnfProfile = nil + }) + + It("Should not allow setting both PreRunTimeoutSeconds and MPISpec.RunPolicy.ActiveDeadlineSeconds", func() { + nnfProfile.Data.Spec = nil + nnfProfile.Data.MPISpec = &mpiv2beta1.MPIJobSpec{} + + timeout := int64(10) + nnfProfile.Data.PreRunTimeoutSeconds = &timeout + nnfProfile.Data.MPISpec.RunPolicy.ActiveDeadlineSeconds = &timeout + + Expect(k8sClient.Create(context.TODO(), nnfProfile)).ToNot(Succeed()) + nnfProfile = nil + }) + + It("Should not allow setting both PreRunTimeoutSeconds and Spec.ActiveDeadlineSeconds", func() { + timeout := int64(10) + nnfProfile.Data.PreRunTimeoutSeconds = &timeout nnfProfile.Data.Spec.ActiveDeadlineSeconds = &timeout Expect(k8sClient.Create(context.TODO(), nnfProfile)).ToNot(Succeed()) @@ -240,7 +310,7 @@ var _ = Describe("NnfContainerProfile Webhook", func() { }) It("Should allow a zero postRunTimeoutSeconds", func() { - nnfProfile.Data.PostRunTimeoutSeconds = 0 + nnfProfile.Data.PostRunTimeoutSeconds = pointy.Int64(0) Expect(k8sClient.Create(context.TODO(), nnfProfile)).To(Succeed()) }) diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index 95df249a0..361596a16 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -206,6 +206,16 @@ func (in *NnfContainerProfileData) DeepCopyInto(out *NnfContainerProfileData) { *out = make([]NnfContainerProfileStorage, len(*in)) copy(*out, *in) } + if in.PreRunTimeoutSeconds != nil { + in, out := &in.PreRunTimeoutSeconds, &out.PreRunTimeoutSeconds + *out = new(int64) + **out = **in + } + if in.PostRunTimeoutSeconds != nil { + in, out := &in.PostRunTimeoutSeconds, &out.PostRunTimeoutSeconds + *out = new(int64) + **out = **in + } if in.UserID != nil { in, out := &in.UserID, &out.UserID *out = new(uint32) diff --git a/config/crd/bases/nnf.cray.hpe.com_nnfcontainerprofiles.yaml b/config/crd/bases/nnf.cray.hpe.com_nnfcontainerprofiles.yaml index dee7c0578..bda90920b 100644 --- a/config/crd/bases/nnf.cray.hpe.com_nnfcontainerprofiles.yaml +++ b/config/crd/bases/nnf.cray.hpe.com_nnfcontainerprofiles.yaml @@ -8635,16 +8635,27 @@ spec: description: Pinned is true if this instance is an immutable copy type: boolean postRunTimeoutSeconds: - description: Stop any containers after X seconds once a workflow has - transitioned to PostRun. Defaults to 0. A value of 0 disables this - behavior. + default: 60 + description: Containers are expected to complete in the PostRun State. + Allow this many seconds for the containers to exit before declaring + an error the workflow. Defaults to 60 if not set. A value of 0 disables + this behavior. + format: int64 + minimum: 0 + type: integer + preRunTimeoutSeconds: + default: 60 + description: Containers are launched in the PreRun state. Allow this + many seconds for the containers to start before declaring an error + to the workflow. Defaults to 60 if not set. A value of 0 disables + this behavior. format: int64 minimum: 0 type: integer retryLimit: default: 6 description: Specifies the number of times a container will be retried - upon a failure. A new pod is deployed on each retry. Defaults to + upon a failure. A new pod is deployed on each retry. Defaults to 6 by kubernetes itself and must be set. A value of 0 disables retries. format: int32 minimum: 0 diff --git a/config/examples/nnf_v1alpha1_nnfcontainerprofiles.yaml b/config/examples/nnf_v1alpha1_nnfcontainerprofiles.yaml index 6b0840896..80d8c8035 100644 --- a/config/examples/nnf_v1alpha1_nnfcontainerprofiles.yaml +++ b/config/examples/nnf_v1alpha1_nnfcontainerprofiles.yaml @@ -5,18 +5,34 @@ metadata: data: retryLimit: 6 storages: - - name: DW_JOB_foo_local_storage - optional: false - - name: DW_PERSISTENT_foo_persistent_storage - optional: true + - name: DW_JOB_foo_local_storage + optional: false + - name: DW_PERSISTENT_foo_persistent_storage + optional: true + - name: DW_GLOBAL_foo_global_lustre + optional: true spec: containers: - - name: example-success - image: alpine:latest - command: - - /bin/sh - - -c - - "sleep 15 && exit 0" + - name: example-success + image: alpine:latest + command: + - /bin/sh + - -c + - "sleep 10 && exit 0" +--- +apiVersion: nnf.cray.hpe.com/v1alpha1 +kind: NnfContainerProfile +metadata: + name: example-fail +data: + spec: + containers: + - name: example-fail + image: alpine:latest + command: + - /bin/sh + - -c + - "sleep 10 && exit 1" --- apiVersion: nnf.cray.hpe.com/v1alpha1 kind: NnfContainerProfile @@ -25,23 +41,23 @@ metadata: data: retryLimit: 6 storages: - - name: DW_JOB_foo_local_storage - optional: false - - name: DW_PERSISTENT_foo_persistent_storage - optional: true + - name: DW_JOB_foo_local_storage + optional: false + - name: DW_PERSISTENT_foo_persistent_storage + optional: true spec: containers: - - name: example-randomly-fail - image: alpine:latest - command: - - /bin/sh - - -c - - | - echo "starting..." - sleep 30 - x=$(($RANDOM % 2)) - echo "exiting: $x" - exit $x + - name: example-randomly-fail + image: alpine:latest + command: + - /bin/sh + - -c + - | + echo "starting..." + sleep 10 + x=$(($RANDOM % 2)) + echo "exiting: $x" + exit $x --- apiVersion: nnf.cray.hpe.com/v1alpha1 kind: NnfContainerProfile @@ -50,18 +66,18 @@ metadata: data: retryLimit: 6 storages: - - name: DW_JOB_foo_local_storage - optional: false - - name: DW_PERSISTENT_foo_persistent_storage - optional: true + - name: DW_JOB_foo_local_storage + optional: false + - name: DW_PERSISTENT_foo_persistent_storage + optional: true spec: containers: - - name: example-forever - image: alpine:latest - command: - - /bin/sh - - -c - - "while true; do date && sleep 5; done" + - name: example-forever + image: alpine:latest + command: + - /bin/sh + - -c + - "while true; do date && sleep 5; done" --- apiVersion: nnf.cray.hpe.com/v1alpha1 kind: NnfContainerProfile @@ -71,13 +87,13 @@ data: retryLimit: 6 numPorts: 1 storages: - - name: DW_JOB_foo_local_storage - optional: false - - name: DW_PERSISTENT_foo_persistent_storage - optional: true - - name: DW_GLOBAL_foo_global_lustre - optional: true - pvcMode: ReadWriteMany + - name: DW_JOB_foo_local_storage + optional: false + - name: DW_PERSISTENT_foo_persistent_storage + optional: true + - name: DW_GLOBAL_foo_global_lustre + optional: true + pvcMode: ReadWriteMany mpiSpec: runPolicy: cleanPodPolicy: Running @@ -86,36 +102,64 @@ data: template: spec: containers: - - name: example-mpi - image: nnf-mfu:latest - command: - - mpirun - - dcmp - - "$(DW_JOB_foo_local_storage)/0" - - "$(DW_JOB_foo_local_storage)/1" + - name: example-mpi + image: nnf-mfu:latest + command: + - mpirun + - dcmp + - "$(DW_JOB_foo_local_storage)/0" + - "$(DW_JOB_foo_local_storage)/1" Worker: template: spec: containers: - - name: example-mpi - image: nnf-mfu:latest + - name: example-mpi + image: nnf-mfu:latest --- apiVersion: nnf.cray.hpe.com/v1alpha1 kind: NnfContainerProfile +metadata: + name: example-mpi-fail +data: + numPorts: 1 + mpiSpec: + runPolicy: + cleanPodPolicy: Running + mpiReplicaSpecs: + Launcher: + template: + spec: + containers: + - name: example-mpi-fail + image: nnf-mfu:latest + command: + - mpirun + - /bin/sh + - -c + - "sleep 10 && exit 1" + Worker: + template: + spec: + containers: + - name: example-mpi-fail + image: nnf-mfu:latest +--- +apiVersion: nnf.cray.hpe.com/v1alpha1 +kind: NnfContainerProfile metadata: name: example-mpi-webserver data: retryLimit: 6 numPorts: 1 storages: - - name: DW_JOB_foo_local_storage - optional: false - - name: DW_PERSISTENT_foo_persistent_storage - optional: true - - name: DW_GLOBAL_foo_global_lustre - optional: true - pvcMode: ReadWriteMany + - name: DW_JOB_foo_local_storage + optional: false + - name: DW_PERSISTENT_foo_persistent_storage + optional: true + - name: DW_GLOBAL_foo_global_lustre + optional: true + pvcMode: ReadWriteMany mpiSpec: runPolicy: cleanPodPolicy: Running @@ -124,17 +168,17 @@ data: template: spec: containers: - - name: example-mpi-webserver - image: ghcr.io/nearnodeflash/nnf-container-example:latest - command: - - mpirun - - python3 - - -m - - http.server - - $(NNF_CONTAINER_PORTS) + - name: example-mpi-webserver + image: ghcr.io/nearnodeflash/nnf-container-example:latest + command: + - mpirun + - python3 + - -m + - http.server + - $(NNF_CONTAINER_PORTS) Worker: template: spec: containers: - - name: example-mpi-webserver - image: ghcr.io/nearnodeflash/nnf-container-example:latest + - name: example-mpi-webserver + image: ghcr.io/nearnodeflash/nnf-container-example:latest diff --git a/controllers/nnf_workflow_controller_helpers.go b/controllers/nnf_workflow_controller_helpers.go index 090c016d8..91380f706 100644 --- a/controllers/nnf_workflow_controller_helpers.go +++ b/controllers/nnf_workflow_controller_helpers.go @@ -1325,10 +1325,36 @@ func (r *NnfWorkflowReconciler) waitForContainersToStart(ctx context.Context, wo if err != nil { return nil, err } + isMPIJob := profile.Data.MPISpec != nil - if profile.Data.MPISpec != nil { + // Timeouts - If the containers don't start after PreRunTimeoutSeconds, we need to send an error + // up to the workflow in every one of our return cases. Each return path will check for + // timeoutElapsed and bubble up a fatal error. + // We must also set the Jobs' activeDeadline timeout so that the containers are stopped once the + // timeout is hit. This needs to be handled slightly differently depending on if the job is MPI + // or not. Once set, k8s will take care of stopping the pods for us. + timeoutElapsed := false + timeout := time.Duration(0) + if profile.Data.PreRunTimeoutSeconds != nil { + timeout = time.Duration(*profile.Data.PreRunTimeoutSeconds) * time.Second + } + timeoutMessage := fmt.Sprintf("user container(s) failed to start after %d seconds", int(timeout.Seconds())) + + // Check if PreRunTimeoutSeconds has elapsed and set the flag. The logic will check once more to + // see if it started or not. If not, then the job(s) activeDeadline will be set to stop the + // jobs/pods. + if timeout > 0 && metav1.Now().Sub(workflow.Status.DesiredStateChange.Time) >= timeout { + timeoutElapsed = true + } + + if isMPIJob { mpiJob, result := r.getMPIJobConditions(ctx, workflow, index, 1) if result != nil { + // If timeout, don't allow requeue and return an error + if timeoutElapsed { + return nil, dwsv1alpha2.NewResourceError("could not retrieve MPIJobs to set timeout"). + WithUserMessage(timeoutMessage).WithFatal() + } return result, nil } @@ -1341,21 +1367,53 @@ func (r *NnfWorkflowReconciler) waitForContainersToStart(ctx context.Context, wo } } + // Jobs are not running. Check to see if timeout elapsed and have k8s stop the jobs for us. + // If no timeout, then just requeue. if !running { + if timeoutElapsed { + r.Log.Info("container prerun timeout occurred, attempting to set MPIJob activeDeadlineSeconds") + if err := r.setMPIJobTimeout(ctx, workflow, mpiJob, time.Duration(1*time.Millisecond)); err != nil { + return nil, dwsv1alpha2.NewResourceError("could not set timeout on MPIJobs"). + WithUserMessage(timeoutMessage).WithError(err).WithFatal() + } else { + return nil, dwsv1alpha2.NewResourceError("MPIJob timeout set").WithUserMessage(timeoutMessage).WithFatal() + } + } return Requeue(fmt.Sprintf("pending MPIJob start for workflow '%s', index: %d", workflow.Name, index)).after(2 * time.Second), nil } } else { jobList, err := r.getContainerJobs(ctx, workflow, index) if err != nil { + if timeoutElapsed { + return nil, dwsv1alpha2.NewResourceError("could not retrieve Jobs to set timeout"). + WithUserMessage(timeoutMessage).WithFatal().WithError(err) + } return nil, err } // Jobs may not be queryable yet, so requeue if len(jobList.Items) < 1 { + // If timeout, don't allow a requeue and return an error + if timeoutElapsed { + return nil, dwsv1alpha2.NewResourceError("no Jobs found in JobList to set timeout"). + WithUserMessage(timeoutMessage).WithFatal() + } return Requeue(fmt.Sprintf("pending job creation for workflow '%s', index: %d", workflow.Name, index)).after(2 * time.Second), nil } for _, job := range jobList.Items { + + // Attempt to set the timeout on all the Jobs in the list + if timeoutElapsed { + r.Log.Info("container prerun timeout occurred, attempting to set Job activeDeadlineSeconds") + if err := r.setJobTimeout(ctx, job, time.Duration(1*time.Millisecond)); err != nil { + return nil, dwsv1alpha2.NewResourceError("could not set timeout on MPIJobs"). + WithUserMessage(timeoutMessage).WithError(err).WithFatal() + } else { + continue + } + } + // If we have any conditions, the job already finished if len(job.Status.Conditions) > 0 { continue @@ -1366,6 +1424,11 @@ func (r *NnfWorkflowReconciler) waitForContainersToStart(ctx context.Context, wo return Requeue(fmt.Sprintf("pending container start for job '%s'", job.Name)).after(2 * time.Second), nil } } + + // Report the timeout error + if timeoutElapsed { + return nil, dwsv1alpha2.NewResourceError("job(s) timeout set").WithUserMessage(timeoutMessage).WithFatal() + } } return nil, nil @@ -1451,64 +1514,70 @@ func (r *NnfWorkflowReconciler) getMPIJobConditions(ctx context.Context, workflo return mpiJob, nil } -func (r *NnfWorkflowReconciler) waitForContainersToFinish(ctx context.Context, workflow *dwsv1alpha2.Workflow, index int) (*result, error) { - // Get profile to determine container job type (MPI or not) - profile, err := getContainerProfile(ctx, r.Client, workflow, index) - if err != nil { - return nil, err - } - timeout := time.Duration(profile.Data.PostRunTimeoutSeconds) * time.Second - - setTimeout := func(job batchv1.Job) error { - // If desired, set the ActiveDeadline on the job to kill pods. Use the job's creation - // timestamp to determine how long the job/pod has been running at this point. Then, add - // the desired timeout to that value. k8s Job's ActiveDeadLineSeconds will then - // terminate the pods once the deadline is hit. - if timeout > 0 && job.Spec.ActiveDeadlineSeconds == nil { - deadline := int64((metav1.Now().Sub(job.CreationTimestamp.Time) + timeout).Seconds()) - - // Update the job with the deadline - err := retry.RetryOnConflict(retry.DefaultRetry, func() error { - j := &batchv1.Job{ObjectMeta: metav1.ObjectMeta{Name: job.Name, Namespace: job.Namespace}} - if err := r.Get(ctx, client.ObjectKeyFromObject(j), j); err != nil { - return client.IgnoreNotFound(err) - } +func (r *NnfWorkflowReconciler) setJobTimeout(ctx context.Context, job batchv1.Job, timeout time.Duration) error { + // If desired, set the ActiveDeadline on the job to kill pods. Use the job's creation + // timestamp to determine how long the job/pod has been running at this point. Then, add + // the desired timeout to that value. k8s Job's ActiveDeadLineSeconds will then + // terminate the pods once the deadline is hit. + if timeout > 0 && job.Spec.ActiveDeadlineSeconds == nil { + var deadline int64 + deadline = int64((metav1.Now().Sub(job.CreationTimestamp.Time) + timeout).Seconds()) + + // Update the job with the deadline + err := retry.RetryOnConflict(retry.DefaultRetry, func() error { + j := &batchv1.Job{ObjectMeta: metav1.ObjectMeta{Name: job.Name, Namespace: job.Namespace}} + if err := r.Get(ctx, client.ObjectKeyFromObject(j), j); err != nil { + return client.IgnoreNotFound(err) + } - j.Spec.ActiveDeadlineSeconds = &deadline - return r.Update(ctx, j) - }) + j.Spec.ActiveDeadlineSeconds = &deadline + return r.Update(ctx, j) + }) - if err != nil { - return dwsv1alpha2.NewResourceError("error updating job '%s' activeDeadlineSeconds:", job.Name) - } + if err != nil { + return dwsv1alpha2.NewResourceError("error updating job '%s' activeDeadlineSeconds:", job.Name) } + } - return nil + return nil +} + +func (r *NnfWorkflowReconciler) setMPIJobTimeout(ctx context.Context, workflow *dwsv1alpha2.Workflow, mpiJob *mpiv2beta1.MPIJob, timeout time.Duration) error { + // Set the ActiveDeadLineSeconds on each of the k8s jobs created by MPIJob/mpi-operator. We + // need to retrieve the jobs in a different way than non-MPI jobs since the jobs are created + // by the MPIJob. + jobList, err := r.getMPIJobChildrenJobs(ctx, workflow, mpiJob) + if err != nil { + return dwsv1alpha2.NewResourceError("setMPIJobTimeout: no MPIJob JobList found for workflow '%s'", workflow.Name).WithMajor() } - setMPITimeout := func(mpiJob *mpiv2beta1.MPIJob) error { - // Set the ActiveDeadLineSeconds on each of the k8s jobs created by MPIJob/mpi-operator. We - // need to retrieve the jobs in a different way than non-MPI jobs since the jobs are created - // by the MPIJob. - jobList, err := r.getMPIJobChildrenJobs(ctx, workflow, mpiJob) - if err != nil { - return dwsv1alpha2.NewResourceError("waitForContainersToFinish: no MPIJob JobList found for workflow '%s', index: %d", workflow.Name, index).WithMajor() - } + if len(jobList.Items) < 1 { + return dwsv1alpha2.NewResourceError("setMPIJobTimeout: no MPIJob jobs found for workflow '%s'", workflow.Name).WithMajor() + } - if len(jobList.Items) < 1 { - return dwsv1alpha2.NewResourceError("waitForContainersToFinish: no MPIJob jobs found for workflow '%s', index: %d", workflow.Name, index).WithMajor() + for _, job := range jobList.Items { + if err := r.setJobTimeout(ctx, job, timeout); err != nil { + return err } + } - for _, job := range jobList.Items { - if err := setTimeout(job); err != nil { - return err - } - } + return nil +} - return nil +func (r *NnfWorkflowReconciler) waitForContainersToFinish(ctx context.Context, workflow *dwsv1alpha2.Workflow, index int) (*result, error) { + // Get profile to determine container job type (MPI or not) + profile, err := getContainerProfile(ctx, r.Client, workflow, index) + if err != nil { + return nil, err + } + isMPIJob := profile.Data.MPISpec != nil + + timeout := time.Duration(0) + if profile.Data.PostRunTimeoutSeconds != nil { + timeout = time.Duration(*profile.Data.PostRunTimeoutSeconds) * time.Second } - if profile.Data.MPISpec != nil { + if isMPIJob { // We should expect at least 2 conditions: created and running mpiJob, result := r.getMPIJobConditions(ctx, workflow, index, 2) if result != nil { @@ -1525,7 +1594,7 @@ func (r *NnfWorkflowReconciler) waitForContainersToFinish(ctx context.Context, w } if !finished { - if err := setMPITimeout(mpiJob); err != nil { + if err := r.setMPIJobTimeout(ctx, workflow, mpiJob, timeout); err != nil { return nil, err } return Requeue(fmt.Sprintf("pending MPIJob completion for workflow '%s', index: %d", workflow.Name, index)).after(2 * time.Second), nil @@ -1545,7 +1614,7 @@ func (r *NnfWorkflowReconciler) waitForContainersToFinish(ctx context.Context, w for _, job := range jobList.Items { // Jobs will have conditions when finished if len(job.Status.Conditions) <= 0 { - if err := setTimeout(job); err != nil { + if err := r.setJobTimeout(ctx, job, timeout); err != nil { return nil, err } return Requeue("pending container finish").after(2 * time.Second).withObject(&job), nil @@ -1562,8 +1631,15 @@ func (r *NnfWorkflowReconciler) checkContainersResults(ctx context.Context, work if err != nil { return nil, err } + isMPIJob := profile.Data.MPISpec != nil - if profile.Data.MPISpec != nil { + timeout := time.Duration(0) + if profile.Data.PostRunTimeoutSeconds != nil { + timeout = time.Duration(*profile.Data.PostRunTimeoutSeconds) * time.Second + } + timeoutMessage := fmt.Sprintf("user container(s) failed to complete after %d seconds", int(timeout.Seconds())) + + if isMPIJob { mpiJob, result := r.getMPIJobConditions(ctx, workflow, index, 2) if result != nil { return result, nil @@ -1571,7 +1647,12 @@ func (r *NnfWorkflowReconciler) checkContainersResults(ctx context.Context, work for _, c := range mpiJob.Status.Conditions { if c.Type == mpiv2beta1.JobFailed { - return nil, dwsv1alpha2.NewResourceError("container MPIJob %s (%s): %s", c.Type, c.Reason, c.Message).WithFatal() + if c.Reason == "DeadlineExceeded" { + return nil, dwsv1alpha2.NewResourceError("container MPIJob %s (%s): %s", c.Type, c.Reason, c.Message).WithFatal(). + WithUserMessage(timeoutMessage) + } + return nil, dwsv1alpha2.NewResourceError("container MPIJob %s (%s): %s", c.Type, c.Reason, c.Message).WithFatal(). + WithUserMessage("user container(s) failed to run successfully after %d attempts", profile.Data.RetryLimit+1) } } } else { @@ -1587,6 +1668,9 @@ func (r *NnfWorkflowReconciler) checkContainersResults(ctx context.Context, work for _, job := range jobList.Items { for _, condition := range job.Status.Conditions { if condition.Type != batchv1.JobComplete { + if condition.Reason == "DeadlineExceeded" { + return nil, dwsv1alpha2.NewResourceError("container job %s (%s): %s", condition.Type, condition.Reason, condition.Message).WithFatal().WithUserMessage(timeoutMessage) + } return nil, dwsv1alpha2.NewResourceError("container job %s (%s): %s", condition.Type, condition.Reason, condition.Message).WithFatal() } } diff --git a/go.mod b/go.mod index ea158d25d..714c3d127 100644 --- a/go.mod +++ b/go.mod @@ -14,6 +14,7 @@ require ( github.com/onsi/ginkgo/v2 v2.9.1 github.com/onsi/gomega v1.27.3 github.com/prometheus/client_golang v1.14.0 + go.openly.dev/pointy v1.3.0 go.uber.org/zap v1.24.0 golang.org/x/sync v0.1.0 k8s.io/api v0.26.1 diff --git a/go.sum b/go.sum index 0d7b65fa6..80886c8a1 100644 --- a/go.sum +++ b/go.sum @@ -233,8 +233,8 @@ github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5 github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= -github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk= github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= +github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= github.com/ugorji/go/codec v0.0.0-20181204163529-d75b2dcb6bc8/go.mod h1:VFNgLljTbGfSG7qAOspJ7OScBnGdDN/yBr0sguwnwf0= github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU= github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415/go.mod h1:GwrjFmJcFw6At/Gs6z4yjiIwzuJ1/+UwLxMQDVQXShQ= @@ -247,6 +247,8 @@ go.chromium.org/luci v0.0.0-20230227223707-c4460eb434d8/go.mod h1:vTpW7gzqLQ9mhM go.opencensus.io v0.22.5/go.mod h1:5pWMHQbX5EPX2/62yrJeAkowc+lfs/XD7Uxpq3pI6kk= go.opencensus.io v0.24.0 h1:y73uSU6J157QMP2kn2r30vwW1A2W2WFwSCGnAVxeaD0= go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo= +go.openly.dev/pointy v1.3.0 h1:keht3ObkbDNdY8PWPwB7Kcqk+MAlNStk5kXZTxukE68= +go.openly.dev/pointy v1.3.0/go.mod h1:rccSKiQDQ2QkNfSVT2KG8Budnfhf3At8IWxy/3ElYes= go.opentelemetry.io/proto/otlp v0.7.0/go.mod h1:PqfVotwruBrMGOCsRd/89rSnXhoiJIqeYNgFYFoEGnI= go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= go.uber.org/atomic v1.11.0 h1:ZvwS0R+56ePWxUNi+Atn9dWONBPp/AUETXlHW0DxSjE= diff --git a/vendor/go.openly.dev/pointy/.gitignore b/vendor/go.openly.dev/pointy/.gitignore new file mode 100644 index 000000000..f1c181ec9 --- /dev/null +++ b/vendor/go.openly.dev/pointy/.gitignore @@ -0,0 +1,12 @@ +# Binaries for programs and plugins +*.exe +*.exe~ +*.dll +*.so +*.dylib + +# Test binary, build with `go test -c` +*.test + +# Output of the go coverage tool, specifically when used with LiteIDE +*.out diff --git a/vendor/go.openly.dev/pointy/LICENSE b/vendor/go.openly.dev/pointy/LICENSE new file mode 100644 index 000000000..4f639d4b8 --- /dev/null +++ b/vendor/go.openly.dev/pointy/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2018 Mateusz Wielbut + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/vendor/go.openly.dev/pointy/README.md b/vendor/go.openly.dev/pointy/README.md new file mode 100644 index 000000000..1426a5a70 --- /dev/null +++ b/vendor/go.openly.dev/pointy/README.md @@ -0,0 +1,154 @@ +# pointy + +Simple helper functions to provide a shorthand to get a pointer to a variable holding a constant...because it's annoying when you have to do it hundreds of times in unit tests: + +```golang + +val := 42 +pointerToVal := &val +// vs. +pointerToVal := pointy.Int(42) // if using Go 1.17 or earlier w/o generics +pointerToVal := pointy.Pointer(42) // if using Go 1.18+ w/ generics +``` + +### New in release 2.0.0 + +🚨 Breaking change + +Package has changed to `go.openly.dev`. Please use +``` +import "go.openly.dev/pointy" +``` + +### New in release 1.2.0 + +Generic implementation of the pointer-to-value and value-to-pointer functions. *Requires Go 1.18+.* +The type-specific functions are still available for backwards-compatibility. + +```golang +pointerToInt := pointy.Pointer(42) +pointerToString := pointy.Pointer("foo") +// then later in your code.. +intValue := pointy.PointerValue(pointerToInt, 99) +stringValue := pointy.PointerValue(pointerToString, "bar") +``` + +Convenience functions to safely compare pointers by their dereferenced values: + +```golang +// when both values are pointers +a := pointy.Int(1) +b := pointy.Int(1) +if pointy.PointersValueEqual(a, b) { + fmt.Println("a and b contain equal dereferenced values") +} + +// or if just one is a pointer +a := pointy.Int(1) +b := 1 +if pointy.PointerValueEqual(a, b) { + fmt.Println("a and b contain equal dereferenced values") +} +``` + +### New in release 1.1.0 + +Additional helper functions have been added to safely dereference pointers +or return a fallback value: + +```golang +val := 42 +pointerToVal := &val +// then later in your code.. +myVal := pointy.IntValue(pointerToVal, 99) // returns 42 (or 99 if pointerToVal was nil) +``` + +## GoDoc + +[https://godoc.org/github.com/openly-engineering/pointy](https://pkg.go.dev/github.com/openly-engineering/pointy) + +## Installation + +`go get go.openly.dev/pointy` + +## Example + +```golang +package main + +import ( + "fmt" + + "go.openly.dev/pointy" +) + +func main() { + foo := pointy.Pointer(2018) + fmt.Println("foo is a pointer to:", *foo) + + bar := pointy.Pointer("point to me") + fmt.Println("bar is a pointer to:", *bar) + + // get the value back out (new in v1.1.0) + barVal := pointy.PointerValue(bar, "empty!") + fmt.Println("bar's value is:", barVal) +} +``` + +## Available Functions + +`Pointer[T any](x T) *T` +`PointerValue[T any](p *T, fallback T) T` +`Bool(x bool) *bool` +`BoolValue(p *bool, fallback bool) bool` +`Byte(x byte) *byte` +`ByteValue(p *byte, fallback byte) byte` +`Complex128(x complex128) *complex128` +`Complex128Value(p *complex128, fallback complex128) complex128` +`Complex64(x complex64) *complex64` +`Complex64Value(p *complex64, fallback complex64) complex64` +`Float32(x float32) *float32` +`Float32Value(p *float32, fallback float32) float32` +`Float64(x float64) *float64` +`Float64Value(p *float64, fallback float64) float64` +`Int(x int) *int` +`IntValue(p *int, fallback int) int` +`Int8(x int8) *int8` +`Int8Value(p *int8, fallback int8) int8` +`Int16(x int16) *int16` +`Int16Value(p *int16, fallback int16) int16` +`Int32(x int32) *int32` +`Int32Value(p *int32, fallback int32) int32` +`Int64(x int64) *int64` +`Int64Value(p *int64, fallback int64) int64` +`Uint(x uint) *uint` +`UintValue(p *uint, fallback uint) uint` +`Uint8(x uint8) *uint8` +`Uint8Value(p *uint8, fallback uint8) uint8` +`Uint16(x uint16) *uint16` +`Uint16Value(p *uint16, fallback uint16) uint16` +`Uint32(x uint32) *uint32` +`Uint32Value(p *uint32, fallback uint32) uint32` +`Uint64(x uint64) *uint64` +`Uint64Value(p *uint64, fallback uint64) uint64` +`String(x string) *string` +`StringValue(p *string, fallback string) string` +`Rune(x rune) *rune` +`RuneValue(p *rune, fallback rune) rune` +`PointersValueEqual[T comparable](a *T, b *T) bool` +`PointerValueEqual[T comparable](a *T, b T) bool` +## Motivation + +Creating pointers to literal constant values is useful, especially in unit tests. Go doesn't support simply using the address operator (&) to reference the location of e.g. `value := &int64(42)` so we're forced to [create](https://stackoverflow.com/questions/35146286/find-address-of-constant-in-go/35146856#35146856) [little](https://stackoverflow.com/questions/34197248/how-can-i-store-reference-to-the-result-of-an-operation-in-go/34197367#34197367) [workarounds](https://stackoverflow.com/questions/30716354/how-do-i-do-a-literal-int64-in-go/30716481#30716481). A common solution is to create a helper function: + +```golang +func createInt64Pointer(x int64) *int64 { + return &x +} +// now you can create a pointer to 42 inline +value := createInt64Pointer(42) +``` + +This package provides a library of these simple little helper functions for every native Go primitive. + +Made @ Openly. [Join us](https://careers.openly.com/) and use Go to build cool stuff. diff --git a/vendor/go.openly.dev/pointy/comparison.go b/vendor/go.openly.dev/pointy/comparison.go new file mode 100644 index 000000000..4541ab1ff --- /dev/null +++ b/vendor/go.openly.dev/pointy/comparison.go @@ -0,0 +1,25 @@ +package pointy + +// PointersValueEqual returns true if both pointer parameters are nil or contain the same dereferenced value. +func PointersValueEqual[T comparable](a *T, b *T) bool { + if a == nil && b == nil { + return true + } + if a != nil && b != nil && *a == *b { + return true + } + + return false +} + +// PointerValueEqual returns true if the pointer parameter is not nil and contains the same dereferenced value as the value parameter. +func PointerValueEqual[T comparable](a *T, b T) bool { + if a == nil { + return false + } + if *a == b { + return true + } + + return false +} diff --git a/vendor/go.openly.dev/pointy/pointy.go b/vendor/go.openly.dev/pointy/pointy.go new file mode 100644 index 000000000..0bbe4988c --- /dev/null +++ b/vendor/go.openly.dev/pointy/pointy.go @@ -0,0 +1,250 @@ +// Package pointy is a set of simple helper functions to provide a shorthand to +// get a pointer to a variable holding a constant. +package pointy + +// Bool returns a pointer to a variable holding the supplied bool constant +func Bool(x bool) *bool { + return &x +} + +// BoolValue returns the bool value pointed to by p or fallback if p is nil +func BoolValue(p *bool, fallback bool) bool { + if p == nil { + return fallback + } + return *p +} + +// Byte returns a pointer to a variable holding the supplied byte constant +func Byte(x byte) *byte { + return &x +} + +// ByteValue returns the byte value pointed to by p or fallback if p is nil +func ByteValue(p *byte, fallback byte) byte { + if p == nil { + return fallback + } + return *p +} + +// Complex128 returns a pointer to a variable holding the supplied complex128 constant +func Complex128(x complex128) *complex128 { + return &x +} + +// Complex128Value returns the complex128 value pointed to by p or fallback if p is nil +func Complex128Value(p *complex128, fallback complex128) complex128 { + if p == nil { + return fallback + } + return *p +} + +// Complex64 returns a pointer to a variable holding the supplied complex64 constant +func Complex64(x complex64) *complex64 { + return &x +} + +// Complex64Value returns the complex64 value pointed to by p or fallback if p is nil +func Complex64Value(p *complex64, fallback complex64) complex64 { + if p == nil { + return fallback + } + return *p +} + +// Float32 returns a pointer to a variable holding the supplied float32 constant +func Float32(x float32) *float32 { + return &x +} + +// Float32Value returns the float32 value pointed to by p or fallback if p is nil +func Float32Value(p *float32, fallback float32) float32 { + if p == nil { + return fallback + } + return *p +} + +// Float64 returns a pointer to a variable holding the supplied float64 constant +func Float64(x float64) *float64 { + return &x +} + +// Float64Value returns the float64 value pointed to by p or fallback if p is nil +func Float64Value(p *float64, fallback float64) float64 { + if p == nil { + return fallback + } + return *p +} + +// Int returns a pointer to a variable holding the supplied int constant +func Int(x int) *int { + return &x +} + +// IntValue returns the int value pointed to by p or fallback if p is nil +func IntValue(p *int, fallback int) int { + if p == nil { + return fallback + } + return *p +} + +// Int8 returns a pointer to a variable holding the supplied int8 constant +func Int8(x int8) *int8 { + return &x +} + +// Int8Value returns the int8 value pointed to by p or fallback if p is nil +func Int8Value(p *int8, fallback int8) int8 { + if p == nil { + return fallback + } + return *p +} + +// Int16 returns a pointer to a variable holding the supplied int16 constant +func Int16(x int16) *int16 { + return &x +} + +// Int16Value returns the int16 value pointed to by p or fallback if p is nil +func Int16Value(p *int16, fallback int16) int16 { + if p == nil { + return fallback + } + return *p +} + +// Int32 returns a pointer to a variable holding the supplied int32 constant +func Int32(x int32) *int32 { + return &x +} + +// Int32Value returns the int32 value pointed to by p or fallback if p is nil +func Int32Value(p *int32, fallback int32) int32 { + if p == nil { + return fallback + } + return *p +} + +// Int64 returns a pointer to a variable holding the supplied int64 constant +func Int64(x int64) *int64 { + return &x +} + +// Int64Value returns the int64 value pointed to by p or fallback if p is nil +func Int64Value(p *int64, fallback int64) int64 { + if p == nil { + return fallback + } + return *p +} + +// Uint returns a pointer to a variable holding the supplied uint constant +func Uint(x uint) *uint { + return &x +} + +// UintValue returns the uint value pointed to by p or fallback if p is nil +func UintValue(p *uint, fallback uint) uint { + if p == nil { + return fallback + } + return *p +} + +// Uint8 returns a pointer to a variable holding the supplied uint8 constant +func Uint8(x uint8) *uint8 { + return &x +} + +// Uint8Value returns the uint8 value pointed to by p or fallback if p is nil +func Uint8Value(p *uint8, fallback uint8) uint8 { + if p == nil { + return fallback + } + return *p +} + +// Uint16 returns a pointer to a variable holding the supplied uint16 constant +func Uint16(x uint16) *uint16 { + return &x +} + +// Uint16Value returns the uint16 value pointed to by p or fallback if p is nil +func Uint16Value(p *uint16, fallback uint16) uint16 { + if p == nil { + return fallback + } + return *p +} + +// Uint32 returns a pointer to a variable holding the supplied uint32 constant +func Uint32(x uint32) *uint32 { + return &x +} + +// Uint32Value returns the uint32 value pointed to by p or fallback if p is nil +func Uint32Value(p *uint32, fallback uint32) uint32 { + if p == nil { + return fallback + } + return *p +} + +// Uint64 returns a pointer to a variable holding the supplied uint64 constant +func Uint64(x uint64) *uint64 { + return &x +} + +// Uint64Value returns the uint64 value pointed to by p or fallback if p is nil +func Uint64Value(p *uint64, fallback uint64) uint64 { + if p == nil { + return fallback + } + return *p +} + +// String returns a pointer to a variable holding the supplied string constant +func String(x string) *string { + return &x +} + +// StringValue returns the string value pointed to by p or fallback if p is nil +func StringValue(p *string, fallback string) string { + if p == nil { + return fallback + } + return *p +} + +// Rune returns a pointer to a variable holding the supplied rune constant +func Rune(x rune) *rune { + return &x +} + +// RuneValue returns the rune value pointed to by p or fallback if p is nil +func RuneValue(p *rune, fallback rune) rune { + if p == nil { + return fallback + } + return *p +} + +// Pointer returns a pointer to a variable holding the supplied T constant +func Pointer[T any](x T) *T { + return &x +} + +// PointerValue returns the T value pointed to by p or fallback if p is nil +func PointerValue[T any](p *T, fallback T) T { + if p == nil { + return fallback + } + return *p +} diff --git a/vendor/modules.txt b/vendor/modules.txt index b5f76cbaa..5c84c5aa7 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -287,6 +287,9 @@ go.opencensus.io/internal go.opencensus.io/trace go.opencensus.io/trace/internal go.opencensus.io/trace/tracestate +# go.openly.dev/pointy v1.3.0 +## explicit; go 1.18 +go.openly.dev/pointy # go.uber.org/atomic v1.11.0 ## explicit; go 1.18 go.uber.org/atomic From 2ab465a12c1a07fc1a8f10c7606a8613e5cec947 Mon Sep 17 00:00:00 2001 From: Blake Devcich Date: Wed, 6 Sep 2023 14:09:33 -0500 Subject: [PATCH 18/19] Containers: Fix mpirun issue where it cannot contact the workers There is an intermittent issue where mpirun cannot contact the workers even though nslookup can successfully resolve their DNS hostnames in the Init Container. This is seen somewhat infrequently, but has happened enough. The end result causes the user containers to restart (if restartLimit > 0), and it always seems to work on the second try. This seems to solve the issue by using the Init Continer to use mpirun to contact the workers and just get their hostnames. This replaces the use of nslookup and ensures that mpirun can be successful on the launcher. To support this, the Init Container must run as the given UID/GID rather than root. It also speeds up container start times as we only need to run 1 Init Container for all of the workers rather than an Init Container for each worker. I have not been able to reproduce the original error using int-test, which would (in)frequently catch this. Signed-off-by: Blake Devcich --- ...f_workflow_controller_container_helpers.go | 66 ++++++++++++------- 1 file changed, 41 insertions(+), 25 deletions(-) diff --git a/controllers/nnf_workflow_controller_container_helpers.go b/controllers/nnf_workflow_controller_container_helpers.go index b23ddace7..db0d499ee 100644 --- a/controllers/nnf_workflow_controller_container_helpers.go +++ b/controllers/nnf_workflow_controller_container_helpers.go @@ -30,6 +30,7 @@ import ( "github.com/go-logr/logr" mpicommonv1 "github.com/kubeflow/common/pkg/apis/common/v1" mpiv2beta1 "github.com/kubeflow/mpi-operator/pkg/apis/kubeflow/v2beta1" + "go.openly.dev/pointy" batchv1 "k8s.io/api/batch/v1" corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" @@ -105,11 +106,6 @@ func (c *nnfUserContainer) createMPIJob() error { // Run the launcher on the first NNF node launcherSpec.NodeSelector = map[string]string{"kubernetes.io/hostname": c.nnfNodes[0]} - // Use initContainers to ensure the workers are up and discoverable before running the launcher command - for i := range c.nnfNodes { - c.addInitContainerWorkerWait(launcherSpec, i) - } - // Target all the NNF nodes for the workers replicas := int32(len(c.nnfNodes)) worker.Replicas = &replicas @@ -152,6 +148,11 @@ func (c *nnfUserContainer) createMPIJob() error { c.applyPermissions(launcherSpec, &mpiJob.Spec, false) c.applyPermissions(workerSpec, &mpiJob.Spec, true) + // Use an Init Container to test the waters for mpi - ensure it can contact the workers before + // the launcher tries it. Since this runs as the UID/GID, this needs to happen after the + // passwd Init Container. + c.addInitContainerWorkerWait(launcherSpec, len(c.nnfNodes)) + // Get the ports from the port manager ports, err := c.getHostPorts() if err != nil { @@ -303,37 +304,55 @@ exit 0 }) } -func (c *nnfUserContainer) addInitContainerWorkerWait(spec *corev1.PodSpec, worker int) { - // Add an initContainer to ensure that a worker pod is up and discoverable via dns. This - // assumes nslookup is available in the container. The nnf-mfu image provides this. - script := `# use nslookup to contact workers -echo "contacting $HOST..." +func (c *nnfUserContainer) addInitContainerWorkerWait(spec *corev1.PodSpec, numWorkers int) { + // Add an initContainer to ensure that the worker pods are up and discoverable via mpirun. + script := `# use mpirun to contact workers +echo "contacting $HOSTS..." for i in $(seq 1 100); do sleep 1 echo "attempt $i of 100..." - nslookup $HOST + echo "mpirun -H $HOSTS hostname" + mpirun -H $HOSTS hostname if [ $? -eq 0 ]; then - echo "successfully contacted $HOST; done" + echo "successfully contacted $HOSTS; done" exit 0 fi done -echo "failed to contact $HOST" +echo "failed to contact $HOSTS" exit 1 ` - // Build the worker's hostname.domain (e.g. nnf-container-example-worker-0.nnf-container-example-worker.default.svc) - // This name comes from mpi-operator. - host := strings.ToLower(fmt.Sprintf( - "%s-worker-%d.%s-worker.%s.svc", c.workflow.Name, worker, c.workflow.Name, c.workflow.Namespace)) - script = strings.ReplaceAll(script, "$HOST", host) + // Build a slice of the workers' hostname.domain (e.g. nnf-container-example-worker-0.nnf-container-example-worker.default.svc) + // This hostname comes from mpi-operator. + workers := []string{} + for i := 0; i < numWorkers; i++ { + host := strings.ToLower(fmt.Sprintf( + "%s-worker-%d.%s-worker.%s.svc", c.workflow.Name, i, c.workflow.Name, c.workflow.Namespace)) + workers = append(workers, host) + } + // mpirun takes a comma separated list of hosts (-H) + script = strings.ReplaceAll(script, "$HOSTS", strings.Join(workers, ",")) spec.InitContainers = append(spec.InitContainers, corev1.Container{ - Name: fmt.Sprintf("mpi-wait-for-worker-%d", worker), + Name: fmt.Sprintf("mpi-wait-for-worker-%d", numWorkers), Image: spec.Containers[0].Image, Command: []string{ "/bin/sh", "-c", script, }, + // mpirun needs this environment variable to use DNS hostnames + Env: []corev1.EnvVar{{Name: "OMPI_MCA_orte_keep_fqdn_hostnames", Value: "true"}}, + // Run this initContainer as the same UID/GID as the launcher + SecurityContext: &corev1.SecurityContext{ + RunAsUser: &c.uid, + RunAsGroup: &c.gid, + RunAsNonRoot: pointy.Bool(true), + }, + // And use the necessary volumes to support the UID/GID + VolumeMounts: []corev1.VolumeMount{ + {MountPath: "/etc/passwd", Name: "passwd", SubPath: "passwd"}, + {MountPath: "/home/mpiuser/.ssh", Name: "ssh-auth"}, + }, }) } @@ -389,16 +408,13 @@ func (c *nnfUserContainer) applyPermissions(spec *corev1.PodSpec, mpiJobSpec *mp if !worker { container.SecurityContext.RunAsUser = &c.uid container.SecurityContext.RunAsGroup = &c.gid - nonRoot := true - container.SecurityContext.RunAsNonRoot = &nonRoot - su := false - container.SecurityContext.AllowPrivilegeEscalation = &su + container.SecurityContext.RunAsNonRoot = pointy.Bool(true) + container.SecurityContext.AllowPrivilegeEscalation = pointy.Bool(false) } else { // For the worker nodes, we need to ensure we have the appropriate linux capabilities to // allow for ssh access for mpirun. Drop all capabilities and only add what is // necessary. Only do this if the Capabilities have not been set by the user. - su := true - container.SecurityContext.AllowPrivilegeEscalation = &su + container.SecurityContext.AllowPrivilegeEscalation = pointy.Bool(true) if container.SecurityContext.Capabilities == nil { container.SecurityContext.Capabilities = &corev1.Capabilities{ Drop: []corev1.Capability{"ALL"}, From 58779235828fde93ef33129558660eda0329df61 Mon Sep 17 00:00:00 2001 From: matthew-richerson <82597529+matthew-richerson@users.noreply.github.com> Date: Thu, 7 Sep 2023 13:18:45 -0500 Subject: [PATCH 19/19] Use GetUserMessage() for resource errors (#223) * Use GetUserMessage() for resource errors Use the GetUserMessage() receiver function when getting the user message for a resource error. This will properly prefix the message with the error type. Signed-off-by: Matt Richerson * re-vendor Signed-off-by: Matt Richerson --------- Signed-off-by: Matt Richerson --- controllers/nnf_workflow_controller_helpers.go | 2 +- go.mod | 2 +- go.sum | 4 ++-- .../HewlettPackard/dws/api/v1alpha2/resource_error.go | 4 ++++ vendor/modules.txt | 2 +- 5 files changed, 9 insertions(+), 5 deletions(-) diff --git a/controllers/nnf_workflow_controller_helpers.go b/controllers/nnf_workflow_controller_helpers.go index 91380f706..87767188d 100644 --- a/controllers/nnf_workflow_controller_helpers.go +++ b/controllers/nnf_workflow_controller_helpers.go @@ -839,7 +839,7 @@ func handleWorkflowError(err error, driverStatus *dwsv1alpha2.WorkflowDriverStat driverStatus.Error = err.Error() } else { driverStatus.Status = status - driverStatus.Message = e.UserMessage + driverStatus.Message = e.GetUserMessage() driverStatus.Error = e.Error() } } else { diff --git a/go.mod b/go.mod index 714c3d127..918fc5066 100644 --- a/go.mod +++ b/go.mod @@ -3,7 +3,7 @@ module github.com/NearNodeFlash/nnf-sos go 1.19 require ( - github.com/HewlettPackard/dws v0.0.1-0.20230815174614-998c6ad6bd1d + github.com/HewlettPackard/dws v0.0.1-0.20230907181649-2f6d9fca4249 github.com/NearNodeFlash/lustre-fs-operator v0.0.1-0.20230613180840-6178f2b04900 github.com/NearNodeFlash/nnf-ec v0.0.0-20230526161255-cfb2d89b35d7 github.com/ghodss/yaml v1.0.0 diff --git a/go.sum b/go.sum index 80886c8a1..2089d52e3 100644 --- a/go.sum +++ b/go.sum @@ -1,8 +1,8 @@ cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= -github.com/HewlettPackard/dws v0.0.1-0.20230815174614-998c6ad6bd1d h1:QZKgq7r+4ZUOGV5IPT/HUYWxVMT7vLrYmOV5yvwB6IA= -github.com/HewlettPackard/dws v0.0.1-0.20230815174614-998c6ad6bd1d/go.mod h1:YvNzcgAPmwhl/YQj6dMwsB9OpwbI5bp/41kINfFiXX8= +github.com/HewlettPackard/dws v0.0.1-0.20230907181649-2f6d9fca4249 h1:t5ibQcHcEL374lxAVVXtHqXOZbPvDVSDSrrAVl7yzBA= +github.com/HewlettPackard/dws v0.0.1-0.20230907181649-2f6d9fca4249/go.mod h1:YvNzcgAPmwhl/YQj6dMwsB9OpwbI5bp/41kINfFiXX8= github.com/HewlettPackard/structex v1.0.4 h1:RVTdN5FWhDWr1IkjllU8wxuLjISo4gr6u5ryZpzyHcA= github.com/HewlettPackard/structex v1.0.4/go.mod h1:3frC4RY/cPsP/4+N8rkxsNAGlQwHV+zDC7qvrN+N+rE= github.com/NearNodeFlash/lustre-fs-operator v0.0.1-0.20230613180840-6178f2b04900 h1:jOrP2H+D5amgHIONcucYS3/kJm6QfmqAG23Ke7elunI= diff --git a/vendor/github.com/HewlettPackard/dws/api/v1alpha2/resource_error.go b/vendor/github.com/HewlettPackard/dws/api/v1alpha2/resource_error.go index 49ba6aa8a..fdcac3588 100644 --- a/vendor/github.com/HewlettPackard/dws/api/v1alpha2/resource_error.go +++ b/vendor/github.com/HewlettPackard/dws/api/v1alpha2/resource_error.go @@ -174,6 +174,10 @@ func (e *ResourceErrorInfo) Error() string { return fmt.Sprintf("%s error: %s", strings.ToLower(string(e.Type)), message) } +func (e *ResourceErrorInfo) GetUserMessage() string { + return fmt.Sprintf("%s error: %s", string(e.Type), e.UserMessage) +} + func (e *ResourceError) SetResourceErrorAndLog(err error, log logr.Logger) { e.SetResourceError(err) if err == nil { diff --git a/vendor/modules.txt b/vendor/modules.txt index 5c84c5aa7..e5a25c19d 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -1,4 +1,4 @@ -# github.com/HewlettPackard/dws v0.0.1-0.20230815174614-998c6ad6bd1d +# github.com/HewlettPackard/dws v0.0.1-0.20230907181649-2f6d9fca4249 ## explicit; go 1.19 github.com/HewlettPackard/dws/api/v1alpha2 github.com/HewlettPackard/dws/config/crd/bases