diff --git a/.vscode/launch.json b/.vscode/launch.json index e9754e1ac..0812f0d7a 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -23,12 +23,10 @@ "mode": "test", "program": "${relativeFileDirname}", "args": [ - "-v=4", "-ginkgo.v", - "-ginkgo.progress" ], "env": { - "KUBEBUILDER_ASSETS": "${workspaceFolder}/bin/k8s/1.25.0-darwin-amd64", + "KUBEBUILDER_ASSETS": "${workspaceFolder}/bin/k8s/1.26.0-darwin-amd64", "GOMEGA_DEFAULT_EVENTUALLY_TIMEOUT": "10m", "GOMEGA_DEFAULT_EVENTUALLY_POLLING_INTERVAL": "100ms" }, diff --git a/Makefile b/Makefile index a336c4bd7..c0f63899b 100644 --- a/Makefile +++ b/Makefile @@ -282,6 +282,12 @@ LOCALBIN ?= $(shell pwd)/bin $(LOCALBIN): mkdir -p $(LOCALBIN) +.PHONY: clean-bin +clean-bin: + if [[ -d $(LOCALBIN) ]]; then \ + chmod -R u+w $(LOCALBIN) && rm -rf $(LOCALBIN); \ + fi + ## Tool Binaries KUSTOMIZE ?= $(LOCALBIN)/kustomize CONTROLLER_GEN ?= $(LOCALBIN)/controller-gen @@ -293,14 +299,17 @@ CONTROLLER_TOOLS_VERSION ?= v0.12.0 KUSTOMIZE_INSTALL_SCRIPT ?= "https://raw.githubusercontent.com/kubernetes-sigs/kustomize/master/hack/install_kustomize.sh" .PHONY: kustomize -kustomize: $(KUSTOMIZE) ## Download kustomize locally if necessary. -$(KUSTOMIZE): $(LOCALBIN) - test -s $(LOCALBIN)/kustomize || { curl -s $(KUSTOMIZE_INSTALL_SCRIPT) | bash -s -- $(subst v,,$(KUSTOMIZE_VERSION)) $(LOCALBIN); } +kustomize: $(LOCALBIN) ## Download kustomize locally if necessary. + if [[ ! -s $(LOCALBIN)/kustomize || $$($(LOCALBIN)/kustomize version | awk '{print $$1}' | awk -F/ '{print $$2}') != $(KUSTOMIZE_VERSION) ]]; then \ + rm -f $(LOCALBIN)/kustomize && \ + { curl -s $(KUSTOMIZE_INSTALL_SCRIPT) | bash -s -- $(subst v,,$(KUSTOMIZE_VERSION)) $(LOCALBIN); }; \ + fi .PHONY: controller-gen -controller-gen: $(CONTROLLER_GEN) ## Download controller-gen locally if necessary. -$(CONTROLLER_GEN): $(LOCALBIN) - test -s $(LOCALBIN)/controller-gen || GOBIN=$(LOCALBIN) go install sigs.k8s.io/controller-tools/cmd/controller-gen@$(CONTROLLER_TOOLS_VERSION) +controller-gen: $(LOCALBIN) ## Download controller-gen locally if necessary. + if [[ ! -s $(LOCALBIN)/controller-gen || $$($(LOCALBIN)/controller-gen --version | awk '{print $$2}') != $(CONTROLLER_TOOLS_VERSION) ]]; then \ + rm -f $(LOCALBIN)/controller-gen && GOBIN=$(LOCALBIN) go install sigs.k8s.io/controller-tools/cmd/controller-gen@$(CONTROLLER_TOOLS_VERSION); \ + fi .PHONY: envtest envtest: $(ENVTEST) ## Download envtest-setup locally if necessary. diff --git a/api/v1alpha1/nnf_access_types.go b/api/v1alpha1/nnf_access_types.go index fa63b98bb..103a22947 100644 --- a/api/v1alpha1/nnf_access_types.go +++ b/api/v1alpha1/nnf_access_types.go @@ -1,5 +1,5 @@ /* - * Copyright 2021, 2022 Hewlett Packard Enterprise Development LP + * Copyright 2021-2023 Hewlett Packard Enterprise Development LP * Other additional copyright holders may be indicated within. * * The entirety of this work is licensed under the Apache License, @@ -83,6 +83,7 @@ type NnfAccessStatus struct { //+kubebuilder:printcolumn:name="DESIREDSTATE",type="string",JSONPath=".spec.desiredState",description="The desired state" //+kubebuilder:printcolumn:name="STATE",type="string",JSONPath=".status.state",description="The current state" //+kubebuilder:printcolumn:name="READY",type="boolean",JSONPath=".status.ready",description="Whether the state has been achieved" +//+kubebuilder:printcolumn:name="ERROR",type="string",JSONPath=".status.error.severity" //+kubebuilder:printcolumn:name="AGE",type="date",JSONPath=".metadata.creationTimestamp" // NnfAccess is the Schema for the nnfaccesses API diff --git a/api/v1alpha1/nnf_datamovement_types.go b/api/v1alpha1/nnf_datamovement_types.go index 09cea91dc..e5fc744e6 100644 --- a/api/v1alpha1/nnf_datamovement_types.go +++ b/api/v1alpha1/nnf_datamovement_types.go @@ -1,5 +1,5 @@ /* - * Copyright 2021, 2022 Hewlett Packard Enterprise Development LP + * Copyright 2021-2023 Hewlett Packard Enterprise Development LP * Other additional copyright holders may be indicated within. * * The entirety of this work is licensed under the Apache License, @@ -153,6 +153,8 @@ type NnfDataMovementStatus struct { // as it executes. The command status is polled at a certain frequency to avoid excessive // updates to the Data Movement resource. CommandStatus *NnfDataMovementCommandStatus `json:"commandStatus,omitempty"` + + dwsv1alpha2.ResourceError `json:",inline"` } // Types describing the various data movement status conditions. @@ -175,6 +177,7 @@ const ( //+kubebuilder:subresource:status //+kubebuilder:printcolumn:name="STATE",type="string",JSONPath=".status.state",description="Current state" //+kubebuilder:printcolumn:name="STATUS",type="string",JSONPath=".status.status",description="Status of current state" +//+kubebuilder:printcolumn:name="ERROR",type="string",JSONPath=".status.error.severity" //+kubebuilder:printcolumn:name="AGE",type="date",JSONPath=".metadata.creationTimestamp" // NnfDataMovement is the Schema for the datamovements API diff --git a/api/v1alpha1/nnf_node_storage_types.go b/api/v1alpha1/nnf_node_storage_types.go index 13d305671..579d8dca5 100644 --- a/api/v1alpha1/nnf_node_storage_types.go +++ b/api/v1alpha1/nnf_node_storage_types.go @@ -1,5 +1,5 @@ /* - * Copyright 2021, 2022 Hewlett Packard Enterprise Development LP + * Copyright 2021-2023 Hewlett Packard Enterprise Development LP * Other additional copyright holders may be indicated within. * * The entirety of this work is licensed under the Apache License, @@ -169,8 +169,6 @@ type NnfNodeStorageAllocationStatus struct { StoragePool NnfResourceStatus `json:"storagePool,omitempty"` FileSystem NnfResourceStatus `json:"fileSystem,omitempty"` - - Conditions []metav1.Condition `json:"conditions,omitempty"` } // LustreStorageStatus describes the Lustre target created here. @@ -196,6 +194,8 @@ func (ns *NnfNodeStorage) GetStatus() updater.Status[*NnfNodeStorageStatus] { } //+kubebuilder:object:root=true +//+kubebuilder:printcolumn:name="ERROR",type="string",JSONPath=".status.error.severity" +//+kubebuilder:printcolumn:name="AGE",type="date",JSONPath=".metadata.creationTimestamp" // NnfNodeStorageList contains a list of NNF Nodes type NnfNodeStorageList struct { diff --git a/api/v1alpha1/nnf_port_manager_types.go b/api/v1alpha1/nnf_port_manager_types.go index 2b987799c..fc447316d 100644 --- a/api/v1alpha1/nnf_port_manager_types.go +++ b/api/v1alpha1/nnf_port_manager_types.go @@ -60,12 +60,13 @@ type NnfPortManagerSpec struct { // AllocationStatus is the current status of a port requestor. A port that is in use by the respective owner // will have a status of "InUse". A port that is freed by the owner but not yet reclaimed by the port manager // will have a status of "Free". Any other status value indicates a failure of the port allocation. -// +kubebuilder:validation:Enum:=InUse;Free;InvalidConfiguration;InsufficientResources +// +kubebuilder:validation:Enum:=InUse;Free;Cooldown;InvalidConfiguration;InsufficientResources type NnfPortManagerAllocationStatusStatus string const ( NnfPortManagerAllocationStatusInUse NnfPortManagerAllocationStatusStatus = "InUse" NnfPortManagerAllocationStatusFree NnfPortManagerAllocationStatusStatus = "Free" + NnfPortManagerAllocationStatusCooldown NnfPortManagerAllocationStatusStatus = "Cooldown" NnfPortManagerAllocationStatusInvalidConfiguration NnfPortManagerAllocationStatusStatus = "InvalidConfiguration" NnfPortManagerAllocationStatusInsufficientResources NnfPortManagerAllocationStatusStatus = "InsufficientResources" // NOTE: You must ensure any new value is added to the above kubebuilder validation enum @@ -82,6 +83,10 @@ type NnfPortManagerAllocationStatus struct { // Status is the ownership status of the port. Status NnfPortManagerAllocationStatusStatus `json:"status"` + + // TimeUnallocated is when the port was unallocated. This is to ensure the proper cooldown + // duration. + TimeUnallocated *metav1.Time `json:"timeUnallocated,omitempty"` } // PortManagerStatus is the current status of the port manager. diff --git a/api/v1alpha1/nnf_storage_types.go b/api/v1alpha1/nnf_storage_types.go index 0ce009a1e..de508c8e7 100644 --- a/api/v1alpha1/nnf_storage_types.go +++ b/api/v1alpha1/nnf_storage_types.go @@ -1,5 +1,5 @@ /* - * Copyright 2021, 2022 Hewlett Packard Enterprise Development LP + * Copyright 2021-2023 Hewlett Packard Enterprise Development LP * Other additional copyright holders may be indicated within. * * The entirety of this work is licensed under the Apache License, @@ -22,6 +22,7 @@ package v1alpha1 import ( dwsv1alpha2 "github.com/HewlettPackard/dws/api/v1alpha2" "github.com/HewlettPackard/dws/utils/updater" + corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "sigs.k8s.io/controller-runtime/pkg/client" ) @@ -56,6 +57,10 @@ type NnfStorageLustreSpec struct { // ExternalMgsNid is the NID of the MGS when a pre-existing MGS is // provided by the DataWarp directive (#DW). ExternalMgsNid string `json:"externalMgsNid,omitempty"` + + // PersistentMgsReference is a reference to a persistent storage that is providing + // the external MGS. + PersistentMgsReference corev1.ObjectReference `json:"persistentMgsReference,omitempty"` } // NnfStorageAllocationSetSpec defines the details for an allocation set @@ -106,9 +111,6 @@ type NnfStorageAllocationSetStatus struct { // Health reflects the health of this allocation set Health NnfResourceHealthType `json:"health,omitempty"` - // Error is the human readable error string - Error string `json:"error,omitempty"` - // AllocationCount is the total number of allocations that currently // exist AllocationCount int `json:"allocationCount"` @@ -135,6 +137,8 @@ type NnfStorageStatus struct { //+kubebuilder:object:root=true //+kubebuilder:subresource:status +//+kubebuilder:printcolumn:name="AGE",type="date",JSONPath=".metadata.creationTimestamp" +//+kubebuilder:printcolumn:name="ERROR",type="string",JSONPath=".status.error.severity" // NnfStorage is the Schema for the storages API type NnfStorage struct { diff --git a/api/v1alpha1/nnfcontainerprofile_types.go b/api/v1alpha1/nnfcontainerprofile_types.go index 7803f792b..ad85f1176 100644 --- a/api/v1alpha1/nnfcontainerprofile_types.go +++ b/api/v1alpha1/nnfcontainerprofile_types.go @@ -40,26 +40,61 @@ type NnfContainerProfileData struct { // List of possible filesystems supported by this container profile Storages []NnfContainerProfileStorage `json:"storages,omitempty"` - // Stop any containers after X seconds once a workflow has transitioned to PostRun. Defaults to - // 0. A value of 0 disables this behavior. + // Containers are launched in the PreRun state. Allow this many seconds for the containers to + // start before declaring an error to the workflow. + // Defaults to 60 if not set. A value of 0 disables this behavior. + // +kubebuilder:default:=60 // +kubebuilder:validation:Minimum:=0 - PostRunTimeoutSeconds int64 `json:"postRunTimeoutSeconds,omitempty"` + PreRunTimeoutSeconds *int64 `json:"preRunTimeoutSeconds,omitempty"` + + // Containers are expected to complete in the PostRun State. Allow this many seconds for the + // containers to exit before declaring an error the workflow. + // Defaults to 60 if not set. A value of 0 disables this behavior. + // +kubebuilder:default:=60 + // +kubebuilder:validation:Minimum:=0 + PostRunTimeoutSeconds *int64 `json:"postRunTimeoutSeconds,omitempty"` // Specifies the number of times a container will be retried upon a failure. A new pod is - // deployed on each retry. Defaults to 6 by kubernetes itself and must be set. A value of 0 + // deployed on each retry. Defaults to 6 by kubernetes itself and must be set. A value of 0 // disables retries. // +kubebuilder:validation:Minimum:=0 // +kubebuilder:default:=6 RetryLimit int32 `json:"retryLimit"` - // Spec to define the containers created from container profile. This is used for non-MPI - // containers. + // UserID specifies the user ID that is allowed to use this profile. If this is specified, only + // Workflows that have a matching user ID can select this profile. + UserID *uint32 `json:"userID,omitempty"` + + // GroupID specifies the group ID that is allowed to use this profile. If this is specified, + // only Workflows that have a matching group ID can select this profile. + GroupID *uint32 `json:"groupID,omitempty"` + + // Number of ports to open for communication with the user container. These ports are opened on + // the targeted NNF nodes and can be accessed outside of the k8s cluster (e.g. compute nodes). + // The requested ports are made available as environment variables inside the container and in + // the DWS workflow (NNF_CONTAINER_PORTS). + NumPorts int32 `json:"numPorts,omitempty"` + + // Spec to define the containers created from this profile. This is used for non-MPI containers. + // Refer to the K8s documentation for `PodSpec` for more definition: + // https://kubernetes.io/docs/reference/kubernetes-api/workload-resources/pod-v1/#PodSpec // Either this or MPISpec must be provided, but not both. Spec *corev1.PodSpec `json:"spec,omitempty"` - // MPIJobSpec to define the containers created from container profile. This is used for MPI - // containers via MPIJobs. See mpi-operator for more details. + // MPIJobSpec to define the MPI containers created from this profile. This functionality is + // provided via mpi-operator, a 3rd party tool to assist in running MPI applications across + // worker containers. // Either this or Spec must be provided, but not both. + // + // All the fields defined drive mpi-operator behavior. See the type definition of MPISpec for + // more detail: + // https://github.com/kubeflow/mpi-operator/blob/v0.4.0/pkg/apis/kubeflow/v2beta1/types.go#L137 + // + // Note: most of these fields are fully customizable with a few exceptions. These fields are + // overridden by NNF software to ensure proper behavior to interface with the DWS workflow + // - Replicas + // - RunPolicy.BackoffLimit (this is set above by `RetryLimit`) + // - Worker/Launcher.RestartPolicy MPISpec *mpiv2beta1.MPIJobSpec `json:"mpiSpec,omitempty"` } @@ -73,6 +108,11 @@ type NnfContainerProfileStorage struct { // the user not supplying this filesystem in the #DW directives //+kubebuilder:default:=false Optional bool `json:"optional"` + + // For DW_GLOBAL_ (global lustre) storages, the access mode must match what is configured in + // the LustreFilesystem resource for the namespace. Defaults to `ReadWriteMany` for global + // lustre, otherwise empty. + PVCMode corev1.PersistentVolumeAccessMode `json:"pvcMode,omitempty"` } // +kubebuilder:object:root=true @@ -82,7 +122,7 @@ type NnfContainerProfile struct { metav1.TypeMeta `json:",inline"` metav1.ObjectMeta `json:"metadata,omitempty"` - Data NnfContainerProfileData `json:"data,omitempty"` + Data NnfContainerProfileData `json:"data"` } // +kubebuilder:object:root=true diff --git a/api/v1alpha1/nnfcontainerprofile_webhook.go b/api/v1alpha1/nnfcontainerprofile_webhook.go index 73e786a73..e5d195cab 100644 --- a/api/v1alpha1/nnfcontainerprofile_webhook.go +++ b/api/v1alpha1/nnfcontainerprofile_webhook.go @@ -21,7 +21,9 @@ package v1alpha1 import ( "fmt" + "os" "reflect" + "strings" "github.com/kubeflow/mpi-operator/pkg/apis/kubeflow/v2beta1" "k8s.io/apimachinery/pkg/runtime" @@ -48,6 +50,55 @@ var _ webhook.Validator = &NnfContainerProfile{} func (r *NnfContainerProfile) ValidateCreate() error { nnfcontainerprofilelog.Info("validate create", "name", r.Name) + // If it's not pinned, then it's being made available for users to select + // and it must be in the correct namespace. + profileNamespace := os.Getenv("NNF_CONTAINER_PROFILE_NAMESPACE") + if !r.Data.Pinned && r.GetNamespace() != profileNamespace { + err := fmt.Errorf("incorrect namespace for profile that is intended to be selected by users; the namespace should be '%s'", profileNamespace) + nnfstorageprofilelog.Error(err, "invalid") + return err + } + + if err := r.validateContent(); err != nil { + nnfcontainerprofilelog.Error(err, "invalid NnfContainerProfile resource") + return err + } + + return nil +} + +// ValidateUpdate implements webhook.Validator so a webhook will be registered for the type +func (r *NnfContainerProfile) ValidateUpdate(old runtime.Object) error { + nnfcontainerprofilelog.Info("validate update", "name", r.Name) + + obj := old.(*NnfContainerProfile) + + if obj.Data.Pinned != r.Data.Pinned { + err := fmt.Errorf("the pinned flag is immutable") + nnfcontainerprofilelog.Error(err, "invalid") + return err + } + + if obj.Data.Pinned { + // Allow metadata to be updated, for things like finalizers, + // ownerReferences, and labels, but do not allow Data to be + // updated. + if !reflect.DeepEqual(r.Data, obj.Data) { + err := fmt.Errorf("update on pinned resource not allowed") + nnfcontainerprofilelog.Error(err, "invalid") + return err + } + } + + if err := r.validateContent(); err != nil { + nnfcontainerprofilelog.Error(err, "invalid NnfContainerProfile resource") + return err + } + + return nil +} + +func (r *NnfContainerProfile) validateContent() error { mpiJob := r.Data.MPISpec != nil nonmpiJob := r.Data.Spec != nil @@ -60,11 +111,14 @@ func (r *NnfContainerProfile) ValidateCreate() error { } if mpiJob { - // PostRunTimeoutSeconds will update the Jobs' ActiveDeadlineSeconds once Postrun starts, so we can't set them both - if r.Data.MPISpec.RunPolicy.ActiveDeadlineSeconds != nil && r.Data.PostRunTimeoutSeconds > 0 { + // PreRunTimeoutSeconds will update the Jobs' ActiveDeadlineSeconds once PreRun timeout occurs, so we can't set them both + if r.Data.MPISpec.RunPolicy.ActiveDeadlineSeconds != nil && r.Data.PreRunTimeoutSeconds != nil && *r.Data.PreRunTimeoutSeconds > 0 { + return fmt.Errorf("both PreRunTimeoutSeconds and MPISpec.RunPolicy.ActiveDeadlineSeconds are provided - only 1 can be set") + } + // PostRunTimeoutSeconds will update the Jobs' ActiveDeadlineSeconds once PostRun starts, so we can't set them both + if r.Data.MPISpec.RunPolicy.ActiveDeadlineSeconds != nil && r.Data.PostRunTimeoutSeconds != nil && *r.Data.PostRunTimeoutSeconds > 0 { return fmt.Errorf("both PostRunTimeoutSeconds and MPISpec.RunPolicy.ActiveDeadlineSeconds are provided - only 1 can be set") } - // Don't allow users to set the backoff limit directly if r.Data.MPISpec.RunPolicy.BackoffLimit != nil && r.Data.RetryLimit > 0 { return fmt.Errorf("MPISpec.RunPolicy.BackoffLimit is set. Use RetryLimit instead") @@ -79,8 +133,12 @@ func (r *NnfContainerProfile) ValidateCreate() error { return fmt.Errorf("MPISpec.MPIReplicaSpecs.Worker must be present with at least 1 container defined") } } else { - // PostRunTimeoutSeconds will update the Jobs' ActiveDeadlineSeconds once Postrun starts, so we can't set them both - if r.Data.Spec.ActiveDeadlineSeconds != nil && r.Data.PostRunTimeoutSeconds > 0 { + // PreRunTimeoutSeconds will update the Jobs' ActiveDeadlineSeconds once PreRun timeout occurs, so we can't set them both + if r.Data.Spec.ActiveDeadlineSeconds != nil && r.Data.PreRunTimeoutSeconds != nil && *r.Data.PreRunTimeoutSeconds > 0 { + return fmt.Errorf("both PreRunTimeoutSeconds and Spec.ActiveDeadlineSeconds are provided - only 1 can be set") + } + // PostRunTimeoutSeconds will update the Jobs' ActiveDeadlineSeconds once PostRun starts, so we can't set them both + if r.Data.Spec.ActiveDeadlineSeconds != nil && r.Data.PostRunTimeoutSeconds != nil && *r.Data.PostRunTimeoutSeconds > 0 { return fmt.Errorf("both PostRunTimeoutSeconds and Spec.ActiveDeadlineSeconds are provided - only 1 can be set") } @@ -89,22 +147,12 @@ func (r *NnfContainerProfile) ValidateCreate() error { } } - return nil -} - -// ValidateUpdate implements webhook.Validator so a webhook will be registered for the type -func (r *NnfContainerProfile) ValidateUpdate(old runtime.Object) error { - nnfcontainerprofilelog.Info("validate update", "name", r.Name) - - obj := old.(*NnfContainerProfile) - if obj.Data.Pinned { - // Allow metadata to be updated, for things like finalizers, - // ownerReferences, and labels, but do not allow Data to be - // updated. - if !reflect.DeepEqual(r.Data, obj.Data) { - err := fmt.Errorf("update on pinned resource not allowed") - nnfcontainerprofilelog.Error(err, "invalid") - return err + // Ensure only DW_GLOBAL_ storages have PVCMode + for _, storage := range r.Data.Storages { + if !strings.HasPrefix(storage.Name, "DW_GLOBAL_") { + if storage.PVCMode != "" { + return fmt.Errorf("PVCMode is only supported for global lustre storages (DW_GLOBAL_)") + } } } @@ -114,7 +162,5 @@ func (r *NnfContainerProfile) ValidateUpdate(old runtime.Object) error { // ValidateDelete implements webhook.Validator so a webhook will be registered for the type func (r *NnfContainerProfile) ValidateDelete() error { nnfcontainerprofilelog.Info("validate delete", "name", r.Name) - - // TODO(user): fill in your validation logic upon object deletion. return nil } diff --git a/api/v1alpha1/nnfcontainerprofile_webhook_test.go b/api/v1alpha1/nnfcontainerprofile_webhook_test.go index a4e1f7ecc..ab0b00638 100644 --- a/api/v1alpha1/nnfcontainerprofile_webhook_test.go +++ b/api/v1alpha1/nnfcontainerprofile_webhook_test.go @@ -23,10 +23,12 @@ import ( "context" "os" + "github.com/google/uuid" mpicommonv1 "github.com/kubeflow/common/pkg/apis/common/v1" mpiv2beta1 "github.com/kubeflow/mpi-operator/pkg/apis/kubeflow/v2beta1" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" + "go.openly.dev/pointy" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "sigs.k8s.io/controller-runtime/pkg/client" @@ -37,16 +39,21 @@ import ( var _ = Describe("NnfContainerProfile Webhook", func() { var ( - namespaceName = os.Getenv("NNF_CONTAINER_PROFILE_NAMESPACE") - pinnedResourceName = "test-pinned" - nnfProfile *NnfContainerProfile = nil + namespaceName = os.Getenv("NNF_CONTAINER_PROFILE_NAMESPACE") + otherNamespaceName string + otherNamespace *corev1.Namespace + + pinnedResourceName string + nnfProfile *NnfContainerProfile newProfile *NnfContainerProfile ) BeforeEach(func() { + pinnedResourceName = "test-pinned-" + uuid.NewString()[:8] + nnfProfile = &NnfContainerProfile{ ObjectMeta: metav1.ObjectMeta{ - Name: "test", + Name: "test-" + uuid.NewString()[:8], Namespace: namespaceName, }, Data: NnfContainerProfileData{ @@ -55,12 +62,32 @@ var _ = Describe("NnfContainerProfile Webhook", func() { {Name: "test"}, }, }, + Storages: []NnfContainerProfileStorage{ + {Name: "DW_JOB_storage", Optional: true}, + {Name: "DW_PERSISTENT_storage", Optional: true}, + {Name: "DW_GLOBAL_storage", Optional: true}, + }, }, } newProfile = &NnfContainerProfile{} }) + BeforeEach(func() { + otherNamespaceName = "other-" + uuid.NewString()[:8] + + otherNamespace = &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: otherNamespaceName, + }, + } + Expect(k8sClient.Create(context.TODO(), otherNamespace)).To(Succeed()) + }) + + AfterEach(func() { + Expect(k8sClient.Delete(context.TODO(), otherNamespace)).To(Succeed()) + }) + AfterEach(func() { if nnfProfile != nil { Expect(k8sClient.Delete(context.TODO(), nnfProfile)).To(Succeed()) @@ -71,28 +98,35 @@ var _ = Describe("NnfContainerProfile Webhook", func() { } }) + It("should accept system profiles in the designated namespace", func() { + Expect(k8sClient.Create(context.TODO(), nnfProfile)).To(Succeed()) + }) + + It("should not accept system profiles that are not in the designated namespace", func() { + nnfProfile.ObjectMeta.Namespace = otherNamespaceName + err := k8sClient.Create(context.TODO(), nnfProfile) + Expect(err.Error()).To(MatchRegexp("webhook .* denied the request: incorrect namespace")) + nnfProfile = nil + }) + It("Should not allow a negative retryLimit", func() { - nnfProfile.ObjectMeta.Name = pinnedResourceName nnfProfile.Data.RetryLimit = -1 Expect(k8sClient.Create(context.TODO(), nnfProfile)).ToNot(Succeed()) nnfProfile = nil }) It("Should allow a zero retryLimit", func() { - nnfProfile.ObjectMeta.Name = pinnedResourceName nnfProfile.Data.RetryLimit = 0 Expect(k8sClient.Create(context.TODO(), nnfProfile)).To(Succeed()) }) It("Should not allow a negative postRunTimeoutSeconds", func() { - nnfProfile.ObjectMeta.Name = pinnedResourceName - nnfProfile.Data.PostRunTimeoutSeconds = -1 + nnfProfile.Data.PostRunTimeoutSeconds = pointy.Int64(-1) Expect(k8sClient.Create(context.TODO(), nnfProfile)).ToNot(Succeed()) nnfProfile = nil }) It("Should not allow setting both Spec and MPISpec", func() { - nnfProfile.ObjectMeta.Name = pinnedResourceName nnfProfile.Data.Spec = &corev1.PodSpec{} nnfProfile.Data.MPISpec = &mpiv2beta1.MPIJobSpec{} Expect(k8sClient.Create(context.TODO(), nnfProfile)).ToNot(Succeed()) @@ -100,7 +134,6 @@ var _ = Describe("NnfContainerProfile Webhook", func() { }) It("Should fail when both Spec and MPISpec are unset", func() { - nnfProfile.ObjectMeta.Name = pinnedResourceName nnfProfile.Data.Spec = nil nnfProfile.Data.MPISpec = nil Expect(k8sClient.Create(context.TODO(), nnfProfile)).ToNot(Succeed()) @@ -108,7 +141,6 @@ var _ = Describe("NnfContainerProfile Webhook", func() { }) It("Should not allow an empty MPIReplicaSpecs", func() { - nnfProfile.ObjectMeta.Name = pinnedResourceName nnfProfile.Data.MPISpec = &mpiv2beta1.MPIJobSpec{ MPIReplicaSpecs: map[mpiv2beta1.MPIReplicaType]*mpicommonv1.ReplicaSpec{}, } @@ -116,8 +148,7 @@ var _ = Describe("NnfContainerProfile Webhook", func() { nnfProfile = nil }) - It("Should not allow an empty Launcher and Worker ReplicaSpecs", func() { - nnfProfile.ObjectMeta.Name = pinnedResourceName + It("Should not allow both an empty Launcher and Worker ReplicaSpecs", func() { nnfProfile.Data.MPISpec = &mpiv2beta1.MPIJobSpec{ MPIReplicaSpecs: map[mpiv2beta1.MPIReplicaType]*mpicommonv1.ReplicaSpec{ mpiv2beta1.MPIReplicaTypeLauncher: nil, @@ -128,8 +159,37 @@ var _ = Describe("NnfContainerProfile Webhook", func() { nnfProfile = nil }) + It("Should not allow an empty Launcher ReplicaSpec", func() { + nnfProfile.Data.MPISpec = &mpiv2beta1.MPIJobSpec{ + MPIReplicaSpecs: map[mpiv2beta1.MPIReplicaType]*mpicommonv1.ReplicaSpec{ + mpiv2beta1.MPIReplicaTypeLauncher: nil, + mpiv2beta1.MPIReplicaTypeWorker: { + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{}, + }, + }, + }, + } + Expect(k8sClient.Create(context.TODO(), nnfProfile)).ToNot(Succeed()) + nnfProfile = nil + }) + + It("Should not allow an empty Worker ReplicaSpec", func() { + nnfProfile.Data.MPISpec = &mpiv2beta1.MPIJobSpec{ + MPIReplicaSpecs: map[mpiv2beta1.MPIReplicaType]*mpicommonv1.ReplicaSpec{ + mpiv2beta1.MPIReplicaTypeLauncher: { + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{}, + }, + }, + mpiv2beta1.MPIReplicaTypeWorker: nil, + }, + } + Expect(k8sClient.Create(context.TODO(), nnfProfile)).ToNot(Succeed()) + nnfProfile = nil + }) + It("Should not allow an empty Launcher and Worker PodSpecs", func() { - nnfProfile.ObjectMeta.Name = pinnedResourceName nnfProfile.Data.MPISpec = &mpiv2beta1.MPIJobSpec{ MPIReplicaSpecs: map[mpiv2beta1.MPIReplicaType]*mpicommonv1.ReplicaSpec{ mpiv2beta1.MPIReplicaTypeLauncher: { @@ -148,13 +208,60 @@ var _ = Describe("NnfContainerProfile Webhook", func() { nnfProfile = nil }) + DescribeTable("Should allow a user to set PreRunTimeoutSeconds", + + func(timeout, expected *int64, succeed bool) { + nnfProfile.Data.Spec = &corev1.PodSpec{Containers: []corev1.Container{ + {Name: "test", Image: "alpine:latest"}, + }} + nnfProfile.Data.MPISpec = nil + + nnfProfile.Data.PreRunTimeoutSeconds = timeout + if succeed { + Expect(k8sClient.Create(context.TODO(), nnfProfile)).To(Succeed()) + Expect(nnfProfile.Data.PreRunTimeoutSeconds).To(Equal(expected)) + } else { + Expect(k8sClient.Create(context.TODO(), nnfProfile)).ToNot(Succeed()) + nnfProfile = nil + } + + }, + Entry("to 0", pointy.Int64(0), pointy.Int64(0), true), + Entry("to 45", pointy.Int64(45), pointy.Int64(45), true), + Entry("to nil and get the default(60)", nil, pointy.Int64(60), true), + Entry("to -1 and fail", pointy.Int64(-1), nil, false), + ) + + DescribeTable("Should allow a user to set PostRunTimeoutSeconds", + + func(timeout, expected *int64, succeed bool) { + nnfProfile.Data.Spec = &corev1.PodSpec{Containers: []corev1.Container{ + {Name: "test", Image: "alpine:latest"}, + }} + nnfProfile.Data.MPISpec = nil + + nnfProfile.Data.PostRunTimeoutSeconds = timeout + if succeed { + Expect(k8sClient.Create(context.TODO(), nnfProfile)).To(Succeed()) + Expect(nnfProfile.Data.PostRunTimeoutSeconds).To(Equal(expected)) + } else { + Expect(k8sClient.Create(context.TODO(), nnfProfile)).ToNot(Succeed()) + nnfProfile = nil + } + + }, + Entry("to 0", pointy.Int64(0), pointy.Int64(0), true), + Entry("to 45", pointy.Int64(45), pointy.Int64(45), true), + Entry("to nil and get the default(60)", nil, pointy.Int64(60), true), + Entry("to -1 and fail", pointy.Int64(-1), nil, false), + ) + It("Should not allow setting both PostRunTimeoutSeconds and MPISpec.RunPolicy.ActiveDeadlineSeconds", func() { - nnfProfile.ObjectMeta.Name = pinnedResourceName nnfProfile.Data.Spec = nil nnfProfile.Data.MPISpec = &mpiv2beta1.MPIJobSpec{} timeout := int64(10) - nnfProfile.Data.PostRunTimeoutSeconds = timeout + nnfProfile.Data.PostRunTimeoutSeconds = &timeout nnfProfile.Data.MPISpec.RunPolicy.ActiveDeadlineSeconds = &timeout Expect(k8sClient.Create(context.TODO(), nnfProfile)).ToNot(Succeed()) @@ -162,10 +269,29 @@ var _ = Describe("NnfContainerProfile Webhook", func() { }) It("Should not allow setting both PostRunTimeoutSeconds and Spec.ActiveDeadlineSeconds", func() { - nnfProfile.ObjectMeta.Name = pinnedResourceName + timeout := int64(10) + nnfProfile.Data.PostRunTimeoutSeconds = &timeout + nnfProfile.Data.Spec.ActiveDeadlineSeconds = &timeout + + Expect(k8sClient.Create(context.TODO(), nnfProfile)).ToNot(Succeed()) + nnfProfile = nil + }) + + It("Should not allow setting both PreRunTimeoutSeconds and MPISpec.RunPolicy.ActiveDeadlineSeconds", func() { + nnfProfile.Data.Spec = nil + nnfProfile.Data.MPISpec = &mpiv2beta1.MPIJobSpec{} timeout := int64(10) - nnfProfile.Data.PostRunTimeoutSeconds = timeout + nnfProfile.Data.PreRunTimeoutSeconds = &timeout + nnfProfile.Data.MPISpec.RunPolicy.ActiveDeadlineSeconds = &timeout + + Expect(k8sClient.Create(context.TODO(), nnfProfile)).ToNot(Succeed()) + nnfProfile = nil + }) + + It("Should not allow setting both PreRunTimeoutSeconds and Spec.ActiveDeadlineSeconds", func() { + timeout := int64(10) + nnfProfile.Data.PreRunTimeoutSeconds = &timeout nnfProfile.Data.Spec.ActiveDeadlineSeconds = &timeout Expect(k8sClient.Create(context.TODO(), nnfProfile)).ToNot(Succeed()) @@ -173,7 +299,6 @@ var _ = Describe("NnfContainerProfile Webhook", func() { }) It("Should not allow setting MPISpec.RunPolicy.BackoffLimit directly", func() { - nnfProfile.ObjectMeta.Name = pinnedResourceName nnfProfile.Data.Spec = nil nnfProfile.Data.MPISpec = &mpiv2beta1.MPIJobSpec{} @@ -185,22 +310,19 @@ var _ = Describe("NnfContainerProfile Webhook", func() { }) It("Should allow a zero postRunTimeoutSeconds", func() { - nnfProfile.ObjectMeta.Name = pinnedResourceName - nnfProfile.Data.PostRunTimeoutSeconds = 0 + nnfProfile.Data.PostRunTimeoutSeconds = pointy.Int64(0) Expect(k8sClient.Create(context.TODO(), nnfProfile)).To(Succeed()) }) It("Should not allow modification of Data in a pinned resource", func() { nnfProfile.ObjectMeta.Name = pinnedResourceName + nnfProfile.ObjectMeta.Namespace = otherNamespaceName + nnfProfile.Data.Pinned = true Expect(k8sClient.Create(context.TODO(), nnfProfile)).To(Succeed()) Eventually(func() error { return k8sClient.Get(context.TODO(), client.ObjectKeyFromObject(nnfProfile), nnfProfile) }).Should(Succeed()) - // Set it as pinned with an Update - nnfProfile.Data.Pinned = true - Expect(k8sClient.Update(context.TODO(), nnfProfile)).To(Succeed()) - // Verify pinned Expect(k8sClient.Get(context.TODO(), client.ObjectKeyFromObject(nnfProfile), newProfile)).To(Succeed()) Expect(newProfile.Data.Pinned).To(BeTrue()) @@ -212,15 +334,13 @@ var _ = Describe("NnfContainerProfile Webhook", func() { It("Should allow modification of Meta in a pinned resource", func() { nnfProfile.ObjectMeta.Name = pinnedResourceName + nnfProfile.ObjectMeta.Namespace = otherNamespaceName + nnfProfile.Data.Pinned = true Expect(k8sClient.Create(context.TODO(), nnfProfile)).To(Succeed()) Eventually(func() error { return k8sClient.Get(context.TODO(), client.ObjectKeyFromObject(nnfProfile), nnfProfile) }).Should(Succeed()) - // Set it as pinned with an Update - nnfProfile.Data.Pinned = true - Expect(k8sClient.Update(context.TODO(), nnfProfile)).To(Succeed()) - // Verify pinned Expect(k8sClient.Get(context.TODO(), client.ObjectKeyFromObject(nnfProfile), newProfile)).To(Succeed()) Expect(newProfile.Data.Pinned).To(BeTrue()) @@ -235,4 +355,52 @@ var _ = Describe("NnfContainerProfile Webhook", func() { newProfile.SetLabels(labels) Expect(k8sClient.Update(context.TODO(), newProfile)).To(Succeed()) }) + + It("Should not allow an unpinned profile to become pinned", func() { + Expect(k8sClient.Create(context.TODO(), nnfProfile)).To(Succeed()) + Eventually(func() error { + return k8sClient.Get(context.TODO(), client.ObjectKeyFromObject(nnfProfile), newProfile) + }).Should(Succeed()) + + newProfile.Data.Pinned = true + Expect(k8sClient.Update(context.TODO(), newProfile)).ToNot(Succeed()) + }) + + It("Should not allow a pinned profile to become unpinned", func() { + nnfProfile.ObjectMeta.Name = pinnedResourceName + nnfProfile.ObjectMeta.Namespace = otherNamespaceName + nnfProfile.Data.Pinned = true + + Expect(k8sClient.Create(context.TODO(), nnfProfile)).To(Succeed()) + Eventually(func() error { + return k8sClient.Get(context.TODO(), client.ObjectKeyFromObject(nnfProfile), newProfile) + }).Should(Succeed()) + + newProfile.Data.Pinned = false + Expect(k8sClient.Update(context.TODO(), newProfile)).ToNot(Succeed()) + }) + + DescribeTable("when modes are set for storages on creation", + func(storageName string, mode corev1.PersistentVolumeAccessMode, result bool) { + for i, storage := range nnfProfile.Data.Storages { + if storage.Name == storageName && mode != "" { + nnfProfile.Data.Storages[i].PVCMode = mode + } + } + if result { + Expect(k8sClient.Create(context.TODO(), nnfProfile)).To(Succeed()) + } else { + Expect(k8sClient.Create(context.TODO(), nnfProfile)).ToNot(Succeed()) + nnfProfile = nil + } + }, + // Only nil modes should pass for JOB/PERSISTENT + Entry("should pass when DW_JOB has no mode", "DW_JOB_storage", corev1.PersistentVolumeAccessMode(""), true), + Entry("should fail when DW_JOB has a mode", "DW_JOB_storage", corev1.ReadWriteMany, false), + Entry("should pass when DW_PERSISTENT has no mode", "DW_PERSISTENT_storage", corev1.PersistentVolumeAccessMode(""), true), + Entry("should fail when DW_PERSISTENT has a mode", "DW_PERSISTENT_storage", corev1.ReadWriteMany, false), + // Both should pass + Entry("should pass when DW_GLOBAL has no mode (defaults)", "DW_GLOBAL_storage", corev1.PersistentVolumeAccessMode(""), true), + Entry("should pass when DW_GLOBAL has a mode", "DW_GLOBAL_storage", corev1.ReadWriteMany, true), + ) }) diff --git a/api/v1alpha1/nnfstorageprofile_types.go b/api/v1alpha1/nnfstorageprofile_types.go index 5a0bf7917..d247e5b6f 100644 --- a/api/v1alpha1/nnfstorageprofile_types.go +++ b/api/v1alpha1/nnfstorageprofile_types.go @@ -65,7 +65,11 @@ type NnfStorageProfileLustreData struct { // +kubebuilder:default:=false CombinedMGTMDT bool `json:"combinedMgtMdt,omitempty"` - // ExternalMGS contains the NIDs of a pre-existing MGS that should be used + // ExternalMGS specifies the use of an existing MGS rather than creating one. This can + // be either the NID(s) of a pre-existing MGS that should be used, or it can be an NNF Persistent + // Instance that was created with the "StandaloneMGTPoolName" option. In the latter case, the format + // is "pool:poolName" where "poolName" is the argument from "StandaloneMGTPoolName". A single MGS will + // be picked from the pool. ExternalMGS string `json:"externalMgs,omitempty"` // CapacityMGT specifies the size of the MGT device. @@ -83,6 +87,11 @@ type NnfStorageProfileLustreData struct { // +kubebuilder:default:=false ExclusiveMDT bool `json:"exclusiveMdt,omitempty"` + // StandaloneMGTPoolName creates a Lustre MGT without a MDT or OST. This option can only be used when creating + // a persistent Lustre instance. The MGS is placed into a named pool that can be used by the "ExternalMGS" option. + // Multiple pools can be created. + StandaloneMGTPoolName string `json:"standaloneMgtPoolName,omitempty"` + // MgtCmdLines contains commands to create an MGT target. MgtCmdLines NnfStorageProfileLustreCmdLines `json:"mgtCommandlines,omitempty"` diff --git a/api/v1alpha1/nnfstorageprofile_webhook.go b/api/v1alpha1/nnfstorageprofile_webhook.go index 23906e2c5..84f168c9c 100644 --- a/api/v1alpha1/nnfstorageprofile_webhook.go +++ b/api/v1alpha1/nnfstorageprofile_webhook.go @@ -1,5 +1,5 @@ /* - * Copyright 2022 Hewlett Packard Enterprise Development LP + * Copyright 2022-2023 Hewlett Packard Enterprise Development LP * Other additional copyright holders may be indicated within. * * The entirety of this work is licensed under the Apache License, @@ -21,6 +21,7 @@ package v1alpha1 import ( "fmt" + "os" "reflect" "k8s.io/apimachinery/pkg/runtime" @@ -47,6 +48,14 @@ var _ webhook.Validator = &NnfStorageProfile{} func (r *NnfStorageProfile) ValidateCreate() error { nnfstorageprofilelog.V(1).Info("validate create", "name", r.Name) + // If it's not pinned, then it's being made available for users to select + // and it must be in the correct namespace. + profileNamespace := os.Getenv("NNF_STORAGE_PROFILE_NAMESPACE") + if !r.Data.Pinned && r.GetNamespace() != profileNamespace { + err := fmt.Errorf("incorrect namespace for profile that is intended to be selected by users; the namespace should be '%s'", profileNamespace) + nnfstorageprofilelog.Error(err, "invalid") + return err + } if err := r.validateContent(); err != nil { nnfstorageprofilelog.Error(err, "invalid NnfStorageProfile resource") return err @@ -59,6 +68,11 @@ func (r *NnfStorageProfile) ValidateUpdate(old runtime.Object) error { nnfstorageprofilelog.V(1).Info("validate update", "name", r.Name) obj := old.(*NnfStorageProfile) + if obj.Data.Pinned != r.Data.Pinned { + err := fmt.Errorf("the pinned flag is immutable") + nnfcontainerprofilelog.Error(err, "invalid") + return err + } if obj.Data.Pinned { // Allow metadata to be updated, for things like finalizers, // ownerReferences, and labels, but do not allow Data to be @@ -102,6 +116,14 @@ func (r *NnfStorageProfile) validateContentLustre() error { return fmt.Errorf("cannot set both combinedMgtMdt and externalMgs") } + if len(r.Data.LustreStorage.StandaloneMGTPoolName) > 0 && len(r.Data.LustreStorage.ExternalMGS) > 0 { + return fmt.Errorf("cannot set both standaloneMgtPoolName and externalMgs") + } + + if len(r.Data.LustreStorage.StandaloneMGTPoolName) > 0 && r.Data.LustreStorage.CombinedMGTMDT { + return fmt.Errorf("cannot set standaloneMgtPoolName and combinedMgtMdt") + } + for _, target := range []string{"mgt", "mdt", "mgtmdt", "ost"} { targetMiscOptions := r.GetLustreMiscOptions(target) err := r.validateLustreTargetMiscOptions(targetMiscOptions) diff --git a/api/v1alpha1/nnfstorageprofile_webhook_test.go b/api/v1alpha1/nnfstorageprofile_webhook_test.go index e09177c99..2ee5d7c5c 100644 --- a/api/v1alpha1/nnfstorageprofile_webhook_test.go +++ b/api/v1alpha1/nnfstorageprofile_webhook_test.go @@ -1,5 +1,5 @@ /* - * Copyright 2022 Hewlett Packard Enterprise Development LP + * Copyright 2022-2023 Hewlett Packard Enterprise Development LP * Other additional copyright holders may be indicated within. * * The entirety of this work is licensed under the Apache License, @@ -23,8 +23,10 @@ import ( "context" "os" + "github.com/google/uuid" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" + corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "sigs.k8s.io/controller-runtime/pkg/client" ) @@ -34,16 +36,21 @@ import ( var _ = Describe("NnfStorageProfile Webhook", func() { var ( - namespaceName = os.Getenv("NNF_STORAGE_PROFILE_NAMESPACE") - pinnedResourceName = "test-pinned" - nnfProfile *NnfStorageProfile = nil + namespaceName = os.Getenv("NNF_STORAGE_PROFILE_NAMESPACE") + otherNamespaceName string + otherNamespace *corev1.Namespace + + pinnedResourceName string + nnfProfile *NnfStorageProfile newProfile *NnfStorageProfile ) BeforeEach(func() { + pinnedResourceName = "test-pinned-" + uuid.NewString()[:8] + nnfProfile = &NnfStorageProfile{ ObjectMeta: metav1.ObjectMeta{ - Name: "test", + Name: "test-" + uuid.NewString()[:8], Namespace: namespaceName, }, } @@ -51,6 +58,21 @@ var _ = Describe("NnfStorageProfile Webhook", func() { newProfile = &NnfStorageProfile{} }) + BeforeEach(func() { + otherNamespaceName = "other-" + uuid.NewString()[:8] + + otherNamespace = &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: otherNamespaceName, + }, + } + Expect(k8sClient.Create(context.TODO(), otherNamespace)).To(Succeed()) + }) + + AfterEach(func() { + Expect(k8sClient.Delete(context.TODO(), otherNamespace)).To(Succeed()) + }) + AfterEach(func() { if nnfProfile != nil { Expect(k8sClient.Delete(context.TODO(), nnfProfile)).To(Succeed()) @@ -61,6 +83,17 @@ var _ = Describe("NnfStorageProfile Webhook", func() { } }) + It("should accept system profiles in the designated namespace", func() { + Expect(k8sClient.Create(context.TODO(), nnfProfile)).To(Succeed()) + }) + + It("should not accept system profiles that are not in the designated namespace", func() { + nnfProfile.ObjectMeta.Namespace = otherNamespaceName + err := k8sClient.Create(context.TODO(), nnfProfile) + Expect(err.Error()).To(MatchRegexp("webhook .* denied the request: incorrect namespace")) + nnfProfile = nil + }) + It("should accept default=true", func() { nnfProfile.Data.Default = true Expect(k8sClient.Create(context.TODO(), nnfProfile)).To(Succeed()) @@ -81,6 +114,13 @@ var _ = Describe("NnfStorageProfile Webhook", func() { Expect(newProfile.Data.Default).ToNot(BeTrue()) }) + It("should accept standaloneMgtPoolName", func() { + nnfProfile.Data.LustreStorage.StandaloneMGTPoolName = "FakePool" + Expect(k8sClient.Create(context.TODO(), nnfProfile)).To(Succeed()) + Expect(k8sClient.Get(context.TODO(), client.ObjectKeyFromObject(nnfProfile), newProfile)).To(Succeed()) + Expect(newProfile.Data.Default).ToNot(BeTrue()) + }) + It("should accept combinedMgtMdt", func() { nnfProfile.Data.LustreStorage.CombinedMGTMDT = true Expect(k8sClient.Create(context.TODO(), nnfProfile)).To(Succeed()) @@ -110,6 +150,20 @@ var _ = Describe("NnfStorageProfile Webhook", func() { nnfProfile = nil }) + It("should not accept standaloneMgtPoolName with externalMgs", func() { + nnfProfile.Data.LustreStorage.StandaloneMGTPoolName = "FakePool" + nnfProfile.Data.LustreStorage.ExternalMGS = "10.0.0.1@tcp" + Expect(k8sClient.Create(context.TODO(), nnfProfile)).ToNot(Succeed()) + nnfProfile = nil + }) + + It("should not accept standaloneMgtPoolName with combinedMgtMdt", func() { + nnfProfile.Data.LustreStorage.StandaloneMGTPoolName = "FakePool" + nnfProfile.Data.LustreStorage.CombinedMGTMDT = true + Expect(k8sClient.Create(context.TODO(), nnfProfile)).ToNot(Succeed()) + nnfProfile = nil + }) + It("Should not allow a default resource to be pinned", func() { nnfProfile.Data.Default = true nnfProfile.Data.Pinned = true @@ -120,6 +174,7 @@ var _ = Describe("NnfStorageProfile Webhook", func() { It("Should not allow modification of Data in a pinned resource", func() { nnfProfile.ObjectMeta.Name = pinnedResourceName + nnfProfile.ObjectMeta.Namespace = otherNamespaceName nnfProfile.Data.Pinned = true Expect(k8sClient.Create(context.TODO(), nnfProfile)).To(Succeed()) @@ -135,6 +190,7 @@ var _ = Describe("NnfStorageProfile Webhook", func() { It("Should allow modification of Meta in a pinned resource", func() { nnfProfile.ObjectMeta.Name = pinnedResourceName + nnfProfile.ObjectMeta.Namespace = otherNamespaceName nnfProfile.Data.Pinned = true Expect(k8sClient.Create(context.TODO(), nnfProfile)).To(Succeed()) @@ -199,4 +255,28 @@ var _ = Describe("NnfStorageProfile Webhook", func() { Expect(k8sClient.Create(context.TODO(), nnfProfile)).ToNot(Succeed()) nnfProfile = nil }) + + It("Should not allow an unpinned profile to become pinned", func() { + Expect(k8sClient.Create(context.TODO(), nnfProfile)).To(Succeed()) + Eventually(func() error { + return k8sClient.Get(context.TODO(), client.ObjectKeyFromObject(nnfProfile), newProfile) + }).Should(Succeed()) + + newProfile.Data.Pinned = true + Expect(k8sClient.Update(context.TODO(), newProfile)).ToNot(Succeed()) + }) + + It("Should not allow a pinned profile to become unpinned", func() { + nnfProfile.ObjectMeta.Name = pinnedResourceName + nnfProfile.ObjectMeta.Namespace = otherNamespaceName + nnfProfile.Data.Pinned = true + + Expect(k8sClient.Create(context.TODO(), nnfProfile)).To(Succeed()) + Eventually(func() error { + return k8sClient.Get(context.TODO(), client.ObjectKeyFromObject(nnfProfile), newProfile) + }).Should(Succeed()) + + newProfile.Data.Pinned = false + Expect(k8sClient.Update(context.TODO(), newProfile)).ToNot(Succeed()) + }) }) diff --git a/api/v1alpha1/webhook_suite_test.go b/api/v1alpha1/webhook_suite_test.go index 98a25206f..2ee3d8164 100644 --- a/api/v1alpha1/webhook_suite_test.go +++ b/api/v1alpha1/webhook_suite_test.go @@ -34,6 +34,7 @@ import ( admissionv1beta1 "k8s.io/api/admission/v1beta1" //+kubebuilder:scaffold:imports + corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/client-go/rest" ctrl "sigs.k8s.io/controller-runtime" @@ -98,6 +99,9 @@ var _ = BeforeSuite(func() { err = admissionv1beta1.AddToScheme(scheme) Expect(err).NotTo(HaveOccurred()) + err = corev1.AddToScheme(scheme) + Expect(err).NotTo(HaveOccurred()) + //+kubebuilder:scaffold:scheme k8sClient, err = client.New(cfg, client.Options{Scheme: scheme}) diff --git a/api/v1alpha1/workflow_error.go b/api/v1alpha1/workflow_error.go deleted file mode 100644 index e3602e194..000000000 --- a/api/v1alpha1/workflow_error.go +++ /dev/null @@ -1,105 +0,0 @@ -/* - * Copyright 2022 Hewlett Packard Enterprise Development LP - * Other additional copyright holders may be indicated within. - * - * The entirety of this work is licensed under the Apache License, - * Version 2.0 (the "License"); you may not use this file except - * in compliance with the License. - * - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package v1alpha1 - -import ( - "fmt" - - dwsv1alpha2 "github.com/HewlettPackard/dws/api/v1alpha2" -) - -// +kubebuilder:object:generate=false -type WorkflowError struct { - message string - recoverable bool - err error -} - -func NewWorkflowError(message string) *WorkflowError { - return &WorkflowError{ - message: message, - recoverable: true, - } -} - -func NewWorkflowErrorf(format string, a ...any) *WorkflowError { - return NewWorkflowError(fmt.Sprintf(format, a...)) -} - -func (e *WorkflowError) GetMessage() string { - return e.message -} - -func (e *WorkflowError) GetRecoverable() bool { - return e.recoverable -} - -func (e *WorkflowError) GetError() error { - return e.err -} - -func (e *WorkflowError) Error() string { - if e.err == nil { - return e.message - } - - return e.message + ": " + e.err.Error() -} - -func (e *WorkflowError) Unwrap() error { - return e.err -} - -func (e *WorkflowError) Inject(driverStatus *dwsv1alpha2.WorkflowDriverStatus) { - driverStatus.Message = e.GetMessage() - if e.GetRecoverable() { - driverStatus.Status = dwsv1alpha2.StatusRunning - } else { - driverStatus.Status = dwsv1alpha2.StatusError - } - - if e.Unwrap() != nil { - driverStatus.Error = e.Unwrap().Error() - } else { - driverStatus.Error = e.Error() - } -} - -func (e *WorkflowError) WithFatal() *WorkflowError { - e.recoverable = false - return e -} - -func (e *WorkflowError) WithError(err error) *WorkflowError { - // if the error is already a WorkflowError, then return it unmodified - workflowError, ok := err.(*WorkflowError) - if ok { - return workflowError - } - - resourceError, ok := err.(*dwsv1alpha2.ResourceErrorInfo) - if ok { - e.message = resourceError.UserMessage - e.recoverable = resourceError.Recoverable - } - - e.err = err - return e -} diff --git a/api/v1alpha1/workflow_helpers.go b/api/v1alpha1/workflow_helpers.go index 0ea6b11ed..69ae9d086 100644 --- a/api/v1alpha1/workflow_helpers.go +++ b/api/v1alpha1/workflow_helpers.go @@ -41,4 +41,8 @@ const ( // PinnedContainerProfileLabelNameSpace is a label applied to NnfStorage objects to show // which pinned container profile is being used. PinnedContainerProfileLabelNameSpace = "nnf.cray.hpe.com/pinned_container_profile_namespace" + + // StandaloneMGTLabel is a label applied to the PersistentStorageInstance to show that + // it is for a Lustre MGT only. The value for the label is the pool name. + StandaloneMGTLabel = "nnf.cray.hpe.com/standalone_mgt" ) diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index bb75f7897..361596a16 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -26,8 +26,7 @@ package v1alpha1 import ( "github.com/kubeflow/mpi-operator/pkg/apis/kubeflow/v2beta1" - corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/runtime" ) @@ -207,9 +206,29 @@ func (in *NnfContainerProfileData) DeepCopyInto(out *NnfContainerProfileData) { *out = make([]NnfContainerProfileStorage, len(*in)) copy(*out, *in) } + if in.PreRunTimeoutSeconds != nil { + in, out := &in.PreRunTimeoutSeconds, &out.PreRunTimeoutSeconds + *out = new(int64) + **out = **in + } + if in.PostRunTimeoutSeconds != nil { + in, out := &in.PostRunTimeoutSeconds, &out.PostRunTimeoutSeconds + *out = new(int64) + **out = **in + } + if in.UserID != nil { + in, out := &in.UserID, &out.UserID + *out = new(uint32) + **out = **in + } + if in.GroupID != nil { + in, out := &in.GroupID, &out.GroupID + *out = new(uint32) + **out = **in + } if in.Spec != nil { in, out := &in.Spec, &out.Spec - *out = new(corev1.PodSpec) + *out = new(v1.PodSpec) (*in).DeepCopyInto(*out) } if in.MPISpec != nil { @@ -434,6 +453,7 @@ func (in *NnfDataMovementStatus) DeepCopyInto(out *NnfDataMovementStatus) { *out = new(NnfDataMovementCommandStatus) (*in).DeepCopyInto(*out) } + in.ResourceError.DeepCopyInto(&out.ResourceError) } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NnfDataMovementStatus. @@ -735,13 +755,6 @@ func (in *NnfNodeStorageAllocationStatus) DeepCopyInto(out *NnfNodeStorageAlloca out.FileShare = in.FileShare out.StoragePool = in.StoragePool out.FileSystem = in.FileSystem - if in.Conditions != nil { - in, out := &in.Conditions, &out.Conditions - *out = make([]v1.Condition, len(*in)) - for i := range *in { - (*in)[i].DeepCopyInto(&(*out)[i]) - } - } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NnfNodeStorageAllocationStatus. @@ -896,7 +909,7 @@ func (in *NnfPortManagerAllocationStatus) DeepCopyInto(out *NnfPortManagerAlloca *out = *in if in.Requester != nil { in, out := &in.Requester, &out.Requester - *out = new(corev1.ObjectReference) + *out = new(v1.ObjectReference) **out = **in } if in.Ports != nil { @@ -904,6 +917,10 @@ func (in *NnfPortManagerAllocationStatus) DeepCopyInto(out *NnfPortManagerAlloca *out = make([]uint16, len(*in)) copy(*out, *in) } + if in.TimeUnallocated != nil { + in, out := &in.TimeUnallocated, &out.TimeUnallocated + *out = (*in).DeepCopy() + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NnfPortManagerAllocationStatus. @@ -1135,6 +1152,7 @@ func (in *NnfStorageList) DeepCopyObject() runtime.Object { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *NnfStorageLustreSpec) DeepCopyInto(out *NnfStorageLustreSpec) { *out = *in + out.PersistentMgsReference = in.PersistentMgsReference } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NnfStorageLustreSpec. diff --git a/config/crd/bases/nnf.cray.hpe.com_nnfaccesses.yaml b/config/crd/bases/nnf.cray.hpe.com_nnfaccesses.yaml index ff2278cca..b3e32f2e6 100644 --- a/config/crd/bases/nnf.cray.hpe.com_nnfaccesses.yaml +++ b/config/crd/bases/nnf.cray.hpe.com_nnfaccesses.yaml @@ -27,6 +27,9 @@ spec: jsonPath: .status.ready name: READY type: boolean + - jsonPath: .status.error.severity + name: ERROR + type: string - jsonPath: .metadata.creationTimestamp name: AGE type: date @@ -189,17 +192,28 @@ spec: debugMessage: description: Internal debug message for the error type: string - recoverable: - description: Indication if the error is likely recoverable or - not - type: boolean + severity: + description: Indication of how severe the error is. Minor will + likely succeed, Major may succeed, and Fatal will never succeed. + enum: + - Minor + - Major + - Fatal + type: string + type: + description: Internal or user error + enum: + - Internal + - User + type: string userMessage: description: Optional user facing message if the error is relevant to an end user type: string required: - debugMessage - - recoverable + - severity + - type type: object ready: description: Ready signifies whether status.state has been achieved diff --git a/config/crd/bases/nnf.cray.hpe.com_nnfcontainerprofiles.yaml b/config/crd/bases/nnf.cray.hpe.com_nnfcontainerprofiles.yaml index 2182ba5dd..bda90920b 100644 --- a/config/crd/bases/nnf.cray.hpe.com_nnfcontainerprofiles.yaml +++ b/config/crd/bases/nnf.cray.hpe.com_nnfcontainerprofiles.yaml @@ -28,11 +28,23 @@ spec: data: description: NnfContainerProfileSpec defines the desired state of NnfContainerProfile properties: + groupID: + description: GroupID specifies the group ID that is allowed to use + this profile. If this is specified, only Workflows that have a matching + group ID can select this profile. + format: int32 + type: integer mpiSpec: - description: MPIJobSpec to define the containers created from container - profile. This is used for MPI containers via MPIJobs. See mpi-operator - for more details. Either this or Spec must be provided, but not - both. + description: "MPIJobSpec to define the MPI containers created from + this profile. This functionality is provided via mpi-operator, a + 3rd party tool to assist in running MPI applications across worker + containers. Either this or Spec must be provided, but not both. + \n All the fields defined drive mpi-operator behavior. See the type + definition of MPISpec for more detail: https://github.com/kubeflow/mpi-operator/blob/v0.4.0/pkg/apis/kubeflow/v2beta1/types.go#L137 + \n Note: most of these fields are fully customizable with a few + exceptions. These fields are overridden by NNF software to ensure + proper behavior to interface with the DWS workflow - Replicas - + RunPolicy.BackoffLimit (this is set above by `RetryLimit`) - Worker/Launcher.RestartPolicy" properties: mpiImplementation: default: OpenMPI @@ -8610,29 +8622,49 @@ spec: required: - mpiReplicaSpecs type: object + numPorts: + description: Number of ports to open for communication with the user + container. These ports are opened on the targeted NNF nodes and + can be accessed outside of the k8s cluster (e.g. compute nodes). + The requested ports are made available as environment variables + inside the container and in the DWS workflow (NNF_CONTAINER_PORTS). + format: int32 + type: integer pinned: default: false description: Pinned is true if this instance is an immutable copy type: boolean postRunTimeoutSeconds: - description: Stop any containers after X seconds once a workflow has - transitioned to PostRun. Defaults to 0. A value of 0 disables this - behavior. + default: 60 + description: Containers are expected to complete in the PostRun State. + Allow this many seconds for the containers to exit before declaring + an error the workflow. Defaults to 60 if not set. A value of 0 disables + this behavior. + format: int64 + minimum: 0 + type: integer + preRunTimeoutSeconds: + default: 60 + description: Containers are launched in the PreRun state. Allow this + many seconds for the containers to start before declaring an error + to the workflow. Defaults to 60 if not set. A value of 0 disables + this behavior. format: int64 minimum: 0 type: integer retryLimit: default: 6 description: Specifies the number of times a container will be retried - upon a failure. A new pod is deployed on each retry. Defaults to + upon a failure. A new pod is deployed on each retry. Defaults to 6 by kubernetes itself and must be set. A value of 0 disables retries. format: int32 minimum: 0 type: integer spec: - description: Spec to define the containers created from container - profile. This is used for non-MPI containers. Either this or MPISpec - must be provided, but not both. + description: 'Spec to define the containers created from this profile. + This is used for non-MPI containers. Refer to the K8s documentation + for `PodSpec` for more definition: https://kubernetes.io/docs/reference/kubernetes-api/workload-resources/pod-v1/#PodSpec + Either this or MPISpec must be provided, but not both.' properties: activeDeadlineSeconds: description: Optional duration in seconds the pod may be active @@ -15730,11 +15762,23 @@ spec: to be mounted, but can be ignored by the user not supplying this filesystem in the #DW directives' type: boolean + pvcMode: + description: For DW_GLOBAL_ (global lustre) storages, the access + mode must match what is configured in the LustreFilesystem + resource for the namespace. Defaults to `ReadWriteMany` for + global lustre, otherwise empty. + type: string required: - name - optional type: object type: array + userID: + description: UserID specifies the user ID that is allowed to use this + profile. If this is specified, only Workflows that have a matching + user ID can select this profile. + format: int32 + type: integer required: - retryLimit type: object @@ -15745,6 +15789,8 @@ spec: type: string metadata: type: object + required: + - data type: object served: true storage: true diff --git a/config/crd/bases/nnf.cray.hpe.com_nnfdatamovements.yaml b/config/crd/bases/nnf.cray.hpe.com_nnfdatamovements.yaml index 30ea8fa04..96661f84b 100644 --- a/config/crd/bases/nnf.cray.hpe.com_nnfdatamovements.yaml +++ b/config/crd/bases/nnf.cray.hpe.com_nnfdatamovements.yaml @@ -23,6 +23,9 @@ spec: jsonPath: .status.status name: STATUS type: string + - jsonPath: .status.error.severity + name: ERROR + type: string - jsonPath: .metadata.creationTimestamp name: AGE type: date @@ -232,6 +235,35 @@ spec: operation ended. format: date-time type: string + error: + description: Error information + properties: + debugMessage: + description: Internal debug message for the error + type: string + severity: + description: Indication of how severe the error is. Minor will + likely succeed, Major may succeed, and Fatal will never succeed. + enum: + - Minor + - Major + - Fatal + type: string + type: + description: Internal or user error + enum: + - Internal + - User + type: string + userMessage: + description: Optional user facing message if the error is relevant + to an end user + type: string + required: + - debugMessage + - severity + - type + type: object message: description: Message contains any text that explains the Status. If Data Movement failed or storeStdout is enabled, this will contain diff --git a/config/crd/bases/nnf.cray.hpe.com_nnfnodestorages.yaml b/config/crd/bases/nnf.cray.hpe.com_nnfnodestorages.yaml index b9807ce68..60365f77f 100644 --- a/config/crd/bases/nnf.cray.hpe.com_nnfnodestorages.yaml +++ b/config/crd/bases/nnf.cray.hpe.com_nnfnodestorages.yaml @@ -154,79 +154,6 @@ spec: storage requirements (i.e. block size / stripe size). format: int64 type: integer - conditions: - items: - description: "Condition contains details for one aspect of - the current state of this API Resource. --- This struct - is intended for direct use as an array at the field path - .status.conditions. For example, \n type FooStatus struct{ - // Represents the observations of a foo's current state. - // Known .status.conditions.type are: \"Available\", \"Progressing\", - and \"Degraded\" // +patchMergeKey=type // +patchStrategy=merge - // +listType=map // +listMapKey=type Conditions []metav1.Condition - `json:\"conditions,omitempty\" patchStrategy:\"merge\" patchMergeKey:\"type\" - protobuf:\"bytes,1,rep,name=conditions\"` \n // other fields - }" - properties: - lastTransitionTime: - description: lastTransitionTime is the last time the condition - transitioned from one status to another. This should - be when the underlying condition changed. If that is - not known, then using the time when the API field changed - is acceptable. - format: date-time - type: string - message: - description: message is a human readable message indicating - details about the transition. This may be an empty string. - maxLength: 32768 - type: string - observedGeneration: - description: observedGeneration represents the .metadata.generation - that the condition was set based upon. For instance, - if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration - is 9, the condition is out of date with respect to the - current state of the instance. - format: int64 - minimum: 0 - type: integer - reason: - description: reason contains a programmatic identifier - indicating the reason for the condition's last transition. - Producers of specific condition types may define expected - values and meanings for this field, and whether the - values are considered a guaranteed API. The value should - be a CamelCase string. This field may not be empty. - maxLength: 1024 - minLength: 1 - pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ - type: string - status: - description: status of the condition, one of True, False, - Unknown. - enum: - - "True" - - "False" - - Unknown - type: string - type: - description: type of condition in CamelCase or in foo.example.com/CamelCase. - --- Many .condition.type values are consistent across - resources like Available, but because arbitrary conditions - can be useful (see .node.status.conditions), the ability - to deconflict is important. The regex it matches is - (dns1123SubdomainFmt/)?(qualifiedNameFmt) - maxLength: 316 - pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ - type: string - required: - - lastTransitionTime - - message - - reason - - status - - type - type: object - type: array creationTime: description: Represents the time when the storage was created by the controller It is represented in RFC3339 form and is @@ -364,17 +291,28 @@ spec: debugMessage: description: Internal debug message for the error type: string - recoverable: - description: Indication if the error is likely recoverable or - not - type: boolean + severity: + description: Indication of how severe the error is. Minor will + likely succeed, Major may succeed, and Fatal will never succeed. + enum: + - Minor + - Major + - Fatal + type: string + type: + description: Internal or user error + enum: + - Internal + - User + type: string userMessage: description: Optional user facing message if the error is relevant to an end user type: string required: - debugMessage - - recoverable + - severity + - type type: object lustreStorage: description: LustreStorageStatus describes the Lustre targets created diff --git a/config/crd/bases/nnf.cray.hpe.com_nnfportmanagers.yaml b/config/crd/bases/nnf.cray.hpe.com_nnfportmanagers.yaml index aab8d03ec..dee321fae 100644 --- a/config/crd/bases/nnf.cray.hpe.com_nnfportmanagers.yaml +++ b/config/crd/bases/nnf.cray.hpe.com_nnfportmanagers.yaml @@ -198,9 +198,15 @@ spec: enum: - InUse - Free + - Cooldown - InvalidConfiguration - InsufficientResources type: string + timeUnallocated: + description: TimeUnallocated is when the port was unallocated. + This is to ensure the proper cooldown duration. + format: date-time + type: string required: - status type: object diff --git a/config/crd/bases/nnf.cray.hpe.com_nnfstorageprofiles.yaml b/config/crd/bases/nnf.cray.hpe.com_nnfstorageprofiles.yaml index ca7281a49..cd752d162 100644 --- a/config/crd/bases/nnf.cray.hpe.com_nnfstorageprofiles.yaml +++ b/config/crd/bases/nnf.cray.hpe.com_nnfstorageprofiles.yaml @@ -133,8 +133,13 @@ spec: colocated with any other target on the chosen server. type: boolean externalMgs: - description: ExternalMGS contains the NIDs of a pre-existing MGS - that should be used + description: ExternalMGS specifies the use of an existing MGS + rather than creating one. This can be either the NID(s) of a + pre-existing MGS that should be used, or it can be an NNF Persistent + Instance that was created with the "StandaloneMGTPoolName" option. + In the latter case, the format is "pool:poolName" where "poolName" + is the argument from "StandaloneMGTPoolName". A single MGS will + be picked from the pool. type: string mdtCommandlines: description: MdtCmdLines contains commands to create an MDT target. @@ -337,6 +342,12 @@ spec: required: - colocateComputes type: object + standaloneMgtPoolName: + description: StandaloneMGTPoolName creates a Lustre MGT without + a MDT or OST. This option can only be used when creating a persistent + Lustre instance. The MGS is placed into a named pool that can + be used by the "ExternalMGS" option. Multiple pools can be created. + type: string type: object pinned: default: false diff --git a/config/crd/bases/nnf.cray.hpe.com_nnfstorages.yaml b/config/crd/bases/nnf.cray.hpe.com_nnfstorages.yaml index 1c584ac7d..1262f751e 100644 --- a/config/crd/bases/nnf.cray.hpe.com_nnfstorages.yaml +++ b/config/crd/bases/nnf.cray.hpe.com_nnfstorages.yaml @@ -14,7 +14,14 @@ spec: singular: nnfstorage scope: Namespaced versions: - - name: v1alpha1 + - additionalPrinterColumns: + - jsonPath: .metadata.creationTimestamp + name: AGE + type: date + - jsonPath: .status.error.severity + name: ERROR + type: string + name: v1alpha1 schema: openAPIV3Schema: description: NnfStorage is the Schema for the storages API @@ -90,6 +97,45 @@ spec: - name type: object type: array + persistentMgsReference: + description: PersistentMgsReference is a reference to a persistent + storage that is providing the external MGS. + properties: + apiVersion: + description: API version of the referent. + type: string + fieldPath: + description: 'If referring to a piece of an object instead + of an entire object, this string should contain a valid + JSON/Go field access statement, such as desiredState.manifest.containers[2]. + For example, if the object reference is to a container + within a pod, this would take on a value like: "spec.containers{name}" + (where "name" refers to the name of the container that + triggered the event) or if no container name is specified + "spec.containers[2]" (container with index 2 in this pod). + This syntax is chosen only to have some well-defined way + of referencing a part of an object. TODO: this design + is not final and this field is subject to change in the + future.' + type: string + kind: + description: 'Kind of the referent. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' + type: string + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names' + type: string + namespace: + description: 'Namespace of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/' + type: string + resourceVersion: + description: 'Specific resourceVersion to which this reference + is made, if any. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency' + type: string + uid: + description: 'UID of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids' + type: string + type: object + x-kubernetes-map-type: atomic targetType: description: TargetType is the type of Lustre target to be created. enum: @@ -144,9 +190,6 @@ spec: description: AllocationCount is the total number of allocations that currently exist type: integer - error: - description: Error is the human readable error string - type: string health: description: Health reflects the health of this allocation set type: string @@ -163,17 +206,28 @@ spec: debugMessage: description: Internal debug message for the error type: string - recoverable: - description: Indication if the error is likely recoverable or - not - type: boolean + severity: + description: Indication of how severe the error is. Minor will + likely succeed, Major may succeed, and Fatal will never succeed. + enum: + - Minor + - Major + - Fatal + type: string + type: + description: Internal or user error + enum: + - Internal + - User + type: string userMessage: description: Optional user facing message if the error is relevant to an end user type: string required: - debugMessage - - recoverable + - severity + - type type: object mgsNode: description: MgsNode is the NID of the MGS. diff --git a/config/dws/nnf-ruleset.yaml b/config/dws/nnf-ruleset.yaml index 6563e99e5..0bec8cec5 100644 --- a/config/dws/nnf-ruleset.yaml +++ b/config/dws/nnf-ruleset.yaml @@ -93,7 +93,7 @@ spec: isRequired: true isValueRequired: true - command: "container" - watchStates: Proposal,PreRun,PostRun,Teardown + watchStates: Proposal,Setup,PreRun,PostRun,Teardown ruleDefs: - key: "^name$" type: "string" @@ -105,7 +105,7 @@ spec: pattern: "^[a-z][a-z0-9-]+$" isRequired: true isValueRequired: true - - key: '^(DW_JOB_|DW_PERSISTENT_)[a-z][a-z0-9_]+$' + - key: '^(DW_JOB_|DW_PERSISTENT_|DW_GLOBAL_)[a-z][a-z0-9_]+$' type: "string" isRequired: false isValueRequired: true diff --git a/config/examples/nnf_v1alpha1_nnfcontainerprofiles.yaml b/config/examples/nnf_v1alpha1_nnfcontainerprofiles.yaml index 8a60bbd7b..80d8c8035 100644 --- a/config/examples/nnf_v1alpha1_nnfcontainerprofiles.yaml +++ b/config/examples/nnf_v1alpha1_nnfcontainerprofiles.yaml @@ -5,18 +5,34 @@ metadata: data: retryLimit: 6 storages: - - name: DW_JOB_foo_local_storage - optional: false - - name: DW_PERSISTENT_foo_persistent_storage - optional: true + - name: DW_JOB_foo_local_storage + optional: false + - name: DW_PERSISTENT_foo_persistent_storage + optional: true + - name: DW_GLOBAL_foo_global_lustre + optional: true spec: containers: - - name: example-success - image: alpine:latest - command: - - /bin/sh - - -c - - "sleep 15 && exit 0" + - name: example-success + image: alpine:latest + command: + - /bin/sh + - -c + - "sleep 10 && exit 0" +--- +apiVersion: nnf.cray.hpe.com/v1alpha1 +kind: NnfContainerProfile +metadata: + name: example-fail +data: + spec: + containers: + - name: example-fail + image: alpine:latest + command: + - /bin/sh + - -c + - "sleep 10 && exit 1" --- apiVersion: nnf.cray.hpe.com/v1alpha1 kind: NnfContainerProfile @@ -25,23 +41,23 @@ metadata: data: retryLimit: 6 storages: - - name: DW_JOB_foo_local_storage - optional: false - - name: DW_PERSISTENT_foo_persistent_storage - optional: true + - name: DW_JOB_foo_local_storage + optional: false + - name: DW_PERSISTENT_foo_persistent_storage + optional: true spec: containers: - - name: example-randomly-fail - image: alpine:latest - command: - - /bin/sh - - -c - - | - echo "starting..." - sleep 30 - x=$(($RANDOM % 2)) - echo "exiting: $x" - exit $x + - name: example-randomly-fail + image: alpine:latest + command: + - /bin/sh + - -c + - | + echo "starting..." + sleep 10 + x=$(($RANDOM % 2)) + echo "exiting: $x" + exit $x --- apiVersion: nnf.cray.hpe.com/v1alpha1 kind: NnfContainerProfile @@ -50,18 +66,18 @@ metadata: data: retryLimit: 6 storages: - - name: DW_JOB_foo_local_storage - optional: false - - name: DW_PERSISTENT_foo_persistent_storage - optional: true + - name: DW_JOB_foo_local_storage + optional: false + - name: DW_PERSISTENT_foo_persistent_storage + optional: true spec: containers: - - name: example-forever - image: alpine:latest - command: - - /bin/sh - - -c - - "while true; do date && sleep 5; done" + - name: example-forever + image: alpine:latest + command: + - /bin/sh + - -c + - "while true; do date && sleep 5; done" --- apiVersion: nnf.cray.hpe.com/v1alpha1 kind: NnfContainerProfile @@ -69,11 +85,81 @@ metadata: name: example-mpi data: retryLimit: 6 + numPorts: 1 + storages: + - name: DW_JOB_foo_local_storage + optional: false + - name: DW_PERSISTENT_foo_persistent_storage + optional: true + - name: DW_GLOBAL_foo_global_lustre + optional: true + pvcMode: ReadWriteMany + mpiSpec: + runPolicy: + cleanPodPolicy: Running + mpiReplicaSpecs: + Launcher: + template: + spec: + containers: + - name: example-mpi + image: nnf-mfu:latest + command: + - mpirun + - dcmp + - "$(DW_JOB_foo_local_storage)/0" + - "$(DW_JOB_foo_local_storage)/1" + Worker: + template: + spec: + containers: + - name: example-mpi + image: nnf-mfu:latest + +--- +apiVersion: nnf.cray.hpe.com/v1alpha1 +kind: NnfContainerProfile +metadata: + name: example-mpi-fail +data: + numPorts: 1 + mpiSpec: + runPolicy: + cleanPodPolicy: Running + mpiReplicaSpecs: + Launcher: + template: + spec: + containers: + - name: example-mpi-fail + image: nnf-mfu:latest + command: + - mpirun + - /bin/sh + - -c + - "sleep 10 && exit 1" + Worker: + template: + spec: + containers: + - name: example-mpi-fail + image: nnf-mfu:latest +--- +apiVersion: nnf.cray.hpe.com/v1alpha1 +kind: NnfContainerProfile +metadata: + name: example-mpi-webserver +data: + retryLimit: 6 + numPorts: 1 storages: - - name: DW_JOB_foo_local_storage - optional: false - - name: DW_PERSISTENT_foo_persistent_storage - optional: true + - name: DW_JOB_foo_local_storage + optional: false + - name: DW_PERSISTENT_foo_persistent_storage + optional: true + - name: DW_GLOBAL_foo_global_lustre + optional: true + pvcMode: ReadWriteMany mpiSpec: runPolicy: cleanPodPolicy: Running @@ -82,16 +168,17 @@ data: template: spec: containers: - - name: example-mpi - image: nnf-mfu:latest - command: - - mpirun - - dcmp - - "$(DW_JOB_foo_local_storage)/0" - - "$(DW_JOB_foo_local_storage)/1" + - name: example-mpi-webserver + image: ghcr.io/nearnodeflash/nnf-container-example:latest + command: + - mpirun + - python3 + - -m + - http.server + - $(NNF_CONTAINER_PORTS) Worker: template: spec: containers: - - name: example-mpi - image: nnf-mfu:latest + - name: example-mpi-webserver + image: ghcr.io/nearnodeflash/nnf-container-example:latest diff --git a/config/manager/manager.yaml b/config/manager/manager.yaml index 36e25caed..a446694ce 100644 --- a/config/manager/manager.yaml +++ b/config/manager/manager.yaml @@ -111,6 +111,10 @@ spec: value: nnf-system - name: NNF_CONTAINER_PROFILE_NAMESPACE value: nnf-system + - name: NNF_PORT_MANAGER_NAME + value: nnf-port-manager + - name: NNF_PORT_MANAGER_NAMESPACE + value: nnf-system ports: - containerPort: 50057 name: nnf-ec diff --git a/config/ports/kustomization.yaml b/config/ports/kustomization.yaml new file mode 100644 index 000000000..6b22c995f --- /dev/null +++ b/config/ports/kustomization.yaml @@ -0,0 +1,12 @@ +# Adds namespace to all resources. +namespace: nnf-system + +# Value of this field is prepended to the +# names of all resources, e.g. a deployment named +# "wordpress" becomes "alices-wordpress". +# Note that it should also match with the prefix (text before '-') of the namespace +# field above. +namePrefix: nnf- + +resources: +- port_manager.yaml diff --git a/config/manager/port_manager.yaml b/config/ports/port_manager.yaml similarity index 100% rename from config/manager/port_manager.yaml rename to config/ports/port_manager.yaml diff --git a/config/samples/nnf_v1alpha1_nnfcontainerprofile.yaml b/config/samples/nnf_v1alpha1_nnfcontainerprofile.yaml index 2d95a99d8..b2635e0f2 100644 --- a/config/samples/nnf_v1alpha1_nnfcontainerprofile.yaml +++ b/config/samples/nnf_v1alpha1_nnfcontainerprofile.yaml @@ -4,36 +4,96 @@ metadata: name: sample-nnfcontainerprofile namespace: nnf-system data: - # Specifies the number of times a container will be retried upon a failure. A - # new pod is deployed on each retry. Defaults to 6 by kubernetes itself and - # must be set. A value of 0 disables retries. + # Specifies the number of times a container will be retried upon a failure. A new pod is deployed + # on each retry. Defaults to 6 by kubernetes itself and must be set. A value of 0 disables + # retries. retryLimit: 6 - # Stop any containers after X seconds once a workflow has transitioned to - # PostRun. Defaults to 0. A value of 0 disables this behavior. + # Stop any containers after X seconds once a workflow has transitioned to PostRun. Defaults to 0. + # A value of 0 disables this behavior. postRunTimeoutSeconds: 0 - # List of possible filesystems supported by this container profile. These - # storages are mounted inside of the container. Any non-optional storage must - # be supplied with the container directive as an argument and must reference - # a valid jobdw or persistentdw directive's name. + # Request the number of ports to open on the targeted rabbits. These ports are accessible outside + # of the k8s cluster. The requested ports are made available as environment variables inside the + # container and in the DWS workflow (NNF_CONTAINER_PORTS). + numPorts: 0 + + # UserID specifies the user ID that is allowed to use this profile. If this is specified, only + # Workflows that have a matching user ID can select this profile. + userID: 1050 + + # GroupID specifies the group ID that is allowed to use this profile. If this is specified, + # only Workflows that have a matching group ID can select this profile. + groupID: 1050 + + # List of possible filesystems supported by this container profile. These storages are mounted + # inside of the container. Any non-optional storage must be supplied with the container directive + # as an argument and must reference a valid jobdw/persistentdw directive's name or refer to a + # LustreFilesystem path. # # Example: # DW jobdw name=my-gfs2 type=gfs2 capacity=50GB # DW container name=my-container profile=nnfcontainerprofile-sample DW_JOB_foo_local_storage=my-gfs2 storages: - - name: DW_JOB_foo_local_storage - optional: false - - name: DW_PERSISTENT_foo_persistent_storage - optional: true + - name: DW_JOB_foo_local_storage + optional: false + - name: DW_PERSISTENT_foo_persistent_storage + optional: true + # For Global lustre, pvcMode must match the mode configured in the LustreFilesystem Resource + - name: DW_GLOBAL_foo_global_lustre + optional: true + pvcMode: ReadWriteMany # Template defines the containers that will be created from container profile. + # Note: Only 1 of `spec` or `MPISpec` can be defined, not both. template: + + # Spec to define the containers created from this profile. This is used for non-MPI containers. + # Refer to the K8s documentation for `PodSpec` for more definition: + # https://kubernetes.io/docs/reference/kubernetes-api/workload-resources/pod-v1/#PodSpec + # Either this or MPISpec must be provided, but not both. spec: containers: - - name: sample-nnfcontainerprofile - image: alpine:latest - command: - - /bin/sh - - -c - - "sleep 15 && exit 0" + - name: sample-nnfcontainerprofile + image: alpine:latest + command: + - /bin/sh + - -c + - "sleep 15 && exit 0" + + # MPIJobSpec to define the MPI containers created from this profile. This functionality is + # provided via mpi-operator, a 3rd party tool to assist in running MPI applications across + # worker containers. + # Either this or Spec must be provided, but not both. + # + # All the fields defined drive mpi-operator behavior. See the type definition of MPISpec for + # more detail: + # https://github.com/kubeflow/mpi-operator/blob/v0.4.0/pkg/apis/kubeflow/v2beta1/types.go#L137 + # + # Note: most of these fields are fully customizable with a few exceptions. These fields are + # overridden by NNF software to ensure proper behavior to interface with the DWS workflow + # - Replicas + # - RunPolicy.BackoffLimit (this is set above by `RetryLimit`) + # - Worker/Launcher.RestartPolicy + # - SSHAuthMountPath + mpiSpec: + runPolicy: + cleanPodPolicy: Running + mpiReplicaSpecs: + Launcher: + template: + spec: + containers: + - name: example-mpi + image: nnf-mfu:latest + command: + - mpirun + - dcmp + - "$(DW_JOB_foo_local_storage)/0" + - "$(DW_JOB_foo_local_storage)/1" + Worker: + template: + spec: + containers: + - name: example-mpi + image: nnf-mfu:latest diff --git a/controllers/directivebreakdown_controller.go b/controllers/directivebreakdown_controller.go index a2d249992..50ba92c2c 100644 --- a/controllers/directivebreakdown_controller.go +++ b/controllers/directivebreakdown_controller.go @@ -107,7 +107,7 @@ func (r *DirectiveBreakdownReconciler) Reconcile(ctx context.Context, req ctrl.R statusUpdater := updater.NewStatusUpdater[*dwsv1alpha2.DirectiveBreakdownStatus](dbd) defer func() { err = statusUpdater.CloseWithStatusUpdate(ctx, r.Client.Status(), err) }() - defer func() { dbd.Status.SetResourceError(err) }() + defer func() { dbd.Status.SetResourceErrorAndLog(err, log) }() // Check if the object is being deleted if !dbd.GetDeletionTimestamp().IsZero() { @@ -117,7 +117,7 @@ func (r *DirectiveBreakdownReconciler) Reconcile(ctx context.Context, req ctrl.R return ctrl.Result{}, nil } - // Delete all children that are owned by this DirectiveBreakdown. + // Delete all children that are owned by this DirectiveBreakdown. deleteStatus, err := dwsv1alpha2.DeleteChildren(ctx, r.Client, r.ChildObjects, dbd) if err != nil { return ctrl.Result{}, err @@ -155,7 +155,7 @@ func (r *DirectiveBreakdownReconciler) Reconcile(ctx context.Context, req ctrl.R argsMap, err := dwdparse.BuildArgsMap(dbd.Spec.Directive) if err != nil { - return ctrl.Result{}, err + return ctrl.Result{}, dwsv1alpha2.NewResourceError("invalid DW directive: %s", dbd.Spec.Directive).WithError(err).WithUserMessage("invalid DW directive").WithFatal() } commonResourceName, commonResourceNamespace := getStorageReferenceNameFromDBD(dbd) @@ -376,7 +376,7 @@ func (r *DirectiveBreakdownReconciler) createOrUpdatePersistentStorageInstance(c } } else { if psi.Spec.UserID != dbd.Spec.UserID { - return dwsv1alpha2.NewResourceError(fmt.Sprintf("Existing persistent storage user ID %v does not match user ID %v", psi.Spec.UserID, dbd.Spec.UserID), nil).WithUserMessage("User ID does not match existing persistent storage").WithFatal() + return dwsv1alpha2.NewResourceError("existing persistent storage user ID %v does not match user ID %v", psi.Spec.UserID, dbd.Spec.UserID).WithUserMessage("User ID does not match existing persistent storage").WithFatal().WithUser() } } @@ -456,8 +456,7 @@ func (r *DirectiveBreakdownReconciler) populateStorageBreakdown(ctx context.Cont // The pinned profile will be named for the NnfStorage. nnfStorageProfile, err := findPinnedProfile(ctx, r.Client, dbd.GetNamespace(), commonResourceName) if err != nil { - log.Error(err, "Unable to find pinned NnfStorageProfile", "name", commonResourceName) - return err + return dwsv1alpha2.NewResourceError("unable to find pinned NnfStorageProfile: %s/%s", commonResourceName, dbd.GetNamespace()).WithError(err).WithUserMessage("Unable to find pinned NnfStorageProfile").WithFatal() } // The directive has been validated by the webhook, so we can assume the pieces we need are in the map. @@ -505,6 +504,12 @@ func (r *DirectiveBreakdownReconciler) populateStorageBreakdown(ctx context.Cont lustreComponents = append(lustreComponents, lustreComponentType{dwsv1alpha2.AllocateAcrossServers, mdtCapacity, "mgtmdt", useKey}) } else if len(nnfStorageProfile.Data.LustreStorage.ExternalMGS) > 0 { lustreComponents = append(lustreComponents, lustreComponentType{dwsv1alpha2.AllocateAcrossServers, mdtCapacity, "mdt", mdtKey}) + } else if len(nnfStorageProfile.Data.LustreStorage.StandaloneMGTPoolName) > 0 { + if argsMap["command"] != "create_persistent" { + return dwsv1alpha2.NewResourceError("").WithUserMessage("standaloneMgtPoolName option can only be used with 'create_persistent' directive").WithFatal().WithUser() + } + + lustreComponents = []lustreComponentType{lustreComponentType{dwsv1alpha2.AllocateSingleServer, mgtCapacity, "mgt", mgtKey}} } else { lustreComponents = append(lustreComponents, lustreComponentType{dwsv1alpha2.AllocateAcrossServers, mdtCapacity, "mdt", mdtKey}) lustreComponents = append(lustreComponents, lustreComponentType{dwsv1alpha2.AllocateSingleServer, mgtCapacity, "mgt", mgtKey}) @@ -519,9 +524,7 @@ func (r *DirectiveBreakdownReconciler) populateStorageBreakdown(ctx context.Cont } default: - err := fmt.Errorf("failed to populate directiveBreakdown") - log.Error(err, "populate directiveBreakdown", "directiveBreakdown", dbd.Name, "filesystem", filesystem) - return err + return dwsv1alpha2.NewResourceError("invalid DW directive file system type: %s", filesystem).WithUserMessage("invalid DW directive").WithFatal() } if dbd.Status.Storage == nil { @@ -558,7 +561,7 @@ func getCapacityInBytes(capacity string) (int64, error) { // matches[0] is the entire string, we want the parts. val, err := strconv.ParseFloat(matches[1], 64) if err != nil { - return 0, fmt.Errorf("invalid capacity string, %s", capacity) + return 0, dwsv1alpha2.NewResourceError("invalid capacity string, %s", capacity) } return int64(math.Round(val * powers[matches[3]])), nil diff --git a/controllers/directivebreakdown_controller_test.go b/controllers/directivebreakdown_controller_test.go index 634e60fc9..4584bc107 100644 --- a/controllers/directivebreakdown_controller_test.go +++ b/controllers/directivebreakdown_controller_test.go @@ -138,4 +138,85 @@ var _ = Describe("DirectiveBreakdown test", func() { return k8sClient.Get(context.TODO(), client.ObjectKeyFromObject(persistentStorage), persistentStorage) }).ShouldNot(Succeed()) }) + + It("Creates a DirectiveBreakdown with a lustre jobdw and standaloneMgtPoolName", func() { + By("Setting standaloneMgtPoolName in the storage profile") + Eventually(func(g Gomega) error { + g.Expect(k8sClient.Get(context.TODO(), client.ObjectKeyFromObject(storageProfile), storageProfile)).To(Succeed()) + storageProfile.Data.LustreStorage.StandaloneMGTPoolName = "FakePool" + return k8sClient.Update(context.TODO(), storageProfile) + }).Should(Succeed()) + + By("Creating a DirectiveBreakdown") + directiveBreakdown := &dwsv1alpha2.DirectiveBreakdown{ + ObjectMeta: metav1.ObjectMeta{ + Name: "standalone-lustre-jobdw-test", + Namespace: corev1.NamespaceDefault, + }, + Spec: dwsv1alpha2.DirectiveBreakdownSpec{ + Directive: "#DW jobdw name=jobdw-lustre type=lustre capacity=1GiB", + }, + } + + Expect(k8sClient.Create(context.TODO(), directiveBreakdown)).To(Succeed()) + + Eventually(func(g Gomega) error { + g.Expect(k8sClient.Get(context.TODO(), client.ObjectKeyFromObject(directiveBreakdown), directiveBreakdown)).To(Succeed()) + return directiveBreakdown.Status.Error + }).ShouldNot(BeNil()) + }) + + It("Creates a DirectiveBreakdown with an xfs jobdw and standaloneMgtPoolName", func() { + By("Setting standaloneMgtPoolName in the storage profile") + Eventually(func(g Gomega) error { + g.Expect(k8sClient.Get(context.TODO(), client.ObjectKeyFromObject(storageProfile), storageProfile)).To(Succeed()) + storageProfile.Data.LustreStorage.StandaloneMGTPoolName = "FakePool" + return k8sClient.Update(context.TODO(), storageProfile) + }).Should(Succeed()) + + By("Creating a DirectiveBreakdown") + directiveBreakdown := &dwsv1alpha2.DirectiveBreakdown{ + ObjectMeta: metav1.ObjectMeta{ + Name: "standalone-xfs-jobdw-test", + Namespace: corev1.NamespaceDefault, + }, + Spec: dwsv1alpha2.DirectiveBreakdownSpec{ + Directive: "#DW jobdw name=jobdw-xfs type=xfs capacity=1GiB", + }, + } + + Expect(k8sClient.Create(context.TODO(), directiveBreakdown)).To(Succeed()) + + Eventually(func(g Gomega) bool { + g.Expect(k8sClient.Get(context.TODO(), client.ObjectKeyFromObject(directiveBreakdown), directiveBreakdown)).To(Succeed()) + return directiveBreakdown.Status.Ready + }).Should(BeTrue()) + }) + + It("Creates a DirectiveBreakdown with a create_persistent and standaloneMgtPoolName", func() { + By("Setting standaloneMgtPoolName in the storage profile") + Eventually(func(g Gomega) error { + g.Expect(k8sClient.Get(context.TODO(), client.ObjectKeyFromObject(storageProfile), storageProfile)).To(Succeed()) + storageProfile.Data.LustreStorage.StandaloneMGTPoolName = "FakePool" + return k8sClient.Update(context.TODO(), storageProfile) + }).Should(Succeed()) + + By("Creating a DirectiveBreakdown") + directiveBreakdown := &dwsv1alpha2.DirectiveBreakdown{ + ObjectMeta: metav1.ObjectMeta{ + Name: "standalone-lustre-persistent-test", + Namespace: corev1.NamespaceDefault, + }, + Spec: dwsv1alpha2.DirectiveBreakdownSpec{ + Directive: "#DW create_persistent name=persistent-lustre type=lustre capacity=1GiB", + }, + } + + Expect(k8sClient.Create(context.TODO(), directiveBreakdown)).To(Succeed()) + + Eventually(func(g Gomega) bool { + g.Expect(k8sClient.Get(context.TODO(), client.ObjectKeyFromObject(directiveBreakdown), directiveBreakdown)).To(Succeed()) + return directiveBreakdown.Status.Ready + }).Should(BeTrue()) + }) }) diff --git a/controllers/dws_servers_controller.go b/controllers/dws_servers_controller.go index badb1b875..51c438ed9 100644 --- a/controllers/dws_servers_controller.go +++ b/controllers/dws_servers_controller.go @@ -1,5 +1,5 @@ /* - * Copyright 2021, 2022 Hewlett Packard Enterprise Development LP + * Copyright 2021-2023 Hewlett Packard Enterprise Development LP * Other additional copyright holders may be indicated within. * * The entirety of this work is licensed under the Apache License, @@ -21,7 +21,6 @@ package controllers import ( "context" - "fmt" "os" "reflect" "runtime" @@ -43,6 +42,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/source" dwsv1alpha2 "github.com/HewlettPackard/dws/api/v1alpha2" + "github.com/HewlettPackard/dws/utils/updater" nnfv1alpha1 "github.com/NearNodeFlash/nnf-sos/api/v1alpha1" "github.com/NearNodeFlash/nnf-sos/controllers/metrics" ) @@ -84,7 +84,7 @@ const ( // For more details, check Reconcile and its Result here: // - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.9.2/pkg/reconcile func (r *DWSServersReconciler) Reconcile(ctx context.Context, req ctrl.Request) (res ctrl.Result, err error) { - + log := r.Log.WithValues("Servers", req.NamespacedName) metrics.NnfServersReconcilesTotal.Inc() servers := &dwsv1alpha2.Servers{} @@ -95,6 +95,10 @@ func (r *DWSServersReconciler) Reconcile(ctx context.Context, req ctrl.Request) return ctrl.Result{}, client.IgnoreNotFound(err) } + statusUpdater := updater.NewStatusUpdater[*dwsv1alpha2.ServersStatus](servers) + defer func() { err = statusUpdater.CloseWithStatusUpdate(ctx, r.Client.Status(), err) }() + defer func() { servers.Status.SetResourceErrorAndLog(err, log) }() + // Check if the object is being deleted if !servers.GetDeletionTimestamp().IsZero() { if !controllerutil.ContainsFinalizer(servers, finalizerNnfServers) { @@ -217,7 +221,7 @@ func (r *DWSServersReconciler) updateCapacityUsed(ctx context.Context, servers * // If the nnfStorage was created using information from the Servers resource, then // we should always find a match. if serversIndex == -1 { - return ctrl.Result{}, fmt.Errorf("Unable to find allocation label %s", label) + return ctrl.Result{}, dwsv1alpha2.NewResourceError("unable to find allocation label %s", label).WithFatal() } // Loop through the nnfNodeStorages corresponding to each of the Rabbit nodes and find diff --git a/controllers/integration_test.go b/controllers/integration_test.go index 301bcbef2..0414ab990 100644 --- a/controllers/integration_test.go +++ b/controllers/integration_test.go @@ -1263,6 +1263,7 @@ var _ = Describe("Integration Test", func() { }).Should(Succeed()) advanceStateAndCheckReady("Proposal", workflow) + Expect(verifyPinnedContainerProfile(context.TODO(), k8sClient, workflow, 0)).To(Succeed()) }) AfterEach(func() { @@ -1307,7 +1308,7 @@ var _ = Describe("Integration Test", func() { matchLabels[nnfv1alpha1.DirectiveIndexLabel] = "0" jobList := &batchv1.JobList{} - Eventually(func(g Gomega) int { + Eventually(func() int { Expect(k8sClient.List(context.TODO(), jobList, matchLabels)).To(Succeed()) return len(jobList.Items) }).Should(Equal(2)) diff --git a/controllers/nnf_access_controller.go b/controllers/nnf_access_controller.go index 3afe297a6..78321c56b 100644 --- a/controllers/nnf_access_controller.go +++ b/controllers/nnf_access_controller.go @@ -1,5 +1,5 @@ /* - * Copyright 2021, 2022 Hewlett Packard Enterprise Development LP + * Copyright 2021-2023 Hewlett Packard Enterprise Development LP * Other additional copyright holders may be indicated within. * * The entirety of this work is licensed under the Apache License, @@ -92,6 +92,11 @@ func (r *NnfAccessReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( statusUpdater := updater.NewStatusUpdater[*nnfv1alpha1.NnfAccessStatus](access) defer func() { err = statusUpdater.CloseWithStatusUpdate(ctx, r.Client.Status(), err) }() + defer func() { + if err != nil || (!res.Requeue && res.RequeueAfter == 0) { + access.Status.SetResourceErrorAndLog(err, log) + } + }() // Create a list of names of the client nodes. This is pulled from either // the Computes resource specified in the ClientReference or the NnfStorage @@ -174,13 +179,16 @@ func (r *NnfAccessReconciler) Reconcile(ctx context.Context, req ctrl.Request) ( if access.Status.State == "mounted" { result, err = r.mount(ctx, access, clientList, storageMapping) + if err != nil { + return ctrl.Result{}, dwsv1alpha2.NewResourceError("").WithError(err).WithUserMessage("unable to mount file system on client nodes") + } } else { result, err = r.unmount(ctx, access, clientList, storageMapping) + if err != nil { + return ctrl.Result{}, dwsv1alpha2.NewResourceError("").WithError(err).WithUserMessage("unable to unmount file system from client nodes") + } } - if err != nil { - return ctrl.Result{}, err - } if result != nil { return *result, nil } @@ -201,7 +209,7 @@ func (r *NnfAccessReconciler) mount(ctx context.Context, access *nnfv1alpha1.Nnf // from a single host. wait, err := r.lockStorage(ctx, access) if err != nil { - return nil, err + return nil, dwsv1alpha2.NewResourceError("unable to lock storage").WithError(err) } if wait { @@ -211,18 +219,26 @@ func (r *NnfAccessReconciler) mount(ctx context.Context, access *nnfv1alpha1.Nnf // Add compute node information to the storage map, if necessary. err = r.addNodeStorageEndpoints(ctx, access, storageMapping) if err != nil { - return nil, err + if apierrors.IsConflict(err) { + return &ctrl.Result{}, nil + } + + return nil, dwsv1alpha2.NewResourceError("unable to add endpoints to NnfNodeStorage").WithError(err) } // Create the ClientMount resources. One ClientMount resource is created per client - err = r.createClientMounts(ctx, access, storageMapping) + err = r.manageClientMounts(ctx, access, storageMapping) if err != nil { - return nil, err + if apierrors.IsConflict(err) { + return &ctrl.Result{}, nil + } + + return nil, dwsv1alpha2.NewResourceError("unable to create ClientMount resources").WithError(err) } ready, err := r.getNodeStorageEndpointStatus(ctx, access, storageMapping) if err != nil { - return nil, err + return nil, dwsv1alpha2.NewResourceError("unable to check endpoints for NnfNodeStorage").WithError(err) } if ready == false { @@ -232,7 +248,7 @@ func (r *NnfAccessReconciler) mount(ctx context.Context, access *nnfv1alpha1.Nnf // Aggregate the status from all the ClientMount resources ready, err = r.getClientMountStatus(ctx, access, clientList) if err != nil { - return nil, err + return nil, dwsv1alpha2.NewResourceError("unable to check ClientMount status").WithError(err) } // Wait for all of the ClientMounts to be ready @@ -245,15 +261,15 @@ func (r *NnfAccessReconciler) mount(ctx context.Context, access *nnfv1alpha1.Nnf func (r *NnfAccessReconciler) unmount(ctx context.Context, access *nnfv1alpha1.NnfAccess, clientList []string, storageMapping map[string][]dwsv1alpha2.ClientMountInfo) (*ctrl.Result, error) { // Create the ClientMount resources. One ClientMount resource is created per client - err := r.createClientMounts(ctx, access, storageMapping) + err := r.manageClientMounts(ctx, access, storageMapping) if err != nil { - return nil, err + return nil, dwsv1alpha2.NewResourceError("unable to update ClientMount resources").WithError(err) } // Aggregate the status from all the ClientMount resources ready, err := r.getClientMountStatus(ctx, access, clientList) if err != nil { - return nil, err + return nil, dwsv1alpha2.NewResourceError("unable to get ClientMount status").WithError(err) } // Wait for all of the ClientMounts to be ready @@ -263,12 +279,12 @@ func (r *NnfAccessReconciler) unmount(ctx context.Context, access *nnfv1alpha1.N err = r.removeNodeStorageEndpoints(ctx, access, storageMapping) if err != nil { - return nil, err + return nil, dwsv1alpha2.NewResourceError("unable to remove NnfNodeStorage endpoints").WithError(err) } // Unlock the NnfStorage so it can be used by another NnfAccess if err = r.unlockStorage(ctx, access); err != nil { - return nil, err + return nil, dwsv1alpha2.NewResourceError("unable to unlock storage").WithError(err) } return nil, nil @@ -280,7 +296,7 @@ func (r *NnfAccessReconciler) unmount(ctx context.Context, access *nnfv1alpha1.N func (r *NnfAccessReconciler) lockStorage(ctx context.Context, access *nnfv1alpha1.NnfAccess) (bool, error) { if access.Spec.StorageReference.Kind != reflect.TypeOf(nnfv1alpha1.NnfStorage{}).Name() { - return false, fmt.Errorf("Invalid StorageReference kind %s", access.Spec.StorageReference.Kind) + return false, fmt.Errorf("invalid StorageReference kind %s", access.Spec.StorageReference.Kind) } namespacedName := types.NamespacedName{ @@ -580,7 +596,7 @@ func (r *NnfAccessReconciler) mapClientLocalStorage(ctx context.Context, access // Check that the correct number of NnfNodeStorage resources were found for this // Rabbit. if len(nnfNodeStorageList.Items) != storageCount.instanceCount { - return nil, fmt.Errorf("Incorrect number of NnfNodeStorages. found %d. Needed %d.", len(nnfNodeStorageList.Items), storageCount.instanceCount) + return nil, dwsv1alpha2.NewResourceError("incorrect number of NnfNodeStorages. found %d. Needed %d.", len(nnfNodeStorageList.Items), storageCount.instanceCount).WithMajor() } for _, nnfNodeStorage := range nnfNodeStorageList.Items { @@ -684,7 +700,7 @@ func (r *NnfAccessReconciler) mapClientLocalStorage(ctx context.Context, access } if len(existingStorage[storageName]) == 0 { - return nil, fmt.Errorf("Invalid matching between clients and storage. Too many clients for storage %s", storageName) + return nil, dwsv1alpha2.NewResourceError("").WithUserMessage("invalid matching between clients and storage. Too many clients for storage").WithWLM().WithFatal() } // If target==all, then the client wants to access all the storage it can see @@ -897,8 +913,8 @@ func (r *NnfAccessReconciler) removeNodeStorageEndpoints(ctx context.Context, ac return nil } -// createClientMounts creates the ClientMount resources based on the information in the storageMapping map. -func (r *NnfAccessReconciler) createClientMounts(ctx context.Context, access *nnfv1alpha1.NnfAccess, storageMapping map[string][]dwsv1alpha2.ClientMountInfo) error { +// manageClientMounts creates or updates the ClientMount resources based on the information in the storageMapping map. +func (r *NnfAccessReconciler) manageClientMounts(ctx context.Context, access *nnfv1alpha1.NnfAccess, storageMapping map[string][]dwsv1alpha2.ClientMountInfo) error { log := r.Log.WithValues("NnfAccess", client.ObjectKeyFromObject(access)) g := new(errgroup.Group) @@ -928,7 +944,10 @@ func (r *NnfAccessReconciler) createClientMounts(ctx context.Context, access *nn namespacedName := client.ObjectKeyFromObject(clientMount).String() if err != nil { - log.Error(err, "failed to create or update ClientMount", "name", namespacedName) + if !apierrors.IsConflict(err) { + log.Error(err, "failed to create or update ClientMount", "name", namespacedName) + } + return err } if result == controllerutil.OperationResultCreated { diff --git a/controllers/nnf_access_controller_test.go b/controllers/nnf_access_controller_test.go index 9b33bfb02..7fd698bd2 100644 --- a/controllers/nnf_access_controller_test.go +++ b/controllers/nnf_access_controller_test.go @@ -1,5 +1,5 @@ /* - * Copyright 2021, 2022 Hewlett Packard Enterprise Development LP + * Copyright 2021-2023 Hewlett Packard Enterprise Development LP * Other additional copyright holders may be indicated within. * * The entirety of this work is licensed under the Apache License, @@ -69,7 +69,7 @@ var _ = Describe("Access Controller Test", func() { FileSystemType: "lustre", AllocationSets: []nnfv1alpha1.NnfStorageAllocationSetSpec{ { - Name: "MGTMDT", + Name: "mgtmdt", NnfStorageLustreSpec: nnfv1alpha1.NnfStorageLustreSpec{ FileSystemName: "MGTMDT", TargetType: "MGTMDT", @@ -82,7 +82,7 @@ var _ = Describe("Access Controller Test", func() { }, }, { - Name: "OST", + Name: "ost", NnfStorageLustreSpec: nnfv1alpha1.NnfStorageLustreSpec{ FileSystemName: "OST", TargetType: "OST", @@ -165,8 +165,11 @@ var _ = Describe("Access Controller Test", func() { } By("Set NNF Access Desired State to unmounted") - access.Spec.DesiredState = "unmounted" - Expect(k8sClient.Update(context.TODO(), access)).To(Succeed()) + Eventually(func(g Gomega) error { + g.Expect(k8sClient.Get(context.TODO(), client.ObjectKeyFromObject(access), access)).To(Succeed()) + access.Spec.DesiredState = "unmounted" + return k8sClient.Update(context.TODO(), access) + }).Should(Succeed()) By("Verify NNF Access goes Ready in unmounted state") Eventually(func(g Gomega) bool { diff --git a/controllers/nnf_clientmount_controller.go b/controllers/nnf_clientmount_controller.go index d5557dac3..8a8b3198c 100644 --- a/controllers/nnf_clientmount_controller.go +++ b/controllers/nnf_clientmount_controller.go @@ -1,5 +1,5 @@ /* - * Copyright 2021, 2022 Hewlett Packard Enterprise Development LP + * Copyright 2021-2023 Hewlett Packard Enterprise Development LP * Other additional copyright holders may be indicated within. * * The entirety of this work is licensed under the Apache License, @@ -21,7 +21,6 @@ package controllers import ( "context" - "fmt" "os" "runtime" "strings" @@ -79,23 +78,23 @@ func (r *NnfClientMountReconciler) Reconcile(ctx context.Context, req ctrl.Reque // on deleted requests. return ctrl.Result{}, client.IgnoreNotFound(err) } + // Create a status updater that handles the call to status().Update() if any of the fields + // in clientMount.Status change + statusUpdater := updater.NewStatusUpdater[*dwsv1alpha2.ClientMountStatus](clientMount) + defer func() { err = statusUpdater.CloseWithStatusUpdate(ctx, r.Client.Status(), err) }() + defer func() { clientMount.Status.SetResourceErrorAndLog(err, log) }() // Ensure the NNF Storage Service is running prior to taking any action. ss := nnf.NewDefaultStorageService() storageService := &sf.StorageServiceV150StorageService{} if err := ss.StorageServiceIdGet(ss.Id(), storageService); err != nil { - return ctrl.Result{}, err + return ctrl.Result{}, dwsv1alpha2.NewResourceError("unable to get redfish storage service status").WithError(err).WithMajor() } if storageService.Status.State != sf.ENABLED_RST { return ctrl.Result{RequeueAfter: 1 * time.Second}, nil } - // Create a status updater that handles the call to status().Update() if any of the fields - // in clientMount.Status change - statusUpdater := updater.NewStatusUpdater[*dwsv1alpha2.ClientMountStatus](clientMount) - defer func() { err = statusUpdater.CloseWithStatusUpdate(ctx, r.Client.Status(), err) }() - // Handle cleanup if the resource is being deleted if !clientMount.GetDeletionTimestamp().IsZero() { if !controllerutil.ContainsFinalizer(clientMount, finalizerNnfClientMount) { @@ -152,7 +151,7 @@ func (r *NnfClientMountReconciler) Reconcile(ctx context.Context, req ctrl.Reque clientMount.Status.Error = nil if err := r.changeMountAll(ctx, clientMount, clientMount.Spec.DesiredState); err != nil { - resourceError := dwsv1alpha2.NewResourceError("Mount/Unmount failed", err) + resourceError := dwsv1alpha2.NewResourceError("mount/unmount failed").WithError(err) log.Info(resourceError.Error()) clientMount.Status.Error = resourceError @@ -176,7 +175,7 @@ func (r *NnfClientMountReconciler) changeMountAll(ctx context.Context, clientMou case dwsv1alpha2.ClientMountStateUnmounted: err = r.changeMount(ctx, mount, false, log) default: - return fmt.Errorf("Invalid desired state %s", state) + return dwsv1alpha2.NewResourceError("invalid desired state %s", state).WithFatal() } if err != nil { @@ -198,7 +197,7 @@ func (r *NnfClientMountReconciler) changeMount(ctx context.Context, clientMountI if os.Getenv("ENVIRONMENT") == "kind" { if shouldMount { if err := os.MkdirAll(clientMountInfo.MountPath, 0755); err != nil { - return dwsv1alpha2.NewResourceError(fmt.Sprintf("Make directory failed: %s", clientMountInfo.MountPath), err) + return dwsv1alpha2.NewResourceError("make directory failed: %s", clientMountInfo.MountPath).WithError(err).WithMajor() } log.Info("Fake mounted file system", "Mount path", clientMountInfo.MountPath) @@ -209,7 +208,7 @@ func (r *NnfClientMountReconciler) changeMount(ctx context.Context, clientMountI } if err := os.RemoveAll(clientMountInfo.MountPath); err != nil { - return dwsv1alpha2.NewResourceError(fmt.Sprintf("Remove directory failed: %s", clientMountInfo.MountPath), err) + return dwsv1alpha2.NewResourceError("remove directory failed: %s", clientMountInfo.MountPath).WithError(err).WithMajor() } log.Info("Fake unmounted file system", "Mount path", clientMountInfo.MountPath) @@ -217,7 +216,7 @@ func (r *NnfClientMountReconciler) changeMount(ctx context.Context, clientMountI if clientMountInfo.SetPermissions { if err := os.Chown(clientMountInfo.MountPath, int(clientMountInfo.UserID), int(clientMountInfo.GroupID)); err != nil { - return dwsv1alpha2.NewResourceError(fmt.Sprintf("Chown failed: %s", clientMountInfo.MountPath), err) + return dwsv1alpha2.NewResourceError("chown failed: %s", clientMountInfo.MountPath).WithError(err).WithMajor() } } @@ -248,18 +247,18 @@ func (r *NnfClientMountReconciler) changeMount(ctx context.Context, clientMountI if !testEnv { if err := os.MkdirAll(mountPath, 0755); err != nil { - return dwsv1alpha2.NewResourceError(fmt.Sprintf("Make directory failed: %s", mountPath), err) + return dwsv1alpha2.NewResourceError("make directory failed: %s", mountPath).WithError(err).WithMajor() } } if err := mounter.Mount(mountSource, mountPath, "lustre", nil); err != nil { - return err + return dwsv1alpha2.NewResourceError("unable to mount file system").WithError(err).WithMajor() } } } else { if !isNotMountPoint { if err := mounter.Unmount(mountPath); err != nil { - return err + return dwsv1alpha2.NewResourceError("unable to unmount file system").WithError(err).WithMajor() } } } @@ -279,7 +278,7 @@ func (r *NnfClientMountReconciler) changeMount(ctx context.Context, clientMountI allocationStatus := nodeStorage.Status.Allocations[clientMountInfo.Device.DeviceReference.Data] fileShare, err := r.getFileShare(allocationStatus.FileSystem.ID, allocationStatus.FileShare.ID) if err != nil { - return dwsv1alpha2.NewResourceError("Could not get file share", err).WithFatal() + return dwsv1alpha2.NewResourceError("could not get file share").WithError(err).WithMajor() } if shouldMount { @@ -290,11 +289,11 @@ func (r *NnfClientMountReconciler) changeMount(ctx context.Context, clientMountI fileShare, err = r.updateFileShare(allocationStatus.FileSystem.ID, fileShare) if err != nil { - return dwsv1alpha2.NewResourceError("Could not update file share", err) + return dwsv1alpha2.NewResourceError("could not update file share").WithError(err).WithMajor() } default: - return dwsv1alpha2.NewResourceError(fmt.Sprintf("Invalid device type %s", clientMountInfo.Device.Type), nil).WithFatal() + return dwsv1alpha2.NewResourceError("invalid device type %s", clientMountInfo.Device.Type).WithFatal() } if shouldMount { diff --git a/controllers/nnf_node_storage_controller.go b/controllers/nnf_node_storage_controller.go index a807b7473..f036436bf 100644 --- a/controllers/nnf_node_storage_controller.go +++ b/controllers/nnf_node_storage_controller.go @@ -1,5 +1,5 @@ /* - * Copyright 2021, 2022 Hewlett Packard Enterprise Development LP + * Copyright 2021-2023 Hewlett Packard Enterprise Development LP * Other additional copyright holders may be indicated within. * * The entirety of this work is licensed under the Apache License, @@ -22,7 +22,6 @@ package controllers import ( "context" "crypto/md5" - "errors" "fmt" "net/http" "os" @@ -82,7 +81,7 @@ type NnfNodeStorageReconciler struct { // For more details, check Reconcile and its Result here: // - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.7.2/pkg/reconcile func (r *NnfNodeStorageReconciler) Reconcile(ctx context.Context, req ctrl.Request) (res ctrl.Result, err error) { - + log := r.Log.WithValues("NnfNodeStorage", req.NamespacedName) metrics.NnfNodeStorageReconcilesTotal.Inc() nodeStorage := &nnfv1alpha1.NnfNodeStorage{} @@ -114,6 +113,7 @@ func (r *NnfNodeStorageReconciler) Reconcile(ctx context.Context, req ctrl.Reque // the r.Update() statusUpdater := updater.NewStatusUpdater[*nnfv1alpha1.NnfNodeStorageStatus](nodeStorage) defer func() { err = statusUpdater.CloseWithUpdate(ctx, r, err) }() + defer func() { nodeStorage.Status.SetResourceErrorAndLog(err, log) }() // Check if the object is being deleted. Deletion is carefully coordinated around // the NNF resources being managed by this NNF Node Storage resource. For a @@ -151,7 +151,7 @@ func (r *NnfNodeStorageReconciler) Reconcile(ctx context.Context, req ctrl.Reque } // First time setup requires programming of the storage status such that the resource - // is labeled as "Starting" and all Conditions are initialized. After this is done, + // is labeled as "Starting". After this is done, // the resource obtains a finalizer to manage the resource lifetime. if !controllerutil.ContainsFinalizer(nodeStorage, finalizerNnfNodeStorage) { controllerutil.AddFinalizer(nodeStorage, finalizerNnfNodeStorage) @@ -173,7 +173,6 @@ func (r *NnfNodeStorageReconciler) Reconcile(ctx context.Context, req ctrl.Reque for i := range nodeStorage.Status.Allocations { allocation := &nodeStorage.Status.Allocations[i] - allocation.Conditions = nnfv1alpha1.NewConditions() allocation.StoragePool.Status = nnfv1alpha1.ResourceStarting allocation.StorageGroup.Status = nnfv1alpha1.ResourceStarting allocation.FileSystem.Status = nnfv1alpha1.ResourceStarting @@ -183,14 +182,12 @@ func (r *NnfNodeStorageReconciler) Reconcile(ctx context.Context, req ctrl.Reque return ctrl.Result{}, nil } - nodeStorage.Status.Error = nil - // Loop through each allocation and create the storage for i := 0; i < nodeStorage.Spec.Count; i++ { // Allocate physical storage result, err := r.allocateStorage(nodeStorage, i) if err != nil { - return ctrl.Result{}, err + return ctrl.Result{}, dwsv1alpha2.NewResourceError("unable to allocate NVMe namespaces for allocation %v", i).WithError(err).WithMajor() } if result != nil { return *result, nil @@ -199,7 +196,7 @@ func (r *NnfNodeStorageReconciler) Reconcile(ctx context.Context, req ctrl.Reque // Create a block device in /dev that is accessible on the Rabbit node result, err = r.createBlockDevice(ctx, nodeStorage, i) if err != nil { - return ctrl.Result{}, err + return ctrl.Result{}, dwsv1alpha2.NewResourceError("unable to attache NVMe namespace to Rabbit node for allocation %v", i).WithError(err).WithMajor() } if result != nil { return *result, nil @@ -208,7 +205,7 @@ func (r *NnfNodeStorageReconciler) Reconcile(ctx context.Context, req ctrl.Reque // Format the block device from the Rabbit with a file system (if needed) result, err = r.formatFileSystem(ctx, nodeStorage, i) if err != nil { - return ctrl.Result{}, err + return ctrl.Result{}, dwsv1alpha2.NewResourceError("unable to format file system for allocation %v", i).WithError(err).WithMajor() } if result != nil { return *result, nil @@ -223,7 +220,7 @@ func (r *NnfNodeStorageReconciler) Reconcile(ctx context.Context, req ctrl.Reque } if err := r.setLustreOwnerGroup(nodeStorage); err != nil { - return ctrl.Result{}, err + return ctrl.Result{}, dwsv1alpha2.NewResourceError("unable to set owner and group for file system").WithError(err).WithMajor() } nodeStorage.Status.OwnerGroupStatus = nnfv1alpha1.ResourceReady @@ -239,17 +236,12 @@ func (r *NnfNodeStorageReconciler) allocateStorage(nodeStorage *nnfv1alpha1.NnfN allocationStatus := &nodeStorage.Status.Allocations[index] - condition := &allocationStatus.Conditions[nnfv1alpha1.ConditionIndexCreateStoragePool] - if len(allocationStatus.StoragePool.ID) == 0 { - condition.LastTransitionTime = metav1.Now() - condition.Status = metav1.ConditionTrue - } - storagePoolID := fmt.Sprintf("%s-%d", nodeStorage.Name, index) sp, err := r.createStoragePool(ss, storagePoolID, nodeStorage.Spec.Capacity) if err != nil { - updateError(condition, &allocationStatus.StoragePool, err) - return r.handleCreateError(nodeStorage, "could not create storage pool", err) + allocationStatus.StoragePool.Status = nnfv1alpha1.ResourceFailed + return &ctrl.Result{}, dwsv1alpha2.NewResourceError("could not create storage pool").WithError(err).WithMajor() + } allocationStatus.StoragePool.Status = nnfv1alpha1.ResourceStatus(sp.Status) @@ -260,9 +252,6 @@ func (r *NnfNodeStorageReconciler) allocateStorage(nodeStorage *nnfv1alpha1.NnfN if len(allocationStatus.StoragePool.ID) == 0 { log.Info("Created storage pool", "Id", sp.Id) allocationStatus.StoragePool.ID = sp.Id - condition.Status = metav1.ConditionFalse - condition.Reason = nnfv1alpha1.ConditionSuccess - condition.Message = "" return &ctrl.Result{}, nil } @@ -275,24 +264,16 @@ func (r *NnfNodeStorageReconciler) createBlockDevice(ctx context.Context, nodeSt ss := nnf.NewDefaultStorageService() allocationStatus := &nodeStorage.Status.Allocations[index] - condition := &allocationStatus.Conditions[nnfv1alpha1.ConditionIndexCreateStorageGroup] // Create a Storage Group if none is currently present. Recall that a Storage Group // is a mapping from the Storage Pool to a Server Endpoint. Establishing a Storage // Group makes block storage available on the server, which itself is a prerequisite to // any file system built on top of the block storage. - if len(allocationStatus.StorageGroup.ID) == 0 { - condition.LastTransitionTime = metav1.Now() - condition.Status = metav1.ConditionTrue - } // Retrieve the collection of endpoints for us to map serverEndpointCollection := &sf.EndpointCollectionEndpointCollection{} if err := ss.StorageServiceIdEndpointsGet(ss.Id(), serverEndpointCollection); err != nil { - nodeStorage.Status.Error = dwsv1alpha2.NewResourceError("Could not get service endpoint", err).WithFatal() - log.Info(nodeStorage.Status.Error.Error()) - - return &ctrl.Result{Requeue: true}, nil + return nil, dwsv1alpha2.NewResourceError("could not get service endpoint").WithError(err).WithFatal() } // Get the Storage resource to map between compute node name and @@ -305,10 +286,7 @@ func (r *NnfNodeStorageReconciler) createBlockDevice(ctx context.Context, nodeSt storage := &dwsv1alpha2.Storage{} err := r.Get(ctx, namespacedName, storage) if err != nil { - nodeStorage.Status.Error = dwsv1alpha2.NewResourceError("Could not read storage resource", err) - log.Info(nodeStorage.Status.Error.Error()) - - return &ctrl.Result{Requeue: true}, nil + return nil, dwsv1alpha2.NewResourceError("could not read storage resource").WithError(err) } // Build a list of all nodes with access to the storage @@ -346,10 +324,7 @@ func (r *NnfNodeStorageReconciler) createBlockDevice(ctx context.Context, nodeSt } if err := r.deleteStorageGroup(ss, storageGroupID); err != nil { - nodeStorage.Status.Error = dwsv1alpha2.NewResourceError("Could not delete storage group", err).WithFatal() - log.Info(nodeStorage.Status.Error.Error()) - - return &ctrl.Result{Requeue: true}, nil + return nil, dwsv1alpha2.NewResourceError("could not delete storage group").WithError(err).WithMajor() } log.Info("Deleted storage group", "storageGroupID", storageGroupID) @@ -361,10 +336,7 @@ func (r *NnfNodeStorageReconciler) createBlockDevice(ctx context.Context, nodeSt endPoint, err := r.getEndpoint(ss, endpointID) if err != nil { - nodeStorage.Status.Error = dwsv1alpha2.NewResourceError("Could not get endpoint", err).WithFatal() - log.Info(nodeStorage.Status.Error.Error()) - - return &ctrl.Result{Requeue: true}, nil + return nil, dwsv1alpha2.NewResourceError("could not get endpoint").WithError(err).WithFatal() } // Skip the endpoints that are not ready @@ -374,8 +346,8 @@ func (r *NnfNodeStorageReconciler) createBlockDevice(ctx context.Context, nodeSt sg, err := r.createStorageGroup(ss, storageGroupID, allocationStatus.StoragePool.ID, endpointID) if err != nil { - updateError(condition, &allocationStatus.StorageGroup, err) - return r.handleCreateError(nodeStorage, "could not create storage group", err) + allocationStatus.StorageGroup.Status = nnfv1alpha1.ResourceFailed + return &ctrl.Result{}, dwsv1alpha2.NewResourceError("could not create storage group").WithError(err).WithMajor() } allocationStatus.StorageGroup.Status = nnfv1alpha1.ResourceStatus(sg.Status) @@ -385,10 +357,6 @@ func (r *NnfNodeStorageReconciler) createBlockDevice(ctx context.Context, nodeSt if len(allocationStatus.StorageGroup.ID) == 0 { log.Info("Created storage group", "Id", storageGroupID) allocationStatus.StorageGroup.ID = sg.Id - condition.LastTransitionTime = metav1.Now() - condition.Status = metav1.ConditionFalse // we are finished with this state - condition.Reason = nnfv1alpha1.ConditionSuccess - condition.Message = "" return &ctrl.Result{}, nil } @@ -414,29 +382,22 @@ func (r *NnfNodeStorageReconciler) formatFileSystem(ctx context.Context, nodeSto // Find the Rabbit node endpoint to collect LNet information endpoint, err := r.getEndpoint(ss, os.Getenv("RABBIT_NODE")) if err != nil { - nnfv1alpha1.SetGetResourceFailureCondition(allocationStatus.Conditions, err) - nodeStorage.Status.Error = dwsv1alpha2.NewResourceError("Could not get endpoint", err).WithFatal() + nodeStorage.Status.Error = dwsv1alpha2.NewResourceError("could not get endpoint").WithError(err).WithFatal() log.Info(nodeStorage.Status.Error.Error()) - return &ctrl.Result{}, nil + return &ctrl.Result{}, dwsv1alpha2.NewResourceError("could not get endpoint").WithError(err).WithMajor() } nnfStorageProfile, err := getPinnedStorageProfileFromLabel(ctx, r.Client, nodeStorage) if err != nil { - nnfv1alpha1.SetGetResourceFailureCondition(allocationStatus.Conditions, err) - nodeStorage.Status.Error = dwsv1alpha2.NewResourceError("Could not find pinned storage profile", err).WithFatal() + allocationStatus.FileSystem.Status = nnfv1alpha1.ResourceFailed + nodeStorage.Status.Error = dwsv1alpha2.NewResourceError("could not find pinned storage profile").WithError(err).WithFatal() log.Info(nodeStorage.Status.Error.Error()) return &ctrl.Result{}, nil } // Create the FileSystem - condition := &allocationStatus.Conditions[nnfv1alpha1.ConditionIndexCreateFileSystem] - if len(allocationStatus.FileSystem.ID) == 0 { - condition.Status = metav1.ConditionTrue - condition.LastTransitionTime = metav1.Now() - } - oem := nnfserver.FileSystemOem{ Type: nodeStorage.Spec.FileSystemType, } @@ -517,9 +478,9 @@ func (r *NnfNodeStorageReconciler) formatFileSystem(ctx context.Context, nodeSto fileSystemID := fmt.Sprintf("%s-%d", nodeStorage.Name, index) fs, err := r.createFileSystem(ss, fileSystemID, allocationStatus.StoragePool.ID, oem) if err != nil { - updateError(condition, &allocationStatus.FileSystem, err) + allocationStatus.FileSystem.Status = nnfv1alpha1.ResourceFailed - return r.handleCreateError(nodeStorage, "could not create file system", err) + return &ctrl.Result{}, dwsv1alpha2.NewResourceError("could not create file system").WithError(err).WithMajor() } allocationStatus.FileSystem.Status = nnfv1alpha1.ResourceReady @@ -529,21 +490,11 @@ func (r *NnfNodeStorageReconciler) formatFileSystem(ctx context.Context, nodeSto if len(allocationStatus.FileSystem.ID) == 0 { log.Info("Created filesystem", "Id", fs.Id) allocationStatus.FileSystem.ID = fs.Id - condition.LastTransitionTime = metav1.Now() - condition.Status = metav1.ConditionFalse - condition.Reason = nnfv1alpha1.ConditionSuccess - condition.Message = "" return &ctrl.Result{}, nil } // Create the FileShare - condition = &allocationStatus.Conditions[nnfv1alpha1.ConditionIndexCreateFileShare] - if len(allocationStatus.FileShare.ID) == 0 { - condition.Status = metav1.ConditionTrue - condition.LastTransitionTime = metav1.Now() - } - fileShareID := fmt.Sprintf("%s-%d", nodeStorage.Name, index) mountPath := "" @@ -560,11 +511,8 @@ func (r *NnfNodeStorageReconciler) formatFileSystem(ctx context.Context, nodeSto } else { volumeGroupName, logicalVolumeName, err = r.lvmNames(ctx, nodeStorage, index) if err != nil { - updateError(condition, &allocationStatus.FileShare, err) - nodeStorage.Status.Error = dwsv1alpha2.NewResourceError("could not get VG/LV names", err).WithFatal() - log.Info(nodeStorage.Status.Error.Error()) - - return &ctrl.Result{RequeueAfter: time.Minute * 2}, nil + allocationStatus.FileShare.Status = nnfv1alpha1.ResourceFailed + return &ctrl.Result{}, dwsv1alpha2.NewResourceError("could not get VG/LV names").WithError(err).WithFatal() } shareOptions["volumeGroupName"] = volumeGroupName @@ -575,8 +523,8 @@ func (r *NnfNodeStorageReconciler) formatFileSystem(ctx context.Context, nodeSto sh, err = r.createFileShare(ss, fileShareID, allocationStatus.FileSystem.ID, os.Getenv("RABBIT_NODE"), mountPath, shareOptions) if err != nil { - updateError(condition, &allocationStatus.FileShare, err) - return r.handleCreateError(nodeStorage, "could not create file share", err) + allocationStatus.FileShare.Status = nnfv1alpha1.ResourceFailed + return &ctrl.Result{}, dwsv1alpha2.NewResourceError("could not create file share").WithError(err).WithMajor() } nid := "" @@ -599,10 +547,6 @@ func (r *NnfNodeStorageReconciler) formatFileSystem(ctx context.Context, nodeSto allocationStatus.FileShare.ID = sh.Id allocationStatus.VolumeGroup = volumeGroupName allocationStatus.LogicalVolume = logicalVolumeName - condition.LastTransitionTime = metav1.Now() - condition.Status = metav1.ConditionFalse - condition.Reason = nnfv1alpha1.ConditionSuccess - condition.Message = "" return &ctrl.Result{}, nil } @@ -668,11 +612,6 @@ func (r *NnfNodeStorageReconciler) deleteStorage(nodeStorage *nnfv1alpha1.NnfNod return nil, nil } - condition := &allocationStatus.Conditions[nnfv1alpha1.ConditionIndexDeleteStoragePool] - - condition.Status = metav1.ConditionTrue - condition.LastTransitionTime = metav1.Now() - log.Info("Deleting storage pool", "Id", allocationStatus.StoragePool.ID) err := r.deleteStoragePool(ss, allocationStatus.StoragePool.ID) @@ -682,8 +621,8 @@ func (r *NnfNodeStorageReconciler) deleteStorage(nodeStorage *nnfv1alpha1.NnfNod // If the error is from a 404 error, then there's nothing to clean up and we // assume everything has been deleted if !ok || ecErr.StatusCode() != http.StatusNotFound { - updateError(condition, &allocationStatus.FileShare, err) - nodeStorage.Status.Error = dwsv1alpha2.NewResourceError("Could not delete storage pool", err).WithFatal() + allocationStatus.FileShare.Status = nnfv1alpha1.ResourceFailed + nodeStorage.Status.Error = dwsv1alpha2.NewResourceError("could not delete storage pool").WithError(err).WithFatal() log.Info(nodeStorage.Status.Error.Error()) return &ctrl.Result{Requeue: true}, nil @@ -730,7 +669,7 @@ func (r *NnfNodeStorageReconciler) lvmNames(ctx context.Context, nodeStorage *nn }, } if err := r.Get(ctx, client.ObjectKeyFromObject(workflow), workflow); err != nil { - return "", "", dwsv1alpha2.NewResourceError("could get workflow", err) + return "", "", dwsv1alpha2.NewResourceError("could get workflow").WithError(err) } return fmt.Sprintf("%s_%s_%d", workflow.GetUID(), directiveIndex, index), "lv", nil @@ -763,18 +702,18 @@ func (r *NnfNodeStorageReconciler) createStoragePool(ss nnf.StorageServiceApi, i } if err := ss.StorageServiceIdStoragePoolIdPut(ss.Id(), id, sp); err != nil { + resourceErr := dwsv1alpha2.NewResourceError("could not allocate storage pool").WithError(err) ecErr, ok := err.(*ec.ControllerError) if ok { - resourceErr := dwsv1alpha2.NewResourceError("", err) switch ecErr.Cause() { case "Insufficient capacity available": - return nil, resourceErr.WithUserMessage("Insufficient capacity available").WithFatal() + return nil, resourceErr.WithUserMessage("insufficient capacity available").WithWLM().WithFatal() default: - return nil, err + return nil, resourceErr } } - return nil, err + return nil, resourceErr } return sp, nil @@ -926,36 +865,6 @@ func (r *NnfNodeStorageReconciler) getFileSystem(ss nnf.StorageServiceApi, id st return fs, nil } -func (r *NnfNodeStorageReconciler) handleCreateError(storage *nnfv1alpha1.NnfNodeStorage, message string, err error) (*ctrl.Result, error) { - - resourceError := dwsv1alpha2.NewResourceError(message, err) - defer func() { - r.Log.WithValues("NnfNodeStorage", client.ObjectKeyFromObject(storage).String()).Info(resourceError.Error()) - storage.Status.Error = resourceError - }() - - controllerError := &ec.ControllerError{} - if errors.As(err, &controllerError) && controllerError.IsRetryable() { - return &ctrl.Result{RequeueAfter: controllerError.RetryDelay()}, nil - } - - resourceError = resourceError.WithFatal() - - // If this is really Fatal, we should not retry. But not all of nnf-ec supports the - // retryable classification of errors. Instead we mark the error as Fatal() but continue - // to retry with a modest delay. If the resource creation error occurs perpetually, an - // external entity should timeout the operation and therefore prevent future create attempts. - // Once nnf-ec has correctly classified all errors, there should be no need to requeue. - - return &ctrl.Result{RequeueAfter: time.Minute}, nil -} - -func updateError(condition *metav1.Condition, status *nnfv1alpha1.NnfResourceStatus, err error) { - status.Status = nnfv1alpha1.ResourceFailed - condition.Reason = nnfv1alpha1.ConditionFailed - condition.Message = err.Error() -} - // SetupWithManager sets up the controller with the Manager. func (r *NnfNodeStorageReconciler) SetupWithManager(mgr ctrl.Manager) error { // nnf-ec is not thread safe, so we are limited to a single reconcile thread. diff --git a/controllers/nnf_persistentstorageinstance_controller.go b/controllers/nnf_persistentstorageinstance_controller.go index f355bd47b..83f9307ec 100644 --- a/controllers/nnf_persistentstorageinstance_controller.go +++ b/controllers/nnf_persistentstorageinstance_controller.go @@ -1,5 +1,5 @@ /* - * Copyright 2022 Hewlett Packard Enterprise Development LP + * Copyright 2022, 2023 Hewlett Packard Enterprise Development LP * Other additional copyright holders may be indicated within. * * The entirety of this work is licensed under the Apache License, @@ -147,6 +147,22 @@ func (r *PersistentStorageReconciler) Reconcile(ctx context.Context, req ctrl.Re return ctrl.Result{Requeue: true}, nil } + // If this PersistentStorageInstance is for a standalone MGT, add a label so it can be easily found + if argsMap["type"] == "lustre" && len(pinnedProfile.Data.LustreStorage.StandaloneMGTPoolName) > 0 { + labels := persistentStorage.GetLabels() + if _, ok := labels[nnfv1alpha1.StandaloneMGTLabel]; !ok { + labels[nnfv1alpha1.StandaloneMGTLabel] = pinnedProfile.Data.LustreStorage.StandaloneMGTPoolName + persistentStorage.SetLabels(labels) + if err := r.Update(ctx, persistentStorage); err != nil { + if !apierrors.IsConflict(err) { + return ctrl.Result{}, err + } + + return ctrl.Result{Requeue: true}, nil + } + } + } + // Create the Servers resource servers, err := r.createServers(ctx, persistentStorage) if err != nil { diff --git a/controllers/nnf_port_manager_controller.go b/controllers/nnf_port_manager_controller.go index fb61d0879..1ca669090 100644 --- a/controllers/nnf_port_manager_controller.go +++ b/controllers/nnf_port_manager_controller.go @@ -1,5 +1,5 @@ /* - * Copyright 2022 Hewlett Packard Enterprise Development LP + * Copyright 2023 Hewlett Packard Enterprise Development LP * Other additional copyright holders may be indicated within. * * The entirety of this work is licensed under the Apache License, @@ -21,6 +21,7 @@ package controllers import ( "context" + "time" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" @@ -57,15 +58,12 @@ type AllocationStatus = nnfv1alpha1.NnfPortManagerAllocationStatus // Reconcile is part of the main kubernetes reconciliation loop which aims to // move the current state of the cluster closer to the desired state. -// TODO(user): Modify the Reconcile function to compare the state specified by -// the NnfPortManager object against the actual cluster state, and then -// perform operations to make the cluster state reflect the state specified by -// the user. // // For more details, check Reconcile and its Result here: // - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.14.1/pkg/reconcile func (r *NnfPortManagerReconciler) Reconcile(ctx context.Context, req ctrl.Request) (res ctrl.Result, err error) { log := log.FromContext(ctx) + unsatisfiedRequests := 0 mgr := &nnfv1alpha1.NnfPortManager{} if err := r.Get(ctx, req.NamespacedName, mgr); err != nil { @@ -96,19 +94,22 @@ func (r *NnfPortManagerReconciler) Reconcile(ctx context.Context, req ctrl.Reque } // Free any unused allocations - r.cleanupUnusedAllocations(log, mgr) + r.cleanupUnusedAllocations(log, mgr, config.Spec.PortsCooldownInSeconds) // For each "requester" in the mgr.Spec.Allocations, try to satisfy the request by // allocating the desired ports. for _, spec := range mgr.Spec.Allocations { + var ports []uint16 + var status nnfv1alpha1.NnfPortManagerAllocationStatusStatus + var allocationStatus *nnfv1alpha1.NnfPortManagerAllocationStatus - // If the specification is already included in the allocations, continue - if r.isAllocated(mgr, spec) { + // If the specification is already included in the allocations and InUse, continue + allocationStatus = r.findAllocationStatus(mgr, spec) + if allocationStatus != nil && allocationStatus.Status == nnfv1alpha1.NnfPortManagerAllocationStatusInUse { continue } - var ports []uint16 - var status nnfv1alpha1.NnfPortManagerAllocationStatusStatus + // Determine if the port manager is ready and find a free port if mgr.Status.Status != nnfv1alpha1.NnfPortManagerStatusReady { ports, status = nil, nnfv1alpha1.NnfPortManagerAllocationStatusInvalidConfiguration } else { @@ -116,19 +117,40 @@ func (r *NnfPortManagerReconciler) Reconcile(ctx context.Context, req ctrl.Reque } log.Info("Allocation", "requester", spec.Requester, "count", spec.Count, "ports", ports, "status", status) - allocationStatus := AllocationStatus{ - Requester: &corev1.ObjectReference{}, - Ports: ports, - Status: status, + + // Port could not be allocated - try again next time + if status != nnfv1alpha1.NnfPortManagerAllocationStatusInUse { + unsatisfiedRequests++ + log.Info("Allocation unsatisfied", "requester", spec.Requester, "count", spec.Count, "ports", ports, "status", status) } - spec.Requester.DeepCopyInto(allocationStatus.Requester) + // Create a new entry if not already present, otherwise update + if allocationStatus == nil { + allocationStatus := AllocationStatus{ + Requester: &corev1.ObjectReference{}, + Ports: ports, + Status: status, + } + + spec.Requester.DeepCopyInto(allocationStatus.Requester) + + if mgr.Status.Allocations == nil { + mgr.Status.Allocations = make([]nnfv1alpha1.NnfPortManagerAllocationStatus, 0) + } - if mgr.Status.Allocations == nil { - mgr.Status.Allocations = make([]nnfv1alpha1.NnfPortManagerAllocationStatus, 0) + mgr.Status.Allocations = append(mgr.Status.Allocations, allocationStatus) + } else { + allocationStatus.Status = status + allocationStatus.Ports = ports } + } - mgr.Status.Allocations = append(mgr.Status.Allocations, allocationStatus) + // If there aren't enough free ports, then requeue so that something eventually frees up + if unsatisfiedRequests > 0 { + log.Info("Unsatisfied requests are pending -- requeuing") + return ctrl.Result{ + RequeueAfter: time.Duration(config.Spec.PortsCooldownInSeconds+1) * time.Second, + }, nil } return res, nil @@ -137,7 +159,7 @@ func (r *NnfPortManagerReconciler) Reconcile(ctx context.Context, req ctrl.Reque // isAllocationNeeded returns true if the provided Port Allocation Status has a matching value // requester in the specification, and false otherwise. func (r *NnfPortManagerReconciler) isAllocationNeeded(mgr *nnfv1alpha1.NnfPortManager, status *AllocationStatus) bool { - if status.Status != nnfv1alpha1.NnfPortManagerAllocationStatusInUse { + if status.Status != nnfv1alpha1.NnfPortManagerAllocationStatusInUse && status.Status != nnfv1alpha1.NnfPortManagerAllocationStatusInsufficientResources { return false } @@ -154,28 +176,40 @@ func (r *NnfPortManagerReconciler) isAllocationNeeded(mgr *nnfv1alpha1.NnfPortMa return false } -func (r *NnfPortManagerReconciler) cleanupUnusedAllocations(log logr.Logger, mgr *nnfv1alpha1.NnfPortManager) { +func (r *NnfPortManagerReconciler) cleanupUnusedAllocations(log logr.Logger, mgr *nnfv1alpha1.NnfPortManager, cooldown int) { // Free unused allocations. This will check if the Status.Allocations exist in // the list of desired allocations in the Spec field and mark any unused allocations // as freed. - failedIndices := make([]int, 0) + allocsToRemove := make([]int, 0) for idx := range mgr.Status.Allocations { status := &mgr.Status.Allocations[idx] if !r.isAllocationNeeded(mgr, status) { - log.Info("Allocation unused", "requester", status.Requester, "status", status.Status) - if status.Status == nnfv1alpha1.NnfPortManagerAllocationStatusInUse { - status.Requester = nil - status.Status = nnfv1alpha1.NnfPortManagerAllocationStatusFree - } else if status.Status != nnfv1alpha1.NnfPortManagerAllocationStatusFree { - failedIndices = append(failedIndices, idx) + + // If there's no cooldown or the cooldown period has expired, remove it + // If no longer needed, set the allocation status to cooldown and record the unallocated time + now := metav1.Now() + if cooldown == 0 { + allocsToRemove = append(allocsToRemove, idx) + log.Info("Allocation unused - removing", "requester", status.Requester, "status", status.Status) + } else if status.Status == nnfv1alpha1.NnfPortManagerAllocationStatusCooldown { + period := now.Sub(status.TimeUnallocated.Time) + log.Info("Allocation unused - checking cooldown", "requester", status.Requester, "status", status.Status, "period", period, "time", status.TimeUnallocated.String()) + if period >= time.Duration(cooldown)*time.Second { + allocsToRemove = append(allocsToRemove, idx) + log.Info("Allocation unused - removing after cooldown", "requester", status.Requester, "status", status.Status) + } + } else if status.TimeUnallocated == nil { + status.TimeUnallocated = &now + status.Status = nnfv1alpha1.NnfPortManagerAllocationStatusCooldown + log.Info("Allocation unused -- cooldown set", "requester", status.Requester, "status", status.Status) } } } - for idx := range failedIndices { - failedIdx := failedIndices[len(failedIndices)-1-idx] // remove in reverse order + for idx := range allocsToRemove { + failedIdx := allocsToRemove[len(allocsToRemove)-1-idx] // remove in reverse order mgr.Status.Allocations = append(mgr.Status.Allocations[:failedIdx], mgr.Status.Allocations[failedIdx+1:]...) } } @@ -206,7 +240,8 @@ func (r *NnfPortManagerReconciler) findFreePorts(log logr.Logger, mgr *nnfv1alph portsInUse := make([]uint16, 0) for _, status := range mgr.Status.Allocations { - if status.Status == nnfv1alpha1.NnfPortManagerAllocationStatusInUse { + if status.Status == nnfv1alpha1.NnfPortManagerAllocationStatusInUse || + status.Status == nnfv1alpha1.NnfPortManagerAllocationStatusCooldown { portsInUse = append(portsInUse, status.Ports...) } } diff --git a/controllers/nnf_port_manager_controller_test.go b/controllers/nnf_port_manager_controller_test.go index dae0f0701..3c613baac 100644 --- a/controllers/nnf_port_manager_controller_test.go +++ b/controllers/nnf_port_manager_controller_test.go @@ -22,7 +22,9 @@ package controllers import ( "fmt" "reflect" + "time" + "github.com/google/uuid" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" @@ -37,33 +39,40 @@ import ( var _ = Context("NNF Port Manager Controller Setup", Ordered, func() { - var cfg *dwsv1alpha2.SystemConfiguration + var r = &NnfPortManagerReconciler{} // use this to access private reconciler methods + const portStart = 20 const portEnd = 29 - - BeforeAll(func() { - cfg = &dwsv1alpha2.SystemConfiguration{ - ObjectMeta: metav1.ObjectMeta{ - Name: "port-manager-system-config", - Namespace: corev1.NamespaceDefault, - }, - Spec: dwsv1alpha2.SystemConfigurationSpec{ - Ports: []intstr.IntOrString{ - intstr.FromString(fmt.Sprintf("%d-%d", portStart, portEnd)), - }, - }, - } - - Expect(k8sClient.Create(ctx, cfg)).To(Succeed()) - DeferCleanup(func() { Expect(k8sClient.Delete(ctx, cfg)).To(Succeed()) }) - }) + portTotal := portEnd - portStart + 1 Describe("NNF Port Manager Controller Test", func() { - + var cfg *dwsv1alpha2.SystemConfiguration var mgr *nnfv1alpha1.NnfPortManager - var r = &NnfPortManagerReconciler{} // use this to access private reconciler methods + portCooldown := 1 + + JustBeforeEach(func() { + cfg = &dwsv1alpha2.SystemConfiguration{ + ObjectMeta: metav1.ObjectMeta{ + Name: "port-manager-system-config", + Namespace: corev1.NamespaceDefault, + }, + Spec: dwsv1alpha2.SystemConfigurationSpec{ + Ports: []intstr.IntOrString{ + intstr.FromString(fmt.Sprintf("%d-%d", portStart, portEnd)), + }, + PortsCooldownInSeconds: portCooldown, + }, + } + Expect(k8sClient.Create(ctx, cfg)).To(Succeed()) + DeferCleanup(func() { + if cfg != nil { + Expect(k8sClient.Delete(ctx, cfg)).To(Succeed()) + Eventually(func() error { + return k8sClient.Get(ctx, client.ObjectKeyFromObject(cfg), cfg) + }).ShouldNot(Succeed()) + } + }) - BeforeEach(func() { mgr = &nnfv1alpha1.NnfPortManager{ ObjectMeta: metav1.ObjectMeta{ Name: "nnf-port-manager", @@ -78,14 +87,23 @@ var _ = Context("NNF Port Manager Controller Setup", Ordered, func() { Allocations: make([]nnfv1alpha1.NnfPortManagerAllocationSpec, 0), }, } - }) - - JustBeforeEach(func() { Expect(k8sClient.Create(ctx, mgr)).To(Succeed()) - DeferCleanup(func() { Expect(k8sClient.Delete(ctx, mgr)).To(Succeed()) }) + DeferCleanup(func() { + if mgr != nil { + Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(mgr), mgr)) + mgr.SetFinalizers([]string{}) + Expect(k8sClient.Update(ctx, mgr)).To(Succeed()) + Expect(k8sClient.Delete(ctx, mgr)).To(Succeed()) + Eventually(func() error { + return k8sClient.Get(ctx, client.ObjectKeyFromObject(mgr), mgr) + }).ShouldNot(Succeed()) + } + }) }) - reservePorts := func(mgr *nnfv1alpha1.NnfPortManager, name string, count int) { + // Submit an allocation and verify it has been accounted for - this doesn't mean the ports + // were successfully allocated, however. + allocatePorts := func(mgr *nnfv1alpha1.NnfPortManager, name string, count int) []uint16 { By(fmt.Sprintf("Reserving %d ports for '%s'", count, name)) allocation := nnfv1alpha1.NnfPortManagerAllocationSpec{ @@ -106,10 +124,29 @@ var _ = Context("NNF Port Manager Controller Setup", Ordered, func() { return r.isAllocated(mgr, allocation) }).Should(BeTrue()) + status := r.findAllocationStatus(mgr, allocation) + return status.Ports + } + + // Submit an allocation and expect it to be successfully allocated (i.e. ports InUse) + reservePorts := func(mgr *nnfv1alpha1.NnfPortManager, name string, count int) []uint16 { + ports := allocatePorts(mgr, name, count) + + allocation := nnfv1alpha1.NnfPortManagerAllocationSpec{ + Requester: corev1.ObjectReference{Name: name}, + Count: count, + } + status := r.findAllocationStatus(mgr, allocation) Expect(status).ToNot(BeNil()) Expect(status.Ports).To(HaveLen(allocation.Count)) Expect(status.Status).To(Equal(nnfv1alpha1.NnfPortManagerAllocationStatusInUse)) + + return ports + } + + reservePortsAllowFail := func(mgr *nnfv1alpha1.NnfPortManager, name string, count int) []uint16 { + return allocatePorts(mgr, name, count) } releasePorts := func(mgr *nnfv1alpha1.NnfPortManager, name string) { @@ -130,55 +167,279 @@ var _ = Context("NNF Port Manager Controller Setup", Ordered, func() { }).Should(Succeed()) } - It("Reserves & removes a single port", func() { - const name = "single" - reservePorts(mgr, name, 1) - releasePorts(mgr, name) - }) + // Simple way to fire the reconciler to test the cooldown handling + // without having to reserve new ports. This is just to limit the scope + // of the test. + kickPortManager := func(mgr *nnfv1alpha1.NnfPortManager) { + By("Kicking port manager to force reconcile") - It("Reserves & removes all ports", func() { - const name = "all" - reservePorts(mgr, name, portEnd-portStart+1) - releasePorts(mgr, name) - }) + Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(mgr), mgr)).To(Succeed()) + finalizers := mgr.GetFinalizers() + finalizers = append(finalizers, "test-"+uuid.NewString()) + mgr.SetFinalizers(finalizers) + Eventually(func() error { + return k8sClient.Update(ctx, mgr) + }).Should(Succeed()) + } - It("Reserves from free list", func() { - const single = "single" - reservePorts(mgr, single, 1) + // Verify the number of allocations in the status allocation list that are InUse + verifyNumAllocations := func(mgr *nnfv1alpha1.NnfPortManager, status nnfv1alpha1.NnfPortManagerAllocationStatusStatus, count int) { + By(fmt.Sprintf("Verifying there are %d allocations with Status %s in the status allocation list", count, status)) - const remaining = "remaining" - reservePorts(mgr, remaining, portEnd-portStart) + Eventually(func() int { + statusCount := 0 + Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(mgr), mgr)).To(Succeed()) + for _, a := range mgr.Status.Allocations { + if a.Status == status { + statusCount++ + } + } + return statusCount + }).Should(Equal(count)) + } - releasePorts(mgr, single) + verifyNumAllocationsInUse := func(mgr *nnfv1alpha1.NnfPortManager, count int) { + verifyNumAllocations(mgr, nnfv1alpha1.NnfPortManagerAllocationStatusInUse, count) + } - reservePorts(mgr, "free", 1) - }) + verifyNumAllocationsCooldown := func(mgr *nnfv1alpha1.NnfPortManager, count int) { + verifyNumAllocations(mgr, nnfv1alpha1.NnfPortManagerAllocationStatusCooldown, count) + } - It("Fails with insufficient resources", func() { - const name = "all" - reservePorts(mgr, name, portEnd-portStart+1) + verifyNumAllocationsInsuffientResources := func(mgr *nnfv1alpha1.NnfPortManager, count int) { + verifyNumAllocations(mgr, nnfv1alpha1.NnfPortManagerAllocationStatusInsufficientResources, count) + } - allocation := nnfv1alpha1.NnfPortManagerAllocationSpec{ - Requester: corev1.ObjectReference{Name: "insufficient-resources"}, - Count: 1, - } + waitForCooldown := func(extra int) { + By(fmt.Sprintf("Waiting for cooldown (%ds)to expire", portCooldown)) + time.Sleep(time.Duration(portCooldown+extra) * time.Second) - Eventually(func() error { - Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(mgr), mgr)).To(Succeed()) - mgr.Spec.Allocations = append(mgr.Spec.Allocations, allocation) - return k8sClient.Update(ctx, mgr) - }).Should(Succeed()) + } - Eventually(func() bool { - Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(mgr), mgr)).To(Succeed()) - return r.isAllocated(mgr, allocation) - }).Should(BeTrue()) + When("the system configuration is missing", func() { + It("should have a status that indicates system configuration is not found", func() { + Expect(k8sClient.Delete(ctx, cfg)).To(Succeed()) + Eventually(func() error { + return k8sClient.Get(ctx, client.ObjectKeyFromObject(cfg), cfg) + }).ShouldNot(Succeed()) + cfg = nil + + kickPortManager(mgr) + + Eventually(func() nnfv1alpha1.NnfPortManagerStatusStatus { + k8sClient.Get(ctx, client.ObjectKeyFromObject(mgr), mgr) + return mgr.Status.Status + }).Should(Equal(nnfv1alpha1.NnfPortManagerStatusSystemConfigurationNotFound)) + }) + }) - status := r.findAllocationStatus(mgr, allocation) - Expect(status).ToNot(BeNil()) - Expect(status.Ports).To(BeEmpty()) - Expect(status.Status).To(Equal(nnfv1alpha1.NnfPortManagerAllocationStatusInsufficientResources)) + When("reserving ports with portCooldown", func() { + + BeforeEach(func() { + portCooldown = 2 + }) + + When("a single port is reserved and removed", func() { + It("should cooldown and then free up", func() { + const name = "single" + ports := reservePorts(mgr, name, 1) + Expect(ports[0]).To(BeEquivalentTo(portStart)) + verifyNumAllocationsInUse(mgr, 1) + releasePorts(mgr, name) + verifyNumAllocationsInUse(mgr, 0) + verifyNumAllocationsCooldown(mgr, 1) + + waitForCooldown(0) + kickPortManager(mgr) + verifyNumAllocationsInUse(mgr, 0) + verifyNumAllocationsCooldown(mgr, 0) + }) + }) + + When("reserving and releasing multiple ports, one after another", func() { + It("should use the next port since the first is still in cooldown", func() { + first := "first" + ports := reservePorts(mgr, first, 1) + Expect(ports[0]).To(BeEquivalentTo(portStart)) + verifyNumAllocationsInUse(mgr, 1) + + second := "second" + ports = reservePorts(mgr, second, 1) + Expect(ports[0]).To(BeEquivalentTo(portStart + 1)) + verifyNumAllocationsInUse(mgr, 2) + + releasePorts(mgr, first) + verifyNumAllocationsInUse(mgr, 1) + verifyNumAllocationsCooldown(mgr, 1) + + releasePorts(mgr, second) + verifyNumAllocationsInUse(mgr, 0) + verifyNumAllocationsCooldown(mgr, 2) + }) + }) + + When("reserving and releasing multiple ports, one at a time", func() { + It("should use the next port since the first is still in cooldown", func() { + first := "first" + ports := reservePorts(mgr, first, 1) + firstPort := ports[0] + Expect(ports[0]).To(BeEquivalentTo(portStart)) + verifyNumAllocationsInUse(mgr, 1) + releasePorts(mgr, first) + verifyNumAllocationsInUse(mgr, 0) + verifyNumAllocationsCooldown(mgr, 1) + + second := "second" + ports = reservePorts(mgr, second, 1) + Expect(ports[0]).To(BeEquivalentTo(firstPort + 1)) + verifyNumAllocationsInUse(mgr, 1) + verifyNumAllocationsCooldown(mgr, 1) + + releasePorts(mgr, second) + verifyNumAllocationsInUse(mgr, 0) + verifyNumAllocationsCooldown(mgr, 2) + }) + }) + + When("reserving all the ports in 1 allocation", func() { + It("should reserve and cooldown successfully", func() { + const name = "all" + reservePorts(mgr, name, portEnd-portStart+1) + verifyNumAllocationsInUse(mgr, 1) + verifyNumAllocationsCooldown(mgr, 0) + releasePorts(mgr, name) + verifyNumAllocationsInUse(mgr, 0) + verifyNumAllocationsCooldown(mgr, 1) + }) + }) + + XIt("Reserves from free list", func() { + const single = "single" + reservePorts(mgr, single, 1) + + const remaining = "remaining" + count := portEnd - portStart + reservePorts(mgr, remaining, count) + + releasePorts(mgr, single) + verifyNumAllocationsInUse(mgr, 1) + + reservePorts(mgr, "free", 1) + + verifyNumAllocationsInUse(mgr, 2) + }) + + When("all ports are already reserved", func() { + It("fails with insufficient resources", func() { + const name = "all" + reservePorts(mgr, name, portEnd-portStart+1) + + allocation := nnfv1alpha1.NnfPortManagerAllocationSpec{ + Requester: corev1.ObjectReference{Name: "insufficient-resources"}, + Count: 1, + } + + Eventually(func() error { + Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(mgr), mgr)).To(Succeed()) + mgr.Spec.Allocations = append(mgr.Spec.Allocations, allocation) + return k8sClient.Update(ctx, mgr) + }).Should(Succeed()) + + Eventually(func() bool { + Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(mgr), mgr)).To(Succeed()) + return r.isAllocated(mgr, allocation) + }).Should(BeTrue()) + + status := r.findAllocationStatus(mgr, allocation) + Expect(status).ToNot(BeNil()) + Expect(status.Ports).To(BeEmpty()) + Expect(status.Status).To(Equal(nnfv1alpha1.NnfPortManagerAllocationStatusInsufficientResources)) + }) + }) + + When("a single port is reserved and released", func() { + It("expires and is removed from allocations after the cooldown period", func() { + const name = "single" + ports := reservePorts(mgr, name, 1) + Expect(ports[0]).To(BeEquivalentTo(portStart)) + verifyNumAllocationsInUse(mgr, 1) + verifyNumAllocationsCooldown(mgr, 0) + + releasePorts(mgr, name) + verifyNumAllocationsInUse(mgr, 0) + verifyNumAllocationsCooldown(mgr, 1) + + waitForCooldown(0) + kickPortManager(mgr) + verifyNumAllocationsInUse(mgr, 0) + verifyNumAllocationsCooldown(mgr, 0) + }) + }) + + When("all ports are already reserved and another allocation is requested", func() { + It("should eventually free up the cooldown ports and successfully reserve", func() { + By("Reserving all available ports") + for i := 0; i < portTotal; i++ { + ports := reservePorts(mgr, fmt.Sprintf("test-%d", i), 1) + verifyNumAllocationsInUse(mgr, i+1) + Expect(ports[0]).To(BeEquivalentTo(portStart + i)) + } + verifyNumAllocationsInUse(mgr, portTotal) + + By("Attempting to reserve an additional port and failing") + ports := reservePortsAllowFail(mgr, "waiting", 1) + allocation := nnfv1alpha1.NnfPortManagerAllocationSpec{Requester: corev1.ObjectReference{Name: "waiting"}, Count: 1} + status := r.findAllocationStatus(mgr, allocation) + + Expect(ports).To(HaveLen(0)) + Expect(status).ToNot(BeNil()) + Expect(status.Status).To(Equal(nnfv1alpha1.NnfPortManagerAllocationStatusInsufficientResources)) + verifyNumAllocationsInUse(mgr, portTotal) + verifyNumAllocationsInsuffientResources(mgr, 1) + + By("Releasing one of the original ports to make room for previous request") + releasePorts(mgr, "test-0") + verifyNumAllocationsInUse(mgr, portTotal-1) + verifyNumAllocationsCooldown(mgr, 1) + verifyNumAllocationsInsuffientResources(mgr, 1) + + By("Verifying that the cooldown expired and the new reservation is now InUse") + waitForCooldown(0) + verifyNumAllocationsCooldown(mgr, 0) + verifyNumAllocationsInsuffientResources(mgr, 0) + verifyNumAllocationsInUse(mgr, portTotal) + }) + }) }) - }) + When("reserving ports with portCooldown", func() { + + BeforeEach(func() { + portCooldown = 0 + }) + + When("reserving and releasing multiple ports, one at a time", func() { + It("should use the same port since the first has no cooldown", func() { + first := "first" + ports := reservePorts(mgr, first, 1) + firstPort := ports[0] + Expect(ports[0]).To(BeEquivalentTo(portStart)) + verifyNumAllocationsInUse(mgr, 1) + releasePorts(mgr, first) + verifyNumAllocationsInUse(mgr, 0) + verifyNumAllocationsCooldown(mgr, 0) + + second := "second" + ports = reservePorts(mgr, second, 1) + Expect(ports[0]).To(BeEquivalentTo(firstPort)) + verifyNumAllocationsInUse(mgr, 1) + verifyNumAllocationsCooldown(mgr, 0) + + releasePorts(mgr, second) + verifyNumAllocationsInUse(mgr, 0) + verifyNumAllocationsCooldown(mgr, 0) + }) + }) + }) + }) }) diff --git a/controllers/nnf_storage_controller.go b/controllers/nnf_storage_controller.go index 14ea534cd..e407394eb 100644 --- a/controllers/nnf_storage_controller.go +++ b/controllers/nnf_storage_controller.go @@ -1,5 +1,5 @@ /* - * Copyright 2021, 2022 Hewlett Packard Enterprise Development LP + * Copyright 2021-2023 Hewlett Packard Enterprise Development LP * Other additional copyright holders may be indicated within. * * The entirety of this work is licensed under the Apache License, @@ -21,11 +21,14 @@ package controllers import ( "context" + "reflect" "runtime" "strconv" + "time" "github.com/go-logr/logr" + corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" kruntime "k8s.io/apimachinery/pkg/runtime" @@ -85,7 +88,7 @@ const ( // Reconcile is part of the main kubernetes reconciliation loop which aims to // move the current state of the cluster closer to the desired state. func (r *NnfStorageReconciler) Reconcile(ctx context.Context, req ctrl.Request) (res ctrl.Result, err error) { - + log := r.Log.WithValues("NnfStorage", req.NamespacedName) metrics.NnfStorageReconcilesTotal.Inc() storage := &nnfv1alpha1.NnfStorage{} @@ -101,6 +104,11 @@ func (r *NnfStorageReconciler) Reconcile(ctx context.Context, req ctrl.Request) // occuring on the on function exit. statusUpdater := updater.NewStatusUpdater[*nnfv1alpha1.NnfStorageStatus](storage) defer func() { err = statusUpdater.CloseWithStatusUpdate(ctx, r.Client.Status(), err) }() + defer func() { + if err != nil || (!res.Requeue && res.RequeueAfter == 0) { + storage.Status.SetResourceErrorAndLog(err, log) + } + }() // Check if the object is being deleted if !storage.GetDeletionTimestamp().IsZero() { @@ -160,7 +168,14 @@ func (r *NnfStorageReconciler) Reconcile(ctx context.Context, req ctrl.Request) storage.Status.Error = nil // For each allocation, create the NnfNodeStorage resources to fan out to the Rabbit nodes - for i := range storage.Spec.AllocationSets { + for i, allocationSet := range storage.Spec.AllocationSets { + // Add a reference to the external MGS PersistentStorageInstance if necessary + if allocationSet.NnfStorageLustreSpec.PersistentMgsReference != (corev1.ObjectReference{}) { + if err := r.addPersistentStorageReference(ctx, storage, allocationSet.NnfStorageLustreSpec.PersistentMgsReference); err != nil { + return ctrl.Result{}, err + } + } + res, err := r.createNodeStorage(ctx, storage, i) if err != nil { return ctrl.Result{}, err @@ -187,7 +202,7 @@ func (r *NnfStorageReconciler) Reconcile(ctx context.Context, req ctrl.Request) // Wait for all the allocation sets to be ready for _, allocationSet := range storage.Status.AllocationSets { if allocationSet.Status != nnfv1alpha1.ResourceReady { - return ctrl.Result{}, nil + return ctrl.Result{RequeueAfter: time.Minute}, nil } } @@ -214,6 +229,69 @@ func (r *NnfStorageReconciler) Reconcile(ctx context.Context, req ctrl.Request) return ctrl.Result{}, nil } +func (r *NnfStorageReconciler) addPersistentStorageReference(ctx context.Context, nnfStorage *nnfv1alpha1.NnfStorage, persistentMgsReference corev1.ObjectReference) error { + persistentStorage := &dwsv1alpha2.PersistentStorageInstance{ + ObjectMeta: metav1.ObjectMeta{ + Name: persistentMgsReference.Name, + Namespace: persistentMgsReference.Namespace, + }, + } + + if err := r.Get(ctx, client.ObjectKeyFromObject(persistentStorage), persistentStorage); err != nil { + return dwsv1alpha2.NewResourceError("").WithUserMessage("PersistentStorage '%v' not found", client.ObjectKeyFromObject(persistentStorage)).WithMajor() + } + + if persistentStorage.Status.State != dwsv1alpha2.PSIStateActive { + return dwsv1alpha2.NewResourceError("").WithUserMessage("PersistentStorage is not active").WithFatal() + } + + // Add a consumer reference to the persistent storage for this directive + reference := corev1.ObjectReference{ + Name: nnfStorage.Name, + Namespace: nnfStorage.Namespace, + Kind: reflect.TypeOf(nnfv1alpha1.NnfStorage{}).Name(), + } + + for _, existingReference := range persistentStorage.Spec.ConsumerReferences { + if existingReference == reference { + return nil + } + } + + persistentStorage.Spec.ConsumerReferences = append(persistentStorage.Spec.ConsumerReferences, reference) + + return r.Update(ctx, persistentStorage) +} + +func (r *NnfStorageReconciler) removePersistentStorageReference(ctx context.Context, nnfStorage *nnfv1alpha1.NnfStorage, persistentMgsReference corev1.ObjectReference) error { + persistentStorage := &dwsv1alpha2.PersistentStorageInstance{ + ObjectMeta: metav1.ObjectMeta{ + Name: persistentMgsReference.Name, + Namespace: persistentMgsReference.Namespace, + }, + } + + if err := r.Get(ctx, client.ObjectKeyFromObject(persistentStorage), persistentStorage); err != nil { + return client.IgnoreNotFound(err) + } + + // remove the consumer reference on the persistent storage for this directive + reference := corev1.ObjectReference{ + Name: nnfStorage.Name, + Namespace: nnfStorage.Namespace, + Kind: reflect.TypeOf(nnfv1alpha1.NnfStorage{}).Name(), + } + + for i, existingReference := range persistentStorage.Spec.ConsumerReferences { + if existingReference == reference { + persistentStorage.Spec.ConsumerReferences = append(persistentStorage.Spec.ConsumerReferences[:i], persistentStorage.Spec.ConsumerReferences[i+1:]...) + return r.Update(ctx, persistentStorage) + } + } + + return nil +} + // Create an NnfNodeStorage if it doesn't exist, or update it if it requires updating. Each // Rabbit node gets an NnfNodeStorage, and there may be multiple allocations requested in it. // This limits the number of resources that have to be broadcast to the Rabbits. @@ -279,7 +357,7 @@ func (r *NnfStorageReconciler) createNodeStorage(ctx context.Context, storage *n if err != nil { if !apierrors.IsConflict(err) { - storage.Status.AllocationSets[allocationSetIndex].Error = err.Error() + return nil, err } return &ctrl.Result{Requeue: true}, nil @@ -306,7 +384,6 @@ func (r *NnfStorageReconciler) aggregateNodeStorageStatus(ctx context.Context, s var status nnfv1alpha1.NnfResourceStatusType = nnfv1alpha1.ResourceReady allocationSet.AllocationCount = 0 - allocationSet.Error = "" nnfNodeStorageList := &nnfv1alpha1.NnfNodeStorageList{} matchLabels := dwsv1alpha2.MatchingOwner(storage) @@ -356,12 +433,6 @@ func (r *NnfStorageReconciler) aggregateNodeStorageStatus(ctx context.Context, s nodeAllocation.StorageGroup.Status.UpdateIfWorseThan(&status) nodeAllocation.FileSystem.Status.UpdateIfWorseThan(&status) nodeAllocation.FileShare.Status.UpdateIfWorseThan(&status) - - for _, condition := range nodeAllocation.Conditions { - if condition.Reason == nnfv1alpha1.ConditionFailed { - allocationSet.Error = condition.Message - } - } } if nnfNodeStorage.Status.Error != nil { @@ -434,6 +505,14 @@ func (r *NnfStorageReconciler) teardownStorage(ctx context.Context, storage *nnf return nodeStoragesExist, nil } + for _, allocationSet := range storage.Spec.AllocationSets { + if allocationSet.NnfStorageLustreSpec.PersistentMgsReference != (corev1.ObjectReference{}) { + if err := r.removePersistentStorageReference(ctx, storage, allocationSet.NnfStorageLustreSpec.PersistentMgsReference); err != nil { + return nodeStoragesExist, err + } + } + } + return nodeStoragesDeleted, nil } diff --git a/controllers/nnf_workflow_controller.go b/controllers/nnf_workflow_controller.go index a19bb76ad..5b2aee1c2 100644 --- a/controllers/nnf_workflow_controller.go +++ b/controllers/nnf_workflow_controller.go @@ -37,7 +37,6 @@ import ( apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" kruntime "k8s.io/apimachinery/pkg/runtime" - "k8s.io/apimachinery/pkg/types" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller" @@ -124,6 +123,20 @@ func (r *NnfWorkflowReconciler) Reconcile(ctx context.Context, req ctrl.Request) return ctrl.Result{}, err } + // Delete containers and unallocate port + containerRes, err := r.deleteContainers(ctx, workflow, -1) + if err != nil { + return ctrl.Result{}, err + } else if containerRes != nil { + return containerRes.Result, nil + } + containerRes, err = r.releaseContainerPorts(ctx, workflow) + if err != nil { + return ctrl.Result{}, err + } else if containerRes != nil { + return containerRes.Result, nil + } + deleteStatus, err := dwsv1alpha2.DeleteChildren(ctx, r.Client, r.ChildObjects, workflow) if err != nil { return ctrl.Result{}, err @@ -205,6 +218,10 @@ func (r *NnfWorkflowReconciler) Reconcile(ctx context.Context, req ctrl.Request) log := log.WithValues("state", workflow.Status.State, "index", driverStatus.DWDIndex) log.Info("Start", "directive", workflow.Spec.DWDirectives[driverStatus.DWDIndex]) + driverStatus.Status = dwsv1alpha2.StatusRunning + driverStatus.Message = "" + driverStatus.Error = "" + result, err := startFunctions[workflow.Status.State](r, ctx, workflow, driverStatus.DWDIndex) if err != nil { handleWorkflowError(err, driverStatus) @@ -213,10 +230,6 @@ func (r *NnfWorkflowReconciler) Reconcile(ctx context.Context, req ctrl.Request) return ctrl.Result{}, err } - driverStatus.Status = dwsv1alpha2.StatusRunning - driverStatus.Message = "" - driverStatus.Error = "" - if result != nil { log.Info("Start wait", result.info()...) driverStatus.Message = result.reason @@ -243,6 +256,10 @@ func (r *NnfWorkflowReconciler) Reconcile(ctx context.Context, req ctrl.Request) log := log.WithValues("state", workflow.Status.State, "index", driverStatus.DWDIndex) log.Info("Finish", "directive", workflow.Spec.DWDirectives[driverStatus.DWDIndex]) + driverStatus.Status = dwsv1alpha2.StatusRunning + driverStatus.Message = "" + driverStatus.Error = "" + result, err := finishFunctions[workflow.Status.State](r, ctx, workflow, driverStatus.DWDIndex) if err != nil { handleWorkflowError(err, driverStatus) @@ -252,13 +269,11 @@ func (r *NnfWorkflowReconciler) Reconcile(ctx context.Context, req ctrl.Request) return ctrl.Result{}, err } - driverStatus.Status = dwsv1alpha2.StatusRunning - driverStatus.Message = "" - driverStatus.Error = "" - if result != nil { log.Info("Finish wait", result.info()...) - driverStatus.Message = result.reason + if driverStatus.Message == "" { + driverStatus.Message = result.reason + } return result.Result, nil } @@ -280,14 +295,13 @@ func (r *NnfWorkflowReconciler) startProposalState(ctx context.Context, workflow dwArgs, _ := dwdparse.BuildArgsMap(workflow.Spec.DWDirectives[index]) if err := r.validateWorkflow(ctx, workflow); err != nil { - log.Error(err, "Unable to validate workflow") - return nil, nnfv1alpha1.NewWorkflowError("Unable to validate DW directives").WithFatal().WithError(err) + return nil, dwsv1alpha2.NewResourceError("").WithError(err).WithUserMessage("unable to validate DW directives") } // only jobdw, persistentdw, and create_persistent need a directive breakdown switch dwArgs["command"] { case "container": - return nil, r.createPinnedContainerProfileIfNecessary(ctx, workflow, index) + return nil, createPinnedContainerProfileIfNecessary(ctx, r.Client, r.Scheme, workflow, index, r.Log) case "jobdw", "persistentdw", "create_persistent": break default: @@ -296,7 +310,7 @@ func (r *NnfWorkflowReconciler) startProposalState(ctx context.Context, workflow directiveBreakdown, err := r.generateDirectiveBreakdown(ctx, index, workflow, log) if err != nil { - return nil, nnfv1alpha1.NewWorkflowError("Unable to start parsing DW directive").WithError(err) + return nil, dwsv1alpha2.NewResourceError("could not generate DirectiveBreakdown").WithError(err).WithUserMessage("unable to start parsing DW directive") } if directiveBreakdown == nil { @@ -324,7 +338,6 @@ func (r *NnfWorkflowReconciler) startProposalState(ctx context.Context, workflow } func (r *NnfWorkflowReconciler) finishProposalState(ctx context.Context, workflow *dwsv1alpha2.Workflow, index int) (*result, error) { - log := r.Log.WithValues("Workflow", client.ObjectKeyFromObject(workflow), "Index", index) dwArgs, _ := dwdparse.BuildArgsMap(workflow.Spec.DWDirectives[index]) // only jobdw, persistentdw, and create_persistent have a directive breakdown @@ -344,12 +357,13 @@ func (r *NnfWorkflowReconciler) finishProposalState(ctx context.Context, workflo err := r.Get(ctx, client.ObjectKeyFromObject(directiveBreakdown), directiveBreakdown) if err != nil { - log.Info("Failed to get DirectiveBreakdown", "name", directiveBreakdown.GetName(), "error", err.Error()) - return nil, nnfv1alpha1.NewWorkflowError("Unable to finish parsing DW directive").WithError(err) + return nil, dwsv1alpha2.NewResourceError("could not get DirectiveBreakdown: %v", client.ObjectKeyFromObject(directiveBreakdown)).WithError(err).WithUserMessage("unable to finish parsing DW directive") } if directiveBreakdown.Status.Error != nil { - return nil, nnfv1alpha1.NewWorkflowError("").WithError(directiveBreakdown.Status.Error) + handleWorkflowErrorByIndex(directiveBreakdown.Status.Error, workflow, index) + + return Requeue("error").withObject(directiveBreakdown), nil } // Wait for the breakdown to be ready @@ -379,9 +393,7 @@ func (r *NnfWorkflowReconciler) startSetupState(ctx context.Context, workflow *d } err := r.Get(ctx, client.ObjectKeyFromObject(dbd), dbd) if err != nil { - log.Info("Unable to get directiveBreakdown", "dbd", client.ObjectKeyFromObject(dbd), "Message", err) - err = fmt.Errorf("Unable to get DirectiveBreakdown %v: %w", client.ObjectKeyFromObject(dbd), err) - return nil, nnfv1alpha1.NewWorkflowError("Could not read allocation request").WithError(err) + return nil, dwsv1alpha2.NewResourceError("unable to get DirectiveBreakdown: %v", client.ObjectKeyFromObject(dbd)).WithError(err).WithUserMessage("could not read allocation request") } s := &dwsv1alpha2.Servers{ @@ -392,14 +404,12 @@ func (r *NnfWorkflowReconciler) startSetupState(ctx context.Context, workflow *d } err = r.Get(ctx, client.ObjectKeyFromObject(s), s) if err != nil { - log.Info("Unable to get servers", "servers", client.ObjectKeyFromObject(s), "Message", err) - err = fmt.Errorf("Unable to get Servers %v: %w", client.ObjectKeyFromObject(s), err) - return nil, nnfv1alpha1.NewWorkflowError("Could not read allocation request").WithError(err) + return nil, dwsv1alpha2.NewResourceError("unable to get Servers: %v", client.ObjectKeyFromObject(s)).WithError(err).WithUserMessage("could not read allocation request") } if _, present := os.LookupEnv("RABBIT_TEST_ENV_BYPASS_SERVER_STORAGE_CHECK"); !present { if err := r.validateServerAllocations(ctx, dbd, s); err != nil { - return nil, err + return nil, dwsv1alpha2.NewResourceError("invalid Rabbit allocations for servers: %v", client.ObjectKeyFromObject(s)).WithError(err).WithUserMessage("invalid Rabbit allocations") } } @@ -408,42 +418,50 @@ func (r *NnfWorkflowReconciler) startSetupState(ctx context.Context, workflow *d return Requeue("conflict").withObject(storage), nil } - log.Info("Failed to create nnf storage", "Message", err) - err = fmt.Errorf("Could not create NnfStorage %w", err) - return nil, nnfv1alpha1.NewWorkflowError("Could not create allocation").WithError(err) + return nil, dwsv1alpha2.NewResourceError("could not create NnfStorage").WithError(err).WithUserMessage("could not create allocation") } + case "container": + return r.getContainerPorts(ctx, workflow, index) } return nil, nil } func (r *NnfWorkflowReconciler) finishSetupState(ctx context.Context, workflow *dwsv1alpha2.Workflow, index int) (*result, error) { - name, namespace := getStorageReferenceNameFromWorkflowActual(workflow, index) + dwArgs, _ := dwdparse.BuildArgsMap(workflow.Spec.DWDirectives[index]) - // Check whether the NnfStorage has finished creating the storage. - nnfStorage := &nnfv1alpha1.NnfStorage{ - ObjectMeta: metav1.ObjectMeta{ - Name: name, - Namespace: namespace, - }, - } - if err := r.Get(ctx, client.ObjectKeyFromObject(nnfStorage), nnfStorage); err != nil { - return nil, nnfv1alpha1.NewWorkflowErrorf("failed to get NNF storage resource '%s", client.ObjectKeyFromObject(nnfStorage)).WithError(err) - } + switch dwArgs["command"] { + case "container": + return r.checkContainerPorts(ctx, workflow, index) + default: + name, namespace := getStorageReferenceNameFromWorkflowActual(workflow, index) - // If the Status section has not been filled in yet, exit and wait. - if len(nnfStorage.Status.AllocationSets) != len(nnfStorage.Spec.AllocationSets) { - // RequeueAfter is necessary for persistent storage that isn't owned by this workflow - return Requeue("allocation").after(2 * time.Second).withObject(nnfStorage), nil - } + // Check whether the NnfStorage has finished creating the storage. + nnfStorage := &nnfv1alpha1.NnfStorage{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: namespace, + }, + } + if err := r.Get(ctx, client.ObjectKeyFromObject(nnfStorage), nnfStorage); err != nil { + return nil, dwsv1alpha2.NewResourceError("could not get NnfStorage: %v", client.ObjectKeyFromObject(nnfStorage)).WithError(err).WithUserMessage("could not allocate storage") + } - if nnfStorage.Status.Error != nil { - return nil, nnfv1alpha1.NewWorkflowErrorf("storage resource '%s' has error", client.ObjectKeyFromObject(nnfStorage)).WithError(nnfStorage.Status.Error) - } + // If the Status section has not been filled in yet, exit and wait. + if len(nnfStorage.Status.AllocationSets) != len(nnfStorage.Spec.AllocationSets) { + // RequeueAfter is necessary for persistent storage that isn't owned by this workflow + return Requeue("allocation").after(2 * time.Second).withObject(nnfStorage), nil + } - if nnfStorage.Status.Status != nnfv1alpha1.ResourceReady { - // RequeueAfter is necessary for persistent storage that isn't owned by this workflow - return Requeue("allocation set not ready").after(2 * time.Second).withObject(nnfStorage), nil + if nnfStorage.Status.Error != nil { + handleWorkflowErrorByIndex(dwsv1alpha2.NewResourceError("storage resource error: %v", client.ObjectKeyFromObject(nnfStorage)).WithError(nnfStorage.Status.Error).WithUserMessage("could not allocate storage"), workflow, index) + return Requeue("error").withObject(nnfStorage), nil + } + + if nnfStorage.Status.Status != nnfv1alpha1.ResourceReady { + // RequeueAfter is necessary for persistent storage that isn't owned by this workflow + return Requeue("allocation set not ready").after(2 * time.Second).withObject(nnfStorage), nil + } } return nil, nil @@ -454,7 +472,7 @@ func (r *NnfWorkflowReconciler) startDataInOutState(ctx context.Context, workflo dwArgs, err := dwdparse.BuildArgsMap(workflow.Spec.DWDirectives[index]) if err != nil { - return nil, nnfv1alpha1.NewWorkflowError("Invalid DW directive: " + workflow.Spec.DWDirectives[index]).WithFatal() + return nil, dwsv1alpha2.NewResourceError("").WithUserMessage("Invalid DW directive: %v", workflow.Spec.DWDirectives[index]).WithFatal().WithUser() } // NOTE: We don't need to check for the occurrence of a source or destination parameters since these are required fields and validated through the webhook @@ -488,7 +506,7 @@ func (r *NnfWorkflowReconciler) startDataInOutState(ctx context.Context, workflo } if parentDwIndex < 0 { - return nil, nil, nil, nnfv1alpha1.NewWorkflowError("No directive matching '" + name + "' found in workflow").WithFatal() + return nil, nil, nil, dwsv1alpha2.NewResourceError("").WithUserMessage("no directive matching '%v' found in workflow", name).WithFatal().WithUser() } // If directive specifies a persistent storage instance, `name` will be the nnfStorageName @@ -508,7 +526,7 @@ func (r *NnfWorkflowReconciler) startDataInOutState(ctx context.Context, workflo } if err := r.Get(ctx, client.ObjectKeyFromObject(storage), storage); err != nil { - return nil, nil, nil, fmt.Errorf("Could not get NnfStorage %v: %w", client.ObjectKeyFromObject(storage), err) + return nil, nil, nil, dwsv1alpha2.NewResourceError("could not get NnfStorage %v", client.ObjectKeyFromObject(storage)).WithError(err).WithUserMessage("could not find storage allocation") } storageReference = &corev1.ObjectReference{ @@ -539,7 +557,7 @@ func (r *NnfWorkflowReconciler) startDataInOutState(ctx context.Context, workflo // Setup NNF Access for the NNF Servers so we can run data movement on them. access, err := r.setupNnfAccessForServers(ctx, storage, workflow, index, parentDwIndex, teardownState, log) if err != nil { - return storageReference, access, nil, nnfv1alpha1.NewWorkflowError("Could not create data movement mount points").WithError(err) + return storageReference, access, nil, dwsv1alpha2.NewResourceError("").WithError(err).WithUserMessage("could not create data movement mount points") } // Wait for accesses to go ready @@ -559,19 +577,19 @@ func (r *NnfWorkflowReconciler) startDataInOutState(ctx context.Context, workflo return storageReference, nil, nil, nil } - return nil, nil, nil, nnfv1alpha1.NewWorkflowError(fmt.Sprintf("Staging parameter '%s' is invalid", param)).WithFatal() + return nil, nil, nil, dwsv1alpha2.NewResourceError("").WithUserMessage("Staging parameter '%s' is invalid", param).WithFatal().WithUser() } sourceStorage, sourceAccess, result, err := prepareStagingArgumentFn(dwArgs["source"]) if err != nil { - return nil, nnfv1alpha1.NewWorkflowError("Could not prepare data movement resources").WithError(err) + return nil, dwsv1alpha2.NewResourceError("").WithError(err).WithUserMessage("could not prepare data movement resources") } else if result != nil { return result, nil } destStorage, destAccess, result, err := prepareStagingArgumentFn(dwArgs["destination"]) if err != nil { - return nil, nnfv1alpha1.NewWorkflowError("Could not prepare data movement resources").WithError(err) + return nil, dwsv1alpha2.NewResourceError("").WithError(err).WithUserMessage("Could not prepare data movement resources") } else if result != nil { return result, nil } @@ -580,7 +598,7 @@ func (r *NnfWorkflowReconciler) startDataInOutState(ctx context.Context, workflo for _, access := range []*nnfv1alpha1.NnfAccess{sourceAccess, destAccess} { if access != nil { if err := r.Get(ctx, client.ObjectKeyFromObject(access), access); err != nil { - return nil, fmt.Errorf("Could not get NnfAccess %v: %w", client.ObjectKeyFromObject(access), err) + return nil, dwsv1alpha2.NewResourceError("could not get NnfAccess %v", client.ObjectKeyFromObject(access)).WithError(err).WithUserMessage("could not create data movement mount points") } if access.Status.State != "mounted" || !access.Status.Ready { @@ -600,9 +618,14 @@ func (r *NnfWorkflowReconciler) startDataInOutState(ctx context.Context, workflo targetStorageRef = sourceStorage } - targetStorage := &nnfv1alpha1.NnfStorage{} - if err := r.Get(ctx, types.NamespacedName{Name: targetStorageRef.Name, Namespace: targetStorageRef.Namespace}, targetStorage); err != nil { - return nil, nnfv1alpha1.NewWorkflowError("Data Movement: Failed to retrieve NNF Storage").WithError(err) + targetStorage := &nnfv1alpha1.NnfStorage{ + ObjectMeta: metav1.ObjectMeta{ + Name: targetStorageRef.Name, + Namespace: targetStorageRef.Namespace, + }, + } + if err := r.Get(ctx, client.ObjectKeyFromObject(targetStorage), targetStorage); err != nil { + return nil, dwsv1alpha2.NewResourceError("could not get NnfStorage: %v", client.ObjectKeyFromObject(targetStorage)).WithError(err).WithUserMessage("could not find storage allocations") } _, source := splitStagingArgumentIntoNameAndPath(dwArgs["source"]) @@ -630,8 +653,7 @@ func (r *NnfWorkflowReconciler) startDataInOutState(ctx context.Context, workflo // XFS & GFS2 require the individual rabbit nodes are performing the data movement. if len(targetStorage.Spec.AllocationSets) != 1 { - msg := fmt.Sprintf("Data Movement: File System %s has unexpected allocation sets %d", fsType, len(targetStorage.Spec.AllocationSets)) - return nil, nnfv1alpha1.NewWorkflowError(msg).WithFatal() + return nil, dwsv1alpha2.NewResourceError("file system %s has unexpected allocation sets %d", fsType, len(targetStorage.Spec.AllocationSets)).WithUserMessage("unexpected allocation count").WithFatal() } nodes := targetStorage.Spec.AllocationSets[0].Nodes @@ -666,7 +688,7 @@ func (r *NnfWorkflowReconciler) startDataInOutState(ctx context.Context, workflo log.Info("Creating NNF Data Movement", "name", client.ObjectKeyFromObject(dm).String()) if err := r.Create(ctx, dm); err != nil { if !errors.IsAlreadyExists(err) { - return nil, nnfv1alpha1.NewWorkflowError("Data Movement failed to create").WithError(err) + return nil, dwsv1alpha2.NewResourceError("could not create DataMovement: %v", client.ObjectKeyFromObject(dm)).WithError(err).WithUserMessage("could not start data movement") } } } @@ -702,7 +724,7 @@ func (r *NnfWorkflowReconciler) startDataInOutState(ctx context.Context, workflo log.Info("Creating NNF Data Movement", "name", client.ObjectKeyFromObject(dm).String()) if err := r.Create(ctx, dm); err != nil { if !errors.IsAlreadyExists(err) { - return nil, nnfv1alpha1.NewWorkflowError("Data Movement failed to create").WithError(err) + return nil, dwsv1alpha2.NewResourceError("could not create DataMovement: %v", client.ObjectKeyFromObject(dm)).WithError(err).WithUserMessage("could not start data movement") } } } @@ -721,7 +743,7 @@ func (r *NnfWorkflowReconciler) finishDataInOutState(ctx context.Context, workfl dataMovementList := &nnfv1alpha1.NnfDataMovementList{} if err := r.List(ctx, dataMovementList, matchingLabels); err != nil { - return nil, nnfv1alpha1.NewWorkflowError("Could not retrieve data movements").WithError(err) + return nil, dwsv1alpha2.NewResourceError("could not list DataMovements with labels: %v", matchingLabels).WithError(err).WithUserMessage("could not find data movement information") } // Since the Finish state is only called when copy_in / copy_out directives are present - the lack of any items @@ -740,7 +762,8 @@ func (r *NnfWorkflowReconciler) finishDataInOutState(ctx context.Context, workfl // TODO: Detailed Fail Message? for _, dm := range dataMovementList.Items { if dm.Status.Status != nnfv1alpha1.DataMovementConditionReasonSuccess { - return nil, nnfv1alpha1.NewWorkflowError(fmt.Sprintf("Staging operation failed")).WithFatal() + handleWorkflowErrorByIndex(dwsv1alpha2.NewResourceError("").WithUserMessage("data movement operation failed").WithFatal(), workflow, index) + return Requeue("error").withObject(&dm), nil } } @@ -756,7 +779,7 @@ func (r *NnfWorkflowReconciler) startPreRunState(ctx context.Context, workflow * // time. unmountResult, err := r.unmountNnfAccessIfNecessary(ctx, workflow, index, "servers") if err != nil { - return nil, err + return nil, dwsv1alpha2.NewResourceError("could not unmount NnfAccess index: %v", index).WithError(err).WithUserMessage("could not unmount on Rabbit nodes") } if unmountResult != nil { @@ -772,7 +795,16 @@ func (r *NnfWorkflowReconciler) startPreRunState(ctx context.Context, workflow * // Create container service and jobs if dwArgs["command"] == "container" { - return r.containerHandler(ctx, workflow, dwArgs, index, log) + result, err := r.userContainerHandler(ctx, workflow, dwArgs, index, log) + + if err != nil { + return nil, dwsv1alpha2.NewResourceError("").WithError(err).WithFatal().WithUserMessage("unable to create/update Container Jobs") + } + if result != nil { + return result, nil + } + + return nil, nil } // Create an NNFAccess for the compute clients @@ -807,8 +839,7 @@ func (r *NnfWorkflowReconciler) startPreRunState(ctx context.Context, workflow * return ctrl.SetControllerReference(workflow, access, r.Scheme) }) if err != nil { - err = fmt.Errorf("Could not CreateOrUpdate compute node NnfAccess %v: %w", client.ObjectKeyFromObject(access), err) - return nil, nnfv1alpha1.NewWorkflowError("Could not mount file system on compute nodes").WithError(err) + return nil, dwsv1alpha2.NewResourceError("Could not CreateOrUpdate compute node NnfAccess: %v", client.ObjectKeyFromObject(access)).WithError(err).WithUserMessage("could not mount file system on compute nodes") } if result == controllerutil.OperationResultCreated { @@ -826,7 +857,7 @@ func (r *NnfWorkflowReconciler) startPreRunState(ctx context.Context, workflow * fsType, err := r.getDirectiveFileSystemType(ctx, workflow, index) if err != nil { - return nil, nnfv1alpha1.NewWorkflowError("Unable to determine directive file system type").WithError(err) + return nil, dwsv1alpha2.NewResourceError("").WithError(err).WithFatal().WithUser().WithUserMessage("Unable to determine directive file system type") } if fsType == "gfs2" || fsType == "lustre" { @@ -849,7 +880,7 @@ func (r *NnfWorkflowReconciler) startPreRunState(ctx context.Context, workflow * _, err := r.setupNnfAccessForServers(ctx, storage, workflow, index, index, teardownState, log) if err != nil { - return nil, nnfv1alpha1.NewWorkflowError(fmt.Sprintf("Could not setup NNF Access in state %s", workflow.Status.State)).WithError(err) + return nil, dwsv1alpha2.NewResourceError("could not setup NNF Access in state %s", workflow.Status.State).WithError(err).WithUserMessage("could not mount file system on Rabbit nodes") } } @@ -875,7 +906,7 @@ func (r *NnfWorkflowReconciler) finishPreRunState(ctx context.Context, workflow case "container": return r.waitForContainersToStart(ctx, workflow, index) default: - return nil, nnfv1alpha1.NewWorkflowErrorf("Unexpected directive %v", dwArgs["command"]) + return nil, dwsv1alpha2.NewResourceError("unexpected directive: %v", dwArgs["command"]).WithFatal().WithUserMessage("could not mount file system on compute nodes") } workflow.Status.Env[envName] = buildMountPath(workflow, index) @@ -883,7 +914,7 @@ func (r *NnfWorkflowReconciler) finishPreRunState(ctx context.Context, workflow // Containers do not have NNFAccesses, so only do this after r.waitForContainersToStart() would have returned result, err := r.waitForNnfAccessStateAndReady(ctx, workflow, index, "mounted") if err != nil { - return nil, nnfv1alpha1.NewWorkflowError("Failed to achieve NnfAccess 'mounted' state").WithError(err).WithFatal() + return nil, dwsv1alpha2.NewResourceError("could not mount rabbit NnfAccess for index %v", index).WithError(err).WithUserMessage("could not mount file system on compute nodes") } else if result != nil { return result, nil } @@ -902,7 +933,11 @@ func (r *NnfWorkflowReconciler) startPostRunState(ctx context.Context, workflow // Unmount the NnfAccess for the compute nodes. This will free the compute nodes to be used // in a different job even if there is data movement happening on the Rabbits. if result, err := r.unmountNnfAccessIfNecessary(ctx, workflow, index, "computes"); result != nil || err != nil { - return result, err + if err != nil { + return nil, dwsv1alpha2.NewResourceError("").WithError(err).WithUserMessage("could not unmount file system from compute nodes") + } + + return result, nil } // Wait for data movement resources to complete @@ -911,7 +946,7 @@ func (r *NnfWorkflowReconciler) startPostRunState(ctx context.Context, workflow dataMovementList := &nnfv1alpha1.NnfDataMovementList{} if err := r.List(ctx, dataMovementList, matchingLabels); err != nil { - return nil, nnfv1alpha1.NewWorkflowError("Could not retrieve data movements").WithError(err) + return nil, dwsv1alpha2.NewResourceError("could not list DataMovements with labels: %v", matchingLabels).WithError(err).WithUserMessage("could not find data movement information") } for _, dm := range dataMovementList.Items { @@ -923,7 +958,7 @@ func (r *NnfWorkflowReconciler) startPostRunState(ctx context.Context, workflow // Unmount the NnfAccess for the servers resource if necessary. fsType, err := r.getDirectiveFileSystemType(ctx, workflow, index) if err != nil { - return nil, nnfv1alpha1.NewWorkflowError("Unable to determine directive file system type").WithError(err) + return nil, dwsv1alpha2.NewResourceError("").WithError(err).WithFatal().WithUser().WithUserMessage("Unable to determine directive file system type") } if fsType == "gfs2" || fsType == "lustre" { @@ -944,7 +979,7 @@ func (r *NnfWorkflowReconciler) finishPostRunState(ctx context.Context, workflow result, err := r.waitForNnfAccessStateAndReady(ctx, workflow, index, "unmounted") if err != nil { - return nil, nnfv1alpha1.NewWorkflowError("Failed to achieve NnfAccess 'unmounted' state").WithError(err).WithFatal() + return nil, dwsv1alpha2.NewResourceError("could not unmount compute NnfAccess for index %v", index).WithError(err).WithUserMessage("could not unmount file system on compute nodes") } else if result != nil { return result, nil } @@ -956,7 +991,7 @@ func (r *NnfWorkflowReconciler) finishPostRunState(ctx context.Context, workflow dataMovementList := &nnfv1alpha1.NnfDataMovementList{} if err := r.List(ctx, dataMovementList, matchingLabels); err != nil { - return nil, nnfv1alpha1.NewWorkflowError("Could not retrieve data movements").WithError(err) + return nil, dwsv1alpha2.NewResourceError("could not list DataMovements with labels: %v", matchingLabels).WithError(err).WithUserMessage("could not find data movement information") } for _, dm := range dataMovementList.Items { @@ -965,8 +1000,8 @@ func (r *NnfWorkflowReconciler) finishPostRunState(ctx context.Context, workflow } if dm.Status.Status == nnfv1alpha1.DataMovementConditionReasonFailed { - err := fmt.Errorf("Data movement %s failed", client.ObjectKeyFromObject(&dm).String()) - return nil, nnfv1alpha1.NewWorkflowError("Data movement unsuccessful").WithError(err).WithFatal() + handleWorkflowErrorByIndex(dwsv1alpha2.NewResourceError("data movement %v failed", client.ObjectKeyFromObject(&dm)).WithUserMessage("data movement failed").WithFatal(), workflow, index) + return Requeue("error").withObject(&dm), nil } } @@ -974,23 +1009,31 @@ func (r *NnfWorkflowReconciler) finishPostRunState(ctx context.Context, workflow } func (r *NnfWorkflowReconciler) startTeardownState(ctx context.Context, workflow *dwsv1alpha2.Workflow, index int) (*result, error) { + dwArgs, _ := dwdparse.BuildArgsMap(workflow.Spec.DWDirectives[index]) - // Delete the NnfDataMovement and NnfAccess for this directive before removing the NnfStorage. - // copy_in/out directives can reference NnfStorage from a different directive, so all the NnfAccesses - // need to be removed first. - childObjects := []dwsv1alpha2.ObjectList{ - &nnfv1alpha1.NnfDataMovementList{}, - &nnfv1alpha1.NnfAccessList{}, - } + switch dwArgs["command"] { + case "container": + res, err := r.deleteContainers(ctx, workflow, index) + if res != nil || err != nil { + return res, err + } + default: + // Delete the NnfDataMovement and NnfAccess for this directive before removing the NnfStorage. + // copy_in/out directives can reference NnfStorage from a different directive, so all the NnfAccesses + // need to be removed first. + childObjects := []dwsv1alpha2.ObjectList{ + &nnfv1alpha1.NnfDataMovementList{}, + &nnfv1alpha1.NnfAccessList{}, + } - deleteStatus, err := dwsv1alpha2.DeleteChildrenWithLabels(ctx, r.Client, childObjects, workflow, client.MatchingLabels{nnfv1alpha1.DirectiveIndexLabel: strconv.Itoa(index)}) - if err != nil { - err = fmt.Errorf("Could not delete NnfDataMovement and NnfAccess children: %w", err) - return nil, nnfv1alpha1.NewWorkflowError("Could not stop data movement and unmount file systems").WithError(err) - } + deleteStatus, err := dwsv1alpha2.DeleteChildrenWithLabels(ctx, r.Client, childObjects, workflow, client.MatchingLabels{nnfv1alpha1.DirectiveIndexLabel: strconv.Itoa(index)}) + if err != nil { + return nil, dwsv1alpha2.NewResourceError("could not delete NnfDataMovement and NnfAccess children").WithError(err).WithUserMessage("could not stop data movement and unmount file systems") + } - if !deleteStatus.Complete() { - return Requeue("delete").withDeleteStatus(deleteStatus), nil + if !deleteStatus.Complete() { + return Requeue("delete").withDeleteStatus(deleteStatus), nil + } } return nil, nil @@ -1014,7 +1057,7 @@ func (r *NnfWorkflowReconciler) finishTeardownState(ctx context.Context, workflo persistentStorage, err := r.findPersistentInstance(ctx, workflow, dwArgs["name"]) if err != nil { - return nil, nnfv1alpha1.NewWorkflowError(fmt.Sprintf("Could not find persistent storage %v", dwArgs["name"])).WithError(err) + return nil, dwsv1alpha2.NewResourceError("").WithError(err).WithUserMessage("could not find persistent storage %v", dwArgs["name"]) } persistentStorage.SetOwnerReferences([]metav1.OwnerReference{}) @@ -1025,30 +1068,27 @@ func (r *NnfWorkflowReconciler) finishTeardownState(ctx context.Context, workflo err = r.Update(ctx, persistentStorage) if err != nil { - err = fmt.Errorf("Could not update PersistentStorage %v: %w", client.ObjectKeyFromObject(persistentStorage), err) - return nil, nnfv1alpha1.NewWorkflowError("Could not finalize peristent storage").WithError(err) + return nil, dwsv1alpha2.NewResourceError("could not update PersistentStorage: %v", client.ObjectKeyFromObject(persistentStorage)).WithError(err).WithUserMessage("could not finalize peristent storage") } log.Info("Removed owner reference from persistent storage", "psi", persistentStorage) case "destroy_persistent": persistentStorage, err := r.findPersistentInstance(ctx, workflow, dwArgs["name"]) if err != nil { if !apierrors.IsNotFound(err) { - return nil, nnfv1alpha1.NewWorkflowError(fmt.Sprintf("Could not find peristent storage %v", dwArgs["name"])).WithError(err) + return nil, dwsv1alpha2.NewResourceError("").WithError(err).WithFatal().WithUser().WithUserMessage("could not find peristent storage %v", dwArgs["name"]) } return nil, nil } if persistentStorage.Spec.UserID != workflow.Spec.UserID { - err = fmt.Errorf("Existing persistent storage user ID %v does not match user ID %v", persistentStorage.Spec.UserID, workflow.Spec.UserID) - log.Info(err.Error()) - return nil, nnfv1alpha1.NewWorkflowError("user ID does not match existing persistent storage").WithError(err).WithFatal() + return nil, dwsv1alpha2.NewResourceError("Existing persistent storage user ID %v does not match user ID %v", persistentStorage.Spec.UserID, workflow.Spec.UserID).WithError(err).WithUserMessage("user ID does not match existing persistent storage").WithFatal().WithUser() } if len(persistentStorage.Spec.ConsumerReferences) != 0 { err = fmt.Errorf("PersistentStorage cannot be deleted with %v consumers", len(persistentStorage.Spec.ConsumerReferences)) log.Info(err.Error()) - return nil, nnfv1alpha1.NewWorkflowError("PersistentStorage cannot be deleted while in use").WithError(err).WithFatal() + return nil, dwsv1alpha2.NewResourceError("persistent storage cannot be deleted with %v consumers", len(persistentStorage.Spec.ConsumerReferences)).WithError(err).WithUserMessage("persistent storage cannot be deleted while in use").WithFatal().WithUser() } persistentStorage.Spec.State = dwsv1alpha2.PSIStateDestroying @@ -1057,21 +1097,24 @@ func (r *NnfWorkflowReconciler) finishTeardownState(ctx context.Context, workflo addDirectiveIndexLabel(persistentStorage, index) if err := controllerutil.SetControllerReference(workflow, persistentStorage, r.Scheme); err != nil { - log.Info("Unable to assign workflow as owner of persistentInstance", "psi", persistentStorage) - err = fmt.Errorf("Could not assign workflow as owner of PersistentInstance %v: %w", client.ObjectKeyFromObject(persistentStorage), err) - return nil, nnfv1alpha1.NewWorkflowError(fmt.Sprintf("Could not delete peristent storage %v", dwArgs["name"])).WithError(err) + return nil, dwsv1alpha2.NewResourceError("could not assign workflow as owner of PersistentInstance: %v", client.ObjectKeyFromObject(persistentStorage)).WithError(err).WithUserMessage("could not delete persistent storage %v", dwArgs["name"]) } err = r.Update(ctx, persistentStorage) if err != nil { - err = fmt.Errorf("Could not update PersistentInstance %v: %w", client.ObjectKeyFromObject(persistentStorage), err) - return nil, nnfv1alpha1.NewWorkflowError(fmt.Sprintf("Could not delete peristent storage %v", dwArgs["name"])).WithError(err) + return nil, dwsv1alpha2.NewResourceError("could not update PersistentInstance: %v", client.ObjectKeyFromObject(persistentStorage)).WithError(err).WithUserMessage("could not delete persistent storage %v", dwArgs["name"]) } log.Info("Add owner reference for persistent storage for deletion", "psi", persistentStorage) case "persistentdw": err := r.removePersistentStorageReference(ctx, workflow, index) if err != nil { - return nil, nnfv1alpha1.NewWorkflowError("Could not remove persistent storage reference").WithError(err) + return nil, dwsv1alpha2.NewResourceError("").WithError(err).WithUserMessage("Could not remove persistent storage reference") + } + case "container": + // Release container ports + res, err := r.releaseContainerPorts(ctx, workflow) + if res != nil || err != nil { + return res, err } default: } @@ -1083,7 +1126,7 @@ func (r *NnfWorkflowReconciler) finishTeardownState(ctx context.Context, workflo deleteStatus, err := dwsv1alpha2.DeleteChildrenWithLabels(ctx, r.Client, childObjects, workflow, client.MatchingLabels{nnfv1alpha1.DirectiveIndexLabel: strconv.Itoa(index)}) if err != nil { - return nil, nnfv1alpha1.NewWorkflowError("Could not delete storage allocations").WithError(err) + return nil, dwsv1alpha2.NewResourceError("could not delete NnfStorage and PersistentStorageInstance children").WithError(err).WithUserMessage("could not delete storage allocations") } if !deleteStatus.Complete() { diff --git a/controllers/nnf_workflow_controller_container_helpers.go b/controllers/nnf_workflow_controller_container_helpers.go new file mode 100644 index 000000000..db0d499ee --- /dev/null +++ b/controllers/nnf_workflow_controller_container_helpers.go @@ -0,0 +1,580 @@ +/* + * Copyright 2023 Hewlett Packard Enterprise Development LP + * Other additional copyright holders may be indicated within. + * + * The entirety of this work is licensed under the Apache License, + * Version 2.0 (the "License"); you may not use this file except + * in compliance with the License. + * + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package controllers + +import ( + "context" + "fmt" + "strconv" + "strings" + + dwsv1alpha2 "github.com/HewlettPackard/dws/api/v1alpha2" + nnfv1alpha1 "github.com/NearNodeFlash/nnf-sos/api/v1alpha1" + "github.com/go-logr/logr" + mpicommonv1 "github.com/kubeflow/common/pkg/apis/common/v1" + mpiv2beta1 "github.com/kubeflow/mpi-operator/pkg/apis/kubeflow/v2beta1" + "go.openly.dev/pointy" + batchv1 "k8s.io/api/batch/v1" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +type nnfUserContainer struct { + workflow *dwsv1alpha2.Workflow + profile *nnfv1alpha1.NnfContainerProfile + nnfNodes []string + volumes []nnfContainerVolume + username string + uid, gid int64 + client client.Client + log logr.Logger + scheme *runtime.Scheme + ctx context.Context + index int +} + +// This struct contains all the necessary information for mounting container storages +type nnfContainerVolume struct { + name string + command string + directiveName string + directiveIndex int + mountPath string + envVarName string + pvcName string +} + +// MPI container workflow. In this model, we use mpi-operator to create an MPIJob, which creates +// a job for the launcher (to run mpirun) and a replicaset for the worker pods. The worker nodes +// run an ssh server tn listen for mpirun operations from the launcher pod. +func (c *nnfUserContainer) createMPIJob() error { + mpiJob := &mpiv2beta1.MPIJob{ + ObjectMeta: metav1.ObjectMeta{ + Name: c.workflow.Name, + Namespace: c.workflow.Namespace, + }, + } + + c.profile.Data.MPISpec.DeepCopyInto(&mpiJob.Spec) + c.username = nnfv1alpha1.ContainerMPIUser + + if err := c.applyLabels(&mpiJob.ObjectMeta); err != nil { + return err + } + + // Use the profile's backoff limit if not set + if mpiJob.Spec.RunPolicy.BackoffLimit == nil { + mpiJob.Spec.RunPolicy.BackoffLimit = &c.profile.Data.RetryLimit + } + + // MPIJobs have two pod specs: one for the launcher and one for the workers. The webhook ensures + // that the launcher/worker specs exist + launcher := mpiJob.Spec.MPIReplicaSpecs[mpiv2beta1.MPIReplicaTypeLauncher] + launcherSpec := &launcher.Template.Spec + worker := mpiJob.Spec.MPIReplicaSpecs[mpiv2beta1.MPIReplicaTypeWorker] + workerSpec := &worker.Template.Spec + + // Keep failed pods around for log inspection + launcher.RestartPolicy = mpicommonv1.RestartPolicyNever + worker.RestartPolicy = mpicommonv1.RestartPolicyNever + + // Add NNF node tolerations + c.applyTolerations(launcherSpec) + c.applyTolerations(workerSpec) + + // Run the launcher on the first NNF node + launcherSpec.NodeSelector = map[string]string{"kubernetes.io/hostname": c.nnfNodes[0]} + + // Target all the NNF nodes for the workers + replicas := int32(len(c.nnfNodes)) + worker.Replicas = &replicas + workerSpec.Affinity = &corev1.Affinity{ + // Ensure we run a worker on every NNF node + NodeAffinity: &corev1.NodeAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: &corev1.NodeSelector{ + NodeSelectorTerms: []corev1.NodeSelectorTerm{{ + MatchExpressions: []corev1.NodeSelectorRequirement{{ + Key: "kubernetes.io/hostname", + Operator: corev1.NodeSelectorOpIn, + Values: c.nnfNodes, + }}, + }}, + }, + }, + // But make sure it's only 1 per node + PodAntiAffinity: &corev1.PodAntiAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: []corev1.PodAffinityTerm{{ + TopologyKey: "kubernetes.io/hostname", + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "training.kubeflow.org/job-name", + Operator: metav1.LabelSelectorOpIn, + Values: []string{c.workflow.Name}, + }, + { + Key: "training.kubeflow.org/job-role", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"worker"}, + }, + }, + }}, + }, + }, + } + + // Set the appropriate permissions (UID/GID) from the workflow + c.applyPermissions(launcherSpec, &mpiJob.Spec, false) + c.applyPermissions(workerSpec, &mpiJob.Spec, true) + + // Use an Init Container to test the waters for mpi - ensure it can contact the workers before + // the launcher tries it. Since this runs as the UID/GID, this needs to happen after the + // passwd Init Container. + c.addInitContainerWorkerWait(launcherSpec, len(c.nnfNodes)) + + // Get the ports from the port manager + ports, err := c.getHostPorts() + if err != nil { + return err + } + // Add the ports to the worker spec and add environment variable for both launcher/worker + addHostPorts(workerSpec, ports) + addPortsEnvVars(launcherSpec, ports) + addPortsEnvVars(workerSpec, ports) + + c.addNnfVolumes(launcherSpec) + c.addNnfVolumes(workerSpec) + c.addEnvVars(launcherSpec, true) + c.addEnvVars(workerSpec, true) + + err = c.client.Create(c.ctx, mpiJob) + if err != nil { + if !apierrors.IsAlreadyExists(err) { + return err + } + } else { + c.log.Info("Created MPIJob", "name", mpiJob.Name, "namespace", mpiJob.Namespace) + } + + return nil +} + +// Non-MPI container workflow. In this model, a job is created for each NNF node which ensures +// that a pod is executed successfully (or the backOffLimit) is hit. Each container in this model +// runs the same image. +func (c *nnfUserContainer) createNonMPIJob() error { + // Use one job that we'll use as a base to create all jobs. Each NNF node will get its own job. + job := &batchv1.Job{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: c.workflow.Namespace, + }, + } + c.profile.Data.Spec.DeepCopyInto(&job.Spec.Template.Spec) + podSpec := &job.Spec.Template.Spec + + if err := c.applyLabels(&job.ObjectMeta); err != nil { + return err + } + + // Use the same labels as the job for the pods + job.Spec.Template.Labels = job.DeepCopy().Labels + + job.Spec.BackoffLimit = &c.profile.Data.RetryLimit + + podSpec.RestartPolicy = corev1.RestartPolicyNever + podSpec.Subdomain = c.workflow.Name // service name == workflow name + + // Get the ports from the port manager + ports, err := c.getHostPorts() + if err != nil { + return err + } + addHostPorts(podSpec, ports) + addPortsEnvVars(podSpec, ports) + + c.applyTolerations(podSpec) + c.applyPermissions(podSpec, nil, false) + c.addNnfVolumes(podSpec) + c.addEnvVars(podSpec, false) + + // Using the base job, create a job for each nnfNode. Only the name, hostname, and node selector is different for each node + for _, nnfNode := range c.nnfNodes { + job.ObjectMeta.Name = c.workflow.Name + "-" + nnfNode + podSpec.Hostname = nnfNode + + // In our case, the target is only 1 node for the job, so a restartPolicy of Never + // is ok because any retry (i.e. new pod) will land on the same node. + podSpec.NodeSelector = map[string]string{"kubernetes.io/hostname": nnfNode} + + newJob := &batchv1.Job{} + job.DeepCopyInto(newJob) + + err := c.client.Create(c.ctx, newJob) + if err != nil { + if !apierrors.IsAlreadyExists(err) { + return err + } + } else { + c.log.Info("Created non-MPI job", "name", newJob.Name, "namespace", newJob.Namespace) + } + } + + return nil +} + +func (c *nnfUserContainer) applyLabels(job metav1.Object) error { + // Apply Job Labels/Owners + dwsv1alpha2.InheritParentLabels(job, c.workflow) + dwsv1alpha2.AddOwnerLabels(job, c.workflow) + dwsv1alpha2.AddWorkflowLabels(job, c.workflow) + + labels := job.GetLabels() + labels[nnfv1alpha1.ContainerLabel] = c.workflow.Name + labels[nnfv1alpha1.PinnedContainerProfileLabelName] = c.profile.GetName() + labels[nnfv1alpha1.PinnedContainerProfileLabelNameSpace] = c.profile.GetNamespace() + labels[nnfv1alpha1.DirectiveIndexLabel] = strconv.Itoa(c.index) + job.SetLabels(labels) + + if err := ctrl.SetControllerReference(c.workflow, job, c.scheme); err != nil { + return err + } + + return nil +} + +func (c *nnfUserContainer) applyTolerations(spec *corev1.PodSpec) { + spec.Tolerations = append(spec.Tolerations, corev1.Toleration{ + Effect: corev1.TaintEffectNoSchedule, + Key: "cray.nnf.node", + Operator: corev1.TolerationOpEqual, + Value: "true", + }) +} + +func (c *nnfUserContainer) addInitContainerPasswd(spec *corev1.PodSpec, image string) { + // This script creates an entry in /etc/passwd to map the user to the given UID/GID using an + // InitContainer. This is necessary for mpirun because it uses ssh to communicate with the + // worker nodes. ssh itself requires that the UID is tied to a username in the container. + // Since the launcher container is running as non-root, we need to make use of an InitContainer + // to edit /etc/passwd and copy it to a volume which can then be mounted into the non-root + // container to replace /etc/passwd. + script := `# tie the UID/GID to the user +sed -i '/^$USER/d' /etc/passwd +echo "$USER:x:$UID:$GID::/home/$USER:/bin/sh" >> /etc/passwd +cp /etc/passwd /config/ +exit 0 +` + // Replace the user and UID/GID + script = strings.ReplaceAll(script, "$USER", c.username) + script = strings.ReplaceAll(script, "$UID", fmt.Sprintf("%d", c.uid)) + script = strings.ReplaceAll(script, "$GID", fmt.Sprintf("%d", c.gid)) + + spec.InitContainers = append(spec.InitContainers, corev1.Container{ + Name: "mpi-init-passwd", + Image: image, + Command: []string{ + "/bin/sh", + "-c", + script, + }, + VolumeMounts: []corev1.VolumeMount{ + {Name: "passwd", MountPath: "/config"}, + }, + }) +} + +func (c *nnfUserContainer) addInitContainerWorkerWait(spec *corev1.PodSpec, numWorkers int) { + // Add an initContainer to ensure that the worker pods are up and discoverable via mpirun. + script := `# use mpirun to contact workers +echo "contacting $HOSTS..." +for i in $(seq 1 100); do + sleep 1 + echo "attempt $i of 100..." + echo "mpirun -H $HOSTS hostname" + mpirun -H $HOSTS hostname + if [ $? -eq 0 ]; then + echo "successfully contacted $HOSTS; done" + exit 0 + fi +done +echo "failed to contact $HOSTS" +exit 1 +` + // Build a slice of the workers' hostname.domain (e.g. nnf-container-example-worker-0.nnf-container-example-worker.default.svc) + // This hostname comes from mpi-operator. + workers := []string{} + for i := 0; i < numWorkers; i++ { + host := strings.ToLower(fmt.Sprintf( + "%s-worker-%d.%s-worker.%s.svc", c.workflow.Name, i, c.workflow.Name, c.workflow.Namespace)) + workers = append(workers, host) + } + // mpirun takes a comma separated list of hosts (-H) + script = strings.ReplaceAll(script, "$HOSTS", strings.Join(workers, ",")) + + spec.InitContainers = append(spec.InitContainers, corev1.Container{ + Name: fmt.Sprintf("mpi-wait-for-worker-%d", numWorkers), + Image: spec.Containers[0].Image, + Command: []string{ + "/bin/sh", + "-c", + script, + }, + // mpirun needs this environment variable to use DNS hostnames + Env: []corev1.EnvVar{{Name: "OMPI_MCA_orte_keep_fqdn_hostnames", Value: "true"}}, + // Run this initContainer as the same UID/GID as the launcher + SecurityContext: &corev1.SecurityContext{ + RunAsUser: &c.uid, + RunAsGroup: &c.gid, + RunAsNonRoot: pointy.Bool(true), + }, + // And use the necessary volumes to support the UID/GID + VolumeMounts: []corev1.VolumeMount{ + {MountPath: "/etc/passwd", Name: "passwd", SubPath: "passwd"}, + {MountPath: "/home/mpiuser/.ssh", Name: "ssh-auth"}, + }, + }) +} + +func (c *nnfUserContainer) applyPermissions(spec *corev1.PodSpec, mpiJobSpec *mpiv2beta1.MPIJobSpec, worker bool) { + + // Add volume for /etc/passwd to map user to UID/GID + spec.Volumes = append(spec.Volumes, corev1.Volume{ + Name: "passwd", + VolumeSource: corev1.VolumeSource{ + EmptyDir: &corev1.EmptyDirVolumeSource{}, + }, + }) + + if !worker { + // Add SecurityContext if necessary + if spec.SecurityContext == nil { + spec.SecurityContext = &corev1.PodSecurityContext{} + } + + // Add spec level security context to apply FSGroup to all containers. This keeps the + // volumes safe from root actions. + spec.SecurityContext.FSGroup = &c.gid + + // Set the ssh key path for non-root users. Defaults to root. + if mpiJobSpec != nil { + mpiJobSpec.SSHAuthMountPath = fmt.Sprintf("/home/%s/.ssh", c.username) + } + } + + // Add user permissions to each container. This needs to be done for each container because + // we do not want these permissions on the init container. + for idx := range spec.Containers { + container := &spec.Containers[idx] + + // Add an InitContainer to map the user to the provided uid/gid using /etc/passwd + c.addInitContainerPasswd(spec, container.Image) + + // Add a mount to copy the modified /etc/passwd to + container.VolumeMounts = append(container.VolumeMounts, corev1.VolumeMount{ + Name: "passwd", + MountPath: "/etc/passwd", + SubPath: "passwd", + }) + + // Create SecurityContext if necessary + if container.SecurityContext == nil { + container.SecurityContext = &corev1.SecurityContext{} + } + + // Add non-root permissions from the workflow's user/group ID for the launcher, but not + // the worker. The worker needs to run an ssh daemon, which requires root. Commands on + // the worker are executed via the launcher as the `mpiuser` and not root. + if !worker { + container.SecurityContext.RunAsUser = &c.uid + container.SecurityContext.RunAsGroup = &c.gid + container.SecurityContext.RunAsNonRoot = pointy.Bool(true) + container.SecurityContext.AllowPrivilegeEscalation = pointy.Bool(false) + } else { + // For the worker nodes, we need to ensure we have the appropriate linux capabilities to + // allow for ssh access for mpirun. Drop all capabilities and only add what is + // necessary. Only do this if the Capabilities have not been set by the user. + container.SecurityContext.AllowPrivilegeEscalation = pointy.Bool(true) + if container.SecurityContext.Capabilities == nil { + container.SecurityContext.Capabilities = &corev1.Capabilities{ + Drop: []corev1.Capability{"ALL"}, + Add: []corev1.Capability{"NET_BIND_SERVICE", "SYS_CHROOT", "AUDIT_WRITE", "SETUID", "SETGID"}, + } + } + } + } +} + +func (c *nnfUserContainer) getHostPorts() ([]uint16, error) { + ports := []uint16{} + expectedPorts := int(c.profile.Data.NumPorts) + + if expectedPorts < 1 { + return ports, nil + } + + pm, err := getContainerPortManager(c.ctx, c.client) + if err != nil { + return nil, err + } + + // Get the ports from the port manager for this workflow + for _, alloc := range pm.Status.Allocations { + if alloc.Requester != nil && alloc.Requester.UID == c.workflow.UID && alloc.Status == nnfv1alpha1.NnfPortManagerAllocationStatusInUse { + ports = append(ports, alloc.Ports...) + } + } + + // Make sure we found the number of ports in the port manager that we expect + if len(ports) != expectedPorts { + return nil, dwsv1alpha2.NewResourceError( + "number of ports found in NnfPortManager's allocation (%d) does not equal the profile's requested ports (%d)", + len(ports), expectedPorts). + WithUserMessage("requested ports do not meet the number of allocated ports").WithFatal() + } + + return ports, nil +} + +// Given a list of ports, add HostPort entries for all containers in a PodSpec +func addHostPorts(spec *corev1.PodSpec, ports []uint16) { + + // Nothing to add + if len(ports) < 1 { + return + } + + // Add the ports to the containers + for idx := range spec.Containers { + container := &spec.Containers[idx] + + for _, port := range ports { + container.Ports = append(container.Ports, corev1.ContainerPort{ + ContainerPort: int32(port), + HostPort: int32(port), + }) + } + } +} + +// Given a list of ports, convert it into an environment variable name and comma separated value +func getContainerPortsEnvVar(ports []uint16) (string, string) { + portStr := []string{} + for _, port := range ports { + portStr = append(portStr, strconv.Itoa(int(port))) + } + + return "NNF_CONTAINER_PORTS", strings.Join(portStr, ",") +} + +// Add a environment variable for the container ports to all containers in a PodSpec +func addPortsEnvVars(spec *corev1.PodSpec, ports []uint16) { + if len(ports) < 1 { + return + } + + // Add port environment variable to containers + for idx := range spec.Containers { + container := &spec.Containers[idx] + + name, val := getContainerPortsEnvVar(ports) + container.Env = append(container.Env, corev1.EnvVar{ + Name: name, + Value: val, + }) + } +} + +func (c *nnfUserContainer) addNnfVolumes(spec *corev1.PodSpec) { + for _, vol := range c.volumes { + + var volSource corev1.VolumeSource + + // If global lustre, use a PVC, otherwise use a HostPath on the rabbit to the mounts that + // already exist. + if vol.command == "globaldw" { + volSource = corev1.VolumeSource{ + PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{ + ClaimName: vol.pvcName, + }, + } + } else { + hostPathType := corev1.HostPathDirectory + volSource = corev1.VolumeSource{ + HostPath: &corev1.HostPathVolumeSource{ + Path: vol.mountPath, + Type: &hostPathType, + }, + } + } + spec.Volumes = append(spec.Volumes, corev1.Volume{Name: vol.name, VolumeSource: volSource}) + + // Add VolumeMounts and Volume environment variables for all containers + for idx := range spec.Containers { + container := &spec.Containers[idx] + + container.VolumeMounts = append(container.VolumeMounts, corev1.VolumeMount{ + Name: vol.name, + MountPath: vol.mountPath, + }) + + container.Env = append(container.Env, corev1.EnvVar{ + Name: vol.envVarName, + Value: vol.mountPath, + }) + } + } +} + +func (c *nnfUserContainer) addEnvVars(spec *corev1.PodSpec, mpi bool) { + // Add in non-volume environment variables for all containers + for idx := range spec.Containers { + container := &spec.Containers[idx] + + // Jobs/hostnames and services/subdomains are named differently based on mpi or not. For + // MPI, there are launcher/worker pods and the service is named after the worker. For + // non-MPI, the jobs are named after the rabbit node. + subdomain := "" + domain := c.workflow.Namespace + ".svc.cluster.local" + hosts := []string{} + + if mpi { + launcher := c.workflow.Name + "-launcher" + worker := c.workflow.Name + "-worker" + subdomain = worker + + hosts = append(hosts, launcher) + for i, _ := range c.nnfNodes { + hosts = append(hosts, fmt.Sprintf("%s-%d", worker, i)) + } + } else { + subdomain = spec.Subdomain + hosts = append(hosts, c.nnfNodes...) + } + + container.Env = append(container.Env, + corev1.EnvVar{Name: "NNF_CONTAINER_SUBDOMAIN", Value: subdomain}, + corev1.EnvVar{Name: "NNF_CONTAINER_DOMAIN", Value: domain}, + corev1.EnvVar{Name: "NNF_CONTAINER_HOSTNAMES", Value: strings.Join(hosts, " ")}) + } +} diff --git a/controllers/nnf_workflow_controller_helpers.go b/controllers/nnf_workflow_controller_helpers.go index 993494188..87767188d 100644 --- a/controllers/nnf_workflow_controller_helpers.go +++ b/controllers/nnf_workflow_controller_helpers.go @@ -22,6 +22,7 @@ package controllers import ( "context" "fmt" + "math/rand" "os" "reflect" "strconv" @@ -34,7 +35,6 @@ import ( nnfv1alpha1 "github.com/NearNodeFlash/nnf-sos/api/v1alpha1" "github.com/go-logr/logr" - mpicommonv1 "github.com/kubeflow/common/pkg/apis/common/v1" mpiv2beta1 "github.com/kubeflow/mpi-operator/pkg/apis/kubeflow/v2beta1" batchv1 "k8s.io/api/batch/v1" corev1 "k8s.io/api/core/v1" @@ -56,16 +56,6 @@ type result struct { deleteStatus *dwsv1alpha2.DeleteStatus } -// This struct contains all the necessary information for mounting container storages -type nnfContainerVolume struct { - name string - command string - directiveName string - directiveIndex int - mountPath string - envVarName string -} - // When workflow stages cannot advance they return a Requeue result with a particular reason. func Requeue(reason string) *result { return &result{Result: ctrl.Result{}, reason: reason} @@ -115,6 +105,8 @@ func (r *result) info() []interface{} { // Validate the workflow and return any error found func (r *NnfWorkflowReconciler) validateWorkflow(ctx context.Context, wf *dwsv1alpha2.Workflow) error { + log := r.Log.WithValues("Workflow", types.NamespacedName{Name: wf.Name, Namespace: wf.Namespace}) + var createPersistentCount, deletePersistentCount, directiveCount, containerCount int for index, directive := range wf.Spec.DWDirectives { @@ -128,7 +120,7 @@ func (r *NnfWorkflowReconciler) validateWorkflow(ctx context.Context, wf *dwsv1a case "copy_in", "copy_out": if err := r.validateStagingDirective(ctx, wf, directive); err != nil { - return nnfv1alpha1.NewWorkflowError("Invalid staging Directive: " + directive).WithFatal().WithError(err) + return dwsv1alpha2.NewResourceError("").WithError(err).WithUserMessage("invalid staging Directive: '%v'", directive) } case "create_persistent": @@ -139,27 +131,28 @@ func (r *NnfWorkflowReconciler) validateWorkflow(ctx context.Context, wf *dwsv1a case "persistentdw": if err := r.validatePersistentInstanceDirective(ctx, wf, directive); err != nil { - return nnfv1alpha1.NewWorkflowError("Could not validate persistent instance: " + directive).WithFatal().WithError(err) + return dwsv1alpha2.NewResourceError("").WithError(err).WithUserMessage("could not validate persistent instance: '%s'", directive) } case "container": containerCount++ if err := r.validateContainerDirective(ctx, wf, index); err != nil { - return nnfv1alpha1.NewWorkflowError("Could not validate container directive: " + directive).WithFatal().WithError(err) + return dwsv1alpha2.NewResourceError("").WithError(err).WithUserMessage("could not validate container directive: '%s'", directive) } } } + log.Info("counts", "directive", directiveCount, "create", createPersistentCount, "delete", deletePersistentCount) if directiveCount > 1 { // Ensure create_persistent or destroy_persistent are singletons in the workflow if createPersistentCount+deletePersistentCount > 0 { - return nnfv1alpha1.NewWorkflowError("Only a single create_persistent or destroy_persistent directive is allowed per workflow").WithFatal() + return dwsv1alpha2.NewResourceError("").WithUserMessage("only a single create_persistent or destroy_persistent directive is allowed per workflow").WithFatal().WithUser() } // Only allow 1 container directive (for now) if containerCount > 1 { - return nnfv1alpha1.NewWorkflowError("Only a single container directive is supported per workflow").WithFatal() + return dwsv1alpha2.NewResourceError("").WithUserMessage("only a single container directive is supported per workflow").WithFatal().WithUser() } } @@ -181,32 +174,32 @@ func (r *NnfWorkflowReconciler) validateStagingDirective(ctx context.Context, wf if strings.HasPrefix(arg, "$DW_JOB_") { index := findDirectiveIndexByName(wf, name, "jobdw") if index == -1 { - return nnfv1alpha1.NewWorkflowError(fmt.Sprintf("Job storage instance '%s' not found", name)).WithFatal() + return dwsv1alpha2.NewResourceError("").WithUserMessage("job storage instance '%s' not found", name).WithFatal().WithUser() } args, err := dwdparse.BuildArgsMap(wf.Spec.DWDirectives[index]) if err != nil { - return nnfv1alpha1.NewWorkflowError("Invalid DW directive: " + wf.Spec.DWDirectives[index]).WithFatal() + return dwsv1alpha2.NewResourceError("").WithUserMessage("invalid DW directive: '%s'", wf.Spec.DWDirectives[index]).WithFatal() } fsType, exists := args["type"] if !exists { - return nnfv1alpha1.NewWorkflowError("Invalid DW directive match for staging argument") + return dwsv1alpha2.NewResourceError("").WithUserMessage("invalid DW directive match for staging argument").WithFatal() } if fsType == "raw" { - return nnfv1alpha1.NewWorkflowError("Data movement can not be used with raw allocations").WithFatal() + return dwsv1alpha2.NewResourceError("").WithUserMessage("data movement can not be used with raw allocations").WithFatal().WithUser() } } else if strings.HasPrefix(arg, "$DW_PERSISTENT_") { if err := r.validatePersistentInstanceForStaging(ctx, name, wf.Namespace); err != nil { - return nnfv1alpha1.NewWorkflowError(fmt.Sprintf("Persistent storage instance '%s' not found", name)).WithFatal() + return dwsv1alpha2.NewResourceError("").WithUserMessage("persistent storage instance '%s' not found", name).WithFatal().WithUser() } if findDirectiveIndexByName(wf, name, "persistentdw") == -1 { - return nnfv1alpha1.NewWorkflowError(fmt.Sprintf("persistentdw directive mentioning '%s' not found", name)).WithFatal() + return dwsv1alpha2.NewResourceError("").WithUserMessage("persistentdw directive mentioning '%s' not found", name).WithFatal().WithUser() } } else { if r.findLustreFileSystemForPath(ctx, arg, r.Log) == nil { - return nnfv1alpha1.NewWorkflowError(fmt.Sprintf("global Lustre file system containing '%s' not found", arg)).WithFatal() + return dwsv1alpha2.NewResourceError("").WithUserMessage("global Lustre file system containing '%s' not found", arg).WithFatal().WithUser() } } @@ -215,15 +208,15 @@ func (r *NnfWorkflowReconciler) validateStagingDirective(ctx context.Context, wf args, err := dwdparse.BuildArgsMap(directive) if err != nil { - return nnfv1alpha1.NewWorkflowError("Invalid DW directive: " + directive).WithFatal() + return dwsv1alpha2.NewResourceError("").WithUserMessage("invalid DW directive: '%s'", directive).WithFatal() } if err := validateStagingArgument(args["source"]); err != nil { - return err + return dwsv1alpha2.NewResourceError("Invalid source argument: '%s'", args["source"]).WithError(err) } if err := validateStagingArgument(args["destination"]); err != nil { - return err + return dwsv1alpha2.NewResourceError("Invalid destination argument: '%s'", args["destination"]).WithError(err) } return nil @@ -233,13 +226,13 @@ func (r *NnfWorkflowReconciler) validateStagingDirective(ctx context.Context, wf func (r *NnfWorkflowReconciler) validateContainerDirective(ctx context.Context, workflow *dwsv1alpha2.Workflow, index int) error { args, err := dwdparse.BuildArgsMap(workflow.Spec.DWDirectives[index]) if err != nil { - return nnfv1alpha1.NewWorkflowError("invalid DW directive: " + workflow.Spec.DWDirectives[index]).WithFatal() + return dwsv1alpha2.NewResourceError("").WithUserMessage("invalid DW directive: '%s'", workflow.Spec.DWDirectives[index]).WithFatal() } - // Ensure the supplied profile exists or use the default - profile, err := r.findContainerProfile(ctx, workflow, index) + // Ensure the supplied profile exists + profile, err := findContainerProfile(ctx, r.Client, workflow, index) if err != nil { - return nnfv1alpha1.NewWorkflowError(err.Error()).WithFatal() + return dwsv1alpha2.NewResourceError("").WithError(err).WithUserMessage("no valid container profile found").WithError(err).WithFatal() } // Check to see if the container storage argument is in the list of storages in the container profile @@ -249,7 +242,7 @@ func (r *NnfWorkflowReconciler) validateContainerDirective(ctx context.Context, return nil } } - return fmt.Errorf("storage '%s' not found in container profile '%s'", storageName, profile.Name) + return dwsv1alpha2.NewResourceError("").WithUserMessage("storage '%s' not found in container profile '%s'", storageName, profile.Name).WithFatal().WithUser() } checkContainerFs := func(idx int) error { @@ -260,7 +253,7 @@ func (r *NnfWorkflowReconciler) validateContainerDirective(ctx context.Context, if args["command"] == "persistentdw" { psi, err := r.getPersistentStorageInstance(ctx, args["name"], workflow.Namespace) if err != nil { - return "", fmt.Errorf("could not retrieve persistent instance '%s' for container directive: %s", args["name"], err) + return "", fmt.Errorf("could not retrieve persistent instance %s for container directive: %v", args["name"], err) } return psi.Spec.FsType, nil @@ -275,7 +268,7 @@ func (r *NnfWorkflowReconciler) validateContainerDirective(ctx context.Context, } if strings.ToLower(t) != "lustre" && strings.ToLower(t) != "gfs2" { - return fmt.Errorf("unsupported container filesystem: %s", t) + return dwsv1alpha2.NewResourceError("").WithUserMessage("unsupported container filesystem: %s", t).WithFatal().WithUser() } return nil @@ -292,32 +285,41 @@ func (r *NnfWorkflowReconciler) validateContainerDirective(ctx context.Context, if strings.HasPrefix(arg, "DW_JOB_") { idx := findDirectiveIndexByName(workflow, storageName, "jobdw") if idx == -1 { - return nnfv1alpha1.NewWorkflowError(fmt.Sprintf("jobdw directive mentioning '%s' not found", storageName)).WithFatal() + return dwsv1alpha2.NewResourceError("").WithUserMessage("jobdw directive mentioning '%s' not found", storageName).WithFatal().WithUser() } if err := checkContainerFs(idx); err != nil { - return nnfv1alpha1.NewWorkflowError(err.Error()).WithFatal() + return err } if err := checkStorageIsInProfile(arg); err != nil { - return nnfv1alpha1.NewWorkflowError(err.Error()).WithFatal() + return err } suppliedStorageArguments = append(suppliedStorageArguments, arg) } else if strings.HasPrefix(arg, "DW_PERSISTENT_") { - if err := r.validatePersistentInstanceForStaging(ctx, storageName, workflow.Namespace); err != nil { - return nnfv1alpha1.NewWorkflowError(fmt.Sprintf("persistent storage instance '%s' not found", storageName)).WithFatal() + if err := r.validatePersistentInstance(ctx, storageName, workflow.Namespace); err != nil { + return dwsv1alpha2.NewResourceError("").WithError(err).WithUserMessage("persistent storage instance '%s' not found", storageName).WithFatal() } idx := findDirectiveIndexByName(workflow, storageName, "persistentdw") if idx == -1 { - return nnfv1alpha1.NewWorkflowError(fmt.Sprintf("persistentdw directive mentioning '%s' not found", storageName)).WithFatal() + return dwsv1alpha2.NewResourceError("").WithUserMessage("persistentdw directive mentioning '%s' not found", storageName).WithFatal().WithUser() } if err := checkContainerFs(idx); err != nil { - return nnfv1alpha1.NewWorkflowError(err.Error()).WithFatal() + return err + } + if err := checkStorageIsInProfile(arg); err != nil { + return err + } + suppliedStorageArguments = append(suppliedStorageArguments, arg) + } else if strings.HasPrefix(arg, "DW_GLOBAL_") { + // Look up the global lustre fs by path rather than LustreFilesystem name + if globalLustre := r.findLustreFileSystemForPath(ctx, storageName, r.Log); globalLustre == nil { + return dwsv1alpha2.NewResourceError("").WithUserMessage("global Lustre file system containing '%s' not found", storageName).WithFatal().WithUser() } if err := checkStorageIsInProfile(arg); err != nil { - return nnfv1alpha1.NewWorkflowError(err.Error()).WithFatal() + return dwsv1alpha2.NewResourceError("").WithError(err).WithUserMessage("storage '%s' is not present in the container profile", arg).WithUser().WithFatal() } suppliedStorageArguments = append(suppliedStorageArguments, arg) } else { - return nnfv1alpha1.NewWorkflowError(fmt.Sprintf("unrecognized container argument: %s", arg)).WithFatal() + return dwsv1alpha2.NewResourceError("").WithUserMessage("unrecognized container argument: %s", arg).WithFatal().WithUser() } } } @@ -336,8 +338,8 @@ func (r *NnfWorkflowReconciler) validateContainerDirective(ctx context.Context, for _, storage := range profile.Data.Storages { if !storage.Optional { if !findInStorageArguments(storage.Name) { - return fmt.Errorf("storage '%s' in container profile '%s' is not optional: storage argument not found in the supplied arguments", - storage.Name, profile.Name) + return dwsv1alpha2.NewResourceError("").WithUserMessage("storage '%s' in container profile '%s' is not optional: storage argument not found in the supplied arguments", + storage.Name, profile.Name).WithUser().WithFatal() } } } @@ -346,7 +348,7 @@ func (r *NnfWorkflowReconciler) validateContainerDirective(ctx context.Context, } if err := checkNonOptionalStorages(suppliedStorageArguments); err != nil { - return nnfv1alpha1.NewWorkflowError(err.Error()).WithFatal() + return err } return nil @@ -356,11 +358,29 @@ func (r *NnfWorkflowReconciler) validateContainerDirective(ctx context.Context, func (r *NnfWorkflowReconciler) validatePersistentInstanceForStaging(ctx context.Context, name string, namespace string) error { psi, err := r.getPersistentStorageInstance(ctx, name, namespace) if err != nil { - return err + return dwsv1alpha2.NewResourceError("").WithError(err).WithUserMessage("could not get PersistentStorageInstance '%s'", name).WithFatal().WithUser() } if psi.Spec.FsType == "raw" { - return nnfv1alpha1.NewWorkflowError("Data movement can not be used with raw allocations").WithFatal() + return dwsv1alpha2.NewResourceError("").WithUserMessage("data movement can not be used with raw allocations").WithFatal().WithUser() + } + + if !psi.DeletionTimestamp.IsZero() { + return dwsv1alpha2.NewResourceError("").WithUserMessage("Persistent storage instance '%s' is deleting", name).WithUser().WithFatal() + } + + return nil +} + +// validatePersistentInstance validates the persistentdw directive. +func (r *NnfWorkflowReconciler) validatePersistentInstance(ctx context.Context, name string, namespace string) error { + psi, err := r.getPersistentStorageInstance(ctx, name, namespace) + if err != nil { + return dwsv1alpha2.NewResourceError("").WithError(err).WithUserMessage("could not get PersistentStorageInstance %s", name).WithFatal().WithUser() + } + + if !psi.DeletionTimestamp.IsZero() { + return dwsv1alpha2.NewResourceError("").WithUserMessage("Persistent storage instance '%s' is deleting", name).WithUser().WithFatal() } return nil @@ -371,16 +391,16 @@ func (r *NnfWorkflowReconciler) validatePersistentInstanceDirective(ctx context. // Validate that the persistent instance is available and not in the process of being deleted args, err := dwdparse.BuildArgsMap(directive) if err != nil { - return nnfv1alpha1.NewWorkflowError("Invalid DW directive: " + directive).WithFatal() + return dwsv1alpha2.NewResourceError("invalid DW directive: %s", directive).WithFatal() } psi, err := r.getPersistentStorageInstance(ctx, args["name"], wf.Namespace) if err != nil { - return err + return dwsv1alpha2.NewResourceError("").WithError(err).WithUserMessage("could not get PersistentStorageInstance '%s'", args["name"]).WithFatal().WithUser() } if !psi.DeletionTimestamp.IsZero() { - return nnfv1alpha1.NewWorkflowError("Persistent storage instance " + args["name"] + " is deleting").WithFatal() + return dwsv1alpha2.NewResourceError("").WithUserMessage("Persistent storage instance '%s' is deleting", args["name"]).WithUser().WithFatal() } return nil @@ -458,8 +478,7 @@ func (r *NnfWorkflowReconciler) generateDirectiveBreakdown(ctx context.Context, }) if err != nil { - log.Error(err, "failed to create or update DirectiveBreakdown", "name", directiveBreakdown.Name) - return nil, fmt.Errorf("CreateOrUpdate failed for DirectiveBreakdown %v: %w", client.ObjectKeyFromObject(directiveBreakdown), err) + return nil, dwsv1alpha2.NewResourceError("CreateOrUpdate failed for DirectiveBreakdown: %v", client.ObjectKeyFromObject(directiveBreakdown)).WithError(err) } if result == controllerutil.OperationResultCreated { @@ -480,8 +499,7 @@ func (r *NnfWorkflowReconciler) generateDirectiveBreakdown(ctx context.Context, func (r *NnfWorkflowReconciler) validateServerAllocations(ctx context.Context, dbd *dwsv1alpha2.DirectiveBreakdown, servers *dwsv1alpha2.Servers) error { if len(dbd.Status.Storage.AllocationSets) != 0 && len(dbd.Status.Storage.AllocationSets) != len(servers.Spec.AllocationSets) { - err := fmt.Errorf("Servers resource does not meet storage requirements for directive '%s'", dbd.Spec.Directive) - return nnfv1alpha1.NewWorkflowError("Allocation request does not meet directive requirements").WithFatal().WithError(err) + return dwsv1alpha2.NewResourceError("Servers resource does not meet storage requirements for directive '%s'", dbd.Spec.Directive).WithUserMessage("Allocation request does not meet directive requirements").WithWLM().WithFatal() } for _, breakdownAllocationSet := range dbd.Status.Storage.AllocationSets { @@ -495,8 +513,7 @@ func (r *NnfWorkflowReconciler) validateServerAllocations(ctx context.Context, d if breakdownAllocationSet.AllocationStrategy == dwsv1alpha2.AllocateSingleServer { if len(serverAllocationSet.Storage) != 1 || serverAllocationSet.Storage[0].AllocationCount != 1 { - err := fmt.Errorf("Allocation set %s expected single allocation", breakdownAllocationSet.Label) - return nnfv1alpha1.NewWorkflowError("Allocation request does not meet directive requirements").WithFatal().WithError(err) + return dwsv1alpha2.NewResourceError("allocation set %s expected single allocation", breakdownAllocationSet.Label).WithUserMessage("storage directive requirements were not satisfied").WithWLM().WithFatal() } } @@ -511,8 +528,7 @@ func (r *NnfWorkflowReconciler) validateServerAllocations(ctx context.Context, d } if totalCapacity < breakdownAllocationSet.MinimumCapacity { - err := fmt.Errorf("Allocation set %s specified insufficient capacity", breakdownAllocationSet.Label) - return nnfv1alpha1.NewWorkflowError("Allocation request does not meet directive requirements").WithFatal().WithError(err) + return dwsv1alpha2.NewResourceError("allocation set %s specified insufficient capacity", breakdownAllocationSet.Label).WithUserMessage("storage directive requirements were not satisfied").WithWLM().WithFatal() } // Look up each of the storages specified to make sure they exist @@ -525,18 +541,13 @@ func (r *NnfWorkflowReconciler) validateServerAllocations(ctx context.Context, d } if err := r.Get(ctx, client.ObjectKeyFromObject(storage), storage); err != nil { - if apierrors.IsNotFound(err) { - return nnfv1alpha1.NewWorkflowError("Allocation request did not specify valid storage").WithFatal().WithError(err) - } - - return nnfv1alpha1.NewWorkflowError("Could not validate allocation request").WithError(err) + return dwsv1alpha2.NewResourceError("could not get storage: %s", client.ObjectKeyFromObject(storage)).WithError(err).WithUserMessage("storage directive requirements were not satisfied").WithFatal() } } } if !found { - err := fmt.Errorf("Allocation set %s not found in Servers resource", breakdownAllocationSet.Label) - return nnfv1alpha1.NewWorkflowError("Allocation request does not meet directive requirements").WithFatal().WithError(err) + return dwsv1alpha2.NewResourceError("allocation set %s not found in Servers resource", breakdownAllocationSet.Label).WithUserMessage("storage directive requirements were not satisfied").WithWLM().WithFatal() } } @@ -554,21 +565,20 @@ func (r *NnfWorkflowReconciler) createNnfStorage(ctx context.Context, workflow * dwArgs, err := dwdparse.BuildArgsMap(workflow.Spec.DWDirectives[index]) if err != nil { - return nil, nnfv1alpha1.NewWorkflowError("Invalid DW directive: " + workflow.Spec.DWDirectives[index]).WithFatal() + return nil, dwsv1alpha2.NewResourceError("").WithUserMessage("invalid DW directive: %s", workflow.Spec.DWDirectives[index]).WithFatal().WithUser() } pinnedName, pinnedNamespace := getStorageReferenceNameFromWorkflowActual(workflow, index) nnfStorageProfile, err := findPinnedProfile(ctx, r.Client, pinnedNamespace, pinnedName) if err != nil { - log.Error(err, "Unable to find pinned NnfStorageProfile", "name", pinnedName) - return nil, fmt.Errorf("Could not find pinned NnfStorageProfile %v: %w", types.NamespacedName{Name: pinnedName, Namespace: pinnedNamespace}, err) + return nil, dwsv1alpha2.NewResourceError("could not find pinned NnfStorageProfile: %v", types.NamespacedName{Name: pinnedName, Namespace: pinnedNamespace}).WithError(err).WithFatal() } var owner metav1.Object = workflow if dwArgs["command"] == "create_persistent" { psi, err := r.findPersistentInstance(ctx, workflow, dwArgs["name"]) if err != nil { - return nil, fmt.Errorf("Could not find PersistentStorageInstance %v for 'create_persistent' directive: %w", dwArgs["name"], err) + return nil, dwsv1alpha2.NewResourceError("could not find PersistentStorageInstance: %v", dwArgs["name"]).WithError(err).WithFatal() } owner = psi @@ -585,6 +595,33 @@ func (r *NnfWorkflowReconciler) createNnfStorage(ctx context.Context, workflow * nnfStorage.Spec.UserID = workflow.Spec.UserID nnfStorage.Spec.GroupID = workflow.Spec.GroupID + // determine the NID of the external MGS if necessary + mgsNid := "" + persistentMgsReference := corev1.ObjectReference{} + + if dwArgs["type"] == "lustre" && len(nnfStorageProfile.Data.LustreStorage.ExternalMGS) > 0 { + // If the prefix on the ExternalMGS field is "pool:", then this is pool name instead of a NID. + if strings.HasPrefix(nnfStorageProfile.Data.LustreStorage.ExternalMGS, "pool:") { + // Copy the existing PersistentStorageInstance data if present to prevent picking a different + // MGS + for _, allocationSet := range nnfStorage.Spec.AllocationSets { + mgsNid = allocationSet.NnfStorageLustreSpec.ExternalMgsNid + persistentMgsReference = allocationSet.NnfStorageLustreSpec.PersistentMgsReference + break + } + + // If no MGS was picked yet, pick one randomly from the pool of PersistentStorageInstances with the right label + if mgsNid == "" { + persistentMgsReference, mgsNid, err = r.getLustreMgsFromPool(ctx, strings.TrimPrefix(nnfStorageProfile.Data.LustreStorage.ExternalMGS, "pool:")) + if err != nil { + return err + } + } + + } else { + mgsNid = nnfStorageProfile.Data.LustreStorage.ExternalMGS + } + } // Need to remove all of the AllocationSets in the NnfStorage object before we begin nnfStorage.Spec.AllocationSets = []nnfv1alpha1.NnfStorageAllocationSetSpec{} @@ -598,8 +635,9 @@ func (r *NnfWorkflowReconciler) createNnfStorage(ctx context.Context, workflow * nnfAllocSet.NnfStorageLustreSpec.TargetType = strings.ToUpper(s.Spec.AllocationSets[i].Label) nnfAllocSet.NnfStorageLustreSpec.BackFs = "zfs" nnfAllocSet.NnfStorageLustreSpec.FileSystemName = "z" + string(s.GetUID())[:7] - if len(nnfStorageProfile.Data.LustreStorage.ExternalMGS) > 0 { - nnfAllocSet.NnfStorageLustreSpec.ExternalMgsNid = nnfStorageProfile.Data.LustreStorage.ExternalMGS + if len(mgsNid) > 0 { + nnfAllocSet.NnfStorageLustreSpec.ExternalMgsNid = mgsNid + nnfAllocSet.NnfStorageLustreSpec.PersistentMgsReference = persistentMgsReference } } @@ -628,8 +666,7 @@ func (r *NnfWorkflowReconciler) createNnfStorage(ctx context.Context, workflow * }) if err != nil { - log.Error(err, "Failed to create or update NnfStorage", "name", nnfStorage.Name) - return nnfStorage, fmt.Errorf("CreateOrUpdate failed for NnfStorage %v: %w", client.ObjectKeyFromObject(nnfStorage), err) + return nil, dwsv1alpha2.NewResourceError("CreateOrUpdate failed for NnfStorage: %v", client.ObjectKeyFromObject(nnfStorage)).WithError(err) } if result == controllerutil.OperationResultCreated { @@ -643,6 +680,46 @@ func (r *NnfWorkflowReconciler) createNnfStorage(ctx context.Context, workflow * return nnfStorage, nil } +func (r *NnfWorkflowReconciler) getLustreMgsFromPool(ctx context.Context, pool string) (corev1.ObjectReference, string, error) { + persistentStorageList := &dwsv1alpha2.PersistentStorageInstanceList{} + if err := r.List(ctx, persistentStorageList, client.MatchingLabels(map[string]string{nnfv1alpha1.StandaloneMGTLabel: pool})); err != nil { + return corev1.ObjectReference{}, "", err + } + + // Choose an MGS at random from the list of persistent storages + persistentStorage := persistentStorageList.Items[rand.Intn(len(persistentStorageList.Items))] + + // Find the NnfStorage for the PersistentStorage so we can get the LNid + nnfStorage := &nnfv1alpha1.NnfStorage{ + ObjectMeta: metav1.ObjectMeta{ + Name: persistentStorage.Name, + Namespace: persistentStorage.Namespace, + }, + } + + if err := r.Get(ctx, client.ObjectKeyFromObject(nnfStorage), nnfStorage); err != nil { + return corev1.ObjectReference{}, "", dwsv1alpha2.NewResourceError("could not get persistent NnfStorage %v for MGS", client.ObjectKeyFromObject(nnfStorage)).WithError(err) + } + + if nnfStorage.Spec.FileSystemType != "lustre" { + return corev1.ObjectReference{}, "", dwsv1alpha2.NewResourceError("invalid file systems type '%s' for persistent MGS", nnfStorage.Spec.FileSystemType).WithFatal() + } + + if len(nnfStorage.Spec.AllocationSets) != 1 { + return corev1.ObjectReference{}, "", dwsv1alpha2.NewResourceError("unexpected number of allocation sets '%d' for persistent MGS", len(nnfStorage.Spec.AllocationSets)).WithFatal() + } + + if len(nnfStorage.Status.MgsNode) == 0 { + return corev1.ObjectReference{}, "", dwsv1alpha2.NewResourceError("no LNid listed for persistent MGS").WithFatal() + } + + return corev1.ObjectReference{ + Kind: reflect.TypeOf(dwsv1alpha2.PersistentStorageInstance{}).Name(), + Name: persistentStorage.Name, + Namespace: persistentStorage.Namespace, + }, nnfStorage.Status.MgsNode, nil +} + func (r *NnfWorkflowReconciler) findLustreFileSystemForPath(ctx context.Context, path string, log logr.Logger) *lusv1beta1.LustreFileSystem { lustres := &lusv1beta1.LustreFileSystemList{} if err := r.List(ctx, lustres); err != nil { @@ -695,7 +772,7 @@ func (r *NnfWorkflowReconciler) setupNnfAccessForServers(ctx context.Context, st }) if err != nil { - return nil, fmt.Errorf("CreateOrUpdate failed for NnfAccess %v: %w", client.ObjectKeyFromObject(access), err) + return nil, dwsv1alpha2.NewResourceError("CreateOrUpdate failed for NnfAccess: %v", client.ObjectKeyFromObject(access)).WithError(err) } if result == controllerutil.OperationResultCreated { @@ -722,12 +799,12 @@ func (r *NnfWorkflowReconciler) getDirectiveFileSystemType(ctx context.Context, } if err := r.Get(ctx, client.ObjectKeyFromObject(nnfStorage), nnfStorage); err != nil { - return "", fmt.Errorf("Could not get persistent NnfStorage %v to determine file system type: %w", client.ObjectKeyFromObject(nnfStorage), err) + return "", dwsv1alpha2.NewResourceError("could not get persistent NnfStorage %v to determine file system type", client.ObjectKeyFromObject(nnfStorage)).WithError(err) } return nnfStorage.Spec.FileSystemType, nil default: - return "", fmt.Errorf("Invalid directive '%s' to get file system type", workflow.Spec.DWDirectives[index]) + return "", dwsv1alpha2.NewResourceError("invalid directive '%s' to get file system type", workflow.Spec.DWDirectives[index]).WithFatal() } } @@ -749,13 +826,22 @@ func (r *NnfWorkflowReconciler) findPersistentInstance(ctx context.Context, wf * return nil, err } - return psi, err + return psi, nil } func handleWorkflowError(err error, driverStatus *dwsv1alpha2.WorkflowDriverStatus) { - e, ok := err.(*nnfv1alpha1.WorkflowError) + e, ok := err.(*dwsv1alpha2.ResourceErrorInfo) if ok { - e.Inject(driverStatus) + status, err := e.Severity.ToStatus() + if err != nil { + driverStatus.Status = dwsv1alpha2.StatusError + driverStatus.Message = "Internal error: " + err.Error() + driverStatus.Error = err.Error() + } else { + driverStatus.Status = status + driverStatus.Message = e.GetUserMessage() + driverStatus.Error = e.Error() + } } else { driverStatus.Status = dwsv1alpha2.StatusError driverStatus.Message = "Internal error: " + err.Error() @@ -763,6 +849,41 @@ func handleWorkflowError(err error, driverStatus *dwsv1alpha2.WorkflowDriverStat } } +func handleWorkflowErrorByIndex(err error, workflow *dwsv1alpha2.Workflow, index int) { + // Create a list of the driverStatus array elements that correspond to the current state + // of the workflow and are targeted for the Rabbit driver + driverList := []*dwsv1alpha2.WorkflowDriverStatus{} + driverID := os.Getenv("DWS_DRIVER_ID") + + for i := range workflow.Status.Drivers { + driverStatus := &workflow.Status.Drivers[i] + + if driverStatus.DriverID != driverID { + continue + } + if workflow.Status.State != driverStatus.WatchState { + continue + } + if driverStatus.Completed { + continue + } + + driverList = append(driverList, driverStatus) + } + + for _, driverStatus := range driverList { + if driverStatus.DWDIndex != index { + continue + } + + handleWorkflowError(err, driverStatus) + + return + } + + panic(index) +} + // Returns the directive index with the 'name' argument matching name, or -1 if not found func findDirectiveIndexByName(workflow *dwsv1alpha2.Workflow, name string, command string) int { for idx, directive := range workflow.Spec.DWDirectives { @@ -908,8 +1029,7 @@ func (r *NnfWorkflowReconciler) unmountNnfAccessIfNecessary(ctx context.Context, if err := r.Update(ctx, access); err != nil { if !apierrors.IsConflict(err) { - err = fmt.Errorf("Could not update NnfAccess %v: %w", client.ObjectKeyFromObject(access), err) - return nil, nnfv1alpha1.NewWorkflowError("Unable to request compute node unmount").WithError(err) + return nil, dwsv1alpha2.NewResourceError("could not update NnfAccess: %v", client.ObjectKeyFromObject(access)).WithError(err) } return Requeue("conflict").withObject(access), nil @@ -932,7 +1052,7 @@ func (r *NnfWorkflowReconciler) waitForNnfAccessStateAndReady(ctx context.Contex // Check if we should also wait on the NnfAccess for the servers fsType, err := r.getDirectiveFileSystemType(ctx, workflow, index) if err != nil { - return nil, nnfv1alpha1.NewWorkflowError("Unable to determine directive file system type").WithError(err) + return nil, dwsv1alpha2.NewResourceError("unable to determine directive file system type").WithError(err).WithFatal() } if fsType == "gfs2" || fsType == "lustre" { @@ -949,13 +1069,13 @@ func (r *NnfWorkflowReconciler) waitForNnfAccessStateAndReady(ctx context.Contex } if err := r.Get(ctx, client.ObjectKeyFromObject(access), access); err != nil { - err = fmt.Errorf("Could not get NnfAccess %s: %w", client.ObjectKeyFromObject(access).String(), err) - return nil, nnfv1alpha1.NewWorkflowError("Could not access file system on nodes").WithError(err) + return nil, dwsv1alpha2.NewResourceError("could not get NnfAccess: %v", client.ObjectKeyFromObject(access)).WithError(err) } if access.Status.Error != nil { - err = fmt.Errorf("Error on NnfAccess %s: %w", client.ObjectKeyFromObject(access).String(), access.Status.Error) - return nil, nnfv1alpha1.NewWorkflowError("Could not access file system on nodes").WithError(err) + handleWorkflowErrorByIndex(access.Status.Error, workflow, index) + + return Requeue("mount/unmount error").withObject(access), nil } if state == "mounted" { @@ -984,17 +1104,18 @@ func (r *NnfWorkflowReconciler) addPersistentStorageReference(ctx context.Contex persistentStorage, err := r.findPersistentInstance(ctx, workflow, dwArgs["name"]) if err != nil { - return err + return dwsv1alpha2.NewResourceError("").WithUserMessage("PersistentStorage '%v' not found", dwArgs["name"]).WithMajor().WithUser() } if persistentStorage.Status.State != dwsv1alpha2.PSIStateActive { - return fmt.Errorf("PersistentStorage is not active") + return dwsv1alpha2.NewResourceError("").WithUserMessage("PersistentStorage is not active").WithFatal().WithUser() } // Add a consumer reference to the persistent storage for this directive reference := corev1.ObjectReference{ Name: indexedResourceName(workflow, index), Namespace: workflow.Namespace, + Kind: reflect.TypeOf(dwsv1alpha2.Workflow{}).Name(), } found := false @@ -1025,6 +1146,7 @@ func (r *NnfWorkflowReconciler) removePersistentStorageReference(ctx context.Con reference := corev1.ObjectReference{ Name: indexedResourceName(workflow, index), Namespace: workflow.Namespace, + Kind: reflect.TypeOf(dwsv1alpha2.Workflow{}).Name(), } for i, existingReference := range persistentStorage.Spec.ConsumerReferences { @@ -1051,419 +1173,55 @@ func (r *NnfWorkflowReconciler) removeAllPersistentStorageReferences(ctx context return nil } -func (r *NnfWorkflowReconciler) containerHandler(ctx context.Context, workflow *dwsv1alpha2.Workflow, dwArgs map[string]string, index int, log logr.Logger) (*result, error) { - profile, err := r.getContainerProfile(ctx, workflow, index) +func (r *NnfWorkflowReconciler) userContainerHandler(ctx context.Context, workflow *dwsv1alpha2.Workflow, dwArgs map[string]string, index int, log logr.Logger) (*result, error) { + profile, err := getContainerProfile(ctx, r.Client, workflow, index) if err != nil { return nil, err } mpiJob := profile.Data.MPISpec != nil - username := nnfv1alpha1.ContainerUser - // Get the targeted NNF nodes for the container jobs nnfNodes, err := r.getNnfNodesFromComputes(ctx, workflow) if err != nil || len(nnfNodes) <= 0 { - return nil, nnfv1alpha1.NewWorkflowError("error obtaining the target NNF nodes for containers:").WithError(err).WithFatal() + return nil, dwsv1alpha2.NewResourceError("error obtaining the target NNF nodes for containers").WithError(err).WithMajor() } // Get the NNF volumes to mount into the containers - volumes, result, err := r.getContainerVolumes(ctx, workflow, dwArgs) + volumes, result, err := r.getContainerVolumes(ctx, workflow, dwArgs, profile) if err != nil { - return nil, nnfv1alpha1.NewWorkflowErrorf("could not determine the list of volumes need to create container job for workflow: %s", workflow.Name).WithError(err).WithFatal() + return nil, dwsv1alpha2.NewResourceError("could not determine the list of volumes needed to create container job for workflow: %s", workflow.Name).WithError(err).WithFatal() } if result != nil { return result, nil } - applyLabels := func(job metav1.Object) error { - - // Apply Job Labels/Owners - dwsv1alpha2.InheritParentLabels(job, workflow) - dwsv1alpha2.AddOwnerLabels(job, workflow) - dwsv1alpha2.AddWorkflowLabels(job, workflow) - - labels := job.GetLabels() - labels[nnfv1alpha1.ContainerLabel] = workflow.Name - labels[nnfv1alpha1.PinnedContainerProfileLabelName] = profile.GetName() - labels[nnfv1alpha1.PinnedContainerProfileLabelNameSpace] = profile.GetNamespace() - labels[nnfv1alpha1.DirectiveIndexLabel] = strconv.Itoa(index) - job.SetLabels(labels) - - if err := ctrl.SetControllerReference(workflow, job, r.Scheme); err != nil { - return nnfv1alpha1.NewWorkflowErrorf("setting Job controller reference failed for '%s':", job.GetName()).WithError(err) - } - - return nil - } - - applyTolerations := func(spec *corev1.PodSpec) { - spec.Tolerations = append(spec.Tolerations, corev1.Toleration{ - Effect: corev1.TaintEffectNoSchedule, - Key: "cray.nnf.node", - Operator: corev1.TolerationOpEqual, - Value: "true", - }) - } - - addInitContainerPasswd := func(spec *corev1.PodSpec, user string, uid, gid int64, image string) { - // This script creates an entry in /etc/passwd to map the user to the given UID/GID using an - // InitContainer. This is necessary for mpirun because it uses ssh to communicate with the - // worker nodes. ssh itself requires that the UID is tied to a username in the container. - // Since the launcher container is running as non-root, we need to make use of an InitContainer - // to edit /etc/passwd and copy it to a volume which can then be mounted into the non-root - // container to replace /etc/passwd. - script := `# tie the UID/GID to the user -sed -i '/^$USER/d' /etc/passwd -echo "$USER:x:$UID:$GID::/home/$USER:/bin/sh" >> /etc/passwd -cp /etc/passwd /config/ -exit 0 -` - // Replace the user and UID/GID - script = strings.ReplaceAll(script, "$USER", user) - script = strings.ReplaceAll(script, "$UID", fmt.Sprintf("%d", uid)) - script = strings.ReplaceAll(script, "$GID", fmt.Sprintf("%d", gid)) - - spec.InitContainers = append(spec.InitContainers, corev1.Container{ - Name: "mpi-init-passwd", - Image: image, - Command: []string{ - "/bin/sh", - "-c", - script, - }, - VolumeMounts: []corev1.VolumeMount{ - {Name: "passwd", MountPath: "/config"}, - }, - }) - } - - addInitContainerWorkerWait := func(spec *corev1.PodSpec, worker int) { - // Add an initContainer to ensure that a worker pod is up and discoverable via dns. This - // assumes nslookup is available in the container. The nnf-mfu image provides this. - script := `# use nslookup to contact workers -echo "contacting $HOST..." -for i in $(seq 1 100); do - sleep 1 - echo "attempt $i of 100..." - nslookup $HOST - if [ $? -eq 0 ]; then - echo "successfully contacted $HOST; done" - exit 0 - fi -done -echo "failed to contact $HOST" -exit 1 -` - // Build the worker's hostname.domain (e.g. nnf-container-example-worker-0.nnf-container-example-worker.default.svc) - // This name comes from mpi-operator. - host := strings.ToLower(fmt.Sprintf( - "%s-worker-%d.%s-worker.%s.svc", workflow.Name, worker, workflow.Name, workflow.Namespace)) - script = strings.ReplaceAll(script, "$HOST", host) - - spec.InitContainers = append(spec.InitContainers, corev1.Container{ - Name: fmt.Sprintf("mpi-wait-for-worker-%d", worker), - Image: spec.Containers[0].Image, - Command: []string{ - "/bin/sh", - "-c", - script, - }, - }) - } - - applyPermissions := func(spec *corev1.PodSpec, mpiJobSpec *mpiv2beta1.MPIJobSpec, user string, worker bool) { - uid := int64(workflow.Spec.UserID) - gid := int64(workflow.Spec.GroupID) - - // Add volume for /etc/passwd to map user to UID/GID - spec.Volumes = append(spec.Volumes, corev1.Volume{ - Name: "passwd", - VolumeSource: corev1.VolumeSource{ - EmptyDir: &corev1.EmptyDirVolumeSource{}, - }, - }) - - if !worker { - // Add SecurityContext if necessary - if spec.SecurityContext == nil { - spec.SecurityContext = &corev1.PodSecurityContext{} - } - - // Add spec level security context to apply FSGroup to all containers. This keeps the - // volumes safe from root actions. - spec.SecurityContext.FSGroup = &gid - - // Set the ssh key path for non-root users. Defaults to root. - if mpiJobSpec != nil { - mpiJobSpec.SSHAuthMountPath = fmt.Sprintf("/home/%s/.ssh", username) - } - } - - // Add user permissions to each container. This needs to be done for each container because - // we do not want these permissions on the init container. - for idx := range spec.Containers { - container := &spec.Containers[idx] - - // Add an InitContainer to map the user to the provided uid/gid using /etc/passwd - addInitContainerPasswd(spec, user, uid, gid, container.Image) - - // Add a mount to copy the modified /etc/passwd to - container.VolumeMounts = append(container.VolumeMounts, corev1.VolumeMount{ - Name: "passwd", - MountPath: "/etc/passwd", - SubPath: "passwd", - }) - - // Add non-root permissions from the workflow's user/group ID for the launcher, but not - // the worker. The worker needs to run an ssh daemon, which requires root. Commands on - // the worker are executed via the launcher as the `mpiuser` and not root. - if !worker { - if container.SecurityContext == nil { - container.SecurityContext = &corev1.SecurityContext{} - } - container.SecurityContext.RunAsUser = &uid - container.SecurityContext.RunAsGroup = &gid - nonRoot := true - container.SecurityContext.RunAsNonRoot = &nonRoot - su := false - container.SecurityContext.AllowPrivilegeEscalation = &su - } - } - } - - addNNFVolumes := func(spec *corev1.PodSpec) { - for _, vol := range volumes { - // Volumes - hostPathType := corev1.HostPathDirectory - spec.Volumes = append(spec.Volumes, corev1.Volume{ - Name: vol.name, - VolumeSource: corev1.VolumeSource{ - HostPath: &corev1.HostPathVolumeSource{ - Path: vol.mountPath, - Type: &hostPathType, - }, - }, - }) - - // Add VolumeMounts and Volume environment variables for all containers - for idx := range spec.Containers { - container := &spec.Containers[idx] - - container.VolumeMounts = append(container.VolumeMounts, corev1.VolumeMount{ - Name: vol.name, - MountPath: vol.mountPath, - }) - - container.Env = append(container.Env, corev1.EnvVar{ - Name: vol.envVarName, - Value: vol.mountPath, - }) - } - } - } - - addEnvVars := func(spec *corev1.PodSpec, mpi bool) { - // Add in non-volume environment variables for all containers - for idx := range spec.Containers { - container := &spec.Containers[idx] - - // Jobs/hostnames and services/subdomains are named differently based on mpi or not. For - // MPI, there are launcher/worker pods and the service is named after the worker. For - // non-MPI, the jobs are named after the rabbit node. - subdomain := "" - domain := workflow.Namespace + ".svc.cluster.local" - hosts := []string{} - - if mpi { - launcher := workflow.Name + "-launcher" - worker := workflow.Name + "-worker" - subdomain = worker - - hosts = append(hosts, launcher) - for i, _ := range nnfNodes { - hosts = append(hosts, fmt.Sprintf("%s-%d", worker, i)) - } - } else { - subdomain = spec.Subdomain - hosts = append(hosts, nnfNodes...) - } - - container.Env = append(container.Env, - corev1.EnvVar{Name: "NNF_CONTAINER_SUBDOMAIN", Value: subdomain}, - corev1.EnvVar{Name: "NNF_CONTAINER_DOMAIN", Value: domain}, - corev1.EnvVar{Name: "NNF_CONTAINER_HOSTNAMES", Value: strings.Join(hosts, " ")}) - } - } - - // MPI container workflow. In this model, we use mpi-operator to create an MPIJob, which creates - // a job for the launcher (to run mpirun) and a replicaset for the worker pods. The worker nodes - // run an ssh server to listen for mpirun operations from the launcher pod. - createMPIJob := func() error { - mpiJob := &mpiv2beta1.MPIJob{ - ObjectMeta: metav1.ObjectMeta{ - Name: workflow.Name, - Namespace: workflow.Namespace, - }, - } - profile.Data.MPISpec.DeepCopyInto(&mpiJob.Spec) - username = nnfv1alpha1.ContainerMPIUser - - if err := applyLabels(&mpiJob.ObjectMeta); err != nil { - return err - } - - // Use the profile's backoff limit if not set - if mpiJob.Spec.RunPolicy.BackoffLimit == nil { - mpiJob.Spec.RunPolicy.BackoffLimit = &profile.Data.RetryLimit - } - - // MPIJobs have two pod specs: one for the launcher and one for the workers - launcher := mpiJob.Spec.MPIReplicaSpecs[mpiv2beta1.MPIReplicaTypeLauncher] - launcherSpec := &launcher.Template.Spec - worker := mpiJob.Spec.MPIReplicaSpecs[mpiv2beta1.MPIReplicaTypeWorker] - workerSpec := &worker.Template.Spec - - // Keep failed pods around for log inspection - launcher.RestartPolicy = mpicommonv1.RestartPolicyNever - worker.RestartPolicy = mpicommonv1.RestartPolicyNever - - // Add NNF node tolerations - applyTolerations(launcherSpec) - applyTolerations(workerSpec) - - // Run the launcher on the first NNF node - launcherSpec.NodeSelector = map[string]string{"kubernetes.io/hostname": nnfNodes[0]} - - // Use initContainers to ensure the workers are up and discoverable before running the launcher command - for i := range nnfNodes { - addInitContainerWorkerWait(launcherSpec, i) - } - - // Target all the NNF nodes for the workers - replicas := int32(len(nnfNodes)) - worker.Replicas = &replicas - workerSpec.Affinity = &corev1.Affinity{ - // Ensure we run a worker on every NNF node - NodeAffinity: &corev1.NodeAffinity{ - RequiredDuringSchedulingIgnoredDuringExecution: &corev1.NodeSelector{ - NodeSelectorTerms: []corev1.NodeSelectorTerm{{ - MatchExpressions: []corev1.NodeSelectorRequirement{{ - Key: "kubernetes.io/hostname", - Operator: corev1.NodeSelectorOpIn, - Values: nnfNodes, - }}, - }}, - }, - }, - // But make sure it's only 1 per node - PodAntiAffinity: &corev1.PodAntiAffinity{ - RequiredDuringSchedulingIgnoredDuringExecution: []corev1.PodAffinityTerm{{ - TopologyKey: "kubernetes.io/hostname", - LabelSelector: &metav1.LabelSelector{ - MatchExpressions: []metav1.LabelSelectorRequirement{ - { - Key: "training.kubeflow.org/job-name", - Operator: metav1.LabelSelectorOpIn, - Values: []string{workflow.Name}, - }, - { - Key: "training.kubeflow.org/job-role", - Operator: metav1.LabelSelectorOpIn, - Values: []string{"worker"}, - }, - }, - }}, - }, - }, - } - - // Set the appropriate permissions (UID/GID) from the workflow - applyPermissions(launcherSpec, &mpiJob.Spec, username, false) - applyPermissions(workerSpec, &mpiJob.Spec, username, true) - - addNNFVolumes(launcherSpec) - addNNFVolumes(workerSpec) - addEnvVars(launcherSpec, true) - addEnvVars(workerSpec, true) - - err = r.Create(ctx, mpiJob) - if err != nil { - if !apierrors.IsAlreadyExists(err) { - return err - } - } else { - log.Info("Created MPIJob", "name", mpiJob.Name, "namespace", mpiJob.Namespace) - } - - return nil - } - - // Non-MPI container workflow. In this model, a job is created for each NNF node which ensures - // that a pod is executed successfully (or the backOffLimit) is hit. Each container in this model - // runs the same image. - createNonMPIJob := func() error { - // Use one job that we'll use as a base to create all jobs. Each NNF node will get its own job. - job := &batchv1.Job{ - ObjectMeta: metav1.ObjectMeta{ - Namespace: workflow.Namespace, - }, - } - profile.Data.Spec.DeepCopyInto(&job.Spec.Template.Spec) - podSpec := &job.Spec.Template.Spec - - if err := applyLabels(&job.ObjectMeta); err != nil { - return err - } - - // Use the same labels as the job for the pods - job.Spec.Template.Labels = job.DeepCopy().Labels - - job.Spec.BackoffLimit = &profile.Data.RetryLimit - - podSpec.RestartPolicy = corev1.RestartPolicyNever - podSpec.Subdomain = workflow.Name // service name == workflow name - - applyTolerations(podSpec) - applyPermissions(podSpec, nil, username, false) - addNNFVolumes(podSpec) - addEnvVars(podSpec, false) - - // Using the base job, create a job for each nnfNode. Only the name, hostname, and node selector is different for each node - for _, nnfNode := range nnfNodes { - job.ObjectMeta.Name = workflow.Name + "-" + nnfNode - podSpec.Hostname = nnfNode - - // In our case, the target is only 1 node for the job, so a restartPolicy of Never - // is ok because any retry (i.e. new pod) will land on the same node. - podSpec.NodeSelector = map[string]string{"kubernetes.io/hostname": nnfNode} - - newJob := &batchv1.Job{} - job.DeepCopyInto(newJob) - - err = r.Create(ctx, newJob) - if err != nil { - if !apierrors.IsAlreadyExists(err) { - return err - } - } else { - log.Info("Created non-MPI job", "name", newJob.Name, "namespace", newJob.Namespace) - } - } - - return nil + c := nnfUserContainer{ + workflow: workflow, + profile: profile, + nnfNodes: nnfNodes, + volumes: volumes, + username: nnfv1alpha1.ContainerUser, + uid: int64(workflow.Spec.UserID), + gid: int64(workflow.Spec.GroupID), + index: index, + client: r.Client, + log: r.Log, + scheme: r.Scheme, + ctx: ctx, } if mpiJob { - if err := createMPIJob(); err != nil { - return nil, nnfv1alpha1.NewWorkflowError("Unable to create/update MPIJob").WithFatal().WithError(err) + if err := c.createMPIJob(); err != nil { + return nil, dwsv1alpha2.NewResourceError("unable to create/update MPIJob").WithMajor().WithError(err) } } else { + // For non-MPI jobs, we need to create a service ourselves if err := r.createContainerService(ctx, workflow); err != nil { - return nil, nnfv1alpha1.NewWorkflowError("Unable to create/update Container Service").WithFatal().WithError(err) + return nil, dwsv1alpha2.NewResourceError("unable to create/update Container Service").WithMajor().WithError(err) } - if err := createNonMPIJob(); err != nil { - return nil, nnfv1alpha1.NewWorkflowError("Unable to create/update Container Jobs").WithFatal().WithError(err) + if err := c.createNonMPIJob(); err != nil { + return nil, dwsv1alpha2.NewResourceError("unable to create/update Container Jobs").WithMajor().WithError(err) } } @@ -1513,7 +1271,7 @@ func (r *NnfWorkflowReconciler) getNnfNodesFromComputes(ctx context.Context, wor }, } if err := r.Get(ctx, client.ObjectKeyFromObject(&computes), &computes); err != nil { - return ret, nnfv1alpha1.NewWorkflowError("could not find Computes resource for workflow") + return ret, dwsv1alpha2.NewResourceError("could not find Computes resource for workflow") } // Build the list of computes @@ -1521,12 +1279,12 @@ func (r *NnfWorkflowReconciler) getNnfNodesFromComputes(ctx context.Context, wor computeNodes = append(computeNodes, c.Name) } if len(computeNodes) == 0 { - return computeNodes, nnfv1alpha1.NewWorkflowError("the Computes resources does not specify any compute nodes") + return computeNodes, dwsv1alpha2.NewResourceError("the Computes resources does not specify any compute nodes").WithWLM().WithFatal() } systemConfig := &dwsv1alpha2.SystemConfiguration{} if err := r.Get(ctx, types.NamespacedName{Name: "default", Namespace: corev1.NamespaceDefault}, systemConfig); err != nil { - return ret, nnfv1alpha1.NewWorkflowError("could not get system configuration") + return ret, dwsv1alpha2.NewResourceError("could not get system configuration") } // The SystemConfiguration is organized by rabbit. Make a map of computes:rabbit for easy lookup. @@ -1544,7 +1302,7 @@ func (r *NnfWorkflowReconciler) getNnfNodesFromComputes(ctx context.Context, wor for _, c := range computeNodes { nnfNode, found := computeMap[c] if !found { - return ret, nnfv1alpha1.NewWorkflowErrorf("supplied compute node '%s' not found in SystemConfiguration", c) + return ret, dwsv1alpha2.NewResourceError("supplied compute node '%s' not found in SystemConfiguration", c).WithFatal() } // Add the node to the map @@ -1563,14 +1321,40 @@ func (r *NnfWorkflowReconciler) getNnfNodesFromComputes(ctx context.Context, wor func (r *NnfWorkflowReconciler) waitForContainersToStart(ctx context.Context, workflow *dwsv1alpha2.Workflow, index int) (*result, error) { // Get profile to determine container job type (MPI or not) - profile, err := r.getContainerProfile(ctx, workflow, index) + profile, err := getContainerProfile(ctx, r.Client, workflow, index) if err != nil { return nil, err } + isMPIJob := profile.Data.MPISpec != nil + + // Timeouts - If the containers don't start after PreRunTimeoutSeconds, we need to send an error + // up to the workflow in every one of our return cases. Each return path will check for + // timeoutElapsed and bubble up a fatal error. + // We must also set the Jobs' activeDeadline timeout so that the containers are stopped once the + // timeout is hit. This needs to be handled slightly differently depending on if the job is MPI + // or not. Once set, k8s will take care of stopping the pods for us. + timeoutElapsed := false + timeout := time.Duration(0) + if profile.Data.PreRunTimeoutSeconds != nil { + timeout = time.Duration(*profile.Data.PreRunTimeoutSeconds) * time.Second + } + timeoutMessage := fmt.Sprintf("user container(s) failed to start after %d seconds", int(timeout.Seconds())) - if profile.Data.MPISpec != nil { + // Check if PreRunTimeoutSeconds has elapsed and set the flag. The logic will check once more to + // see if it started or not. If not, then the job(s) activeDeadline will be set to stop the + // jobs/pods. + if timeout > 0 && metav1.Now().Sub(workflow.Status.DesiredStateChange.Time) >= timeout { + timeoutElapsed = true + } + + if isMPIJob { mpiJob, result := r.getMPIJobConditions(ctx, workflow, index, 1) if result != nil { + // If timeout, don't allow requeue and return an error + if timeoutElapsed { + return nil, dwsv1alpha2.NewResourceError("could not retrieve MPIJobs to set timeout"). + WithUserMessage(timeoutMessage).WithFatal() + } return result, nil } @@ -1583,21 +1367,53 @@ func (r *NnfWorkflowReconciler) waitForContainersToStart(ctx context.Context, wo } } + // Jobs are not running. Check to see if timeout elapsed and have k8s stop the jobs for us. + // If no timeout, then just requeue. if !running { + if timeoutElapsed { + r.Log.Info("container prerun timeout occurred, attempting to set MPIJob activeDeadlineSeconds") + if err := r.setMPIJobTimeout(ctx, workflow, mpiJob, time.Duration(1*time.Millisecond)); err != nil { + return nil, dwsv1alpha2.NewResourceError("could not set timeout on MPIJobs"). + WithUserMessage(timeoutMessage).WithError(err).WithFatal() + } else { + return nil, dwsv1alpha2.NewResourceError("MPIJob timeout set").WithUserMessage(timeoutMessage).WithFatal() + } + } return Requeue(fmt.Sprintf("pending MPIJob start for workflow '%s', index: %d", workflow.Name, index)).after(2 * time.Second), nil } } else { jobList, err := r.getContainerJobs(ctx, workflow, index) if err != nil { + if timeoutElapsed { + return nil, dwsv1alpha2.NewResourceError("could not retrieve Jobs to set timeout"). + WithUserMessage(timeoutMessage).WithFatal().WithError(err) + } return nil, err } // Jobs may not be queryable yet, so requeue if len(jobList.Items) < 1 { + // If timeout, don't allow a requeue and return an error + if timeoutElapsed { + return nil, dwsv1alpha2.NewResourceError("no Jobs found in JobList to set timeout"). + WithUserMessage(timeoutMessage).WithFatal() + } return Requeue(fmt.Sprintf("pending job creation for workflow '%s', index: %d", workflow.Name, index)).after(2 * time.Second), nil } for _, job := range jobList.Items { + + // Attempt to set the timeout on all the Jobs in the list + if timeoutElapsed { + r.Log.Info("container prerun timeout occurred, attempting to set Job activeDeadlineSeconds") + if err := r.setJobTimeout(ctx, job, time.Duration(1*time.Millisecond)); err != nil { + return nil, dwsv1alpha2.NewResourceError("could not set timeout on MPIJobs"). + WithUserMessage(timeoutMessage).WithError(err).WithFatal() + } else { + continue + } + } + // If we have any conditions, the job already finished if len(job.Status.Conditions) > 0 { continue @@ -1608,11 +1424,76 @@ func (r *NnfWorkflowReconciler) waitForContainersToStart(ctx context.Context, wo return Requeue(fmt.Sprintf("pending container start for job '%s'", job.Name)).after(2 * time.Second), nil } } + + // Report the timeout error + if timeoutElapsed { + return nil, dwsv1alpha2.NewResourceError("job(s) timeout set").WithUserMessage(timeoutMessage).WithFatal() + } } return nil, nil } +func (r *NnfWorkflowReconciler) deleteContainers(ctx context.Context, workflow *dwsv1alpha2.Workflow, index int) (*result, error) { + doneMpi := false + doneNonMpi := false + + // Set the delete propagation + policy := metav1.DeletePropagationBackground + deleteAllOptions := &client.DeleteAllOfOptions{ + DeleteOptions: client.DeleteOptions{ + PropagationPolicy: &policy, + }, + } + // Add workflow matchLabels + directive index (if desired) + matchLabels := dwsv1alpha2.MatchingWorkflow(workflow) + if index >= 0 { + matchLabels[nnfv1alpha1.DirectiveIndexLabel] = strconv.Itoa(index) + } + + // Delete MPIJobs + mpiJobList, err := r.getMPIJobs(ctx, workflow, index) + if err != nil { + if strings.Contains(err.Error(), "no kind is registered for the type") || apierrors.IsNotFound(err) { + doneMpi = true + } else { + return nil, dwsv1alpha2.NewResourceError("could not delete container MPIJob(s)").WithError(err).WithMajor().WithInternal() + } + } else if len(mpiJobList.Items) > 0 { + if err := r.DeleteAllOf(ctx, &mpiJobList.Items[0], client.InNamespace(workflow.Namespace), matchLabels, deleteAllOptions); err != nil { + if !apierrors.IsNotFound(err) { + return nil, dwsv1alpha2.NewResourceError("could not delete container MPIJob(s)").WithError(err).WithMajor().WithInternal() + } + } + } else { + doneMpi = true + } + + // Delete non-MPI Jobs + jobList, err := r.getContainerJobs(ctx, workflow, index) + if err != nil { + if apierrors.IsNotFound(err) { + doneNonMpi = true + } else { + return nil, dwsv1alpha2.NewResourceError("could not delete container Job(s)").WithError(err).WithMajor().WithInternal() + } + } else if len(jobList.Items) > 0 { + if err := r.DeleteAllOf(ctx, &jobList.Items[0], client.InNamespace(workflow.Namespace), matchLabels, deleteAllOptions); err != nil { + if !apierrors.IsNotFound(err) { + return nil, dwsv1alpha2.NewResourceError("could not delete container Job(s)").WithError(err).WithMajor().WithInternal() + } + } + } else { + doneNonMpi = true + } + + if doneMpi && doneNonMpi { + return nil, nil + } + + return Requeue("pending container deletion"), nil +} + func (r *NnfWorkflowReconciler) getMPIJobConditions(ctx context.Context, workflow *dwsv1alpha2.Workflow, index, expected int) (*mpiv2beta1.MPIJob, *result) { mpiJob := &mpiv2beta1.MPIJob{ ObjectMeta: metav1.ObjectMeta{ @@ -1633,64 +1514,70 @@ func (r *NnfWorkflowReconciler) getMPIJobConditions(ctx context.Context, workflo return mpiJob, nil } -func (r *NnfWorkflowReconciler) waitForContainersToFinish(ctx context.Context, workflow *dwsv1alpha2.Workflow, index int) (*result, error) { - // Get profile to determine container job type (MPI or not) - profile, err := r.getContainerProfile(ctx, workflow, index) - if err != nil { - return nil, err - } - timeout := time.Duration(profile.Data.PostRunTimeoutSeconds) * time.Second - - setTimeout := func(job batchv1.Job) error { - // If desired, set the ActiveDeadline on the job to kill pods. Use the job's creation - // timestamp to determine how long the job/pod has been running at this point. Then, add - // the desired timeout to that value. k8s Job's ActiveDeadLineSeconds will then - // terminate the pods once the deadline is hit. - if timeout > 0 && job.Spec.ActiveDeadlineSeconds == nil { - deadline := int64((metav1.Now().Sub(job.CreationTimestamp.Time) + timeout).Seconds()) - - // Update the job with the deadline - err := retry.RetryOnConflict(retry.DefaultRetry, func() error { - j := &batchv1.Job{ObjectMeta: metav1.ObjectMeta{Name: job.Name, Namespace: job.Namespace}} - if err := r.Get(ctx, client.ObjectKeyFromObject(j), j); err != nil { - return client.IgnoreNotFound(err) - } +func (r *NnfWorkflowReconciler) setJobTimeout(ctx context.Context, job batchv1.Job, timeout time.Duration) error { + // If desired, set the ActiveDeadline on the job to kill pods. Use the job's creation + // timestamp to determine how long the job/pod has been running at this point. Then, add + // the desired timeout to that value. k8s Job's ActiveDeadLineSeconds will then + // terminate the pods once the deadline is hit. + if timeout > 0 && job.Spec.ActiveDeadlineSeconds == nil { + var deadline int64 + deadline = int64((metav1.Now().Sub(job.CreationTimestamp.Time) + timeout).Seconds()) + + // Update the job with the deadline + err := retry.RetryOnConflict(retry.DefaultRetry, func() error { + j := &batchv1.Job{ObjectMeta: metav1.ObjectMeta{Name: job.Name, Namespace: job.Namespace}} + if err := r.Get(ctx, client.ObjectKeyFromObject(j), j); err != nil { + return client.IgnoreNotFound(err) + } - j.Spec.ActiveDeadlineSeconds = &deadline - return r.Update(ctx, j) - }) + j.Spec.ActiveDeadlineSeconds = &deadline + return r.Update(ctx, j) + }) - if err != nil { - return nnfv1alpha1.NewWorkflowErrorf("error updating job '%s' activeDeadlineSeconds:", job.Name) - } + if err != nil { + return dwsv1alpha2.NewResourceError("error updating job '%s' activeDeadlineSeconds:", job.Name) } + } - return nil + return nil +} + +func (r *NnfWorkflowReconciler) setMPIJobTimeout(ctx context.Context, workflow *dwsv1alpha2.Workflow, mpiJob *mpiv2beta1.MPIJob, timeout time.Duration) error { + // Set the ActiveDeadLineSeconds on each of the k8s jobs created by MPIJob/mpi-operator. We + // need to retrieve the jobs in a different way than non-MPI jobs since the jobs are created + // by the MPIJob. + jobList, err := r.getMPIJobChildrenJobs(ctx, workflow, mpiJob) + if err != nil { + return dwsv1alpha2.NewResourceError("setMPIJobTimeout: no MPIJob JobList found for workflow '%s'", workflow.Name).WithMajor() } - setMPITimeout := func(mpiJob *mpiv2beta1.MPIJob) error { - // Set the ActiveDeadLineSeconds on each of the k8s jobs created by MPIJob/mpi-operator. We - // need to retrieve the jobs in a different way than non-MPI jobs since the jobs are created - // by the MPIJob. - jobList, err := r.getMPIJobList(ctx, workflow, mpiJob) - if err != nil { - return nnfv1alpha1.NewWorkflowErrorf("waitForContainersToFinish: no MPIJob JobList found for workflow '%s', index: %d", workflow.Name, index) - } + if len(jobList.Items) < 1 { + return dwsv1alpha2.NewResourceError("setMPIJobTimeout: no MPIJob jobs found for workflow '%s'", workflow.Name).WithMajor() + } - if len(jobList.Items) < 1 { - return nnfv1alpha1.NewWorkflowErrorf("waitForContainersToFinish: no MPIJob jobs found for workflow '%s', index: %d", workflow.Name, index) + for _, job := range jobList.Items { + if err := r.setJobTimeout(ctx, job, timeout); err != nil { + return err } + } - for _, job := range jobList.Items { - if err := setTimeout(job); err != nil { - return err - } - } + return nil +} - return nil +func (r *NnfWorkflowReconciler) waitForContainersToFinish(ctx context.Context, workflow *dwsv1alpha2.Workflow, index int) (*result, error) { + // Get profile to determine container job type (MPI or not) + profile, err := getContainerProfile(ctx, r.Client, workflow, index) + if err != nil { + return nil, err + } + isMPIJob := profile.Data.MPISpec != nil + + timeout := time.Duration(0) + if profile.Data.PostRunTimeoutSeconds != nil { + timeout = time.Duration(*profile.Data.PostRunTimeoutSeconds) * time.Second } - if profile.Data.MPISpec != nil { + if isMPIJob { // We should expect at least 2 conditions: created and running mpiJob, result := r.getMPIJobConditions(ctx, workflow, index, 2) if result != nil { @@ -1707,7 +1594,7 @@ func (r *NnfWorkflowReconciler) waitForContainersToFinish(ctx context.Context, w } if !finished { - if err := setMPITimeout(mpiJob); err != nil { + if err := r.setMPIJobTimeout(ctx, workflow, mpiJob, timeout); err != nil { return nil, err } return Requeue(fmt.Sprintf("pending MPIJob completion for workflow '%s', index: %d", workflow.Name, index)).after(2 * time.Second), nil @@ -1720,14 +1607,14 @@ func (r *NnfWorkflowReconciler) waitForContainersToFinish(ctx context.Context, w } if len(jobList.Items) < 1 { - return nil, nnfv1alpha1.NewWorkflowErrorf("waitForContainersToFinish: no container jobs found for workflow '%s', index: %d", workflow.Name, index) + return nil, dwsv1alpha2.NewResourceError("waitForContainersToFinish: no container jobs found for workflow '%s', index: %d", workflow.Name, index).WithMajor() } // Ensure all the jobs are done running before we check the conditions. for _, job := range jobList.Items { // Jobs will have conditions when finished if len(job.Status.Conditions) <= 0 { - if err := setTimeout(job); err != nil { + if err := r.setJobTimeout(ctx, job, timeout); err != nil { return nil, err } return Requeue("pending container finish").after(2 * time.Second).withObject(&job), nil @@ -1740,12 +1627,19 @@ func (r *NnfWorkflowReconciler) waitForContainersToFinish(ctx context.Context, w func (r *NnfWorkflowReconciler) checkContainersResults(ctx context.Context, workflow *dwsv1alpha2.Workflow, index int) (*result, error) { // Get profile to determine container job type (MPI or not) - profile, err := r.getContainerProfile(ctx, workflow, index) + profile, err := getContainerProfile(ctx, r.Client, workflow, index) if err != nil { return nil, err } + isMPIJob := profile.Data.MPISpec != nil + + timeout := time.Duration(0) + if profile.Data.PostRunTimeoutSeconds != nil { + timeout = time.Duration(*profile.Data.PostRunTimeoutSeconds) * time.Second + } + timeoutMessage := fmt.Sprintf("user container(s) failed to complete after %d seconds", int(timeout.Seconds())) - if profile.Data.MPISpec != nil { + if isMPIJob { mpiJob, result := r.getMPIJobConditions(ctx, workflow, index, 2) if result != nil { return result, nil @@ -1753,7 +1647,12 @@ func (r *NnfWorkflowReconciler) checkContainersResults(ctx context.Context, work for _, c := range mpiJob.Status.Conditions { if c.Type == mpiv2beta1.JobFailed { - return nil, nnfv1alpha1.NewWorkflowErrorf("container MPIJob %s (%s): %s", c.Type, c.Reason, c.Message) + if c.Reason == "DeadlineExceeded" { + return nil, dwsv1alpha2.NewResourceError("container MPIJob %s (%s): %s", c.Type, c.Reason, c.Message).WithFatal(). + WithUserMessage(timeoutMessage) + } + return nil, dwsv1alpha2.NewResourceError("container MPIJob %s (%s): %s", c.Type, c.Reason, c.Message).WithFatal(). + WithUserMessage("user container(s) failed to run successfully after %d attempts", profile.Data.RetryLimit+1) } } } else { @@ -1763,13 +1662,16 @@ func (r *NnfWorkflowReconciler) checkContainersResults(ctx context.Context, work } if len(jobList.Items) < 1 { - return nil, nnfv1alpha1.NewWorkflowErrorf("checkContainersResults: no container jobs found for workflow '%s', index: %d", workflow.Name, index) + return nil, dwsv1alpha2.NewResourceError("checkContainersResults: no container jobs found for workflow '%s', index: %d", workflow.Name, index).WithMajor() } for _, job := range jobList.Items { for _, condition := range job.Status.Conditions { if condition.Type != batchv1.JobComplete { - return nil, nnfv1alpha1.NewWorkflowErrorf("container job %s (%s): %s", condition.Type, condition.Reason, condition.Message) + if condition.Reason == "DeadlineExceeded" { + return nil, dwsv1alpha2.NewResourceError("container job %s (%s): %s", condition.Type, condition.Reason, condition.Message).WithFatal().WithUserMessage(timeoutMessage) + } + return nil, dwsv1alpha2.NewResourceError("container job %s (%s): %s", condition.Type, condition.Reason, condition.Message).WithFatal() } } } @@ -1778,7 +1680,8 @@ func (r *NnfWorkflowReconciler) checkContainersResults(ctx context.Context, work return nil, nil } -func (r *NnfWorkflowReconciler) getMPIJobList(ctx context.Context, workflow *dwsv1alpha2.Workflow, mpiJob *mpiv2beta1.MPIJob) (*batchv1.JobList, error) { +// Given an MPIJob, return a list of all the k8s Jobs owned by the MPIJob +func (r *NnfWorkflowReconciler) getMPIJobChildrenJobs(ctx context.Context, workflow *dwsv1alpha2.Workflow, mpiJob *mpiv2beta1.MPIJob) (*batchv1.JobList, error) { // The k8s jobs that are spawned off by MPIJob do not have labels tied to the workflow. // Therefore, we need to get the k8s jobs manually. To do this, we can query the jobs by the // name of the MPIJob. However, this doesn't account for the namespace. We need another way. @@ -1788,7 +1691,7 @@ func (r *NnfWorkflowReconciler) getMPIJobList(ctx context.Context, workflow *dws jobList := &batchv1.JobList{} if err := r.List(ctx, jobList, matchLabels); err != nil { - return nil, nnfv1alpha1.NewWorkflowErrorf("could not retrieve Jobs for MPIJob %s", mpiJob.Name).WithError(err) + return nil, dwsv1alpha2.NewResourceError("could not retrieve Jobs for MPIJob %s", mpiJob.Name).WithError(err).WithMajor() } // Create a new list so we don't alter the loop iterator @@ -1809,176 +1712,267 @@ func (r *NnfWorkflowReconciler) getMPIJobList(ctx context.Context, workflow *dws return jobList, nil } +func (r *NnfWorkflowReconciler) getMPIJobs(ctx context.Context, workflow *dwsv1alpha2.Workflow, index int) (*mpiv2beta1.MPIJobList, error) { + // Get the MPIJobs for this workflow and directive index + matchLabels := dwsv1alpha2.MatchingWorkflow(workflow) + if index >= 0 { + matchLabels[nnfv1alpha1.DirectiveIndexLabel] = strconv.Itoa(index) + } + + jobList := &mpiv2beta1.MPIJobList{} + if err := r.List(ctx, jobList, matchLabels); err != nil { + return nil, dwsv1alpha2.NewResourceError("could not retrieve MPIJobs").WithError(err).WithMajor() + } + + return jobList, nil +} + func (r *NnfWorkflowReconciler) getContainerJobs(ctx context.Context, workflow *dwsv1alpha2.Workflow, index int) (*batchv1.JobList, error) { // Get the jobs for this workflow and directive index matchLabels := dwsv1alpha2.MatchingWorkflow(workflow) - matchLabels[nnfv1alpha1.DirectiveIndexLabel] = strconv.Itoa(index) + if index >= 0 { + matchLabels[nnfv1alpha1.DirectiveIndexLabel] = strconv.Itoa(index) + } jobList := &batchv1.JobList{} if err := r.List(ctx, jobList, matchLabels); err != nil { - return nil, nnfv1alpha1.NewWorkflowErrorf("could not retrieve Jobs for index %d", index).WithError(err) + return nil, dwsv1alpha2.NewResourceError("could not retrieve Jobs").WithError(err).WithMajor() } return jobList, nil } -func (r *NnfWorkflowReconciler) getContainerProfile(ctx context.Context, workflow *dwsv1alpha2.Workflow, index int) (*nnfv1alpha1.NnfContainerProfile, error) { - profile, err := r.findPinnedContainerProfile(ctx, workflow, index) - if err != nil { - return nil, err - } +// Create a list of volumes to be mounted inside of the containers based on the DW_JOB/DW_PERSISTENT arguments +func (r *NnfWorkflowReconciler) getContainerVolumes(ctx context.Context, workflow *dwsv1alpha2.Workflow, dwArgs map[string]string, profile *nnfv1alpha1.NnfContainerProfile) ([]nnfContainerVolume, *result, error) { + volumes := []nnfContainerVolume{} - if profile == nil { - return nil, nnfv1alpha1.NewWorkflowErrorf("container profile '%s' not found", indexedResourceName(workflow, index)).WithFatal() - } + for arg, val := range dwArgs { + volName, cmd := "", "" - if !profile.Data.Pinned { - return nil, nnfv1alpha1.NewWorkflowErrorf("expected pinned container profile '%s'", indexedResourceName(workflow, index)).WithFatal() - } + // Find any DW_(JOB|PERSISTENT) arguments + if strings.HasPrefix(arg, "DW_JOB_") { + volName = strings.TrimPrefix(arg, "DW_JOB_") + cmd = "jobdw" + } else if strings.HasPrefix(arg, "DW_PERSISTENT_") { + volName = strings.TrimPrefix(arg, "DW_PERSISTENT_") + cmd = "persistentdw" + } else if strings.HasPrefix(arg, "DW_GLOBAL_") { + volName = strings.TrimPrefix(arg, "DW_GLOBAL_") + cmd = "globaldw" + } else { + continue + } - return profile, nil -} + // k8s resources can't have underscores + volName = strings.ReplaceAll(volName, "_", "-") -func (r *NnfWorkflowReconciler) findPinnedContainerProfile(ctx context.Context, workflow *dwsv1alpha2.Workflow, index int) (*nnfv1alpha1.NnfContainerProfile, error) { - profile := &nnfv1alpha1.NnfContainerProfile{ - ObjectMeta: metav1.ObjectMeta{ - Name: indexedResourceName(workflow, index), - Namespace: workflow.Namespace, - }, - } + vol := nnfContainerVolume{ + name: volName, + command: cmd, + directiveName: val, + directiveIndex: -1, + // and env vars can't have hyphens + envVarName: strings.ReplaceAll(arg, "-", "_"), + } - if err := r.Get(ctx, client.ObjectKeyFromObject(profile), profile); err != nil { - return nil, err + // For global lustre, a namespace that matches the workflow's namespace must be present in + // the LustreFilesystem's Spec.Namespaces list. This results in a matching PVC that can + // then be mounted into containers in that namespace. + if cmd == "globaldw" { + globalLustre := r.findLustreFileSystemForPath(ctx, val, r.Log) + if globalLustre == nil { + return nil, nil, dwsv1alpha2.NewResourceError("").WithUserMessage("global Lustre file system containing '%s' not found", val).WithUser().WithFatal() + } + + ns, nsFound := globalLustre.Spec.Namespaces[workflow.Namespace] + if !nsFound || len(ns.Modes) < 1 { + return nil, nil, dwsv1alpha2.NewResourceError("").WithUserMessage("global Lustre file system containing '%s' is not configured for the '%s' namespace", val, workflow.Namespace).WithUser().WithFatal() + } + + // Retrieve the desired PVC mode from the container profile. Default to readwritemany. + modeStr := strings.ToLower(string(corev1.ReadWriteMany)) + if profile != nil { + for _, storage := range profile.Data.Storages { + if storage.Name == arg && storage.PVCMode != "" { + modeStr = strings.ToLower(string(storage.PVCMode)) + } + } + } + + // e.g. PVC name: global-default-readwritemany-pvc + vol.pvcName = strings.ToLower(fmt.Sprintf("%s-%s-%s-pvc", globalLustre.Name, globalLustre.Namespace, modeStr)) + vol.mountPath = globalLustre.Spec.MountRoot + } else { + // Find the directive index for the given name so we can retrieve its NnfAccess + vol.directiveIndex = findDirectiveIndexByName(workflow, vol.directiveName, vol.command) + if vol.directiveIndex < 0 { + return nil, nil, dwsv1alpha2.NewResourceError("could not retrieve the directive breakdown for '%s'", vol.directiveName).WithMajor() + } + + nnfAccess := &nnfv1alpha1.NnfAccess{ + ObjectMeta: metav1.ObjectMeta{ + Name: workflow.Name + "-" + strconv.Itoa(vol.directiveIndex) + "-servers", + Namespace: workflow.Namespace, + }, + } + if err := r.Get(ctx, client.ObjectKeyFromObject(nnfAccess), nnfAccess); err != nil { + return nil, nil, dwsv1alpha2.NewResourceError("could not retrieve the NnfAccess '%s'", nnfAccess.Name).WithMajor() + } + + if !nnfAccess.Status.Ready { + return nil, Requeue(fmt.Sprintf("NnfAccess '%s' is not ready to be mounted into container", nnfAccess.Name)).after(2 * time.Second), nil + } + + vol.mountPath = nnfAccess.Spec.MountPath + } + volumes = append(volumes, vol) } - return profile, nil + return volumes, nil, nil } -func (r *NnfWorkflowReconciler) findContainerProfile(ctx context.Context, workflow *dwsv1alpha2.Workflow, index int) (*nnfv1alpha1.NnfContainerProfile, error) { - args, err := dwdparse.BuildArgsMap(workflow.Spec.DWDirectives[index]) +// Use the container profile to determine how many ports are needed and request them from the default NnfPortManager +func (r *NnfWorkflowReconciler) getContainerPorts(ctx context.Context, workflow *dwsv1alpha2.Workflow, index int) (*result, error) { + profile, err := getContainerProfile(ctx, r.Client, workflow, index) if err != nil { return nil, err } - name, found := args["profile"] - if !found { - return nil, fmt.Errorf("container directive '%s' has no profile key", workflow.Spec.DWDirectives[index]) - } + // Nothing to do here if ports are not requested + if profile.Data.NumPorts > 0 { + pm, err := getContainerPortManager(ctx, r.Client) + if err != nil { + return nil, err + } - profile := &nnfv1alpha1.NnfContainerProfile{ - ObjectMeta: metav1.ObjectMeta{ - Name: name, - Namespace: os.Getenv("NNF_CONTAINER_PROFILE_NAMESPACE"), - }, - } + // Check to see if we've already made an allocation + for _, alloc := range pm.Spec.Allocations { + if alloc.Requester.UID == workflow.UID { + return nil, nil + } + } - if err := r.Get(ctx, client.ObjectKeyFromObject(profile), profile); err != nil { - return nil, err - } + // Add a port allocation request to the manager for the number of ports specified by the + // container profile + pm.Spec.Allocations = append(pm.Spec.Allocations, nnfv1alpha1.NnfPortManagerAllocationSpec{ + Requester: corev1.ObjectReference{ + Name: workflow.Name, + Namespace: workflow.Namespace, + Kind: reflect.TypeOf(dwsv1alpha2.Workflow{}).Name(), + UID: workflow.UID, + }, + Count: int(profile.Data.NumPorts), + }) - return profile, nil -} + if err := r.Update(ctx, pm); err != nil { + if !apierrors.IsConflict(err) { + return nil, err + } + return Requeue("update port manager allocation"), nil + } -func (r *NnfWorkflowReconciler) createPinnedContainerProfileIfNecessary(ctx context.Context, workflow *dwsv1alpha2.Workflow, index int) error { - profile, err := r.findPinnedContainerProfile(ctx, workflow, index) - if err != nil && !apierrors.IsNotFound(err) { - return err + r.Log.Info("Ports Requested", "numPorts", profile.Data.NumPorts) } - if profile != nil { - if !profile.Data.Pinned { - return fmt.Errorf("expected pinned container profile, but it was not pinned: %s", profile.Name) - } + return nil, nil +} - return nil - } +// Ensure that the default NnfPortManager has assigned the appropriate number of requested ports +func (r *NnfWorkflowReconciler) checkContainerPorts(ctx context.Context, workflow *dwsv1alpha2.Workflow, index int) (*result, error) { - profile, err = r.findContainerProfile(ctx, workflow, index) + profile, err := getContainerProfile(ctx, r.Client, workflow, index) if err != nil { - return err + return nil, err } - pinnedProfile := &nnfv1alpha1.NnfContainerProfile{ - ObjectMeta: metav1.ObjectMeta{ - Name: indexedResourceName(workflow, index), - Namespace: workflow.Namespace, - }, - } + // Nothing to do here if ports are not requested + r.Log.Info("Checking for requested ports", "numPorts", profile.Data.NumPorts) + if profile.Data.NumPorts > 0 { + pm, err := getContainerPortManager(ctx, r.Client) + if err != nil { + return nil, err + } + + for _, alloc := range pm.Status.Allocations { + if alloc.Requester != nil && alloc.Requester.UID == workflow.UID { + if alloc.Status == nnfv1alpha1.NnfPortManagerAllocationStatusInUse && len(alloc.Ports) == int(profile.Data.NumPorts) { + // Add workflow env var for the ports + name, val := getContainerPortsEnvVar(alloc.Ports) + workflow.Status.Env[name] = val + return nil, nil // done + } else if alloc.Status == nnfv1alpha1.NnfPortManagerAllocationStatusInvalidConfiguration { + return nil, dwsv1alpha2.NewResourceError("").WithUserMessage("could not request ports for container workflow: Invalid NnfPortManager configuration").WithFatal().WithUser() + } else if alloc.Status == nnfv1alpha1.NnfPortManagerAllocationStatusInsufficientResources { + return nil, dwsv1alpha2.NewResourceError("").WithUserMessage("could not request ports for container workflow: InsufficientResources").WithFatal() + } + } + } - profile.Data.DeepCopyInto(&pinnedProfile.Data) + return Requeue("NnfPortManager allocation not ready").after(2 * time.Second).withObject(pm), nil + } - pinnedProfile.Data.Pinned = true + return nil, nil +} - dwsv1alpha2.AddOwnerLabels(pinnedProfile, workflow) +// Retrieve the default NnfPortManager for user containers. Allow a client to be passed in as this +// is meant to be used by reconcilers or container helpers. +func getContainerPortManager(ctx context.Context, cl client.Client) (*nnfv1alpha1.NnfPortManager, error) { + portManagerName := os.Getenv("NNF_PORT_MANAGER_NAME") + portManagerNamespace := os.Getenv("NNF_PORT_MANAGER_NAMESPACE") - if err := controllerutil.SetControllerReference(workflow, pinnedProfile, r.Scheme); err != nil { - r.Log.Error(err, "failed to set controller reference on profile", "profile", pinnedProfile) - return fmt.Errorf("failed to set controller reference on profile %s", client.ObjectKeyFromObject(pinnedProfile)) + pm := &nnfv1alpha1.NnfPortManager{ + ObjectMeta: metav1.ObjectMeta{ + Name: portManagerName, + Namespace: portManagerNamespace, + }, } - - r.Log.Info("Creating pinned container profile", "resource", client.ObjectKeyFromObject(pinnedProfile)) - if err := r.Create(ctx, pinnedProfile); err != nil { - return err + if err := cl.Get(ctx, client.ObjectKeyFromObject(pm), pm); err != nil { + return nil, err } - return nil + return pm, nil } -// Create a list of volumes to be mounted inside of the containers based on the DW_JOB/DW_PERSISTENT arguments -func (r *NnfWorkflowReconciler) getContainerVolumes(ctx context.Context, workflow *dwsv1alpha2.Workflow, dwArgs map[string]string) ([]nnfContainerVolume, *result, error) { - volumes := []nnfContainerVolume{} - - // TODO: ssh is necessary for mpi see setupSSHAuthVolumes(manager, podSpec) in nnf-dm - - for arg, val := range dwArgs { - volName, cmd := "", "" +// Tell the NnfPortManager that the ports are no longer needed +// func (r *NnfWorkflowReconciler) releaseContainerPorts(ctx context.Context, workflow *dwsv1alpha2.Workflow, index int) (*result, error) { +func (r *NnfWorkflowReconciler) releaseContainerPorts(ctx context.Context, workflow *dwsv1alpha2.Workflow) (*result, error) { + found := false - // Find any DW_(JOB|PERSISTENT) arguments - if strings.HasPrefix(arg, "DW_JOB_") { - volName = strings.TrimPrefix(arg, "DW_JOB_") - cmd = "jobdw" - } else if strings.HasPrefix(arg, "DW_PERSISTENT_") { - volName = strings.TrimPrefix(arg, "DW_PERSISTENT_") - cmd = "persistentdw" + pm, err := getContainerPortManager(ctx, r.Client) + if err != nil { + if apierrors.IsNotFound(err) { + return nil, nil } else { - continue - } - - // k8s resources can't have underscores - volName = strings.ReplaceAll(volName, "_", "-") - - vol := nnfContainerVolume{ - name: volName, - command: cmd, - directiveName: val, - directiveIndex: -1, - // and env vars can't have hyphens - envVarName: strings.ReplaceAll(arg, "-", "_"), + return nil, err } + } - // Find the directive index for the given name so we can retrieve its NnfAccess - vol.directiveIndex = findDirectiveIndexByName(workflow, vol.directiveName, vol.command) - if vol.directiveIndex < 0 { - return nil, nil, nnfv1alpha1.NewWorkflowErrorf("could not retrieve the directive breakdown for '%s'", vol.directiveName) + // Find the allocation in the Status + for _, alloc := range pm.Status.Allocations { + if alloc.Requester.UID == workflow.UID && alloc.Status == nnfv1alpha1.NnfPortManagerAllocationStatusInUse { + found = true + break } + } - nnfAccess := &nnfv1alpha1.NnfAccess{ - ObjectMeta: metav1.ObjectMeta{ - Name: workflow.Name + "-" + strconv.Itoa(vol.directiveIndex) + "-servers", - Namespace: workflow.Namespace, - }, - } - if err := r.Get(ctx, client.ObjectKeyFromObject(nnfAccess), nnfAccess); err != nil { - return nil, nil, nnfv1alpha1.NewWorkflowErrorf("could not retrieve the NnfAccess '%s'", nnfAccess.Name) + if found { + // Remove the allocation request from the Spec + // TODO: For cooldowns, change the status to cooldown/time_wait rather than delete. Can we + // even do that from here? + for idx, alloc := range pm.Spec.Allocations { + if alloc.Requester.UID == workflow.UID { + pm.Spec.Allocations = append(pm.Spec.Allocations[:idx], pm.Spec.Allocations[idx+1:]...) + } } - if !nnfAccess.Status.Ready { - return nil, Requeue(fmt.Sprintf("NnfAccess '%s' is not ready to be mounted into container", nnfAccess.Name)).after(2 * time.Second), nil + if err := r.Update(ctx, pm); err != nil { + if !apierrors.IsConflict(err) { + return nil, err + } } - vol.mountPath = nnfAccess.Spec.MountPath - volumes = append(volumes, vol) + return Requeue("pending port de-allocation"), nil + } else { + return nil, nil } - - return volumes, nil, nil } diff --git a/controllers/nnf_workflow_controller_test.go b/controllers/nnf_workflow_controller_test.go index 859453794..5a79eeeab 100644 --- a/controllers/nnf_workflow_controller_test.go +++ b/controllers/nnf_workflow_controller_test.go @@ -47,16 +47,26 @@ import ( // BeforeEach - initialize the workflow // AfterEach - destroy the workflow +var ( + baseWorkflowUserID uint32 = 1042 + baseWorkflowGroupID uint32 = 1043 + + altWorkflowUserID uint32 = 1044 + altWorkflowGroupID uint32 = 1045 +) + var _ = Describe("NNF Workflow Unit Tests", func() { var ( - key types.NamespacedName - workflow *dwsv1alpha2.Workflow - storageProfile *nnfv1alpha1.NnfStorageProfile + key types.NamespacedName + workflow *dwsv1alpha2.Workflow + storageProfile *nnfv1alpha1.NnfStorageProfile + persistentStorageName string ) BeforeEach(func() { wfid := uuid.NewString()[0:8] + persistentStorageName = "persistent-" + uuid.NewString()[:8] key = types.NamespacedName{ Name: "nnf-workflow-" + wfid, @@ -72,6 +82,8 @@ var _ = Describe("NNF Workflow Unit Tests", func() { DesiredState: dwsv1alpha2.StateProposal, JobID: intstr.FromString("job 1244"), WLMID: uuid.NewString(), + UserID: baseWorkflowUserID, + GroupID: baseWorkflowGroupID, }, } @@ -122,7 +134,7 @@ var _ = Describe("NNF Workflow Unit Tests", func() { return nil } - createPersistentStorageInstance := func(name string) { + createPersistentStorageInstance := func(name, fsType string) { By("Fabricate the persistent storage instance") // Create a persistent storage instance to be found @@ -131,7 +143,7 @@ var _ = Describe("NNF Workflow Unit Tests", func() { ObjectMeta: metav1.ObjectMeta{Name: name, Namespace: workflow.Namespace}, Spec: dwsv1alpha2.PersistentStorageInstanceSpec{ Name: name, - FsType: "lustre", + FsType: fsType, // DWDirective: workflow.Spec.DWDirectives[0], DWDirective: "#DW persistentdw name=" + name, State: dwsv1alpha2.PSIStateActive, @@ -152,26 +164,15 @@ var _ = Describe("NNF Workflow Unit Tests", func() { Namespace: workflow.Namespace, }, Spec: nnfv1alpha1.NnfStorageSpec{ - FileSystemType: "lustre", + FileSystemType: fsType, AllocationSets: []nnfv1alpha1.NnfStorageAllocationSetSpec{}, }, - Status: nnfv1alpha1.NnfStorageStatus{ - MgsNode: "", - AllocationSets: []nnfv1alpha1.NnfStorageAllocationSetStatus{{ - Status: "Ready", - Health: "OK", - Error: "", - AllocationCount: 0, - }}, - }, } Expect(k8sClient.Create(context.TODO(), nnfStorage)).To(Succeed()) } deletePersistentStorageInstance := func(name string) { - By("Fabricate the nnfStorage as if the persistent storage instance exists") - - // Delete persistent storage instance + By("delete persistent storage instance") psi := &dwsv1alpha2.PersistentStorageInstance{ ObjectMeta: metav1.ObjectMeta{Name: name, Namespace: workflow.Namespace}, } @@ -490,15 +491,13 @@ var _ = Describe("NNF Workflow Unit Tests", func() { }) When("using $DW_PERSISTENT_ references", func() { - persistentStorageName := "my-persistent-storage" - BeforeEach(func() { workflow.Spec.DWDirectives = []string{ fmt.Sprintf("#DW persistentdw name=%s", persistentStorageName), fmt.Sprintf("#DW copy_in source=/lus/maui/my-file.in destination=$DW_PERSISTENT_%s/my-persistent-file.out", strings.ReplaceAll(persistentStorageName, "-", "_")), } - createPersistentStorageInstance(persistentStorageName) + createPersistentStorageInstance(persistentStorageName, "lustre") }) // Create/Delete the "nnf-system" namespace as part of the test life-cycle; the persistent storage instances are @@ -1028,16 +1027,28 @@ var _ = Describe("NNF Workflow Unit Tests", func() { }) When("Using container directives", func() { - var ns *corev1.Namespace + var ( + ns *corev1.Namespace - persistentStorageName := "container-persistent" - createPersistent := true + createPersistent bool + createPersistentType string - var containerProfile *nnfv1alpha1.NnfContainerProfile - var containerProfileStorages []nnfv1alpha1.NnfContainerProfileStorage = nil - createContainerProfile := true + createGlobalLustre bool + globalLustre *lusv1beta1.LustreFileSystem + + containerProfile *nnfv1alpha1.NnfContainerProfile + containerProfileStorages []nnfv1alpha1.NnfContainerProfileStorage + createContainerProfile bool + ) BeforeEach(func() { + createPersistent = true + createPersistentType = "lustre" + createGlobalLustre = false + containerProfile = nil + containerProfileStorages = nil + createContainerProfile = true + // Create/Delete the "nnf-system" namespace as part of the test life-cycle; the persistent storage instances are // placed in the "nnf-system" namespace so it must be present. // EnvTest does not support namespace deletion, so this could already exist. Ignore any errors. @@ -1048,9 +1059,6 @@ var _ = Describe("NNF Workflow Unit Tests", func() { } k8sClient.Create(context.TODO(), ns) - if createPersistent { - createPersistentStorageInstance(persistentStorageName) - } }) JustBeforeEach(func() { @@ -1058,10 +1066,30 @@ var _ = Describe("NNF Workflow Unit Tests", func() { if createContainerProfile { containerProfile = createBasicNnfContainerProfile(containerProfileStorages) } + + if createPersistent { + createPersistentStorageInstance(persistentStorageName, createPersistentType) + } + + if createGlobalLustre { + globalLustre = &lusv1beta1.LustreFileSystem{ + ObjectMeta: metav1.ObjectMeta{ + Name: "sawbill", + Namespace: corev1.NamespaceDefault, + }, + Spec: lusv1beta1.LustreFileSystemSpec{ + Name: "sawbill", + MountRoot: "/lus/sawbill", + MgsNids: "10.0.0.2@tcp", + }, + } + Expect(k8sClient.Create(context.TODO(), globalLustre)).To(Succeed()) + } }) AfterEach(func() { if containerProfile != nil { + By("delete NnfContainerProfile") Expect(k8sClient.Delete(context.TODO(), containerProfile)).Should(Succeed()) Eventually(func() error { return k8sClient.Get(context.TODO(), client.ObjectKeyFromObject(containerProfile), containerProfile) @@ -1071,30 +1099,77 @@ var _ = Describe("NNF Workflow Unit Tests", func() { if createPersistent { deletePersistentStorageInstance(persistentStorageName) } + + if createGlobalLustre { + Expect(k8sClient.Get(context.TODO(), client.ObjectKeyFromObject(globalLustre), globalLustre)).To(Succeed()) + Expect(k8sClient.Delete(context.TODO(), globalLustre)).To(Succeed()) + } }) - Context("when a container workflow has everything in order", func() { - // This means that: - // - A persistent instance is available prior to the container workflow - // - The provided storage arguments are included in the preceding directives - // - The supplied container profile exists and the supplied storage arguments are in the profiles list of required storages + Context("with container restrictions", func() { + BeforeEach(func() { + createContainerProfile = false // We'll make a custom version. + createGlobalLustre = true + }) - It("should go to Proposal Ready with required storages present", func() { + // buildRestrictedContainerProfile will create a NnfContainerProfile that + // is restricted to a specific user ID or group ID. + buildRestrictedContainerProfile := func(userID *uint32, groupID *uint32) { + By("Create a restricted NnfContainerProfile") + tempProfile := basicNnfContainerProfile("restricted-"+uuid.NewString()[:8], containerProfileStorages) + if userID != nil { + tempProfile.Data.UserID = userID + } + if groupID != nil { + tempProfile.Data.GroupID = groupID + } + + containerProfile = createNnfContainerProfile(tempProfile, true) + } + + buildWorkflowWithCorrectDirectives := func() { + By("creating the workflow") workflow.Spec.DWDirectives = []string{ "#DW jobdw name=container-storage type=gfs2 capacity=1GB", "#DW persistentdw name=" + persistentStorageName, fmt.Sprintf("#DW container name=container profile=%s "+ "DW_JOB_foo_local_storage=container-storage "+ - "DW_PERSISTENT_foo_persistent_storage=container-persistent", - containerProfile.Name), + "DW_PERSISTENT_foo_persistent_storage=%s "+ + "DW_GLOBAL_foo_global_lustre=%s", + containerProfile.Name, persistentStorageName, globalLustre.Spec.MountRoot), } Expect(k8sClient.Create(context.TODO(), workflow)).Should(Succeed()) + } - Eventually(func(g Gomega) bool { - g.Expect(k8sClient.Get(context.TODO(), key, workflow)).To(Succeed()) - return workflow.Status.Ready && workflow.Status.State == dwsv1alpha2.StateProposal - }).Should(BeTrue(), "reach desired Proposal state") - }) + DescribeTable("should go to Proposal Ready when everything is in order", + func(containerUserID *uint32, containerGroupID *uint32) { + buildRestrictedContainerProfile(containerUserID, containerGroupID) + buildWorkflowWithCorrectDirectives() + Eventually(func(g Gomega) bool { + g.Expect(k8sClient.Get(context.TODO(), key, workflow)).To(Succeed()) + return workflow.Status.Ready && workflow.Status.State == dwsv1alpha2.StateProposal + }).Should(BeTrue(), "reach desired Proposal state") + Expect(verifyPinnedContainerProfile(context.TODO(), k8sClient, workflow, 2)).To(Succeed()) + }, + Entry("when not restricted to a user ID or group ID", nil, nil), + Entry("when restricted to a matching user ID", &baseWorkflowUserID, nil), + Entry("when restricted to a matching group ID", nil, &baseWorkflowGroupID), + Entry("when restricted to a matching user ID and group ID", &baseWorkflowUserID, &baseWorkflowGroupID), + ) + + DescribeTable("should not go to Proposal Ready when profile restriction is not satisfied", + func(containerUserID *uint32, containerGroupID *uint32) { + buildRestrictedContainerProfile(containerUserID, containerGroupID) + buildWorkflowWithCorrectDirectives() + Eventually(func(g Gomega) bool { + g.Expect(k8sClient.Get(context.TODO(), key, workflow)).To(Succeed()) + return workflow.Status.Status == dwsv1alpha2.StatusError && strings.Contains(workflow.Status.Message, "container profile") && strings.Contains(workflow.Status.Message, "is restricted to") + }).Should(BeTrue(), "does not reach desired Proposal state") + }, + Entry("when restricted to non-matching user ID", &altWorkflowUserID, nil), + Entry("when restricted to non-matching group ID", nil, &altWorkflowGroupID), + Entry("when restricted to non-matching user ID and group ID", &altWorkflowUserID, &altWorkflowGroupID), + ) }) Context("when an optional storage in the container profile is not present in the container arguments", func() { @@ -1102,6 +1177,7 @@ var _ = Describe("NNF Workflow Unit Tests", func() { containerProfileStorages = []nnfv1alpha1.NnfContainerProfileStorage{ {Name: "DW_JOB_foo_local_storage", Optional: false}, {Name: "DW_PERSISTENT_foo_persistent_storage", Optional: true}, + {Name: "DW_GLOBAL_foo_global_lustre", Optional: true}, } }) @@ -1118,6 +1194,7 @@ var _ = Describe("NNF Workflow Unit Tests", func() { g.Expect(k8sClient.Get(context.TODO(), key, workflow)).To(Succeed()) return workflow.Status.Ready && workflow.Status.State == dwsv1alpha2.StateProposal }).Should(BeTrue(), "reach desired Proposal state") + Expect(verifyPinnedContainerProfile(context.TODO(), k8sClient, workflow, 1)).To(Succeed()) }) }) @@ -1128,8 +1205,8 @@ var _ = Describe("NNF Workflow Unit Tests", func() { "#DW jobdw name=container-storage type=gfs2 capacity=1GB", fmt.Sprintf("#DW container name=container profile=%s "+ "DW_JOB_foo_local_storage=container-storage "+ - "DW_PERSISTENT_foo_persistent_storage=container-persistent", - containerProfile.Name), + "DW_PERSISTENT_foo_persistent_storage=%s", + containerProfile.Name, persistentStorageName), } Expect(k8sClient.Create(context.TODO(), workflow)).Should(Succeed()) @@ -1141,14 +1218,20 @@ var _ = Describe("NNF Workflow Unit Tests", func() { }) Context("when a required storage in the container profile is not present in the arguments", func() { + BeforeEach(func() { + containerProfileStorages = []nnfv1alpha1.NnfContainerProfileStorage{ + {Name: "DW_JOB_foo_local_storage", Optional: false}, + {Name: "DW_PERSISTENT_foo_persistent_storage", Optional: true}, + } + }) It("should go to error", func() { workflow.Spec.DWDirectives = []string{ "#DW jobdw name=container-storage type=gfs2 capacity=1GB", "#DW persistentdw name=" + persistentStorageName, fmt.Sprintf("#DW container name=container profile=%s "+ // local storage is missing - "DW_PERSISTENT_foo_persistent_storage=container-persistent", - containerProfile.Name), + "DW_PERSISTENT_foo_persistent_storage=%s", + containerProfile.Name, persistentStorageName), } Expect(k8sClient.Create(context.TODO(), workflow)).Should(Succeed()) @@ -1159,26 +1242,145 @@ var _ = Describe("NNF Workflow Unit Tests", func() { }) }) - Context("when a argument is not in the container profile", func() { + Context("when an argument is present in the container directive but not in the container profile", func() { + var storageArgsList []string + localStorageName := "local-storage" + BeforeEach(func() { - containerProfileStorages = []nnfv1alpha1.NnfContainerProfileStorage{ - {Name: "DW_PERSISTENT_foo_persistent_storage", Optional: true}, + createContainerProfile = false // We'll make a custom version. + createGlobalLustre = true + }) + + JustBeforeEach(func() { + // Build a list of storage arguments for the test. This is necessary because things + // like persistentStorageName are not initialized until the parent's BeforeEach() + // block, and the Entry() in the DescribeTable() will be translated well before + // then. So create a list of canned directive arguments for use in the Entries. + storageArgsList = []string{ + fmt.Sprintf("DW_JOB_foo_local_storage=%s", localStorageName), + fmt.Sprintf("DW_PERSISTENT_foo_persistent_storage=%s", persistentStorageName), + fmt.Sprintf("DW_GLOBAL_foo_global_lustre=%s", globalLustre.Spec.MountRoot), } }) - It("should go to error", func() { + + buildContainerProfile := func(storages []nnfv1alpha1.NnfContainerProfileStorage) { + By("Creating a profile with specific storages") + tempProfile := basicNnfContainerProfile("restricted-"+uuid.NewString()[:8], storages) + containerProfile = createNnfContainerProfile(tempProfile, true) + } + + buildContainerWorkflowWithArgs := func(args string) { + By("creating the workflow") workflow.Spec.DWDirectives = []string{ - "#DW jobdw name=container-storage type=gfs2 capacity=1GB", - fmt.Sprintf("#DW container name=container profile=%s "+ - "DW_JOB_foo_local_storage=container-storage ", - containerProfile.Name), + fmt.Sprintf("#DW jobdw name=%s type=gfs2 capacity=1GB", localStorageName), + fmt.Sprintf("#DW persistentdw name=%s", persistentStorageName), + fmt.Sprintf("#DW container name=container profile=%s %s", containerProfile.Name, args), } Expect(k8sClient.Create(context.TODO(), workflow)).Should(Succeed()) + } - Eventually(func(g Gomega) bool { - g.Expect(k8sClient.Get(context.TODO(), key, workflow)).To(Succeed()) - return !workflow.Status.Ready && workflow.Status.Status == dwsv1alpha2.StatusError - }).Should(BeTrue(), "be in error state") + DescribeTable("should not go to Proposal Ready", + func(argIdx int, storages []nnfv1alpha1.NnfContainerProfileStorage) { + buildContainerProfile(storages) + buildContainerWorkflowWithArgs(storageArgsList[argIdx]) + Eventually(func(g Gomega) bool { + g.Expect(k8sClient.Get(context.TODO(), key, workflow)).To(Succeed()) + return workflow.Status.Status == dwsv1alpha2.StatusError && + strings.Contains(workflow.Status.Message, "not found in container profile") + }).Should(BeTrue(), "does not reach desired Proposal state") + }, + + Entry("when DW_JOB_ not present in the container profile", 0, + []nnfv1alpha1.NnfContainerProfileStorage{ + {Name: "DW_PERSISTENT_foo_persistent_storage", Optional: true}, + {Name: "DW_GLOBAL_foo_global_lustre", Optional: true}, + }, + ), + Entry("when DW_PERSISTENT_ not present in the container profile", 1, + []nnfv1alpha1.NnfContainerProfileStorage{ + {Name: "DW_JOB_foo_local_storage", Optional: true}, + {Name: "DW_GLOBAL_foo_global_lustre", Optional: true}, + }, + ), + Entry("when DW_GLOBAL_ not present in the container profile", 2, + []nnfv1alpha1.NnfContainerProfileStorage{ + {Name: "DW_JOB_foo_local_storage", Optional: true}, + {Name: "DW_PERSISTENT_foo_persistent_storage", Optional: true}, + }, + ), + ) + }) + + Context("when an unsupported jobdw container filesystem type is specified", func() { + localStorageName := "local-storage" + + buildContainerWorkflowWithJobDWType := func(fsType string) { + By("creating the workflow") + workflow.Spec.DWDirectives = []string{ + fmt.Sprintf("#DW jobdw name=%s type=%s capacity=1GB", localStorageName, fsType), + fmt.Sprintf("#DW container name=container profile=%s DW_JOB_foo_local_storage=%s", + containerProfile.Name, localStorageName), + } + Expect(k8sClient.Create(context.TODO(), workflow)).Should(Succeed()) + } + + DescribeTable("should reach the desired Proposal state", + func(fsType string, shouldError bool) { + buildContainerWorkflowWithJobDWType(fsType) + Eventually(func(g Gomega) bool { + g.Expect(k8sClient.Get(context.TODO(), key, workflow)).To(Succeed()) + if shouldError { + return workflow.Status.Status == dwsv1alpha2.StatusError && + strings.Contains(workflow.Status.Message, "unsupported container filesystem") + } else { + return workflow.Status.Ready == true + } + }).Should(BeTrue(), "should reach desired Proposal state") + + }, + Entry("when gfs2 jobdw storage is used", "gfs2", false), + Entry("when lustre jobdw storage is used", "lustre", false), + Entry("when xfs jobdw storage is used", "xfs", true), + Entry("when raw jobdw storage is used", "raw", true), + ) + }) + + Context("when an unsupported persistentdw container filesystem type is specified", func() { + + BeforeEach(func() { + createPersistent = false }) + + buildContainerWorkflowWithPersistentDWType := func(fsType string) { + By("creating the workflow") + workflow.Spec.DWDirectives = []string{ + fmt.Sprintf("#DW persistentdw name=%s", persistentStorageName), + fmt.Sprintf("#DW container name=container profile=%s DW_PERSISTENT_foo_persistent_storage=%s", + containerProfile.Name, persistentStorageName), + } + Expect(k8sClient.Create(context.TODO(), workflow)).Should(Succeed()) + } + + DescribeTable("should reach the desired Proposal state", + func(fsType string, shouldError bool) { + createPersistentStorageInstance(persistentStorageName, fsType) + buildContainerWorkflowWithPersistentDWType(fsType) + Eventually(func(g Gomega) bool { + g.Expect(k8sClient.Get(context.TODO(), key, workflow)).To(Succeed()) + if shouldError { + return workflow.Status.Status == dwsv1alpha2.StatusError && + strings.Contains(workflow.Status.Message, "unsupported container filesystem: "+fsType) + } else { + return workflow.Status.Ready == true + } + }).Should(BeTrue(), "should reach desired Proposal state") + + }, + Entry("when gfs2 persistentdw storage is used", "gfs2", false), + Entry("when lustre persistentdw storage is used", "lustre", false), + Entry("when xfs persistentdw storage is used", "xfs", true), + Entry("when raw persistentdw storage is used", "raw", true), + ) }) }) }) @@ -1187,9 +1389,10 @@ var _ = Describe("NnfContainerProfile Webhook test", func() { // The nnfcontainer_webhook_test.go covers testing of the webhook. // This spec exists only to verify that the webhook is also running for // the controller tests. - It("Fails to create an invalid profile, to verify that the webhook is installed", func() { - profileInvalid := basicNnfContainerProfile("an-invalid-profile", nil) - profileInvalid.Data.RetryLimit = -100 + It("fails to create an invalid profile to verify that the webhook is installed", func() { + profileInvalid := basicNnfContainerProfile("invalid-"+uuid.NewString()[:8], nil) + profileInvalid.Data.Spec = nil + profileInvalid.Data.MPISpec = nil Expect(createNnfContainerProfile(profileInvalid, false)).To(BeNil()) }) }) @@ -1198,8 +1401,8 @@ var _ = Describe("NnfStorageProfile Webhook test", func() { // The nnfstorageprofile_webhook_test.go covers testing of the webhook. // This spec exists only to verify that the webhook is also running for // the controller tests. - It("Fails to create an invalid profile, to verify that the webhook is installed", func() { - profileInvalid := basicNnfStorageProfile("an-invalid-profile") + It("fails to create an invalid profile to verify that the webhook is installed", func() { + profileInvalid := basicNnfStorageProfile("invalid-" + uuid.NewString()[:8]) profileInvalid.Data.LustreStorage.ExternalMGS = "10.0.0.1@tcp" profileInvalid.Data.LustreStorage.CombinedMGTMDT = true Expect(createNnfStorageProfile(profileInvalid, false)).To(BeNil()) diff --git a/controllers/nnfcontainerprofile_helpers.go b/controllers/nnfcontainerprofile_helpers.go new file mode 100644 index 000000000..fcbb2a502 --- /dev/null +++ b/controllers/nnfcontainerprofile_helpers.go @@ -0,0 +1,148 @@ +/* + * Copyright 2023 Hewlett Packard Enterprise Development LP + * Other additional copyright holders may be indicated within. + * + * The entirety of this work is licensed under the Apache License, + * Version 2.0 (the "License"); you may not use this file except + * in compliance with the License. + * + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package controllers + +import ( + "context" + "fmt" + "os" + + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + kruntime "k8s.io/apimachinery/pkg/runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" + + dwsv1alpha2 "github.com/HewlettPackard/dws/api/v1alpha2" + "github.com/HewlettPackard/dws/utils/dwdparse" + nnfv1alpha1 "github.com/NearNodeFlash/nnf-sos/api/v1alpha1" + "github.com/go-logr/logr" +) + +func getContainerProfile(ctx context.Context, clnt client.Client, workflow *dwsv1alpha2.Workflow, index int) (*nnfv1alpha1.NnfContainerProfile, error) { + profile, err := findPinnedContainerProfile(ctx, clnt, workflow, index) + if err != nil { + return nil, err + } + + if profile == nil { + return nil, dwsv1alpha2.NewResourceError("container profile '%s' not found", indexedResourceName(workflow, index)).WithFatal() + } + + return profile, nil +} + +func findPinnedContainerProfile(ctx context.Context, clnt client.Client, workflow *dwsv1alpha2.Workflow, index int) (*nnfv1alpha1.NnfContainerProfile, error) { + profile := &nnfv1alpha1.NnfContainerProfile{ + ObjectMeta: metav1.ObjectMeta{ + Name: indexedResourceName(workflow, index), + Namespace: workflow.Namespace, + }, + } + + if err := clnt.Get(ctx, client.ObjectKeyFromObject(profile), profile); err != nil { + return nil, err + } + + if !profile.Data.Pinned { + return nil, dwsv1alpha2.NewResourceError("expected a pinned container profile '%s', but found one that is not pinned", indexedResourceName(workflow, index)).WithFatal() + } + + return profile, nil +} + +func findContainerProfile(ctx context.Context, clnt client.Client, workflow *dwsv1alpha2.Workflow, index int) (*nnfv1alpha1.NnfContainerProfile, error) { + args, err := dwdparse.BuildArgsMap(workflow.Spec.DWDirectives[index]) + if err != nil { + return nil, err + } + + name, found := args["profile"] + if !found { + return nil, fmt.Errorf("container directive '%s' has no profile key", workflow.Spec.DWDirectives[index]) + } + + profile := &nnfv1alpha1.NnfContainerProfile{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: os.Getenv("NNF_CONTAINER_PROFILE_NAMESPACE"), + }, + } + + if err := clnt.Get(ctx, client.ObjectKeyFromObject(profile), profile); err != nil { + return nil, err + } + + if profile.Data.Pinned { + return nil, dwsv1alpha2.NewResourceError("expected container profile that is not pinned '%s', but found one that is pinned", indexedResourceName(workflow, index)).WithFatal() + } + + // Determine whether the profile is restricted to a UserID/GroupID. + restrictedMsg := "container profile '%s' is restricted to %s %d" + if profile.Data.UserID != nil && *profile.Data.UserID != workflow.Spec.UserID { + return nil, dwsv1alpha2.NewResourceError("").WithUserMessage(restrictedMsg, profile.Name, "UserID", *profile.Data.UserID).WithUser().WithFatal() + } + if profile.Data.GroupID != nil && *profile.Data.GroupID != workflow.Spec.GroupID { + return nil, dwsv1alpha2.NewResourceError("").WithUserMessage(restrictedMsg, profile.Name, "GroupID", *profile.Data.GroupID).WithUser().WithFatal() + + } + + return profile, nil +} + +func createPinnedContainerProfileIfNecessary(ctx context.Context, clnt client.Client, scheme *kruntime.Scheme, workflow *dwsv1alpha2.Workflow, index int, log logr.Logger) error { + profile, err := findPinnedContainerProfile(ctx, clnt, workflow, index) + if err != nil && !apierrors.IsNotFound(err) { + return err + } + if profile != nil { + return nil + } + + profile, err = findContainerProfile(ctx, clnt, workflow, index) + if err != nil { + return err + } + + pinnedProfile := &nnfv1alpha1.NnfContainerProfile{ + ObjectMeta: metav1.ObjectMeta{ + Name: indexedResourceName(workflow, index), + Namespace: workflow.Namespace, + }, + } + + profile.Data.DeepCopyInto(&pinnedProfile.Data) + + pinnedProfile.Data.Pinned = true + + dwsv1alpha2.AddOwnerLabels(pinnedProfile, workflow) + + if err := controllerutil.SetControllerReference(workflow, pinnedProfile, scheme); err != nil { + log.Error(err, "failed to set controller reference on profile", "profile", pinnedProfile) + return fmt.Errorf("failed to set controller reference on profile %s", client.ObjectKeyFromObject(pinnedProfile)) + } + + if err := clnt.Create(ctx, pinnedProfile); err != nil { + return err + } + log.Info("Created pinned container profile", "resource", client.ObjectKeyFromObject(pinnedProfile)) + + return nil +} diff --git a/controllers/nnfcontainerprofile_test.go b/controllers/nnfcontainerprofile_test.go index 6064d85a1..fa1277cdd 100644 --- a/controllers/nnfcontainerprofile_test.go +++ b/controllers/nnfcontainerprofile_test.go @@ -26,29 +26,35 @@ import ( "github.com/google/uuid" corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "sigs.k8s.io/controller-runtime/pkg/client" + dwsv1alpha2 "github.com/HewlettPackard/dws/api/v1alpha2" nnfv1alpha1 "github.com/NearNodeFlash/nnf-sos/api/v1alpha1" ) // createNnfContainerProfile creates the given profile in the "default" namespace. +// When expectSuccess=false, we expect to find that it was failed by the webhook. func createNnfContainerProfile(containerProfile *nnfv1alpha1.NnfContainerProfile, expectSuccess bool) *nnfv1alpha1.NnfContainerProfile { // Place NnfContainerProfiles in "default" for the test environment. containerProfile.ObjectMeta.Namespace = corev1.NamespaceDefault profKey := client.ObjectKeyFromObject(containerProfile) profExpected := &nnfv1alpha1.NnfContainerProfile{} - Expect(k8sClient.Get(context.TODO(), profKey, profExpected)).ToNot(Succeed()) + err := k8sClient.Get(context.TODO(), profKey, profExpected) + Expect(err).ToNot(BeNil()) + Expect(apierrors.IsNotFound(err)).To(BeTrue()) if expectSuccess { Expect(k8sClient.Create(context.TODO(), containerProfile)).To(Succeed(), "create nnfcontainerprofile") - //err := k8sClient.Create(context.TODO(), containerProfile) Eventually(func(g Gomega) { g.Expect(k8sClient.Get(context.TODO(), profKey, profExpected)).To(Succeed()) }, "3s", "1s").Should(Succeed(), "wait for create of NnfContainerProfile") } else { - Expect(k8sClient.Create(context.TODO(), containerProfile)).ToNot(Succeed(), "expect to fail to create nnfcontainerprofile") + err = k8sClient.Create(context.TODO(), containerProfile) + Expect(err).ToNot(BeNil()) + Expect(err.Error()).To(MatchRegexp("webhook .* denied the request")) containerProfile = nil } @@ -63,6 +69,7 @@ func basicNnfContainerProfile(name string, storages []nnfv1alpha1.NnfContainerPr storages = []nnfv1alpha1.NnfContainerProfileStorage{ {Name: "DW_JOB_foo_local_storage", Optional: true}, {Name: "DW_PERSISTENT_foo_persistent_storage", Optional: true}, + {Name: "DW_GLOBAL_foo_global_lustre", Optional: true}, } } @@ -90,9 +97,9 @@ func createBasicNnfContainerProfile(storages []nnfv1alpha1.NnfContainerProfileSt return createNnfContainerProfile(containerProfile, true) } -func verifyPinnedContainerProfile(ctx context.Context, clnt client.Client, namespace string, profileName string) error { +func verifyPinnedContainerProfile(ctx context.Context, clnt client.Client, workflow *dwsv1alpha2.Workflow, index int) error { - nnfContainerProfile, err := findPinnedProfile(ctx, clnt, namespace, profileName) + nnfContainerProfile, err := findPinnedContainerProfile(ctx, clnt, workflow, index) ExpectWithOffset(1, err).NotTo(HaveOccurred()) ExpectWithOffset(1, nnfContainerProfile.Data.Pinned).To(BeTrue()) refs := nnfContainerProfile.GetOwnerReferences() diff --git a/controllers/nnfstorageprofile_helpers.go b/controllers/nnfstorageprofile_helpers.go index 859fc5134..5a9aea87e 100644 --- a/controllers/nnfstorageprofile_helpers.go +++ b/controllers/nnfstorageprofile_helpers.go @@ -21,7 +21,6 @@ package controllers import ( "context" - "fmt" "os" apierrors "k8s.io/apimachinery/pkg/api/errors" @@ -61,14 +60,14 @@ func findProfileToUse(ctx context.Context, clnt client.Client, args map[string]s } // Require that there be one and only one default. if len(profilesFound) == 0 { - return nil, fmt.Errorf("Unable to find a default NnfStorageProfile to use") + return nil, dwsv1alpha2.NewResourceError("").WithUserMessage("Unable to find a default NnfStorageProfile to use").WithFatal() } else if len(profilesFound) > 1 { - return nil, fmt.Errorf("More than one default NnfStorageProfile found; unable to pick one: %v", profilesFound) + return nil, dwsv1alpha2.NewResourceError("").WithUserMessage("More than one default NnfStorageProfile found; unable to pick one: %v", profilesFound).WithFatal() } profileName = profilesFound[0] } if len(profileName) == 0 { - return nil, fmt.Errorf("Unable to find an NnfStorageProfile name") + return nil, dwsv1alpha2.NewResourceError("").WithUserMessage("Unable to find an NnfStorageProfile name").WithUser().WithFatal() } err := clnt.Get(ctx, types.NamespacedName{Namespace: profileNamespace, Name: profileName}, nnfStorageProfile) if err != nil { @@ -86,7 +85,7 @@ func findPinnedProfile(ctx context.Context, clnt client.Client, namespace string return nil, err } if !nnfStorageProfile.Data.Pinned { - return nil, fmt.Errorf("Expected pinned NnfStorageProfile, but it was not pinned: %s", pinnedName) + return nil, dwsv1alpha2.NewResourceError("Expected pinned NnfStorageProfile, but it was not pinned: %s", pinnedName).WithFatal() } return nnfStorageProfile, nil } @@ -150,16 +149,16 @@ func addPinnedStorageProfileLabel(object metav1.Object, nnfStorageProfile *nnfv1 func getPinnedStorageProfileFromLabel(ctx context.Context, clnt client.Client, object metav1.Object) (*nnfv1alpha1.NnfStorageProfile, error) { labels := object.GetLabels() if labels == nil { - return nil, fmt.Errorf("unable to find labels") + return nil, dwsv1alpha2.NewResourceError("unable to find labels").WithFatal() } pinnedName, okName := labels[nnfv1alpha1.PinnedStorageProfileLabelName] if !okName { - return nil, fmt.Errorf("unable to find %s label", nnfv1alpha1.PinnedStorageProfileLabelName) + return nil, dwsv1alpha2.NewResourceError("unable to find %s label", nnfv1alpha1.PinnedStorageProfileLabelName).WithFatal() } pinnedNamespace, okNamespace := labels[nnfv1alpha1.PinnedStorageProfileLabelNameSpace] if !okNamespace { - return nil, fmt.Errorf("unable to find %s label", nnfv1alpha1.PinnedStorageProfileLabelNameSpace) + return nil, dwsv1alpha2.NewResourceError("unable to find %s label", nnfv1alpha1.PinnedStorageProfileLabelNameSpace).WithFatal() } return findPinnedProfile(ctx, clnt, pinnedNamespace, pinnedName) diff --git a/controllers/nnfstorageprofile_test.go b/controllers/nnfstorageprofile_test.go index 07e20d20d..19e5bae3b 100644 --- a/controllers/nnfstorageprofile_test.go +++ b/controllers/nnfstorageprofile_test.go @@ -7,6 +7,7 @@ import ( "github.com/google/uuid" corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "sigs.k8s.io/controller-runtime/pkg/client" @@ -14,22 +15,26 @@ import ( ) // createNnfStorageProfile creates the given profile in the "default" namespace. +// When expectSuccess=false, we expect to find that it was failed by the webhook. func createNnfStorageProfile(storageProfile *nnfv1alpha1.NnfStorageProfile, expectSuccess bool) *nnfv1alpha1.NnfStorageProfile { // Place NnfStorageProfiles in "default" for the test environment. storageProfile.ObjectMeta.Namespace = corev1.NamespaceDefault profKey := client.ObjectKeyFromObject(storageProfile) profExpected := &nnfv1alpha1.NnfStorageProfile{} - Expect(k8sClient.Get(context.TODO(), profKey, profExpected)).ToNot(Succeed()) + err := k8sClient.Get(context.TODO(), profKey, profExpected) + Expect(err).ToNot(BeNil()) + Expect(apierrors.IsNotFound(err)).To(BeTrue()) if expectSuccess { Expect(k8sClient.Create(context.TODO(), storageProfile)).To(Succeed(), "create nnfstorageprofile") - //err := k8sClient.Create(context.TODO(), storageProfile) Eventually(func(g Gomega) { g.Expect(k8sClient.Get(context.TODO(), profKey, profExpected)).To(Succeed()) }, "3s", "1s").Should(Succeed(), "wait for create of NnfStorageProfile") } else { - Expect(k8sClient.Create(context.TODO(), storageProfile)).ToNot(Succeed(), "expect to fail to create nnfstorageprofile") + err = k8sClient.Create(context.TODO(), storageProfile) + Expect(err).ToNot(BeNil()) + Expect(err.Error()).To(MatchRegexp("webhook .* denied the request")) storageProfile = nil } diff --git a/controllers/suite_test.go b/controllers/suite_test.go index d042e2677..9cd1b1bfa 100644 --- a/controllers/suite_test.go +++ b/controllers/suite_test.go @@ -75,6 +75,8 @@ var envVars = []envSetting{ {"POD_NAMESPACE", "default"}, {"NNF_STORAGE_PROFILE_NAMESPACE", "default"}, {"NNF_CONTAINER_PROFILE_NAMESPACE", "default"}, + {"NNF_PORT_MANAGER_NAME", "nnf-port-manager"}, + {"NNF_PORT_MANAGER_NAMESPACE", "default"}, {"NNF_POD_IP", "172.0.0.1"}, {"NNF_NODE_NAME", "nnf-test-node"}, {"ACK_GINKGO_DEPRECATIONS", "1.16.4"}, diff --git a/deploy.sh b/deploy.sh index 94e924215..2366d78ef 100755 --- a/deploy.sh +++ b/deploy.sh @@ -52,9 +52,18 @@ if [[ $CMD == 'deploy' ]]; then # MPIJobSpec (with large annotations). (cd config/examples && $KUSTOMIZE edit set image nnf-mfu=$NNFMFU_IMG) $KUSTOMIZE build config/examples | kubectl apply --server-side=true --force-conflicts -f - + + # Deploy the nnfportmanager after everything else + echo "Waiting for the nnfportmamanger CRD to become ready..." + while :; do + sleep 1 + kubectl get crds nnfportmanagers.nnf.cray.hpe.com && break + done + $KUSTOMIZE build config/ports| kubectl apply --server-side=true --force-conflicts -f - fi if [[ $CMD == 'undeploy' ]]; then + $KUSTOMIZE build config/ports | kubectl delete --ignore-not-found -f - $KUSTOMIZE build config/examples | kubectl delete --ignore-not-found -f - $KUSTOMIZE build config/$OVERLAY | kubectl delete --ignore-not-found -f - fi diff --git a/go.mod b/go.mod index 3c79b7ebb..918fc5066 100644 --- a/go.mod +++ b/go.mod @@ -3,7 +3,7 @@ module github.com/NearNodeFlash/nnf-sos go 1.19 require ( - github.com/HewlettPackard/dws v0.0.1-0.20230613201835-73abc41bd83c + github.com/HewlettPackard/dws v0.0.1-0.20230907181649-2f6d9fca4249 github.com/NearNodeFlash/lustre-fs-operator v0.0.1-0.20230613180840-6178f2b04900 github.com/NearNodeFlash/nnf-ec v0.0.0-20230526161255-cfb2d89b35d7 github.com/ghodss/yaml v1.0.0 @@ -14,6 +14,7 @@ require ( github.com/onsi/ginkgo/v2 v2.9.1 github.com/onsi/gomega v1.27.3 github.com/prometheus/client_golang v1.14.0 + go.openly.dev/pointy v1.3.0 go.uber.org/zap v1.24.0 golang.org/x/sync v0.1.0 k8s.io/api v0.26.1 diff --git a/go.sum b/go.sum index 6ae120757..2089d52e3 100644 --- a/go.sum +++ b/go.sum @@ -1,8 +1,8 @@ cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= -github.com/HewlettPackard/dws v0.0.1-0.20230613201835-73abc41bd83c h1:atwVAI9Gslf501a4ADo/nkJol141DgF8YR4AiMtj4E8= -github.com/HewlettPackard/dws v0.0.1-0.20230613201835-73abc41bd83c/go.mod h1:YvNzcgAPmwhl/YQj6dMwsB9OpwbI5bp/41kINfFiXX8= +github.com/HewlettPackard/dws v0.0.1-0.20230907181649-2f6d9fca4249 h1:t5ibQcHcEL374lxAVVXtHqXOZbPvDVSDSrrAVl7yzBA= +github.com/HewlettPackard/dws v0.0.1-0.20230907181649-2f6d9fca4249/go.mod h1:YvNzcgAPmwhl/YQj6dMwsB9OpwbI5bp/41kINfFiXX8= github.com/HewlettPackard/structex v1.0.4 h1:RVTdN5FWhDWr1IkjllU8wxuLjISo4gr6u5ryZpzyHcA= github.com/HewlettPackard/structex v1.0.4/go.mod h1:3frC4RY/cPsP/4+N8rkxsNAGlQwHV+zDC7qvrN+N+rE= github.com/NearNodeFlash/lustre-fs-operator v0.0.1-0.20230613180840-6178f2b04900 h1:jOrP2H+D5amgHIONcucYS3/kJm6QfmqAG23Ke7elunI= @@ -233,8 +233,8 @@ github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5 github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= -github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk= github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= +github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= github.com/ugorji/go/codec v0.0.0-20181204163529-d75b2dcb6bc8/go.mod h1:VFNgLljTbGfSG7qAOspJ7OScBnGdDN/yBr0sguwnwf0= github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU= github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415/go.mod h1:GwrjFmJcFw6At/Gs6z4yjiIwzuJ1/+UwLxMQDVQXShQ= @@ -247,6 +247,8 @@ go.chromium.org/luci v0.0.0-20230227223707-c4460eb434d8/go.mod h1:vTpW7gzqLQ9mhM go.opencensus.io v0.22.5/go.mod h1:5pWMHQbX5EPX2/62yrJeAkowc+lfs/XD7Uxpq3pI6kk= go.opencensus.io v0.24.0 h1:y73uSU6J157QMP2kn2r30vwW1A2W2WFwSCGnAVxeaD0= go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo= +go.openly.dev/pointy v1.3.0 h1:keht3ObkbDNdY8PWPwB7Kcqk+MAlNStk5kXZTxukE68= +go.openly.dev/pointy v1.3.0/go.mod h1:rccSKiQDQ2QkNfSVT2KG8Budnfhf3At8IWxy/3ElYes= go.opentelemetry.io/proto/otlp v0.7.0/go.mod h1:PqfVotwruBrMGOCsRd/89rSnXhoiJIqeYNgFYFoEGnI= go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= go.uber.org/atomic v1.11.0 h1:ZvwS0R+56ePWxUNi+Atn9dWONBPp/AUETXlHW0DxSjE= diff --git a/vendor/github.com/HewlettPackard/dws/api/v1alpha2/clientmount_types.go b/vendor/github.com/HewlettPackard/dws/api/v1alpha2/clientmount_types.go index 6380e23d8..e4f375df3 100644 --- a/vendor/github.com/HewlettPackard/dws/api/v1alpha2/clientmount_types.go +++ b/vendor/github.com/HewlettPackard/dws/api/v1alpha2/clientmount_types.go @@ -189,6 +189,8 @@ type ClientMountStatus struct { //+kubebuilder:object:root=true //+kubebuilder:storageversion //+kubebuilder:subresource:status +//+kubebuilder:printcolumn:name="ERROR",type="string",JSONPath=".status.error.severity" +//+kubebuilder:printcolumn:name="AGE",type="date",JSONPath=".metadata.creationTimestamp" // ClientMount is the Schema for the clientmounts API type ClientMount struct { diff --git a/vendor/github.com/HewlettPackard/dws/api/v1alpha2/directivebreakdown_types.go b/vendor/github.com/HewlettPackard/dws/api/v1alpha2/directivebreakdown_types.go index 19301de04..df5c95bcb 100644 --- a/vendor/github.com/HewlettPackard/dws/api/v1alpha2/directivebreakdown_types.go +++ b/vendor/github.com/HewlettPackard/dws/api/v1alpha2/directivebreakdown_types.go @@ -190,6 +190,7 @@ type DirectiveBreakdownStatus struct { //+kubebuilder:storageversion //+kubebuilder:subresource:status //+kubebuilder:printcolumn:name="READY",type="boolean",JSONPath=".status.ready",description="True if allocation sets have been generated" +//+kubebuilder:printcolumn:name="ERROR",type="string",JSONPath=".status.error.severity" //+kubebuilder:printcolumn:name="AGE",type="date",JSONPath=".metadata.creationTimestamp" // DirectiveBreakdown is the Schema for the directivebreakdown API diff --git a/vendor/github.com/HewlettPackard/dws/api/v1alpha2/persistentstorageinstance_types.go b/vendor/github.com/HewlettPackard/dws/api/v1alpha2/persistentstorageinstance_types.go index 34e26f202..90e196b5e 100644 --- a/vendor/github.com/HewlettPackard/dws/api/v1alpha2/persistentstorageinstance_types.go +++ b/vendor/github.com/HewlettPackard/dws/api/v1alpha2/persistentstorageinstance_types.go @@ -91,6 +91,8 @@ type PersistentStorageInstanceStatus struct { //+kubebuilder:object:root=true //+kubebuilder:storageversion //+kubebuilder:subresource:status +//+kubebuilder:printcolumn:name="ERROR",type="string",JSONPath=".status.error.severity" +//+kubebuilder:printcolumn:name="AGE",type="date",JSONPath=".metadata.creationTimestamp" // PersistentStorageInstance is the Schema for the Persistentstorageinstances API type PersistentStorageInstance struct { diff --git a/vendor/github.com/HewlettPackard/dws/api/v1alpha2/resource_error.go b/vendor/github.com/HewlettPackard/dws/api/v1alpha2/resource_error.go index 29dd3084b..fdcac3588 100644 --- a/vendor/github.com/HewlettPackard/dws/api/v1alpha2/resource_error.go +++ b/vendor/github.com/HewlettPackard/dws/api/v1alpha2/resource_error.go @@ -1,5 +1,5 @@ /* - * Copyright 2022 Hewlett Packard Enterprise Development LP + * Copyright 2022-2023 Hewlett Packard Enterprise Development LP * Other additional copyright holders may be indicated within. * * The entirety of this work is licensed under the Apache License, @@ -19,6 +19,42 @@ package v1alpha2 +import ( + "fmt" + "strings" + + "github.com/go-logr/logr" +) + +type ResourceErrorSeverity string +type ResourceErrorType string + +const ( + // Minor errors are very likely to eventually succeed (e.g., errors caused by a stale cache) + // The WLM doesn't see these errors directly. The workflow stays in the DriverWait state, and + // the error string is put in workflow.Status.Message. + SeverityMinor ResourceErrorSeverity = "Minor" + + // Major errors may or may not succeed. These are transient errors that could be persistent + // due to an underlying problem (e.g., errors from OS calls) + SeverityMajor ResourceErrorSeverity = "Major" + + // Fatal errors will never succeed. This is for situations where we can guarantee that retrying + // will not fix the error (e.g., a DW directive that is not valid) + SeverityFatal ResourceErrorSeverity = "Fatal" +) + +const ( + // Internal errors are due to an error in the DWS/driver code + TypeInternal ResourceErrorType = "Internal" + + // WLM errors are due to an error with the input from the WLM + TypeWLM ResourceErrorType = "WLM" + + // User errors are due to an error with the input from a user + TypeUser ResourceErrorType = "User" +) + type ResourceErrorInfo struct { // Optional user facing message if the error is relevant to an end user UserMessage string `json:"userMessage,omitempty"` @@ -26,8 +62,14 @@ type ResourceErrorInfo struct { // Internal debug message for the error DebugMessage string `json:"debugMessage"` - // Indication if the error is likely recoverable or not - Recoverable bool `json:"recoverable"` + // Internal or user error + // +kubebuilder:validation:Enum=Internal;User + Type ResourceErrorType `json:"type"` + + // Indication of how severe the error is. Minor will likely succeed, Major may + // succeed, and Fatal will never succeed. + // +kubebuilder:validation:Enum=Minor;Major;Fatal + Severity ResourceErrorSeverity `json:"severity"` } type ResourceError struct { @@ -35,54 +77,131 @@ type ResourceError struct { Error *ResourceErrorInfo `json:"error,omitempty"` } -func NewResourceError(message string, err error) *ResourceErrorInfo { - resourceError := &ResourceErrorInfo{ - Recoverable: true, +func NewResourceError(format string, a ...any) *ResourceErrorInfo { + return &ResourceErrorInfo{ + Type: TypeInternal, + Severity: SeverityMinor, + DebugMessage: fmt.Sprintf(format, a...), } +} - if err != nil { - // If the error provided is already a ResourceError, use it and concatenate - // the debug messages - _, ok := err.(*ResourceErrorInfo) - if ok { - resourceError = err.(*ResourceErrorInfo) - } +// A resource error can have an optional user message that is displayed in the workflow.Status.Message +// field. The user message of the lowest level error is all that's displayed. +func (e *ResourceErrorInfo) WithUserMessage(format string, a ...any) *ResourceErrorInfo { + // Only set the user message if it's empty. This prevents upper layers + // from overriding a user message set by a lower layer + if e.UserMessage == "" { + e.UserMessage = fmt.Sprintf(format, a...) + } + + return e +} + +func (e *ResourceErrorInfo) WithError(err error) *ResourceErrorInfo { + if err == nil { + return e + } + + // Concatenate the parent and child debug messages + debugMessageList := []string{} + if e.DebugMessage != "" { + debugMessageList = append(debugMessageList, e.DebugMessage) + } + + childError, ok := err.(*ResourceErrorInfo) + if ok { + // Inherit the severity and the user message if the child error is a ResourceError + e.Severity = childError.Severity + e.UserMessage = childError.UserMessage + e.Type = childError.Type - if message == "" { - message = err.Error() + // If the child resource error doesn't have a debug message, use the user message instead + if childError.DebugMessage == "" { + debugMessageList = append(debugMessageList, childError.UserMessage) } else { - message = message + ": " + err.Error() + debugMessageList = append(debugMessageList, childError.DebugMessage) } + } else { + debugMessageList = append(debugMessageList, err.Error()) } - resourceError.DebugMessage = message + e.DebugMessage = strings.Join(debugMessageList, ": ") - return resourceError + return e } func (e *ResourceErrorInfo) WithFatal() *ResourceErrorInfo { - e.Recoverable = false + e.Severity = SeverityFatal return e } -func (e *ResourceErrorInfo) WithUserMessage(message string) *ResourceErrorInfo { - // Only set the user message if it's empty. This prevents upper layers - // from overriding a user message set by a lower layer - if e.UserMessage == "" { - e.UserMessage = message +func (e *ResourceErrorInfo) WithMajor() *ResourceErrorInfo { + if e.Severity != SeverityFatal { + e.Severity = SeverityMajor } + return e +} + +func (e *ResourceErrorInfo) WithMinor() *ResourceErrorInfo { + if e.Severity != SeverityFatal && e.Severity != SeverityMajor { + e.Severity = SeverityMinor + } + return e +} + +func (e *ResourceErrorInfo) WithInternal() *ResourceErrorInfo { + e.Type = TypeInternal + return e +} + +func (e *ResourceErrorInfo) WithWLM() *ResourceErrorInfo { + e.Type = TypeWLM + return e +} +func (e *ResourceErrorInfo) WithUser() *ResourceErrorInfo { + e.Type = TypeUser return e } func (e *ResourceErrorInfo) Error() string { - return e.DebugMessage + message := "" + if e.DebugMessage == "" { + message = e.UserMessage + } else { + message = e.DebugMessage + } + return fmt.Sprintf("%s error: %s", strings.ToLower(string(e.Type)), message) +} + +func (e *ResourceErrorInfo) GetUserMessage() string { + return fmt.Sprintf("%s error: %s", string(e.Type), e.UserMessage) +} + +func (e *ResourceError) SetResourceErrorAndLog(err error, log logr.Logger) { + e.SetResourceError(err) + if err == nil { + return + } + + childError, ok := err.(*ResourceErrorInfo) + if ok { + if childError.Severity == SeverityFatal { + log.Error(err, "Fatal error") + return + } + + log.Info("Recoverable Error", "Severity", childError.Severity, "Message", err.Error()) + return + } + + log.Info("Recoverable Error", "Message", err.Error()) } func (e *ResourceError) SetResourceError(err error) { if err == nil { e.Error = nil } else { - e.Error = NewResourceError("", err) + e.Error = NewResourceError("").WithError(err) } } diff --git a/vendor/github.com/HewlettPackard/dws/api/v1alpha2/servers_types.go b/vendor/github.com/HewlettPackard/dws/api/v1alpha2/servers_types.go index f3f2cc922..3ce6bc6f7 100644 --- a/vendor/github.com/HewlettPackard/dws/api/v1alpha2/servers_types.go +++ b/vendor/github.com/HewlettPackard/dws/api/v1alpha2/servers_types.go @@ -20,6 +20,8 @@ package v1alpha2 import ( + "github.com/HewlettPackard/dws/utils/updater" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "sigs.k8s.io/controller-runtime/pkg/client" ) @@ -79,12 +81,16 @@ type ServersStatus struct { Ready bool `json:"ready"` LastUpdate *metav1.MicroTime `json:"lastUpdate,omitempty"` AllocationSets []ServersStatusAllocationSet `json:"allocationSets,omitempty"` + + // Error information + ResourceError `json:",inline"` } //+kubebuilder:object:root=true //+kubebuilder:storageversion //+kubebuilder:subresource:status //+kubebuilder:printcolumn:name="READY",type="boolean",JSONPath=".status.ready",description="True if allocation sets have been generated" +//+kubebuilder:printcolumn:name="ERROR",type="string",JSONPath=".status.error.severity" //+kubebuilder:printcolumn:name="AGE",type="date",JSONPath=".metadata.creationTimestamp" // Servers is the Schema for the servers API @@ -96,6 +102,10 @@ type Servers struct { Status ServersStatus `json:"status,omitempty"` } +func (s *Servers) GetStatus() updater.Status[*ServersStatus] { + return &s.Status +} + //+kubebuilder:object:root=true // ServersList contains a list of Servers diff --git a/vendor/github.com/HewlettPackard/dws/api/v1alpha2/systemconfiguration_types.go b/vendor/github.com/HewlettPackard/dws/api/v1alpha2/systemconfiguration_types.go index 8217718d2..c89ad18d7 100644 --- a/vendor/github.com/HewlettPackard/dws/api/v1alpha2/systemconfiguration_types.go +++ b/vendor/github.com/HewlettPackard/dws/api/v1alpha2/systemconfiguration_types.go @@ -68,6 +68,12 @@ type SystemConfigurationSpec struct { // START is an integer value that represents the start of a port range and END is an // integer value that represents the end of the port range (inclusive). Ports []intstr.IntOrString `json:"ports,omitempty"` + + // PortsCooldownInSeconds is the number of seconds to wait before a port can be reused. Defaults + // to 60 seconds (to match the typical value for the kernel's TIME_WAIT). A value of 0 means the + // ports can be reused immediately. + // +kubebuilder:default:=60 + PortsCooldownInSeconds int `json:"portsCooldownInSeconds"` } // SystemConfigurationStatus defines the status of SystemConfiguration diff --git a/vendor/github.com/HewlettPackard/dws/api/v1alpha2/workflow_types.go b/vendor/github.com/HewlettPackard/dws/api/v1alpha2/workflow_types.go index 25a5b3386..388dfa523 100644 --- a/vendor/github.com/HewlettPackard/dws/api/v1alpha2/workflow_types.go +++ b/vendor/github.com/HewlettPackard/dws/api/v1alpha2/workflow_types.go @@ -1,5 +1,5 @@ /* - * Copyright 2021, 2022 Hewlett Packard Enterprise Development LP + * Copyright 2021-2023 Hewlett Packard Enterprise Development LP * Other additional copyright holders may be indicated within. * * The entirety of this work is licensed under the Apache License, @@ -20,6 +20,9 @@ package v1alpha2 import ( + "fmt" + "strings" + "github.com/HewlettPackard/dws/utils/updater" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -92,14 +95,46 @@ func (s WorkflowState) after(t WorkflowState) bool { // Strings associated with workflow statuses const ( - StatusPending = "Pending" - StatusQueued = "Queued" - StatusRunning = "Running" - StatusCompleted = "Completed" - StatusError = "Error" - StatusDriverWait = "DriverWait" + StatusPending = "Pending" + StatusQueued = "Queued" + StatusRunning = "Running" + StatusCompleted = "Completed" + StatusTransientCondition = "TransientCondition" + StatusError = "Error" + StatusDriverWait = "DriverWait" ) +// ToStatus will return a Status* string that goes with +// the given severity. +func (severity ResourceErrorSeverity) ToStatus() (string, error) { + switch severity { + case SeverityMinor: + return StatusRunning, nil + case SeverityMajor: + return StatusTransientCondition, nil + case SeverityFatal: + return StatusError, nil + default: + return "", fmt.Errorf("unknown severity: %s", string(severity)) + } +} + +// SeverityStringToStatus will return a Status* string that goes with +// the given severity. +// An empty severity string will be considered a minor severity. +func SeverityStringToStatus(severity string) (string, error) { + switch strings.ToLower(severity) { + case "", "minor": + return SeverityMinor.ToStatus() + case "major": + return SeverityMajor.ToStatus() + case "fatal": + return SeverityFatal.ToStatus() + default: + return "", fmt.Errorf("unknown severity: %s", severity) + } +} + // WorkflowSpec defines the desired state of Workflow type WorkflowSpec struct { // Desired state for the workflow to be in. Unless progressing to the teardown state, @@ -147,8 +182,8 @@ type WorkflowDriverStatus struct { // User readable reason. // For the CDS driver, this could be the state of the underlying - // data movement request: Pending, Queued, Running, Completed or Error - // +kubebuilder:validation:Enum=Pending;Queued;Running;Completed;Error;DriverWait + // data movement request + // +kubebuilder:validation:Enum=Pending;Queued;Running;Completed;TransientCondition;Error;DriverWait Status string `json:"status,omitempty"` // Message provides additional details on the current status of the resource @@ -172,8 +207,12 @@ type WorkflowStatus struct { // Indicates whether State has been reached. Ready bool `json:"ready"` - // User readable reason and status message - // +kubebuilder:validation:Enum=Completed;DriverWait;Error + // User readable reason and status message. + // - Completed: The workflow has reached the state in workflow.Status.State. + // - DriverWait: The underlying drivers are currently running. + // - TransientCondition: A driver has encountered an error that might be recoverable. + // - Error: A driver has encountered an error that will not recover. + // +kubebuilder:validation:Enum=Completed;DriverWait;TransientCondition;Error Status string `json:"status,omitempty"` // Message provides additional details on the current status of the resource diff --git a/vendor/github.com/HewlettPackard/dws/api/v1alpha2/zz_generated.deepcopy.go b/vendor/github.com/HewlettPackard/dws/api/v1alpha2/zz_generated.deepcopy.go index 2a7278155..96f72a977 100644 --- a/vendor/github.com/HewlettPackard/dws/api/v1alpha2/zz_generated.deepcopy.go +++ b/vendor/github.com/HewlettPackard/dws/api/v1alpha2/zz_generated.deepcopy.go @@ -891,6 +891,7 @@ func (in *ServersStatus) DeepCopyInto(out *ServersStatus) { (*in)[i].DeepCopyInto(&(*out)[i]) } } + in.ResourceError.DeepCopyInto(&out.ResourceError) } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ServersStatus. diff --git a/vendor/github.com/HewlettPackard/dws/config/crd/bases/dws.cray.hpe.com_clientmounts.yaml b/vendor/github.com/HewlettPackard/dws/config/crd/bases/dws.cray.hpe.com_clientmounts.yaml index 698b6ea28..716467db0 100644 --- a/vendor/github.com/HewlettPackard/dws/config/crd/bases/dws.cray.hpe.com_clientmounts.yaml +++ b/vendor/github.com/HewlettPackard/dws/config/crd/bases/dws.cray.hpe.com_clientmounts.yaml @@ -269,7 +269,14 @@ spec: storage: false subresources: status: {} - - name: v1alpha2 + - additionalPrinterColumns: + - jsonPath: .status.error.severity + name: ERROR + type: string + - jsonPath: .metadata.creationTimestamp + name: AGE + type: date + name: v1alpha2 schema: openAPIV3Schema: description: ClientMount is the Schema for the clientmounts API @@ -484,17 +491,28 @@ spec: debugMessage: description: Internal debug message for the error type: string - recoverable: - description: Indication if the error is likely recoverable or - not - type: boolean + severity: + description: Indication of how severe the error is. Minor will + likely succeed, Major may succeed, and Fatal will never succeed. + enum: + - Minor + - Major + - Fatal + type: string + type: + description: Internal or user error + enum: + - Internal + - User + type: string userMessage: description: Optional user facing message if the error is relevant to an end user type: string required: - debugMessage - - recoverable + - severity + - type type: object mounts: description: List of mount statuses diff --git a/vendor/github.com/HewlettPackard/dws/config/crd/bases/dws.cray.hpe.com_directivebreakdowns.yaml b/vendor/github.com/HewlettPackard/dws/config/crd/bases/dws.cray.hpe.com_directivebreakdowns.yaml index d13dd663d..ede580e6e 100644 --- a/vendor/github.com/HewlettPackard/dws/config/crd/bases/dws.cray.hpe.com_directivebreakdowns.yaml +++ b/vendor/github.com/HewlettPackard/dws/config/crd/bases/dws.cray.hpe.com_directivebreakdowns.yaml @@ -319,6 +319,9 @@ spec: jsonPath: .status.ready name: READY type: boolean + - jsonPath: .status.error.severity + name: ERROR + type: string - jsonPath: .metadata.creationTimestamp name: AGE type: date @@ -448,17 +451,28 @@ spec: debugMessage: description: Internal debug message for the error type: string - recoverable: - description: Indication if the error is likely recoverable or - not - type: boolean + severity: + description: Indication of how severe the error is. Minor will + likely succeed, Major may succeed, and Fatal will never succeed. + enum: + - Minor + - Major + - Fatal + type: string + type: + description: Internal or user error + enum: + - Internal + - User + type: string userMessage: description: Optional user facing message if the error is relevant to an end user type: string required: - debugMessage - - recoverable + - severity + - type type: object ready: description: Ready indicates whether AllocationSets have been generated diff --git a/vendor/github.com/HewlettPackard/dws/config/crd/bases/dws.cray.hpe.com_persistentstorageinstances.yaml b/vendor/github.com/HewlettPackard/dws/config/crd/bases/dws.cray.hpe.com_persistentstorageinstances.yaml index 4253ebee6..274055ad4 100644 --- a/vendor/github.com/HewlettPackard/dws/config/crd/bases/dws.cray.hpe.com_persistentstorageinstances.yaml +++ b/vendor/github.com/HewlettPackard/dws/config/crd/bases/dws.cray.hpe.com_persistentstorageinstances.yaml @@ -207,7 +207,14 @@ spec: storage: false subresources: status: {} - - name: v1alpha2 + - additionalPrinterColumns: + - jsonPath: .status.error.severity + name: ERROR + type: string + - jsonPath: .metadata.creationTimestamp + name: AGE + type: date + name: v1alpha2 schema: openAPIV3Schema: description: PersistentStorageInstance is the Schema for the Persistentstorageinstances @@ -335,17 +342,28 @@ spec: debugMessage: description: Internal debug message for the error type: string - recoverable: - description: Indication if the error is likely recoverable or - not - type: boolean + severity: + description: Indication of how severe the error is. Minor will + likely succeed, Major may succeed, and Fatal will never succeed. + enum: + - Minor + - Major + - Fatal + type: string + type: + description: Internal or user error + enum: + - Internal + - User + type: string userMessage: description: Optional user facing message if the error is relevant to an end user type: string required: - debugMessage - - recoverable + - severity + - type type: object servers: description: Servers refers to the Servers resource that provides diff --git a/vendor/github.com/HewlettPackard/dws/config/crd/bases/dws.cray.hpe.com_servers.yaml b/vendor/github.com/HewlettPackard/dws/config/crd/bases/dws.cray.hpe.com_servers.yaml index bb9c2dd4b..74649af1b 100644 --- a/vendor/github.com/HewlettPackard/dws/config/crd/bases/dws.cray.hpe.com_servers.yaml +++ b/vendor/github.com/HewlettPackard/dws/config/crd/bases/dws.cray.hpe.com_servers.yaml @@ -135,6 +135,9 @@ spec: jsonPath: .status.ready name: READY type: boolean + - jsonPath: .status.error.severity + name: ERROR + type: string - jsonPath: .metadata.creationTimestamp name: AGE type: date @@ -233,6 +236,35 @@ spec: - storage type: object type: array + error: + description: Error information + properties: + debugMessage: + description: Internal debug message for the error + type: string + severity: + description: Indication of how severe the error is. Minor will + likely succeed, Major may succeed, and Fatal will never succeed. + enum: + - Minor + - Major + - Fatal + type: string + type: + description: Internal or user error + enum: + - Internal + - User + type: string + userMessage: + description: Optional user facing message if the error is relevant + to an end user + type: string + required: + - debugMessage + - severity + - type + type: object lastUpdate: format: date-time type: string diff --git a/vendor/github.com/HewlettPackard/dws/config/crd/bases/dws.cray.hpe.com_systemconfigurations.yaml b/vendor/github.com/HewlettPackard/dws/config/crd/bases/dws.cray.hpe.com_systemconfigurations.yaml index 85f65967a..acfad5282 100644 --- a/vendor/github.com/HewlettPackard/dws/config/crd/bases/dws.cray.hpe.com_systemconfigurations.yaml +++ b/vendor/github.com/HewlettPackard/dws/config/crd/bases/dws.cray.hpe.com_systemconfigurations.yaml @@ -177,6 +177,13 @@ spec: - type: string x-kubernetes-int-or-string: true type: array + portsCooldownInSeconds: + default: 60 + description: PortsCooldownInSeconds is the number of seconds to wait + before a port can be reused. Defaults to 60 seconds (to match the + typical value for the kernel's TIME_WAIT). A value of 0 means the + ports can be reused immediately. + type: integer storageNodes: description: StorageNodes is the list of storage nodes on the system items: @@ -212,6 +219,8 @@ spec: - type type: object type: array + required: + - portsCooldownInSeconds type: object status: description: SystemConfigurationStatus defines the status of SystemConfiguration diff --git a/vendor/github.com/HewlettPackard/dws/config/crd/bases/dws.cray.hpe.com_workflows.yaml b/vendor/github.com/HewlettPackard/dws/config/crd/bases/dws.cray.hpe.com_workflows.yaml index 62735bb69..7cf158bcb 100644 --- a/vendor/github.com/HewlettPackard/dws/config/crd/bases/dws.cray.hpe.com_workflows.yaml +++ b/vendor/github.com/HewlettPackard/dws/config/crd/bases/dws.cray.hpe.com_workflows.yaml @@ -603,14 +603,14 @@ spec: status of the resource type: string status: - description: 'User readable reason. For the CDS driver, this - could be the state of the underlying data movement request: Pending, - Queued, Running, Completed or Error' + description: User readable reason. For the CDS driver, this + could be the state of the underlying data movement request enum: - Pending - Queued - Running - Completed + - TransientCondition - Error - DriverWait type: string @@ -673,10 +673,15 @@ spec: - Teardown type: string status: - description: User readable reason and status message + description: 'User readable reason and status message. - Completed: + The workflow has reached the state in workflow.Status.State. - DriverWait: + The underlying drivers are currently running. - TransientCondition: + A driver has encountered an error that might be recoverable. - Error: + A driver has encountered an error that will not recover.' enum: - Completed - DriverWait + - TransientCondition - Error type: string required: diff --git a/vendor/github.com/HewlettPackard/dws/controllers/clientmount_controller.go b/vendor/github.com/HewlettPackard/dws/controllers/clientmount_controller.go index 7e99b6754..87bbc2107 100644 --- a/vendor/github.com/HewlettPackard/dws/controllers/clientmount_controller.go +++ b/vendor/github.com/HewlettPackard/dws/controllers/clientmount_controller.go @@ -66,6 +66,7 @@ func (r *ClientMountReconciler) Reconcile(ctx context.Context, req ctrl.Request) // in clientMount.Status{} change statusUpdater := updater.NewStatusUpdater[*dwsv1alpha2.ClientMountStatus](clientMount) defer func() { err = statusUpdater.CloseWithStatusUpdate(ctx, r.Client.Status(), err) }() + defer func() { clientMount.Status.SetResourceError(err) }() // Handle cleanup if the resource is being deleted if !clientMount.GetDeletionTimestamp().IsZero() { diff --git a/vendor/github.com/HewlettPackard/dws/controllers/workflow_controller.go b/vendor/github.com/HewlettPackard/dws/controllers/workflow_controller.go index 78df0f8c2..fb1576a60 100644 --- a/vendor/github.com/HewlettPackard/dws/controllers/workflow_controller.go +++ b/vendor/github.com/HewlettPackard/dws/controllers/workflow_controller.go @@ -24,6 +24,7 @@ import ( "fmt" "reflect" "runtime" + "sort" "time" "github.com/go-logr/logr" @@ -162,6 +163,9 @@ func (r *WorkflowReconciler) Reconcile(ctx context.Context, req ctrl.Request) (r err = r.Update(ctx, workflow) if err != nil { + if apierrors.IsConflict(err) { + return ctrl.Result{}, nil + } log.Error(err, "Failed to add computes reference") } return ctrl.Result{}, err @@ -179,25 +183,47 @@ func (r *WorkflowReconciler) Reconcile(ctx context.Context, req ctrl.Request) (r workflow.Status.Status = dwsv1alpha2.StatusCompleted workflow.Status.Message = "" - // Loop through the driver status array and update the workflow - // status as necessary + // Loop through the driver status array find the entries that are for the current state + drivers := []dwsv1alpha2.WorkflowDriverStatus{} + for _, driver := range workflow.Status.Drivers { if driver.WatchState != workflow.Status.State { continue } - if driver.Completed == false { - workflow.Status.Ready = false - workflow.Status.Status = dwsv1alpha2.StatusDriverWait - } + drivers = append(drivers, driver) + } - if driver.Message != "" { - workflow.Status.Message = fmt.Sprintf("DW Directive %d: %s", driver.DWDIndex, driver.Message) - } + if len(drivers) > 0 { + // Sort the driver entries by the priority of their status + sort.Slice(drivers, func(i, j int) bool { + return statusPriority(drivers[i].Status) > statusPriority(drivers[j].Status) + }) + + // Pull info from the driver entries with the highest priority. This means + // we'll only report status info in the workflow status section based on the + // most important driver status. Error > TransientCondition > Running > Completed. This + // keeps us from overwriting the workflow.Status.Message with a message from + // a less interesting driver entry. + priority := statusPriority(drivers[0].Status) + for _, driver := range drivers { + if driver.Completed == false { + workflow.Status.Ready = false + } + + if statusPriority(driver.Status) < priority { + break + } + + if driver.Message != "" { + workflow.Status.Message = fmt.Sprintf("DW Directive %d: %s", driver.DWDIndex, driver.Message) + } - if driver.Status == dwsv1alpha2.StatusError { - workflow.Status.Status = dwsv1alpha2.StatusError - break + if driver.Status == dwsv1alpha2.StatusTransientCondition || driver.Status == dwsv1alpha2.StatusError || driver.Status == dwsv1alpha2.StatusCompleted { + workflow.Status.Status = driver.Status + } else { + workflow.Status.Status = dwsv1alpha2.StatusDriverWait + } } } @@ -244,6 +270,29 @@ func (r *WorkflowReconciler) createComputes(ctx context.Context, wf *dwsv1alpha2 return computes, nil } +// statusPriority returns the priority of a driver's status. Errors have +// the lowest priority and completed entries have the lowest priority. +func statusPriority(status string) int { + switch status { + case dwsv1alpha2.StatusCompleted: + return 1 + case dwsv1alpha2.StatusDriverWait: + fallthrough + case dwsv1alpha2.StatusPending: + fallthrough + case dwsv1alpha2.StatusQueued: + fallthrough + case dwsv1alpha2.StatusRunning: + return 2 + case dwsv1alpha2.StatusTransientCondition: + return 3 + case dwsv1alpha2.StatusError: + return 4 + } + + panic(status) +} + type workflowStatusUpdater struct { workflow *dwsv1alpha2.Workflow existingStatus dwsv1alpha2.WorkflowStatus diff --git a/vendor/go.openly.dev/pointy/.gitignore b/vendor/go.openly.dev/pointy/.gitignore new file mode 100644 index 000000000..f1c181ec9 --- /dev/null +++ b/vendor/go.openly.dev/pointy/.gitignore @@ -0,0 +1,12 @@ +# Binaries for programs and plugins +*.exe +*.exe~ +*.dll +*.so +*.dylib + +# Test binary, build with `go test -c` +*.test + +# Output of the go coverage tool, specifically when used with LiteIDE +*.out diff --git a/vendor/go.openly.dev/pointy/LICENSE b/vendor/go.openly.dev/pointy/LICENSE new file mode 100644 index 000000000..4f639d4b8 --- /dev/null +++ b/vendor/go.openly.dev/pointy/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2018 Mateusz Wielbut + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/vendor/go.openly.dev/pointy/README.md b/vendor/go.openly.dev/pointy/README.md new file mode 100644 index 000000000..1426a5a70 --- /dev/null +++ b/vendor/go.openly.dev/pointy/README.md @@ -0,0 +1,154 @@ +# pointy + +Simple helper functions to provide a shorthand to get a pointer to a variable holding a constant...because it's annoying when you have to do it hundreds of times in unit tests: + +```golang + +val := 42 +pointerToVal := &val +// vs. +pointerToVal := pointy.Int(42) // if using Go 1.17 or earlier w/o generics +pointerToVal := pointy.Pointer(42) // if using Go 1.18+ w/ generics +``` + +### New in release 2.0.0 + +🚨 Breaking change + +Package has changed to `go.openly.dev`. Please use +``` +import "go.openly.dev/pointy" +``` + +### New in release 1.2.0 + +Generic implementation of the pointer-to-value and value-to-pointer functions. *Requires Go 1.18+.* +The type-specific functions are still available for backwards-compatibility. + +```golang +pointerToInt := pointy.Pointer(42) +pointerToString := pointy.Pointer("foo") +// then later in your code.. +intValue := pointy.PointerValue(pointerToInt, 99) +stringValue := pointy.PointerValue(pointerToString, "bar") +``` + +Convenience functions to safely compare pointers by their dereferenced values: + +```golang +// when both values are pointers +a := pointy.Int(1) +b := pointy.Int(1) +if pointy.PointersValueEqual(a, b) { + fmt.Println("a and b contain equal dereferenced values") +} + +// or if just one is a pointer +a := pointy.Int(1) +b := 1 +if pointy.PointerValueEqual(a, b) { + fmt.Println("a and b contain equal dereferenced values") +} +``` + +### New in release 1.1.0 + +Additional helper functions have been added to safely dereference pointers +or return a fallback value: + +```golang +val := 42 +pointerToVal := &val +// then later in your code.. +myVal := pointy.IntValue(pointerToVal, 99) // returns 42 (or 99 if pointerToVal was nil) +``` + +## GoDoc + +[https://godoc.org/github.com/openly-engineering/pointy](https://pkg.go.dev/github.com/openly-engineering/pointy) + +## Installation + +`go get go.openly.dev/pointy` + +## Example + +```golang +package main + +import ( + "fmt" + + "go.openly.dev/pointy" +) + +func main() { + foo := pointy.Pointer(2018) + fmt.Println("foo is a pointer to:", *foo) + + bar := pointy.Pointer("point to me") + fmt.Println("bar is a pointer to:", *bar) + + // get the value back out (new in v1.1.0) + barVal := pointy.PointerValue(bar, "empty!") + fmt.Println("bar's value is:", barVal) +} +``` + +## Available Functions + +`Pointer[T any](x T) *T` +`PointerValue[T any](p *T, fallback T) T` +`Bool(x bool) *bool` +`BoolValue(p *bool, fallback bool) bool` +`Byte(x byte) *byte` +`ByteValue(p *byte, fallback byte) byte` +`Complex128(x complex128) *complex128` +`Complex128Value(p *complex128, fallback complex128) complex128` +`Complex64(x complex64) *complex64` +`Complex64Value(p *complex64, fallback complex64) complex64` +`Float32(x float32) *float32` +`Float32Value(p *float32, fallback float32) float32` +`Float64(x float64) *float64` +`Float64Value(p *float64, fallback float64) float64` +`Int(x int) *int` +`IntValue(p *int, fallback int) int` +`Int8(x int8) *int8` +`Int8Value(p *int8, fallback int8) int8` +`Int16(x int16) *int16` +`Int16Value(p *int16, fallback int16) int16` +`Int32(x int32) *int32` +`Int32Value(p *int32, fallback int32) int32` +`Int64(x int64) *int64` +`Int64Value(p *int64, fallback int64) int64` +`Uint(x uint) *uint` +`UintValue(p *uint, fallback uint) uint` +`Uint8(x uint8) *uint8` +`Uint8Value(p *uint8, fallback uint8) uint8` +`Uint16(x uint16) *uint16` +`Uint16Value(p *uint16, fallback uint16) uint16` +`Uint32(x uint32) *uint32` +`Uint32Value(p *uint32, fallback uint32) uint32` +`Uint64(x uint64) *uint64` +`Uint64Value(p *uint64, fallback uint64) uint64` +`String(x string) *string` +`StringValue(p *string, fallback string) string` +`Rune(x rune) *rune` +`RuneValue(p *rune, fallback rune) rune` +`PointersValueEqual[T comparable](a *T, b *T) bool` +`PointerValueEqual[T comparable](a *T, b T) bool` +## Motivation + +Creating pointers to literal constant values is useful, especially in unit tests. Go doesn't support simply using the address operator (&) to reference the location of e.g. `value := &int64(42)` so we're forced to [create](https://stackoverflow.com/questions/35146286/find-address-of-constant-in-go/35146856#35146856) [little](https://stackoverflow.com/questions/34197248/how-can-i-store-reference-to-the-result-of-an-operation-in-go/34197367#34197367) [workarounds](https://stackoverflow.com/questions/30716354/how-do-i-do-a-literal-int64-in-go/30716481#30716481). A common solution is to create a helper function: + +```golang +func createInt64Pointer(x int64) *int64 { + return &x +} +// now you can create a pointer to 42 inline +value := createInt64Pointer(42) +``` + +This package provides a library of these simple little helper functions for every native Go primitive. + +Made @ Openly. [Join us](https://careers.openly.com/) and use Go to build cool stuff. diff --git a/vendor/go.openly.dev/pointy/comparison.go b/vendor/go.openly.dev/pointy/comparison.go new file mode 100644 index 000000000..4541ab1ff --- /dev/null +++ b/vendor/go.openly.dev/pointy/comparison.go @@ -0,0 +1,25 @@ +package pointy + +// PointersValueEqual returns true if both pointer parameters are nil or contain the same dereferenced value. +func PointersValueEqual[T comparable](a *T, b *T) bool { + if a == nil && b == nil { + return true + } + if a != nil && b != nil && *a == *b { + return true + } + + return false +} + +// PointerValueEqual returns true if the pointer parameter is not nil and contains the same dereferenced value as the value parameter. +func PointerValueEqual[T comparable](a *T, b T) bool { + if a == nil { + return false + } + if *a == b { + return true + } + + return false +} diff --git a/vendor/go.openly.dev/pointy/pointy.go b/vendor/go.openly.dev/pointy/pointy.go new file mode 100644 index 000000000..0bbe4988c --- /dev/null +++ b/vendor/go.openly.dev/pointy/pointy.go @@ -0,0 +1,250 @@ +// Package pointy is a set of simple helper functions to provide a shorthand to +// get a pointer to a variable holding a constant. +package pointy + +// Bool returns a pointer to a variable holding the supplied bool constant +func Bool(x bool) *bool { + return &x +} + +// BoolValue returns the bool value pointed to by p or fallback if p is nil +func BoolValue(p *bool, fallback bool) bool { + if p == nil { + return fallback + } + return *p +} + +// Byte returns a pointer to a variable holding the supplied byte constant +func Byte(x byte) *byte { + return &x +} + +// ByteValue returns the byte value pointed to by p or fallback if p is nil +func ByteValue(p *byte, fallback byte) byte { + if p == nil { + return fallback + } + return *p +} + +// Complex128 returns a pointer to a variable holding the supplied complex128 constant +func Complex128(x complex128) *complex128 { + return &x +} + +// Complex128Value returns the complex128 value pointed to by p or fallback if p is nil +func Complex128Value(p *complex128, fallback complex128) complex128 { + if p == nil { + return fallback + } + return *p +} + +// Complex64 returns a pointer to a variable holding the supplied complex64 constant +func Complex64(x complex64) *complex64 { + return &x +} + +// Complex64Value returns the complex64 value pointed to by p or fallback if p is nil +func Complex64Value(p *complex64, fallback complex64) complex64 { + if p == nil { + return fallback + } + return *p +} + +// Float32 returns a pointer to a variable holding the supplied float32 constant +func Float32(x float32) *float32 { + return &x +} + +// Float32Value returns the float32 value pointed to by p or fallback if p is nil +func Float32Value(p *float32, fallback float32) float32 { + if p == nil { + return fallback + } + return *p +} + +// Float64 returns a pointer to a variable holding the supplied float64 constant +func Float64(x float64) *float64 { + return &x +} + +// Float64Value returns the float64 value pointed to by p or fallback if p is nil +func Float64Value(p *float64, fallback float64) float64 { + if p == nil { + return fallback + } + return *p +} + +// Int returns a pointer to a variable holding the supplied int constant +func Int(x int) *int { + return &x +} + +// IntValue returns the int value pointed to by p or fallback if p is nil +func IntValue(p *int, fallback int) int { + if p == nil { + return fallback + } + return *p +} + +// Int8 returns a pointer to a variable holding the supplied int8 constant +func Int8(x int8) *int8 { + return &x +} + +// Int8Value returns the int8 value pointed to by p or fallback if p is nil +func Int8Value(p *int8, fallback int8) int8 { + if p == nil { + return fallback + } + return *p +} + +// Int16 returns a pointer to a variable holding the supplied int16 constant +func Int16(x int16) *int16 { + return &x +} + +// Int16Value returns the int16 value pointed to by p or fallback if p is nil +func Int16Value(p *int16, fallback int16) int16 { + if p == nil { + return fallback + } + return *p +} + +// Int32 returns a pointer to a variable holding the supplied int32 constant +func Int32(x int32) *int32 { + return &x +} + +// Int32Value returns the int32 value pointed to by p or fallback if p is nil +func Int32Value(p *int32, fallback int32) int32 { + if p == nil { + return fallback + } + return *p +} + +// Int64 returns a pointer to a variable holding the supplied int64 constant +func Int64(x int64) *int64 { + return &x +} + +// Int64Value returns the int64 value pointed to by p or fallback if p is nil +func Int64Value(p *int64, fallback int64) int64 { + if p == nil { + return fallback + } + return *p +} + +// Uint returns a pointer to a variable holding the supplied uint constant +func Uint(x uint) *uint { + return &x +} + +// UintValue returns the uint value pointed to by p or fallback if p is nil +func UintValue(p *uint, fallback uint) uint { + if p == nil { + return fallback + } + return *p +} + +// Uint8 returns a pointer to a variable holding the supplied uint8 constant +func Uint8(x uint8) *uint8 { + return &x +} + +// Uint8Value returns the uint8 value pointed to by p or fallback if p is nil +func Uint8Value(p *uint8, fallback uint8) uint8 { + if p == nil { + return fallback + } + return *p +} + +// Uint16 returns a pointer to a variable holding the supplied uint16 constant +func Uint16(x uint16) *uint16 { + return &x +} + +// Uint16Value returns the uint16 value pointed to by p or fallback if p is nil +func Uint16Value(p *uint16, fallback uint16) uint16 { + if p == nil { + return fallback + } + return *p +} + +// Uint32 returns a pointer to a variable holding the supplied uint32 constant +func Uint32(x uint32) *uint32 { + return &x +} + +// Uint32Value returns the uint32 value pointed to by p or fallback if p is nil +func Uint32Value(p *uint32, fallback uint32) uint32 { + if p == nil { + return fallback + } + return *p +} + +// Uint64 returns a pointer to a variable holding the supplied uint64 constant +func Uint64(x uint64) *uint64 { + return &x +} + +// Uint64Value returns the uint64 value pointed to by p or fallback if p is nil +func Uint64Value(p *uint64, fallback uint64) uint64 { + if p == nil { + return fallback + } + return *p +} + +// String returns a pointer to a variable holding the supplied string constant +func String(x string) *string { + return &x +} + +// StringValue returns the string value pointed to by p or fallback if p is nil +func StringValue(p *string, fallback string) string { + if p == nil { + return fallback + } + return *p +} + +// Rune returns a pointer to a variable holding the supplied rune constant +func Rune(x rune) *rune { + return &x +} + +// RuneValue returns the rune value pointed to by p or fallback if p is nil +func RuneValue(p *rune, fallback rune) rune { + if p == nil { + return fallback + } + return *p +} + +// Pointer returns a pointer to a variable holding the supplied T constant +func Pointer[T any](x T) *T { + return &x +} + +// PointerValue returns the T value pointed to by p or fallback if p is nil +func PointerValue[T any](p *T, fallback T) T { + if p == nil { + return fallback + } + return *p +} diff --git a/vendor/modules.txt b/vendor/modules.txt index 74e03a88b..e5a25c19d 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -1,4 +1,4 @@ -# github.com/HewlettPackard/dws v0.0.1-0.20230613201835-73abc41bd83c +# github.com/HewlettPackard/dws v0.0.1-0.20230907181649-2f6d9fca4249 ## explicit; go 1.19 github.com/HewlettPackard/dws/api/v1alpha2 github.com/HewlettPackard/dws/config/crd/bases @@ -287,6 +287,9 @@ go.opencensus.io/internal go.opencensus.io/trace go.opencensus.io/trace/internal go.opencensus.io/trace/tracestate +# go.openly.dev/pointy v1.3.0 +## explicit; go 1.18 +go.openly.dev/pointy # go.uber.org/atomic v1.11.0 ## explicit; go 1.18 go.uber.org/atomic