From 2ca478c68f103c89b42b99e998f2d48e9501ecb6 Mon Sep 17 00:00:00 2001 From: Blake Devcich Date: Tue, 3 Dec 2024 11:30:30 -0600 Subject: [PATCH] Add lustre component count to NnfStorage/NnfNodeStorage API --- api/v1alpha2/conversion.go | 21 +++- api/v1alpha3/conversion.go | 21 +++- api/v1alpha4/nnfnodestorage_types.go | 3 + api/v1alpha4/nnfstorage_types.go | 12 ++ api/v1alpha4/zz_generated.deepcopy.go | 48 +++++++- .../nnf.cray.hpe.com_nnfnodestorages.yaml | 24 ++++ .../bases/nnf.cray.hpe.com_nnfstorages.yaml | 24 ++++ internal/controller/filesystem_helpers.go | 106 ++--------------- .../controller/nnf_clientmount_controller.go | 108 ++++++++++++++++-- .../nnf_clientmount_controller_test.go | 7 +- internal/controller/nnf_storage_controller.go | 43 +++++++ .../controller/nnf_storage_controller_test.go | 77 +++++++++++++ 12 files changed, 374 insertions(+), 120 deletions(-) create mode 100644 internal/controller/nnf_storage_controller_test.go diff --git a/api/v1alpha2/conversion.go b/api/v1alpha2/conversion.go index 8806dd0d..95954a29 100644 --- a/api/v1alpha2/conversion.go +++ b/api/v1alpha2/conversion.go @@ -343,12 +343,20 @@ func (src *NnfNodeStorage) ConvertTo(dstRaw conversion.Hub) error { // Manually restore data. restored := &nnfv1alpha4.NnfNodeStorage{} - if ok, err := utilconversion.UnmarshalData(src, restored); err != nil || !ok { + hasAnno, err := utilconversion.UnmarshalData(src, restored) + if err != nil { return err } // EDIT THIS FUNCTION! If the annotation is holding anything that is // hub-specific then copy it into 'dst' from 'restored'. // Otherwise, you may comment out UnmarshalData() until it's needed. + if hasAnno { + dst.Spec.LustreStorage.LustreComponents.MDTs = append([]string(nil), restored.Spec.LustreStorage.LustreComponents.MDTs...) + dst.Spec.LustreStorage.LustreComponents.MGTs = append([]string(nil), restored.Spec.LustreStorage.LustreComponents.MGTs...) + dst.Spec.LustreStorage.LustreComponents.MGTMDTs = append([]string(nil), restored.Spec.LustreStorage.LustreComponents.MGTMDTs...) + dst.Spec.LustreStorage.LustreComponents.OSTs = append([]string(nil), restored.Spec.LustreStorage.LustreComponents.OSTs...) + dst.Spec.LustreStorage.LustreComponents.NNFNodes = append([]string(nil), restored.Spec.LustreStorage.LustreComponents.NNFNodes...) + } return nil } @@ -407,12 +415,21 @@ func (src *NnfStorage) ConvertTo(dstRaw conversion.Hub) error { // Manually restore data. restored := &nnfv1alpha4.NnfStorage{} - if ok, err := utilconversion.UnmarshalData(src, restored); err != nil || !ok { + hasAnno, err := utilconversion.UnmarshalData(src, restored) + if err != nil { return err } + // EDIT THIS FUNCTION! If the annotation is holding anything that is // hub-specific then copy it into 'dst' from 'restored'. // Otherwise, you may comment out UnmarshalData() until it's needed. + if hasAnno { + dst.Status.LustreComponents.MDTs = append([]string(nil), restored.Status.LustreComponents.MDTs...) + dst.Status.LustreComponents.MGTs = append([]string(nil), restored.Status.LustreComponents.MGTs...) + dst.Status.LustreComponents.MGTMDTs = append([]string(nil), restored.Status.LustreComponents.MGTMDTs...) + dst.Status.LustreComponents.OSTs = append([]string(nil), restored.Status.LustreComponents.OSTs...) + dst.Status.LustreComponents.NNFNodes = append([]string(nil), restored.Status.LustreComponents.NNFNodes...) + } return nil } diff --git a/api/v1alpha3/conversion.go b/api/v1alpha3/conversion.go index 14e5819c..0b5141b7 100644 --- a/api/v1alpha3/conversion.go +++ b/api/v1alpha3/conversion.go @@ -342,12 +342,20 @@ func (src *NnfNodeStorage) ConvertTo(dstRaw conversion.Hub) error { // Manually restore data. restored := &nnfv1alpha4.NnfNodeStorage{} - if ok, err := utilconversion.UnmarshalData(src, restored); err != nil || !ok { + hasAnno, err := utilconversion.UnmarshalData(src, restored) + if err != nil { return err } // EDIT THIS FUNCTION! If the annotation is holding anything that is // hub-specific then copy it into 'dst' from 'restored'. // Otherwise, you may comment out UnmarshalData() until it's needed. + if hasAnno { + dst.Spec.LustreStorage.LustreComponents.MDTs = append([]string(nil), restored.Spec.LustreStorage.LustreComponents.MDTs...) + dst.Spec.LustreStorage.LustreComponents.MGTs = append([]string(nil), restored.Spec.LustreStorage.LustreComponents.MGTs...) + dst.Spec.LustreStorage.LustreComponents.MGTMDTs = append([]string(nil), restored.Spec.LustreStorage.LustreComponents.MGTMDTs...) + dst.Spec.LustreStorage.LustreComponents.OSTs = append([]string(nil), restored.Spec.LustreStorage.LustreComponents.OSTs...) + dst.Spec.LustreStorage.LustreComponents.NNFNodes = append([]string(nil), restored.Spec.LustreStorage.LustreComponents.NNFNodes...) + } return nil } @@ -406,12 +414,21 @@ func (src *NnfStorage) ConvertTo(dstRaw conversion.Hub) error { // Manually restore data. restored := &nnfv1alpha4.NnfStorage{} - if ok, err := utilconversion.UnmarshalData(src, restored); err != nil || !ok { + hasAnno, err := utilconversion.UnmarshalData(src, restored) + if err != nil { return err } + // EDIT THIS FUNCTION! If the annotation is holding anything that is // hub-specific then copy it into 'dst' from 'restored'. // Otherwise, you may comment out UnmarshalData() until it's needed. + if hasAnno { + dst.Status.LustreComponents.MDTs = append([]string(nil), restored.Status.LustreComponents.MDTs...) + dst.Status.LustreComponents.MGTs = append([]string(nil), restored.Status.LustreComponents.MGTs...) + dst.Status.LustreComponents.MGTMDTs = append([]string(nil), restored.Status.LustreComponents.MGTMDTs...) + dst.Status.LustreComponents.OSTs = append([]string(nil), restored.Status.LustreComponents.OSTs...) + dst.Status.LustreComponents.NNFNodes = append([]string(nil), restored.Status.LustreComponents.NNFNodes...) + } return nil } diff --git a/api/v1alpha4/nnfnodestorage_types.go b/api/v1alpha4/nnfnodestorage_types.go index 6b6b014b..f86f1fc1 100644 --- a/api/v1alpha4/nnfnodestorage_types.go +++ b/api/v1alpha4/nnfnodestorage_types.go @@ -88,6 +88,9 @@ type LustreStorageSpec struct { // BackFs is the type of backing filesystem to use. // +kubebuilder:validation:Enum=ldiskfs;zfs BackFs string `json:"backFs,omitempty"` + + // Component info for the Lustre filesystem + LustreComponents NnfStorageLustreComponents `json:"lustreComponents,omitempty"` } // NnfNodeStorageStatus defines the status for NnfNodeStorage diff --git a/api/v1alpha4/nnfstorage_types.go b/api/v1alpha4/nnfstorage_types.go index b9ab275c..ed133365 100644 --- a/api/v1alpha4/nnfstorage_types.go +++ b/api/v1alpha4/nnfstorage_types.go @@ -60,6 +60,15 @@ type NnfStorageLustreSpec struct { PersistentMgsReference corev1.ObjectReference `json:"persistentMgsReference,omitempty"` } +// TODO: document these +type NnfStorageLustreComponents struct { + MDTs []string `json:"mdts,omitempty"` + MGTs []string `json:"mgts,omitempty"` + MGTMDTs []string `json:"mgtmdts,omitempty"` + OSTs []string `json:"osts,omitempty"` + NNFNodes []string `json:"nnfNodes,omitempty"` +} + // NnfStorageAllocationSetSpec defines the details for an allocation set type NnfStorageAllocationSetSpec struct { // Name is a human readable label for this set of allocations (e.g., xfs) @@ -124,6 +133,9 @@ type NnfStorageLustreStatus struct { // LustgreMgtReference is an object reference to the NnfLustreMGT resource used // by the NnfStorage LustreMgtReference corev1.ObjectReference `json:"lustreMgtReference,omitempty"` + + // Component info for the Lustre filesystem + LustreComponents NnfStorageLustreComponents `json:"lustreComponents,omitempty"` } // NnfStorageStatus defines the observed status of NNF Storage. diff --git a/api/v1alpha4/zz_generated.deepcopy.go b/api/v1alpha4/zz_generated.deepcopy.go index dbbf2b92..a581ab62 100644 --- a/api/v1alpha4/zz_generated.deepcopy.go +++ b/api/v1alpha4/zz_generated.deepcopy.go @@ -32,6 +32,7 @@ import ( // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *LustreStorageSpec) DeepCopyInto(out *LustreStorageSpec) { *out = *in + in.LustreComponents.DeepCopyInto(&out.LustreComponents) } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LustreStorageSpec. @@ -1188,7 +1189,7 @@ func (in *NnfNodeStorage) DeepCopyInto(out *NnfNodeStorage) { *out = *in out.TypeMeta = in.TypeMeta in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) - out.Spec = in.Spec + in.Spec.DeepCopyInto(&out.Spec) in.Status.DeepCopyInto(&out.Status) } @@ -1260,7 +1261,7 @@ func (in *NnfNodeStorageList) DeepCopyObject() runtime.Object { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *NnfNodeStorageSpec) DeepCopyInto(out *NnfNodeStorageSpec) { *out = *in - out.LustreStorage = in.LustreStorage + in.LustreStorage.DeepCopyInto(&out.LustreStorage) out.BlockReference = in.BlockReference } @@ -1583,6 +1584,46 @@ func (in *NnfStorageList) DeepCopyObject() runtime.Object { return nil } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *NnfStorageLustreComponents) DeepCopyInto(out *NnfStorageLustreComponents) { + *out = *in + if in.MDTs != nil { + in, out := &in.MDTs, &out.MDTs + *out = make([]string, len(*in)) + copy(*out, *in) + } + if in.MGTs != nil { + in, out := &in.MGTs, &out.MGTs + *out = make([]string, len(*in)) + copy(*out, *in) + } + if in.MGTMDTs != nil { + in, out := &in.MGTMDTs, &out.MGTMDTs + *out = make([]string, len(*in)) + copy(*out, *in) + } + if in.OSTs != nil { + in, out := &in.OSTs, &out.OSTs + *out = make([]string, len(*in)) + copy(*out, *in) + } + if in.NNFNodes != nil { + in, out := &in.NNFNodes, &out.NNFNodes + *out = make([]string, len(*in)) + copy(*out, *in) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NnfStorageLustreComponents. +func (in *NnfStorageLustreComponents) DeepCopy() *NnfStorageLustreComponents { + if in == nil { + return nil + } + out := new(NnfStorageLustreComponents) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *NnfStorageLustreSpec) DeepCopyInto(out *NnfStorageLustreSpec) { *out = *in @@ -1603,6 +1644,7 @@ func (in *NnfStorageLustreSpec) DeepCopy() *NnfStorageLustreSpec { func (in *NnfStorageLustreStatus) DeepCopyInto(out *NnfStorageLustreStatus) { *out = *in out.LustreMgtReference = in.LustreMgtReference + in.LustreComponents.DeepCopyInto(&out.LustreComponents) } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NnfStorageLustreStatus. @@ -1915,7 +1957,7 @@ func (in *NnfStorageSpec) DeepCopy() *NnfStorageSpec { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *NnfStorageStatus) DeepCopyInto(out *NnfStorageStatus) { *out = *in - out.NnfStorageLustreStatus = in.NnfStorageLustreStatus + in.NnfStorageLustreStatus.DeepCopyInto(&out.NnfStorageLustreStatus) if in.AllocationSets != nil { in, out := &in.AllocationSets, &out.AllocationSets *out = make([]NnfStorageAllocationSetStatus, len(*in)) diff --git a/config/crd/bases/nnf.cray.hpe.com_nnfnodestorages.yaml b/config/crd/bases/nnf.cray.hpe.com_nnfnodestorages.yaml index 2b0ddcb1..983ead17 100644 --- a/config/crd/bases/nnf.cray.hpe.com_nnfnodestorages.yaml +++ b/config/crd/bases/nnf.cray.hpe.com_nnfnodestorages.yaml @@ -576,6 +576,30 @@ spec: filesystem. maxLength: 8 type: string + lustreComponents: + description: Component info for the Lustre filesystem + properties: + mdts: + items: + type: string + type: array + mgtmdts: + items: + type: string + type: array + mgts: + items: + type: string + type: array + nnfNodes: + items: + type: string + type: array + osts: + items: + type: string + type: array + type: object mgsAddress: description: |- MgsAddress is the NID of the MGS to use. This is used only when diff --git a/config/crd/bases/nnf.cray.hpe.com_nnfstorages.yaml b/config/crd/bases/nnf.cray.hpe.com_nnfstorages.yaml index 3bad19ad..246619c4 100644 --- a/config/crd/bases/nnf.cray.hpe.com_nnfstorages.yaml +++ b/config/crd/bases/nnf.cray.hpe.com_nnfstorages.yaml @@ -841,6 +841,30 @@ spec: filesystem. maxLength: 8 type: string + lustreComponents: + description: Component info for the Lustre filesystem + properties: + mdts: + items: + type: string + type: array + mgtmdts: + items: + type: string + type: array + mgts: + items: + type: string + type: array + nnfNodes: + items: + type: string + type: array + osts: + items: + type: string + type: array + type: object lustreMgtReference: description: |- LustgreMgtReference is an object reference to the NnfLustreMGT resource used diff --git a/internal/controller/filesystem_helpers.go b/internal/controller/filesystem_helpers.go index dc5d0261..422b91ec 100644 --- a/internal/controller/filesystem_helpers.go +++ b/internal/controller/filesystem_helpers.go @@ -450,21 +450,16 @@ func newLustreFileSystem(ctx context.Context, c client.Client, nnfNodeStorage *n fs.CommandArgs.PreDeactivate = cmdLines.PreDeactivate fs.TempDir = fmt.Sprintf("/mnt/temp/%s-%d", nnfNodeStorage.Name, index) - log.Info("BLAKE labels - before filesystem", "labels", nnfNodeStorage.Labels) - server, err := getServerForClientMountOrNnfNodeStorage(ctx, c, nnfNodeStorage.Labels) - if err != nil { - return nil, dwsv1alpha2.NewResourceError("could not retrieve corresponding NnfServer resource for this NnfNodeStorage").WithError(err).WithMajor() - } - storages := getLustreMapping(server) + components := nnfNodeStorage.Spec.LustreStorage.LustreComponents fs.CommandArgs.Vars = map[string]string{ - "$USERID": fmt.Sprintf("%d", nnfNodeStorage.Spec.UserID), - "$GROUPID": fmt.Sprintf("%d", nnfNodeStorage.Spec.GroupID), - "$NUM_MGTS": fmt.Sprintf("%d", len(storages["mgt"])), - "$NUM_MDTS": fmt.Sprintf("%d", len(storages["mdt"])), - "$NUM_MGTMDTS": fmt.Sprintf("%d", len(storages["mgtmdt"])), - "$NUM_OSTS": fmt.Sprintf("%d", len(storages["ost"])), - "$NUM_RABBITS": fmt.Sprintf("%d", len(storages["rabbits"])), + "$USERID": fmt.Sprintf("%d", nnfNodeStorage.Spec.UserID), + "$GROUPID": fmt.Sprintf("%d", nnfNodeStorage.Spec.GroupID), + "$NUM_MDTS": fmt.Sprintf("%d", len(components.MDTs)), + "$NUM_MGTS": fmt.Sprintf("%d", len(components.MGTs)), + "$NUM_MGTMDTS": fmt.Sprintf("%d", len(components.MGTMDTs)), + "$NUM_OSTS": fmt.Sprintf("%d", len(components.OSTs)), + "$NUM_NNFNODES": fmt.Sprintf("%d", len(components.NNFNodes)), } log.Info("BLAKE", "Vars", fs.CommandArgs.Vars) @@ -554,88 +549,3 @@ func logicalVolumeName(ctx context.Context, c client.Client, nnfNodeStorage *nnf return "lv", nil } - -// Retrieve the ClientMount's corresponding NnfServer resource. To do this, we first need to get the corresponding NnfStorage resource. That is done by -// looking at the owner of the ClientMount resource. It should be NnfStorage. Then, we inspect the NnfStorage resource's owner. In this case, there can -// be two different owners: -// -// 1. Workflow (non-persistent storage case) -// 2. PersistentStorageInstance (persistent storage case) -// -// Once we understand who owns the NnfStorage resource, we can then obtain the NnfServer resource through slightly different methods. -func getServerForClientMountOrNnfNodeStorage(ctx context.Context, c client.Client, labels map[string]string) (*dwsv1alpha2.Servers, error) { - storageKind := "NnfStorage" - persistentKind := "PersistentStorageInstance" - workflowKind := "Workflow" - - // Get the owner and directive index from ClientMount's labels - ownerKind, ownerExists := labels[dwsv1alpha2.OwnerKindLabel] - ownerName, ownerNameExists := labels[dwsv1alpha2.OwnerNameLabel] - ownerNS, ownerNSExists := labels[dwsv1alpha2.OwnerNamespaceLabel] - _, idxExists := labels[nnfv1alpha4.DirectiveIndexLabel] - - // log.FromContext(ctx).Info("BLAKE labels - after", "labels", labels) - - // We should expect the owner to be NnfStorage and have the expected labels - if !ownerExists || !ownerNameExists || !ownerNSExists || !idxExists || ownerKind != storageKind { - return nil, dwsv1alpha2.NewResourceError(fmt.Sprintf("expected owner to be of kind NnfStorage and have the expected labels: %v", labels)).WithMajor() - } - - // Retrieve the NnfStorage resource - storage := &nnfv1alpha4.NnfStorage{ - ObjectMeta: metav1.ObjectMeta{ - Name: ownerName, - Namespace: ownerNS, - }, - } - if err := c.Get(ctx, client.ObjectKeyFromObject(storage), storage); err != nil { - return nil, dwsv1alpha2.NewResourceError("unable retrieve NnfStorage resource").WithError(err).WithMajor() - } - - // Get the owner and directive index from NnfStorage's labels - ownerKind, ownerExists = storage.Labels[dwsv1alpha2.OwnerKindLabel] - ownerName, ownerNameExists = storage.Labels[dwsv1alpha2.OwnerNameLabel] - ownerNS, ownerNSExists = storage.Labels[dwsv1alpha2.OwnerNamespaceLabel] - idx, idxExists := storage.Labels[nnfv1alpha4.DirectiveIndexLabel] - - // We should expect the owner of the NnfStorage to be Workflow or PersistentStorageInstance and - // have the expected labels - if !ownerExists || !ownerNameExists || !ownerNSExists || !idxExists || (ownerKind != workflowKind && ownerKind != persistentKind) { - return nil, dwsv1alpha2.NewResourceError("expected owner to be of kind Workflow or PersistentStorageInstance and have the expected labels").WithMajor() - } - - // If the owner is a workflow, then we can use the workflow labels and directive index to get - // the Servers Resource. - var listOptions []client.ListOption - if ownerKind == workflowKind { - listOptions = []client.ListOption{ - client.MatchingLabels(map[string]string{ - dwsv1alpha2.WorkflowNameLabel: ownerName, - dwsv1alpha2.WorkflowNamespaceLabel: ownerNS, - nnfv1alpha4.DirectiveIndexLabel: idx, - }), - } - } else { - // Otherwise the owner is a PersistentStorageInstance and we'll need to use the owner - // labels. It also will not have a directive index. - listOptions = []client.ListOption{ - client.MatchingLabels(map[string]string{ - dwsv1alpha2.OwnerKindLabel: ownerKind, - dwsv1alpha2.OwnerNameLabel: ownerName, - dwsv1alpha2.OwnerNamespaceLabel: ownerNS, - }), - } - } - - serversList := &dwsv1alpha2.ServersList{} - if err := c.List(ctx, serversList, listOptions...); err != nil { - return nil, dwsv1alpha2.NewResourceError("unable retrieve NnfServers resource").WithError(err).WithMajor() - } - - // We should only have 1 - if len(serversList.Items) != 1 { - return nil, dwsv1alpha2.NewResourceError(fmt.Sprintf("wrong number of NnfServers resources: expected 1, got %d", len(serversList.Items))).WithMajor() - } - - return &serversList.Items[0], nil -} diff --git a/internal/controller/nnf_clientmount_controller.go b/internal/controller/nnf_clientmount_controller.go index b2f6f484..99f802bb 100644 --- a/internal/controller/nnf_clientmount_controller.go +++ b/internal/controller/nnf_clientmount_controller.go @@ -22,6 +22,7 @@ package controller import ( "context" "encoding/json" + "fmt" "os" "path/filepath" "runtime" @@ -306,7 +307,7 @@ func (r *NnfClientMountReconciler) dumpServersToFile(ctx context.Context, client // Get the NnfServers Resource log.FromContext(ctx).Info("BLAKE labels - before clientmount", "labels", clientMount.Labels) - server, err := getServerForClientMountOrNnfNodeStorage(ctx, r.Client, clientMount.Labels) + server, err := r.getServerForClientMount(ctx, clientMount) if err != nil { return dwsv1alpha2.NewResourceError("could not retrieve corresponding NnfServer resource for this ClientMount").WithError(err).WithMajor() } @@ -320,7 +321,7 @@ func (r *NnfClientMountReconciler) dumpServersToFile(ctx context.Context, client encoder := json.NewEncoder(file) - components := getLustreMapping(server) + components := getLustreMappingFromServer(server) err = encoder.Encode(components) if err != nil { return dwsv1alpha2.NewResourceError("could not write JSON to file").WithError(err).WithMajor() @@ -334,16 +335,99 @@ func (r *NnfClientMountReconciler) dumpServersToFile(ctx context.Context, client return nil } +// Retrieve the ClientMount's corresponding NnfServer resource. To do this, we first need to get the corresponding NnfStorage resource. That is done by +// looking at the owner of the ClientMount resource. It should be NnfStorage. Then, we inspect the NnfStorage resource's owner. In this case, there can +// be two different owners: +// +// 1. Workflow (non-persistent storage case) +// 2. PersistentStorageInstance (persistent storage case) +// +// Once we understand who owns the NnfStorage resource, we can then obtain the NnfServer resource through slightly different methods. +func (r *NnfClientMountReconciler) getServerForClientMount(ctx context.Context, clientMount *dwsv1alpha2.ClientMount) (*dwsv1alpha2.Servers, error) { + storageKind := "NnfStorage" + persistentKind := "PersistentStorageInstance" + workflowKind := "Workflow" + + // Get the owner and directive index from ClientMount's labels + ownerKind, ownerExists := clientMount.Labels[dwsv1alpha2.OwnerKindLabel] + ownerName, ownerNameExists := clientMount.Labels[dwsv1alpha2.OwnerNameLabel] + ownerNS, ownerNSExists := clientMount.Labels[dwsv1alpha2.OwnerNamespaceLabel] + _, idxExists := clientMount.Labels[nnfv1alpha4.DirectiveIndexLabel] + + // We should expect the owner to be NnfStorage and have the expected labels + if !ownerExists || !ownerNameExists || !ownerNSExists || !idxExists || ownerKind != storageKind { + return nil, dwsv1alpha2.NewResourceError("expected owner to be of kind NnfStorage and have the expected labels").WithMajor() + } + + // Retrieve the NnfStorage resource + storage := &nnfv1alpha4.NnfStorage{ + ObjectMeta: metav1.ObjectMeta{ + Name: ownerName, + Namespace: ownerNS, + }, + } + if err := r.Get(ctx, client.ObjectKeyFromObject(storage), storage); err != nil { + return nil, dwsv1alpha2.NewResourceError("unable retrieve NnfStorage resource").WithError(err).WithMajor() + } + + // Get the owner and directive index from NnfStorage's labels + ownerKind, ownerExists = storage.Labels[dwsv1alpha2.OwnerKindLabel] + ownerName, ownerNameExists = storage.Labels[dwsv1alpha2.OwnerNameLabel] + ownerNS, ownerNSExists = storage.Labels[dwsv1alpha2.OwnerNamespaceLabel] + idx, idxExists := storage.Labels[nnfv1alpha4.DirectiveIndexLabel] + + // We should expect the owner of the NnfStorage to be Workflow or PersistentStorageInstance and + // have the expected labels + if !ownerExists || !ownerNameExists || !ownerNSExists || !idxExists || (ownerKind != workflowKind && ownerKind != persistentKind) { + return nil, dwsv1alpha2.NewResourceError("expected owner to be of kind Workflow or PersistentStorageInstance and have the expected labels").WithMajor() + } + + // If the owner is a workflow, then we can use the workflow labels and directive index to get + // the Servers Resource. + var listOptions []client.ListOption + if ownerKind == workflowKind { + listOptions = []client.ListOption{ + client.MatchingLabels(map[string]string{ + dwsv1alpha2.WorkflowNameLabel: ownerName, + dwsv1alpha2.WorkflowNamespaceLabel: ownerNS, + nnfv1alpha4.DirectiveIndexLabel: idx, + }), + } + } else { + // Otherwise the owner is a PersistentStorageInstance and we'll need to use the owner + // labels. It also will not have a directive index. + listOptions = []client.ListOption{ + client.MatchingLabels(map[string]string{ + dwsv1alpha2.OwnerKindLabel: ownerKind, + dwsv1alpha2.OwnerNameLabel: ownerName, + dwsv1alpha2.OwnerNamespaceLabel: ownerNS, + }), + } + } + + serversList := &dwsv1alpha2.ServersList{} + if err := r.List(ctx, serversList, listOptions...); err != nil { + return nil, dwsv1alpha2.NewResourceError("unable retrieve NnfServers resource").WithError(err).WithMajor() + } + + // We should only have 1 + if len(serversList.Items) != 1 { + return nil, dwsv1alpha2.NewResourceError(fmt.Sprintf("wrong number of NnfServers resources: expected 1, got %d", len(serversList.Items))).WithMajor() + } + + return &serversList.Items[0], nil +} + // Go through the Server's allocation sets to determine the number of Lustre components and rabbit // nodes. Returns a map with keys for each lustre component type and also the rabbits involved. The // list of rabbits is kept unique. -func getLustreMapping(server *dwsv1alpha2.Servers) map[string][]string { - storages := map[string][]string{ - "mgt": []string{}, - "mdt": []string{}, - "mgtmdt": []string{}, - "ost": []string{}, - "rabbits": []string{}, +func getLustreMappingFromServer(server *dwsv1alpha2.Servers) map[string][]string { + components := map[string][]string{ + "mdt": []string{}, + "mgt": []string{}, + "mgtmdt": []string{}, + "ost": []string{}, + "nnfNodes": []string{}, } rabbitMap := make(map[string]bool) // use a map to keep the list unique @@ -354,17 +438,17 @@ func getLustreMapping(server *dwsv1alpha2.Servers) map[string][]string { node := storage.Name // add to the list for that lustre component - storages[label] = append(storages[label], node) + components[label] = append(components[label], node) // add to the unique list of rabbits if _, found := rabbitMap[node]; !found { rabbitMap[node] = true - storages["rabbits"] = append(storages["rabbits"], node) + components["nnfNodes"] = append(components["nnfNodes"], node) } } } - return storages + return components } // fakeNnfNodeStorage creates an NnfNodeStorage resource filled in with only the fields diff --git a/internal/controller/nnf_clientmount_controller_test.go b/internal/controller/nnf_clientmount_controller_test.go index 8e1ba760..19a6cf95 100644 --- a/internal/controller/nnf_clientmount_controller_test.go +++ b/internal/controller/nnf_clientmount_controller_test.go @@ -55,7 +55,8 @@ var _ = Describe("Clientmount Controller Test", func() { }, } - m := getLustreMapping(&s) + Expect(s.Spec.AllocationSets).To(HaveLen(5)) + m := getLustreMappingFromServer(&s) Expect(m).To(HaveLen(5)) // should have keys for 4 lustre components (i.e. ost, mdt, mgt, mgtmdt) + rabbits Expect(m["ost"]).To(HaveLen(3)) @@ -70,7 +71,7 @@ var _ = Describe("Clientmount Controller Test", func() { Expect(m["mgtmdt"]).To(HaveLen(1)) Expect(m["mgtmdt"]).Should(ContainElements("rabbit-node-4")) - Expect(m["rabbits"]).To(HaveLen(5)) - Expect(m["rabbits"]).Should(ContainElements("rabbit-node-1", "rabbit-node-2", "rabbit-node-3", "rabbit-node-4", "rabbit-node-8")) + Expect(m["nnfNodes"]).To(HaveLen(5)) + Expect(m["nnfNodes"]).Should(ContainElements("rabbit-node-1", "rabbit-node-2", "rabbit-node-3", "rabbit-node-4", "rabbit-node-8")) }) }) diff --git a/internal/controller/nnf_storage_controller.go b/internal/controller/nnf_storage_controller.go index d9cd6e3e..90acc35a 100644 --- a/internal/controller/nnf_storage_controller.go +++ b/internal/controller/nnf_storage_controller.go @@ -192,6 +192,18 @@ func (r *NnfStorageReconciler) Reconcile(ctx context.Context, req ctrl.Request) } } + // Collect the lists of nodes for each lustre component used for the filesystem + if storage.Spec.FileSystemType == "lustre" { + components := getLustreMappingFromStorage(storage) + storage.Status.LustreComponents = nnfv1alpha4.NnfStorageLustreComponents{ + MDTs: components["mdt"], + MGTs: components["mgt"], + MGTMDTs: components["mgtmdt"], + OSTs: components["osts"], + NNFNodes: components["nnfNodes"], + } + } + // For each allocation, create the NnfNodeStorage resources to fan out to the Rabbit nodes for i, allocationSet := range storage.Spec.AllocationSets { // Add a reference to the external MGS PersistentStorageInstance if necessary @@ -639,6 +651,7 @@ func (r *NnfStorageReconciler) createNodeStorage(ctx context.Context, storage *n nnfNodeStorage.Spec.LustreStorage.TargetType = allocationSet.TargetType nnfNodeStorage.Spec.LustreStorage.FileSystemName = storage.Status.FileSystemName nnfNodeStorage.Spec.LustreStorage.MgsAddress = storage.Status.MgsAddress + nnfNodeStorage.Spec.LustreStorage.LustreComponents = storage.Status.LustreComponents // If this isn't the first allocation, then change MGTMDT to MDT so that we only get a single MGT if allocationSet.TargetType == "mgtmdt" && startIndex != 0 { @@ -1272,6 +1285,36 @@ func (r *NnfStorageReconciler) getLustreOST0(ctx context.Context, storage *nnfv1 return nil, nil } +func getLustreMappingFromStorage(storage *nnfv1alpha4.NnfStorage) map[string][]string { + componentMap := map[string][]string{ + "mdt": {}, + "mgt": {}, + "mgtmdt": {}, + "ost": {}, + "nnfNodes": {}, + } + rabbitMap := make(map[string]bool) // use a map to keep the list unique + + // Gather the info from the allocation set + for _, allocationSet := range storage.Spec.AllocationSets { + name := allocationSet.Name + for _, storage := range allocationSet.Nodes { + node := storage.Name + + // add to the list for that lustre component + componentMap[name] = append(componentMap[name], node) + + // add to the unique list of rabbits + if _, found := rabbitMap[node]; !found { + rabbitMap[node] = true + componentMap["nnfNodes"] = append(componentMap["nnfNodes"], node) + } + } + } + + return componentMap +} + // SetupWithManager sets up the controller with the Manager. func (r *NnfStorageReconciler) SetupWithManager(mgr ctrl.Manager) error { r.ChildObjects = []dwsv1alpha2.ObjectList{ diff --git a/internal/controller/nnf_storage_controller_test.go b/internal/controller/nnf_storage_controller_test.go new file mode 100644 index 00000000..0b6e1ef4 --- /dev/null +++ b/internal/controller/nnf_storage_controller_test.go @@ -0,0 +1,77 @@ +/* + * Copyright 2024 Hewlett Packard Enterprise Development LP + * Other additional copyright holders may be indicated within. + * + * The entirety of this work is licensed under the Apache License, + * Version 2.0 (the "License"); you may not use this file except + * in compliance with the License. + * + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package controller + +import ( + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + nnfv1alpha4 "github.com/NearNodeFlash/nnf-sos/api/v1alpha4" +) + +var _ = Describe("NNFStorage Controller Test", func() { + + It("It should correctly create a human-readable lustre mapping for NnfStorage", func() { + s := nnfv1alpha4.NnfStorage{ + Spec: nnfv1alpha4.NnfStorageSpec{ + AllocationSets: []nnfv1alpha4.NnfStorageAllocationSetSpec{ + {Name: "ost", Nodes: []nnfv1alpha4.NnfStorageAllocationNodes{ + {Name: "rabbit-node-1", Count: 1}, + {Name: "rabbit-node-2", Count: 1}}, + }, + // throw another OST on rabbit-node-2 + {Name: "ost", Nodes: []nnfv1alpha4.NnfStorageAllocationNodes{ + {Name: "rabbit-node-2", Count: 1}}, + }, + {Name: "mdt", Nodes: []nnfv1alpha4.NnfStorageAllocationNodes{ + {Name: "rabbit-node-3", Count: 1}, + {Name: "rabbit-node-4", Count: 1}, + {Name: "rabbit-node-8", Count: 1}}, + }, + {Name: "mgt", Nodes: []nnfv1alpha4.NnfStorageAllocationNodes{ + {Name: "rabbit-node-3", Count: 1}}, + }, + {Name: "mgtmdt", Nodes: []nnfv1alpha4.NnfStorageAllocationNodes{ + {Name: "rabbit-node-4", Count: 1}}, + }, + }, + }, + } + + Expect(s.Spec.AllocationSets).To(HaveLen(5)) + m := getLustreMappingFromStorage(&s) + Expect(m).To(HaveLen(5)) // should have keys for 4 lustre components (i.e. ost, mdt, mgt, mgtmdt) + rabbits + + Expect(m["ost"]).To(HaveLen(3)) + Expect(m["ost"]).Should(ContainElements("rabbit-node-1", "rabbit-node-2", "rabbit-node-2")) + + Expect(m["mdt"]).To(HaveLen(3)) + Expect(m["mdt"]).Should(ContainElements("rabbit-node-3", "rabbit-node-4", "rabbit-node-8")) + + Expect(m["mgt"]).To(HaveLen(1)) + Expect(m["mgt"]).Should(ContainElements("rabbit-node-3")) + + Expect(m["mgtmdt"]).To(HaveLen(1)) + Expect(m["mgtmdt"]).Should(ContainElements("rabbit-node-4")) + + Expect(m["nnfNodes"]).To(HaveLen(5)) + Expect(m["nnfNodes"]).Should(ContainElements("rabbit-node-1", "rabbit-node-2", "rabbit-node-3", "rabbit-node-4", "rabbit-node-8")) + }) +})