diff --git a/config/kind/manager_volumes_patch.yaml b/config/kind/manager_volumes_patch.yaml index 0cfb1067..a6bf6487 100644 --- a/config/kind/manager_volumes_patch.yaml +++ b/config/kind/manager_volumes_patch.yaml @@ -20,7 +20,7 @@ spec: hostPath: path: /mnt - name: localdisk - type: DirectoryOrCreate hostPath: + type: DirectoryOrCreate path: /localdisk diff --git a/go.mod b/go.mod index cc8b4197..edb41aea 100644 --- a/go.mod +++ b/go.mod @@ -3,7 +3,7 @@ module github.com/NearNodeFlash/nnf-sos go 1.21 require ( - github.com/DataWorkflowServices/dws v0.0.1-0.20240916174522-9062a91241cd + github.com/DataWorkflowServices/dws v0.0.1-0.20241029172011-d5898d0b8640 github.com/NearNodeFlash/lustre-fs-operator v0.0.1-0.20240925185149-26d9d6071a1c github.com/NearNodeFlash/nnf-ec v0.0.1-0.20241017152925-afc4d0cf1a4b github.com/ghodss/yaml v1.0.0 diff --git a/go.sum b/go.sum index 96657563..7bb263ea 100644 --- a/go.sum +++ b/go.sum @@ -1,7 +1,7 @@ cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= -github.com/DataWorkflowServices/dws v0.0.1-0.20240916174522-9062a91241cd h1:/bSL8rV9cB13b3I5SCf20t4q3ab91i533sPcYIISI+8= -github.com/DataWorkflowServices/dws v0.0.1-0.20240916174522-9062a91241cd/go.mod h1:6MrEEHISskyooSKcKU6R3mFqH6Yh6KzWgajhcw2s+nM= +github.com/DataWorkflowServices/dws v0.0.1-0.20241029172011-d5898d0b8640 h1:JSjgesWkPo9sAc7QkjWisNDOlIOGR0MQX/hxXL56FTA= +github.com/DataWorkflowServices/dws v0.0.1-0.20241029172011-d5898d0b8640/go.mod h1:6MrEEHISskyooSKcKU6R3mFqH6Yh6KzWgajhcw2s+nM= github.com/HewlettPackard/structex v1.0.4 h1:RVTdN5FWhDWr1IkjllU8wxuLjISo4gr6u5ryZpzyHcA= github.com/HewlettPackard/structex v1.0.4/go.mod h1:3frC4RY/cPsP/4+N8rkxsNAGlQwHV+zDC7qvrN+N+rE= github.com/NearNodeFlash/lustre-fs-operator v0.0.1-0.20240925185149-26d9d6071a1c h1:fSuMz3j8UzlYZI59Ded8XuUjYd7C5IyLB55jwgSTIew= diff --git a/internal/controller/nnf_access_controller.go b/internal/controller/nnf_access_controller.go index 4fd42426..426b793e 100644 --- a/internal/controller/nnf_access_controller.go +++ b/internal/controller/nnf_access_controller.go @@ -1039,25 +1039,28 @@ func (r *NnfAccessReconciler) getClientMountStatus(ctx context.Context, access * return true, nil } - clientMounts := []dwsv1alpha2.ClientMount{} + clientMountList := &dwsv1alpha2.ClientMountList{} + matchLabels := dwsv1alpha2.MatchingOwner(access) + + listOptions := []client.ListOption{ + matchLabels, + } + + if err := r.List(ctx, clientMountList, listOptions...); err != nil { + return false, dwsv1alpha2.NewResourceError("could not list ClientMounts").WithError(err) + } + // make a map with empty data of the client names to allow easy searching + clientNameMap := map[string]struct{}{} for _, clientName := range clientList { - clientMount := &dwsv1alpha2.ClientMount{ - ObjectMeta: metav1.ObjectMeta{ - Name: clientMountName(access), - Namespace: clientName, - }, - } - err := r.Get(ctx, client.ObjectKeyFromObject(clientMount), clientMount) - if err != nil { - if apierrors.IsNotFound(err) { - return false, nil - } + clientNameMap[clientName] = struct{}{} + } - return false, err + clientMounts := []dwsv1alpha2.ClientMount{} + for _, clientMount := range clientMountList.Items { + if _, exists := clientNameMap[clientMount.GetNamespace()]; exists { + clientMounts = append(clientMounts, clientMount) } - - clientMounts = append(clientMounts, *clientMount) } // Check the clientmounts for any errors first @@ -1106,6 +1109,12 @@ func (r *NnfAccessReconciler) getClientMountStatus(ctx context.Context, access * } } + if len(clientMounts) != len(clientList) { + if access.GetDeletionTimestamp().IsZero() { + log.Info("unexpected number of ClientMounts", "found", len(clientMounts), "expected", len(clientList)) + } + return false, nil + } return true, nil } diff --git a/internal/controller/nnf_storage_controller.go b/internal/controller/nnf_storage_controller.go index 560d2d06..360ae828 100644 --- a/internal/controller/nnf_storage_controller.go +++ b/internal/controller/nnf_storage_controller.go @@ -418,7 +418,22 @@ func (r *NnfStorageReconciler) aggregateNodeBlockStorageStatus(ctx context.Conte return &ctrl.Result{}, dwsv1alpha2.NewResourceError("could not list NnfNodeBlockStorages").WithError(err) } + // make a map with empty data of the Rabbit names to allow easy searching + nodeNameMap := map[string]struct{}{} + for _, node := range nnfStorage.Spec.AllocationSets[allocationSetIndex].Nodes { + nodeNameMap[node.Name] = struct{}{} + } + + // prune out any entries that aren't in the NnfStorage. This can happen if the NnfStorage was modified + // after it was created, as is the case with NnfStorages from an NnfSystemStorage + nnfNodeBlockStorages := []nnfv1alpha3.NnfNodeBlockStorage{} for _, nnfNodeBlockStorage := range nnfNodeBlockStorageList.Items { + if _, exists := nodeNameMap[nnfNodeBlockStorage.GetNamespace()]; exists { + nnfNodeBlockStorages = append(nnfNodeBlockStorages, nnfNodeBlockStorage) + } + } + + for _, nnfNodeBlockStorage := range nnfNodeBlockStorages { for _, nodeAllocation := range nnfNodeBlockStorage.Status.Allocations { if nodeAllocation.CapacityAllocated > 0 { allocationSet.AllocationCount++ @@ -426,7 +441,7 @@ func (r *NnfStorageReconciler) aggregateNodeBlockStorageStatus(ctx context.Conte } } - for _, nnfNodeBlockStorage := range nnfNodeBlockStorageList.Items { + for _, nnfNodeBlockStorage := range nnfNodeBlockStorages { if nnfNodeBlockStorage.Status.Error != nil { return &ctrl.Result{}, dwsv1alpha2.NewResourceError("Node: %s", nnfNodeBlockStorage.GetNamespace()).WithError(nnfNodeBlockStorage.Status.Error) } @@ -440,7 +455,7 @@ func (r *NnfStorageReconciler) aggregateNodeBlockStorageStatus(ctx context.Conte childTimeout = 300 } - for _, nnfNodeBlockStorage := range nnfNodeBlockStorageList.Items { + for _, nnfNodeBlockStorage := range nnfNodeBlockStorages { // check if the finalizer has been added by the controller on the Rabbit if len(nnfNodeBlockStorage.GetFinalizers()) > 0 { continue @@ -454,7 +469,7 @@ func (r *NnfStorageReconciler) aggregateNodeBlockStorageStatus(ctx context.Conte } } - for _, nnfNodeBlockStorage := range nnfNodeBlockStorageList.Items { + for _, nnfNodeBlockStorage := range nnfNodeBlockStorages { if nnfNodeBlockStorage.Status.Ready == false { return &ctrl.Result{}, nil } @@ -463,9 +478,9 @@ func (r *NnfStorageReconciler) aggregateNodeBlockStorageStatus(ctx context.Conte // Ensure that we found all the NnfNodeBlockStorage resources we were expecting. This can be expected // transiently as it takes time for the client cache to be updated. Log a message in case the count // never reaches the expected value. - if len(nnfNodeBlockStorageList.Items) != len(nnfStorage.Spec.AllocationSets[allocationSetIndex].Nodes) { + if len(nnfNodeBlockStorages) != len(nnfStorage.Spec.AllocationSets[allocationSetIndex].Nodes) { if nnfStorage.GetDeletionTimestamp().IsZero() { - log.Info("unexpected number of NnfNodeBlockStorages", "found", len(nnfNodeBlockStorageList.Items), "expected", len(nnfStorage.Spec.AllocationSets[allocationSetIndex].Nodes)) + log.Info("unexpected number of NnfNodeBlockStorages", "found", len(nnfNodeBlockStorages), "expected", len(nnfStorage.Spec.AllocationSets[allocationSetIndex].Nodes)) } return &ctrl.Result{}, nil } @@ -642,7 +657,22 @@ func (r *NnfStorageReconciler) aggregateNodeStorageStatus(ctx context.Context, s return &ctrl.Result{}, dwsv1alpha2.NewResourceError("could not list NnfNodeStorages").WithError(err) } + // make a map with empty data of the Rabbit names to allow easy searching + nodeNameMap := map[string]struct{}{} + for _, node := range storage.Spec.AllocationSets[allocationSetIndex].Nodes { + nodeNameMap[node.Name] = struct{}{} + } + + // prune out any entries that aren't in the NnfStorage. This can happen if the NnfStorage was modified + // after it was created, as is the case with NnfStorages from an NnfSystemStorage + nnfNodeStorages := []nnfv1alpha3.NnfNodeStorage{} for _, nnfNodeStorage := range nnfNodeStorageList.Items { + if _, exists := nodeNameMap[nnfNodeStorage.GetNamespace()]; exists { + nnfNodeStorages = append(nnfNodeStorages, nnfNodeStorage) + } + } + + for _, nnfNodeStorage := range nnfNodeStorages { // If we're in the delete path, only propagate errors for storages that are deleting. Errors // from creation aren't interesting anymore if deleting && nnfNodeStorage.GetDeletionTimestamp().IsZero() { @@ -661,7 +691,7 @@ func (r *NnfStorageReconciler) aggregateNodeStorageStatus(ctx context.Context, s childTimeout = 300 } - for _, nnfNodeStorage := range nnfNodeStorageList.Items { + for _, nnfNodeStorage := range nnfNodeStorages { // check if the finalizer has been added by the controller on the Rabbit if len(nnfNodeStorage.GetFinalizers()) > 0 { continue @@ -675,7 +705,7 @@ func (r *NnfStorageReconciler) aggregateNodeStorageStatus(ctx context.Context, s } } - for _, nnfNodeStorage := range nnfNodeStorageList.Items { + for _, nnfNodeStorage := range nnfNodeStorages { if nnfNodeStorage.Status.Ready == false { return &ctrl.Result{}, nil } @@ -684,9 +714,9 @@ func (r *NnfStorageReconciler) aggregateNodeStorageStatus(ctx context.Context, s // Ensure that we found all the NnfNodeStorage resources we were expecting. This can be expected // transiently as it takes time for the client cache to be updated. Log a message in case the count // never reaches the expected value. - if len(nnfNodeStorageList.Items) != len(storage.Spec.AllocationSets[allocationSetIndex].Nodes) { + if len(nnfNodeStorages) != len(storage.Spec.AllocationSets[allocationSetIndex].Nodes) { if storage.GetDeletionTimestamp().IsZero() { - log.Info("unexpected number of NnfNodeStorages", "found", len(nnfNodeStorageList.Items), "expected", len(storage.Spec.AllocationSets[allocationSetIndex].Nodes)) + log.Info("unexpected number of NnfNodeStorages", "found", len(nnfNodeStorages), "expected", len(storage.Spec.AllocationSets[allocationSetIndex].Nodes)) } return &ctrl.Result{}, nil } diff --git a/pkg/blockdevice/lvm/volume_groups.go b/pkg/blockdevice/lvm/volume_groups.go index a2bbdfb0..acff13d2 100644 --- a/pkg/blockdevice/lvm/volume_groups.go +++ b/pkg/blockdevice/lvm/volume_groups.go @@ -95,7 +95,7 @@ func (vg *VolumeGroup) WaitForAppearance(ctx context.Context) (bool, error) { } } - return false, fmt.Errorf("timeout waiting for VG %w", err) + return false, fmt.Errorf("timeout waiting for VG") } func (vg *VolumeGroup) parseArgs(args string) (string, error) { diff --git a/vendor/github.com/DataWorkflowServices/dws/api/v1alpha2/owner_labels.go b/vendor/github.com/DataWorkflowServices/dws/api/v1alpha2/owner_labels.go index 2af9d95b..313f618c 100644 --- a/vendor/github.com/DataWorkflowServices/dws/api/v1alpha2/owner_labels.go +++ b/vendor/github.com/DataWorkflowServices/dws/api/v1alpha2/owner_labels.go @@ -63,6 +63,7 @@ func MatchingOwner(owner metav1.Object) client.MatchingLabels { OwnerKindLabel: reflect.Indirect(reflect.ValueOf(owner)).Type().Name(), OwnerNameLabel: owner.GetName(), OwnerNamespaceLabel: owner.GetNamespace(), + OwnerUidLabel: string(owner.GetUID()), }) } diff --git a/vendor/modules.txt b/vendor/modules.txt index fbdd4a5a..d14d11a7 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -1,4 +1,4 @@ -# github.com/DataWorkflowServices/dws v0.0.1-0.20240916174522-9062a91241cd +# github.com/DataWorkflowServices/dws v0.0.1-0.20241029172011-d5898d0b8640 ## explicit; go 1.21 github.com/DataWorkflowServices/dws/api/v1alpha2 github.com/DataWorkflowServices/dws/config/crd/bases