Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pre v1alpha4 rel #428

Merged
merged 28 commits into from
Dec 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
9f1340e
Add timeout when creating fan-out child resources (#395)
matthew-richerson Sep 26, 2024
11f7bfc
Wait for persistent storage owner label changes in teardown (#396)
matthew-richerson Oct 3, 2024
82d9db5
Role rules to monitor API Priority and Fairness
roehrich-hpe Oct 3, 2024
afc04df
Merge pull request #397 from NearNodeFlash/apf-role
roehrich-hpe Oct 3, 2024
f0849b9
CRDBUMPER-create-apis
matthew-richerson Oct 8, 2024
c73a5b0
CRDBUMPER-copy-api-content
matthew-richerson Oct 8, 2024
721c093
CRDBUMPER-mv-webhooks
matthew-richerson Oct 8, 2024
21dd163
CRDBUMPER-conversion-webhooks
matthew-richerson Oct 8, 2024
b788c68
CRDBUMPER-conversion-gen
matthew-richerson Oct 8, 2024
29a7892
CRDBUMPER-bump-controllers
matthew-richerson Oct 8, 2024
9682427
CRDBUMPER-bump-apis
matthew-richerson Oct 8, 2024
47824f0
CRDBUMPER-auto-gens
matthew-richerson Oct 8, 2024
de11dae
Merge pull request #398 from NearNodeFlash/api-v1alpha3
matthew-richerson Oct 9, 2024
bcb26d1
Mark the v1alpha1 API as unserved.
roehrich-hpe Oct 16, 2024
49d8beb
Merge pull request #399 from NearNodeFlash/v1alpha1-unserved
roehrich-hpe Oct 16, 2024
b547906
Add ability repair the state of the NoExecute/NoSchedule taints (#403)
roehrich-hpe Oct 21, 2024
48a2e9d
Allow commands to run on the Rabbit (#402)
matthew-richerson Oct 21, 2024
30475bc
Use a file based database for nnf-ec (#401)
matthew-richerson Oct 21, 2024
d6cc12a
Create nnf-servers file for lustre filesystems
bdevcich Oct 16, 2024
8dcdc4b
Remove an unhelpful info message (#404)
roehrich-hpe Oct 21, 2024
a10aedb
Fix missing conversion changes
bdevcich Oct 21, 2024
f69b96f
Verify generated Go conversions (#406)
roehrich-hpe Oct 21, 2024
0cc1ba6
Update dependencies for release
bdevcich Oct 21, 2024
3f48984
Fix localdisk hostPath type for KIND (#409)
roehrich-hpe Oct 24, 2024
ed4c94b
Nil err message after waiting for VG (#411)
roehrich-hpe Oct 31, 2024
aba25da
Only aggregate status from children that would be created using the c…
matthew-richerson Nov 7, 2024
4ef2c58
re-vendor dws (#413)
roehrich-hpe Nov 7, 2024
b5d510a
Merge branch 'pre-v1alpha4' into pre-v1alpha4-rel
roehrich-hpe Dec 9, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion config/kind/manager_volumes_patch.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ spec:
hostPath:
path: /mnt
- name: localdisk
type: DirectoryOrCreate
hostPath:
type: DirectoryOrCreate
path: /localdisk

2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ module github.com/NearNodeFlash/nnf-sos
go 1.21

require (
github.com/DataWorkflowServices/dws v0.0.1-0.20240916174522-9062a91241cd
github.com/DataWorkflowServices/dws v0.0.1-0.20241029172011-d5898d0b8640
github.com/NearNodeFlash/lustre-fs-operator v0.0.1-0.20240925185149-26d9d6071a1c
github.com/NearNodeFlash/nnf-ec v0.0.1-0.20241017152925-afc4d0cf1a4b
github.com/ghodss/yaml v1.0.0
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/DataWorkflowServices/dws v0.0.1-0.20240916174522-9062a91241cd h1:/bSL8rV9cB13b3I5SCf20t4q3ab91i533sPcYIISI+8=
github.com/DataWorkflowServices/dws v0.0.1-0.20240916174522-9062a91241cd/go.mod h1:6MrEEHISskyooSKcKU6R3mFqH6Yh6KzWgajhcw2s+nM=
github.com/DataWorkflowServices/dws v0.0.1-0.20241029172011-d5898d0b8640 h1:JSjgesWkPo9sAc7QkjWisNDOlIOGR0MQX/hxXL56FTA=
github.com/DataWorkflowServices/dws v0.0.1-0.20241029172011-d5898d0b8640/go.mod h1:6MrEEHISskyooSKcKU6R3mFqH6Yh6KzWgajhcw2s+nM=
github.com/HewlettPackard/structex v1.0.4 h1:RVTdN5FWhDWr1IkjllU8wxuLjISo4gr6u5ryZpzyHcA=
github.com/HewlettPackard/structex v1.0.4/go.mod h1:3frC4RY/cPsP/4+N8rkxsNAGlQwHV+zDC7qvrN+N+rE=
github.com/NearNodeFlash/lustre-fs-operator v0.0.1-0.20240925185149-26d9d6071a1c h1:fSuMz3j8UzlYZI59Ded8XuUjYd7C5IyLB55jwgSTIew=
Expand Down
39 changes: 24 additions & 15 deletions internal/controller/nnf_access_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -1039,25 +1039,28 @@ func (r *NnfAccessReconciler) getClientMountStatus(ctx context.Context, access *
return true, nil
}

clientMounts := []dwsv1alpha2.ClientMount{}
clientMountList := &dwsv1alpha2.ClientMountList{}
matchLabels := dwsv1alpha2.MatchingOwner(access)

listOptions := []client.ListOption{
matchLabels,
}

if err := r.List(ctx, clientMountList, listOptions...); err != nil {
return false, dwsv1alpha2.NewResourceError("could not list ClientMounts").WithError(err)
}

// make a map with empty data of the client names to allow easy searching
clientNameMap := map[string]struct{}{}
for _, clientName := range clientList {
clientMount := &dwsv1alpha2.ClientMount{
ObjectMeta: metav1.ObjectMeta{
Name: clientMountName(access),
Namespace: clientName,
},
}
err := r.Get(ctx, client.ObjectKeyFromObject(clientMount), clientMount)
if err != nil {
if apierrors.IsNotFound(err) {
return false, nil
}
clientNameMap[clientName] = struct{}{}
}

return false, err
clientMounts := []dwsv1alpha2.ClientMount{}
for _, clientMount := range clientMountList.Items {
if _, exists := clientNameMap[clientMount.GetNamespace()]; exists {
clientMounts = append(clientMounts, clientMount)
}

clientMounts = append(clientMounts, *clientMount)
}

// Check the clientmounts for any errors first
Expand Down Expand Up @@ -1106,6 +1109,12 @@ func (r *NnfAccessReconciler) getClientMountStatus(ctx context.Context, access *
}
}

if len(clientMounts) != len(clientList) {
if access.GetDeletionTimestamp().IsZero() {
log.Info("unexpected number of ClientMounts", "found", len(clientMounts), "expected", len(clientList))
}
return false, nil
}
return true, nil
}

Expand Down
48 changes: 39 additions & 9 deletions internal/controller/nnf_storage_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -418,15 +418,30 @@ func (r *NnfStorageReconciler) aggregateNodeBlockStorageStatus(ctx context.Conte
return &ctrl.Result{}, dwsv1alpha2.NewResourceError("could not list NnfNodeBlockStorages").WithError(err)
}

// make a map with empty data of the Rabbit names to allow easy searching
nodeNameMap := map[string]struct{}{}
for _, node := range nnfStorage.Spec.AllocationSets[allocationSetIndex].Nodes {
nodeNameMap[node.Name] = struct{}{}
}

// prune out any entries that aren't in the NnfStorage. This can happen if the NnfStorage was modified
// after it was created, as is the case with NnfStorages from an NnfSystemStorage
nnfNodeBlockStorages := []nnfv1alpha3.NnfNodeBlockStorage{}
for _, nnfNodeBlockStorage := range nnfNodeBlockStorageList.Items {
if _, exists := nodeNameMap[nnfNodeBlockStorage.GetNamespace()]; exists {
nnfNodeBlockStorages = append(nnfNodeBlockStorages, nnfNodeBlockStorage)
}
}

for _, nnfNodeBlockStorage := range nnfNodeBlockStorages {
for _, nodeAllocation := range nnfNodeBlockStorage.Status.Allocations {
if nodeAllocation.CapacityAllocated > 0 {
allocationSet.AllocationCount++
}
}
}

for _, nnfNodeBlockStorage := range nnfNodeBlockStorageList.Items {
for _, nnfNodeBlockStorage := range nnfNodeBlockStorages {
if nnfNodeBlockStorage.Status.Error != nil {
return &ctrl.Result{}, dwsv1alpha2.NewResourceError("Node: %s", nnfNodeBlockStorage.GetNamespace()).WithError(nnfNodeBlockStorage.Status.Error)
}
Expand All @@ -440,7 +455,7 @@ func (r *NnfStorageReconciler) aggregateNodeBlockStorageStatus(ctx context.Conte
childTimeout = 300
}

for _, nnfNodeBlockStorage := range nnfNodeBlockStorageList.Items {
for _, nnfNodeBlockStorage := range nnfNodeBlockStorages {
// check if the finalizer has been added by the controller on the Rabbit
if len(nnfNodeBlockStorage.GetFinalizers()) > 0 {
continue
Expand All @@ -454,7 +469,7 @@ func (r *NnfStorageReconciler) aggregateNodeBlockStorageStatus(ctx context.Conte
}
}

for _, nnfNodeBlockStorage := range nnfNodeBlockStorageList.Items {
for _, nnfNodeBlockStorage := range nnfNodeBlockStorages {
if nnfNodeBlockStorage.Status.Ready == false {
return &ctrl.Result{}, nil
}
Expand All @@ -463,9 +478,9 @@ func (r *NnfStorageReconciler) aggregateNodeBlockStorageStatus(ctx context.Conte
// Ensure that we found all the NnfNodeBlockStorage resources we were expecting. This can be expected
// transiently as it takes time for the client cache to be updated. Log a message in case the count
// never reaches the expected value.
if len(nnfNodeBlockStorageList.Items) != len(nnfStorage.Spec.AllocationSets[allocationSetIndex].Nodes) {
if len(nnfNodeBlockStorages) != len(nnfStorage.Spec.AllocationSets[allocationSetIndex].Nodes) {
if nnfStorage.GetDeletionTimestamp().IsZero() {
log.Info("unexpected number of NnfNodeBlockStorages", "found", len(nnfNodeBlockStorageList.Items), "expected", len(nnfStorage.Spec.AllocationSets[allocationSetIndex].Nodes))
log.Info("unexpected number of NnfNodeBlockStorages", "found", len(nnfNodeBlockStorages), "expected", len(nnfStorage.Spec.AllocationSets[allocationSetIndex].Nodes))
}
return &ctrl.Result{}, nil
}
Expand Down Expand Up @@ -642,7 +657,22 @@ func (r *NnfStorageReconciler) aggregateNodeStorageStatus(ctx context.Context, s
return &ctrl.Result{}, dwsv1alpha2.NewResourceError("could not list NnfNodeStorages").WithError(err)
}

// make a map with empty data of the Rabbit names to allow easy searching
nodeNameMap := map[string]struct{}{}
for _, node := range storage.Spec.AllocationSets[allocationSetIndex].Nodes {
nodeNameMap[node.Name] = struct{}{}
}

// prune out any entries that aren't in the NnfStorage. This can happen if the NnfStorage was modified
// after it was created, as is the case with NnfStorages from an NnfSystemStorage
nnfNodeStorages := []nnfv1alpha3.NnfNodeStorage{}
for _, nnfNodeStorage := range nnfNodeStorageList.Items {
if _, exists := nodeNameMap[nnfNodeStorage.GetNamespace()]; exists {
nnfNodeStorages = append(nnfNodeStorages, nnfNodeStorage)
}
}

for _, nnfNodeStorage := range nnfNodeStorages {
// If we're in the delete path, only propagate errors for storages that are deleting. Errors
// from creation aren't interesting anymore
if deleting && nnfNodeStorage.GetDeletionTimestamp().IsZero() {
Expand All @@ -661,7 +691,7 @@ func (r *NnfStorageReconciler) aggregateNodeStorageStatus(ctx context.Context, s
childTimeout = 300
}

for _, nnfNodeStorage := range nnfNodeStorageList.Items {
for _, nnfNodeStorage := range nnfNodeStorages {
// check if the finalizer has been added by the controller on the Rabbit
if len(nnfNodeStorage.GetFinalizers()) > 0 {
continue
Expand All @@ -675,7 +705,7 @@ func (r *NnfStorageReconciler) aggregateNodeStorageStatus(ctx context.Context, s
}
}

for _, nnfNodeStorage := range nnfNodeStorageList.Items {
for _, nnfNodeStorage := range nnfNodeStorages {
if nnfNodeStorage.Status.Ready == false {
return &ctrl.Result{}, nil
}
Expand All @@ -684,9 +714,9 @@ func (r *NnfStorageReconciler) aggregateNodeStorageStatus(ctx context.Context, s
// Ensure that we found all the NnfNodeStorage resources we were expecting. This can be expected
// transiently as it takes time for the client cache to be updated. Log a message in case the count
// never reaches the expected value.
if len(nnfNodeStorageList.Items) != len(storage.Spec.AllocationSets[allocationSetIndex].Nodes) {
if len(nnfNodeStorages) != len(storage.Spec.AllocationSets[allocationSetIndex].Nodes) {
if storage.GetDeletionTimestamp().IsZero() {
log.Info("unexpected number of NnfNodeStorages", "found", len(nnfNodeStorageList.Items), "expected", len(storage.Spec.AllocationSets[allocationSetIndex].Nodes))
log.Info("unexpected number of NnfNodeStorages", "found", len(nnfNodeStorages), "expected", len(storage.Spec.AllocationSets[allocationSetIndex].Nodes))
}
return &ctrl.Result{}, nil
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/blockdevice/lvm/volume_groups.go
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ func (vg *VolumeGroup) WaitForAppearance(ctx context.Context) (bool, error) {
}
}

return false, fmt.Errorf("timeout waiting for VG %w", err)
return false, fmt.Errorf("timeout waiting for VG")
}

func (vg *VolumeGroup) parseArgs(args string) (string, error) {
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion vendor/modules.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# github.com/DataWorkflowServices/dws v0.0.1-0.20240916174522-9062a91241cd
# github.com/DataWorkflowServices/dws v0.0.1-0.20241029172011-d5898d0b8640
## explicit; go 1.21
github.com/DataWorkflowServices/dws/api/v1alpha2
github.com/DataWorkflowServices/dws/config/crd/bases
Expand Down
Loading