From afc4d0cf1a4b8d51ecaed8983ecd66636e507d4f Mon Sep 17 00:00:00 2001 From: matthew-richerson <82597529+matthew-richerson@users.noreply.github.com> Date: Thu, 17 Oct 2024 10:29:25 -0500 Subject: [PATCH] Return success on delete storage pool failure (#111) A storage pool delete failure can result in double deleting of the NVMe namespaces. Return success even if some of the namespaces were not removed. We'll leak these namespaces for now. Signed-off-by: Matt Richerson --- pkg/manager-nnf/manager.go | 14 ++++++++++++-- pkg/manager-nvme/manager.go | 10 +++++++++- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/pkg/manager-nnf/manager.go b/pkg/manager-nnf/manager.go index cd2d78b..4060cb0 100644 --- a/pkg/manager-nnf/manager.go +++ b/pkg/manager-nnf/manager.go @@ -838,11 +838,21 @@ func (*StorageService) StorageServiceIdStoragePoolIdDelete(storageServiceId, sto } deleteFunc := func() error { - return p.deallocateVolumes() + err := p.deallocateVolumes() + if err != nil { + log.Error(err, "deallocateVolumes failed, but returning success anyway") + } + + return nil } if err := s.persistentController.DeletePersistentObject(p, deleteFunc, storagePoolStorageDeleteStartLogEntryType, storagePoolStorageDeleteCompleteLogEntryType); err != nil { - return ec.NewErrInternalServerError().WithResourceType(StoragePoolOdataType).WithError(err).WithCause(fmt.Sprintf("Failed to delete storage pool")) + err := ec.NewErrInternalServerError().WithResourceType(StoragePoolOdataType).WithError(err).WithCause(fmt.Sprintf("Failed to delete storage pool")) + if err != nil { + log.Error(err, "DeletePersistentObject failed, but returning success anyway") + } + + return nil } event.EventManager.PublishResourceEvent(msgreg.ResourceRemovedResourceEvent(), p) diff --git a/pkg/manager-nvme/manager.go b/pkg/manager-nvme/manager.go index c9cbfa5..0483cdf 100644 --- a/pkg/manager-nvme/manager.go +++ b/pkg/manager-nvme/manager.go @@ -692,6 +692,7 @@ func (v *Volume) WaitFormatComplete() error { return err } + stalledCount := 0 for ns.Utilization != 0 { log.V(3).Info("Namespace in use", "utilization", ns.Utilization) @@ -708,7 +709,14 @@ func (v *Volume) WaitFormatComplete() error { } if lastUtilization == ns.Utilization { - return fmt.Errorf("Device %s Format Stalled: Namespace: %d Delay: %s Utilization: %d", v.storage.id, v.namespaceId, delay.String(), ns.Utilization) + stalledCount++ + if stalledCount == 10 { + return fmt.Errorf("Device %s Format Stalled: Namespace: %d Delay: %s Stall Count: %d Utilization: %d", v.storage.id, v.namespaceId, delay.String(), stalledCount, ns.Utilization) + } + + log.V(1).Info("Format stalled", "device", v.storage.id, "Namespace", v.namespaceId, "utilization", ns.Utilization, "stall count", stalledCount) + } else { + stalledCount = 0 } }