diff --git a/pkg/nvme/initiator.go b/pkg/nvme/initiator.go index e0b1e75..8845f2d 100644 --- a/pkg/nvme/initiator.go +++ b/pkg/nvme/initiator.go @@ -22,17 +22,19 @@ const ( LockFile = "/var/run/longhorn-spdk.lock" LockTimeout = 120 * time.Second - maxNumRetries = 15 - retryInterval = 1 * time.Second - - maxNumWaitDeviceRetries = 60 - waitDeviceInterval = 1 * time.Second - HostProc = "/host/proc" validateDiskCreationTimeout = 30 // seconds ) +const ( + maxConnectTargetRetries = 15 + retryConnectTargetInterval = 1 * time.Second + + maxWaitDeviceRetries = 60 + waitDeviceInterval = 1 * time.Second +) + type Initiator struct { Name string SubsystemNQN string @@ -52,10 +54,14 @@ type Initiator struct { logger logrus.FieldLogger } -// NewInitiator creates a new NVMe initiator +// NewInitiator creates a new NVMe-oF initiator func NewInitiator(name, subsystemNQN, hostProc string) (*Initiator, error) { - if name == "" || subsystemNQN == "" { - return nil, fmt.Errorf("empty name or subsystem for initiator creation") + if name == "" { + return nil, fmt.Errorf("empty name for NVMe-oF initiator creation") + } + + if subsystemNQN == "" { + return nil, fmt.Errorf("empty subsystem for NVMe-oF initiator creation") } // If transportAddress or transportServiceID is empty, the initiator is still valid for stopping @@ -132,8 +138,8 @@ func (i *Initiator) DisconnectTarget() error { return DisconnectTarget(i.SubsystemNQN, i.executor) } -// WaitForConnect waits for the NVMe initiator to connect -func (i *Initiator) WaitForConnect(maxNumRetries int, retryInterval time.Duration) (err error) { +// WaitForConnect waits for the NVMe-oF initiator to connect +func (i *Initiator) WaitForConnect(maxRetries int, retryInterval time.Duration) (err error) { if i.hostProc != "" { lock, err := i.newLock() if err != nil { @@ -142,7 +148,7 @@ func (i *Initiator) WaitForConnect(maxNumRetries int, retryInterval time.Duratio defer lock.Unlock() } - for r := 0; r < maxNumRetries; r++ { + for r := 0; r < maxRetries; r++ { err = i.loadNVMeDeviceInfoWithoutLock(i.TransportAddress, i.TransportServiceID, i.SubsystemNQN) if err == nil { return nil @@ -153,8 +159,8 @@ func (i *Initiator) WaitForConnect(maxNumRetries int, retryInterval time.Duratio return err } -// WaitForDisconnect waits for the NVMe initiator to disconnect -func (i *Initiator) WaitForDisconnect(maxNumRetries int, retryInterval time.Duration) (err error) { +// WaitForDisconnect waits for the NVMe-oF initiator to disconnect +func (i *Initiator) WaitForDisconnect(maxRetries int, retryInterval time.Duration) (err error) { if i.hostProc != "" { lock, err := i.newLock() if err != nil { @@ -163,7 +169,7 @@ func (i *Initiator) WaitForDisconnect(maxNumRetries int, retryInterval time.Dura defer lock.Unlock() } - for r := 0; r < maxNumRetries; r++ { + for r := 0; r < maxRetries; r++ { err = i.loadNVMeDeviceInfoWithoutLock(i.TransportAddress, i.TransportServiceID, i.SubsystemNQN) if types.ErrorIsValidNvmeDeviceNotFound(err) { return nil @@ -174,7 +180,7 @@ func (i *Initiator) WaitForDisconnect(maxNumRetries int, retryInterval time.Dura return err } -// Suspend suspends the device mapper device for the NVMe initiator +// Suspend suspends the device mapper device for the NVMe-oF initiator func (i *Initiator) Suspend(noflush, nolockfs bool) error { if i.hostProc != "" { lock, err := i.newLock() @@ -186,19 +192,19 @@ func (i *Initiator) Suspend(noflush, nolockfs bool) error { suspended, err := i.IsSuspended() if err != nil { - return errors.Wrapf(err, "failed to check if linear dm device is suspended for NVMe initiator %s", i.Name) + return errors.Wrapf(err, "failed to check if linear dm device is suspended for NVMe-oF initiator %s", i.Name) } if !suspended { if err := i.suspendLinearDmDevice(noflush, nolockfs); err != nil { - return errors.Wrapf(err, "failed to suspend device mapper device for NVMe initiator %s", i.Name) + return errors.Wrapf(err, "failed to suspend linear dm device for NVMe-oF initiator %s", i.Name) } } return nil } -// Resume resumes the device mapper device for the NVMe initiator +// Resume resumes the device mapper device for the NVMe-oF initiator func (i *Initiator) Resume() error { if i.hostProc != "" { lock, err := i.newLock() @@ -209,7 +215,7 @@ func (i *Initiator) Resume() error { } if err := i.resumeLinearDmDevice(); err != nil { - return errors.Wrapf(err, "failed to resume device mapper device for NVMe initiator %s", i.Name) + return errors.Wrapf(err, "failed to resume linear dm device for NVMe-oF initiator %s", i.Name) } return nil @@ -224,30 +230,30 @@ func (i *Initiator) resumeLinearDmDevice() error { func (i *Initiator) replaceDmDeviceTarget() error { suspended, err := i.IsSuspended() if err != nil { - return errors.Wrapf(err, "failed to check if linear dm device is suspended for NVMe initiator %s", i.Name) + return errors.Wrapf(err, "failed to check if linear dm device is suspended for NVMe-oF initiator %s", i.Name) } if !suspended { if err := i.suspendLinearDmDevice(true, false); err != nil { - return errors.Wrapf(err, "failed to suspend linear dm device for NVMe initiator %s", i.Name) + return errors.Wrapf(err, "failed to suspend linear dm device for NVMe-oF initiator %s", i.Name) } } if err := i.reloadLinearDmDevice(); err != nil { - return errors.Wrapf(err, "failed to reload linear dm device for NVMe initiator %s", i.Name) + return errors.Wrapf(err, "failed to reload linear dm device for NVMe-oF initiator %s", i.Name) } if err := i.resumeLinearDmDevice(); err != nil { - return errors.Wrapf(err, "failed to resume linear dm device for NVMe initiator %s", i.Name) + return errors.Wrapf(err, "failed to resume linear dm device for NVMe-oF initiator %s", i.Name) } return nil } -// Start starts the NVMe initiator with the given transportAddress and transportServiceID +// Start starts the NVMe-oF initiator with the given transportAddress and transportServiceID func (i *Initiator) Start(transportAddress, transportServiceID string, dmDeviceAndEndpointCleanupRequired bool) (dmDeviceIsBusy bool, err error) { defer func() { if err != nil { - err = errors.Wrapf(err, "failed to start NVMe initiator %s", i.Name) + err = errors.Wrapf(err, "failed to start NVMe-oF initiator %s", i.Name) } }() @@ -259,7 +265,7 @@ func (i *Initiator) Start(transportAddress, transportServiceID string, dmDeviceA "transportAddress": transportAddress, "transportServiceID": transportServiceID, "dmDeviceAndEndpointCleanupRequired": dmDeviceAndEndpointCleanupRequired, - }).Info("Starting NVMe initiator") + }).Info("Starting NVMe-oF initiator") if i.hostProc != "" { lock, err := i.newLock() @@ -269,81 +275,80 @@ func (i *Initiator) Start(transportAddress, transportServiceID string, dmDeviceA defer lock.Unlock() } - // Check if the initiator/NVMe device is already launched and matches the params + // Check if the initiator/NVMe-oF device is already launched and matches the params err = i.loadNVMeDeviceInfoWithoutLock(i.TransportAddress, i.TransportServiceID, i.SubsystemNQN) if err == nil { if i.TransportAddress == transportAddress && i.TransportServiceID == transportServiceID { err = i.LoadEndpoint(false) if err == nil { - i.logger.Info("NVMe initiator is already launched with correct params") + i.logger.Info("NVMe-oF initiator is already launched with correct params") return false, nil } - i.logger.WithError(err).Warnf("NVMe initiator is launched with failed to load the endpoint") + i.logger.WithError(err).Warnf("NVMe-oF initiator is launched with failed to load the endpoint") } else { - i.logger.Warnf("NVMe initiator is launched but with incorrect address, the required one is %s:%s, will try to stop then relaunch it", transportAddress, transportServiceID) + i.logger.Warnf("NVMe-oF initiator is launched but with incorrect address, the required one is %s:%s, will try to stop then relaunch it", transportAddress, transportServiceID) } } - i.logger.Info("Stopping NVMe initiator blindly before starting") + i.logger.Info("Stopping NVMe-oF initiator blindly before starting") dmDeviceIsBusy, err = i.stopWithoutLock(dmDeviceAndEndpointCleanupRequired, false, false) if err != nil { - return dmDeviceIsBusy, errors.Wrapf(err, "failed to stop the mismatching NVMe initiator %s before starting", i.Name) + return dmDeviceIsBusy, errors.Wrapf(err, "failed to stop the mismatching NVMe-oF initiator %s before starting", i.Name) } - i.logger.Info("Launching NVMe initiator") + i.logger.Info("Launching NVMe-oF initiator") - i.connectTarget(transportAddress, transportServiceID) + i.connectTarget(transportAddress, transportServiceID, maxConnectTargetRetries, retryConnectTargetInterval) if i.ControllerName == "" { - return dmDeviceIsBusy, fmt.Errorf("failed to start NVMe initiator %s within %d * %v sec retries", i.Name, maxNumRetries, retryInterval.Seconds()) + return dmDeviceIsBusy, fmt.Errorf("failed to start NVMe-oF initiator %s within %d * %v sec retries", i.Name, maxConnectTargetRetries, retryConnectTargetInterval.Seconds()) } err = i.waitAndLoadNVMeDeviceInfoWithoutLock(transportAddress, transportServiceID) if err != nil { - return dmDeviceIsBusy, errors.Wrapf(err, "failed to load device info after connecting target for NVMe initiator %s", i.Name) + return dmDeviceIsBusy, errors.Wrapf(err, "failed to load device info after connecting target for NVMe-oF initiator %s", i.Name) } if dmDeviceAndEndpointCleanupRequired { if dmDeviceIsBusy { // Endpoint is already created, just replace the target device - i.logger.Info("Linear dm device is busy, trying the best to replace the target device for NVMe initiator") + i.logger.Info("Linear dm device is busy, trying the best to replace the target device for NVMe-oF initiator") if err := i.replaceDmDeviceTarget(); err != nil { - i.logger.WithError(err).Warnf("Failed to replace the target device for NVMe initiator") + i.logger.WithError(err).Warnf("Failed to replace the target device for NVMe-oF initiator") } else { - i.logger.Info("Successfully replaced the target device for NVMe initiator") + i.logger.Info("Successfully replaced the target device for NVMe-oF initiator") dmDeviceIsBusy = false } } else { - i.logger.Info("Creating linear dm device for NVMe initiator") + i.logger.Info("Creating linear dm device for NVMe-oF initiator") if err := i.createLinearDmDevice(); err != nil { - return false, errors.Wrapf(err, "failed to create linear dm device for NVMe initiator %s", i.Name) + return false, errors.Wrapf(err, "failed to create linear dm device for NVMe-oF initiator %s", i.Name) } } } else { - i.logger.Info("Skipping creating linear dm device for NVMe initiator") + i.logger.Info("Skipping creating linear dm device for NVMe-oF initiator") i.dev.Export = i.dev.Nvme } i.logger.Infof("Creating endpoint %v", i.Endpoint) exist, err := i.isEndpointExist() if err != nil { - return dmDeviceIsBusy, errors.Wrapf(err, "failed to check if endpoint %v exists for NVMe initiator %s", i.Endpoint, i.Name) + return dmDeviceIsBusy, errors.Wrapf(err, "failed to check if endpoint %v exists for NVMe-oF initiator %s", i.Endpoint, i.Name) } if exist { - i.logger.Infof("Skipping endpoint %v creation for NVMe initiator", i.Endpoint) + i.logger.Infof("Skipping endpoint %v creation for NVMe-oF initiator", i.Endpoint) } else { if err := i.makeEndpoint(); err != nil { return dmDeviceIsBusy, err } } - i.logger.Infof("Launched NVMe initiator: %+v", i) + i.logger.Infof("Launched NVMe-oF initiator: %+v", i) return dmDeviceIsBusy, nil } func (i *Initiator) waitAndLoadNVMeDeviceInfoWithoutLock(transportAddress, transportServiceID string) (err error) { - for r := 0; r < maxNumWaitDeviceRetries; r++ { - //err = i.loadNVMeDeviceInfoWithoutLock(i.TransportAddress, i.TransportServiceID, i.SubsystemNQN) + for r := 0; r < maxWaitDeviceRetries; r++ { err = i.loadNVMeDeviceInfoWithoutLock(transportAddress, transportServiceID, i.SubsystemNQN) if err == nil { break @@ -353,8 +358,8 @@ func (i *Initiator) waitAndLoadNVMeDeviceInfoWithoutLock(transportAddress, trans return err } -func (i *Initiator) connectTarget(transportAddress, transportServiceID string) { - for r := 0; r < maxNumRetries; r++ { +func (i *Initiator) connectTarget(transportAddress, transportServiceID string, maxRetries int, retryInterval time.Duration) { + for r := 0; r < maxRetries; r++ { // Rerun this API for a discovered target should be fine subsystemNQN, err := DiscoverTarget(transportAddress, transportServiceID, i.executor) if err != nil { @@ -376,6 +381,7 @@ func (i *Initiator) connectTarget(transportAddress, transportServiceID string) { } } +// Stop stops the NVMe-oF initiator func (i *Initiator) Stop(dmDeviceAndEndpointCleanupRequired, deferDmDeviceCleanup, returnErrorForBusyDevice bool) (bool, error) { if i.hostProc != "" { lock, err := i.newLock() @@ -414,7 +420,7 @@ func (i *Initiator) stopWithoutLock(dmDeviceAndEndpointCleanupRequired, deferDmD err = DisconnectTarget(i.SubsystemNQN, i.executor) if err != nil { - return dmDeviceIsBusy, errors.Wrapf(err, "failed to disconnect target for NVMe initiator %s", i.Name) + return dmDeviceIsBusy, errors.Wrapf(err, "failed to disconnect target for NVMe-oF initiator %s", i.Name) } i.ControllerName = "" @@ -425,22 +431,27 @@ func (i *Initiator) stopWithoutLock(dmDeviceAndEndpointCleanupRequired, deferDmD return dmDeviceIsBusy, nil } +// GetControllerName returns the controller name func (i *Initiator) GetControllerName() string { return i.ControllerName } +// GetNamespaceName returns the namespace name func (i *Initiator) GetNamespaceName() string { return i.NamespaceName } +// GetTransportAddress returns the transport address func (i *Initiator) GetTransportAddress() string { return i.TransportAddress } +// GetTransportServiceID returns the transport service ID func (i *Initiator) GetTransportServiceID() string { return i.TransportServiceID } +// GetEndpoint returns the endpoint func (i *Initiator) GetEndpoint() string { if i.isUp { return i.Endpoint @@ -448,6 +459,7 @@ func (i *Initiator) GetEndpoint() string { return "" } +// GetDevice returns the device information func (i *Initiator) LoadNVMeDeviceInfo(transportAddress, transportServiceID, subsystemNQN string) (err error) { if i.hostProc != "" { lock, err := i.newLock() @@ -466,13 +478,13 @@ func (i *Initiator) loadNVMeDeviceInfoWithoutLock(transportAddress, transportSer return err } if len(nvmeDevices) != 1 { - return fmt.Errorf("found zero or multiple devices NVMe initiator %s", i.Name) + return fmt.Errorf("found zero or multiple devices NVMe-oF initiator %s", i.Name) } if len(nvmeDevices[0].Namespaces) != 1 { - return fmt.Errorf("found zero or multiple devices for NVMe initiator %s", i.Name) + return fmt.Errorf("found zero or multiple devices for NVMe-oF initiator %s", i.Name) } if i.ControllerName != "" && i.ControllerName != nvmeDevices[0].Controllers[0].Controller { - return fmt.Errorf("found mismatching between the detected controller name %s and the recorded value %s for NVMe initiator %s", nvmeDevices[0].Controllers[0].Controller, i.ControllerName, i.Name) + return fmt.Errorf("found mismatching between the detected controller name %s and the recorded value %s for NVMe-oF initiator %s", nvmeDevices[0].Controllers[0].Controller, i.ControllerName, i.Name) } i.ControllerName = nvmeDevices[0].Controllers[0].Controller @@ -488,7 +500,7 @@ func (i *Initiator) loadNVMeDeviceInfoWithoutLock(transportAddress, transportSer devPath := filepath.Join("/dev", i.NamespaceName) dev, err := util.DetectDevice(devPath, i.executor) if err != nil { - return errors.Wrapf(err, "cannot find the device for NVMe initiator %s with namespace name %s", i.Name, i.NamespaceName) + return errors.Wrapf(err, "cannot find the device for NVMe-oF initiator %s with namespace name %s", i.Name, i.NamespaceName) } i.dev = &util.LonghornBlockDevice{ @@ -514,6 +526,7 @@ func (i *Initiator) findDependentDevices(devName string) ([]string, error) { return depDevices, nil } +// LoadEndpoint loads the endpoint func (i *Initiator) LoadEndpoint(dmDeviceIsBusy bool) error { dev, err := util.DetectDevice(i.Endpoint, i.executor) if err != nil { @@ -529,7 +542,7 @@ func (i *Initiator) LoadEndpoint(dmDeviceIsBusy bool) error { i.logger.Debugf("Skipping endpoint %v loading due to device busy", i.Endpoint) } else { if i.NamespaceName != "" && !i.isNamespaceExist(depDevices) { - return fmt.Errorf("detected device %s name mismatching from endpoint %v for NVMe initiator %s", dev.Name, i.Endpoint, i.Name) + return fmt.Errorf("detected device %s name mismatching from endpoint %v for NVMe-oF initiator %s", dev.Name, i.Endpoint, i.Name) } }