diff --git a/pkg/spdk/engine.go b/pkg/spdk/engine.go index 6c79c7ff..862b979a 100644 --- a/pkg/spdk/engine.go +++ b/pkg/spdk/engine.go @@ -10,20 +10,22 @@ import ( "github.com/pkg/errors" "github.com/sirupsen/logrus" + grpccodes "google.golang.org/grpc/codes" grpcstatus "google.golang.org/grpc/status" + "github.com/longhorn/go-spdk-helper/pkg/jsonrpc" + "github.com/longhorn/go-spdk-helper/pkg/nvme" + "github.com/longhorn/types/pkg/generated/spdkrpc" + commonbitmap "github.com/longhorn/go-common-libs/bitmap" commonnet "github.com/longhorn/go-common-libs/net" commontypes "github.com/longhorn/go-common-libs/types" commonutils "github.com/longhorn/go-common-libs/utils" - "github.com/longhorn/go-spdk-helper/pkg/jsonrpc" - "github.com/longhorn/go-spdk-helper/pkg/nvme" spdkclient "github.com/longhorn/go-spdk-helper/pkg/spdk/client" spdktypes "github.com/longhorn/go-spdk-helper/pkg/spdk/types" helpertypes "github.com/longhorn/go-spdk-helper/pkg/types" helperutil "github.com/longhorn/go-spdk-helper/pkg/util" - "github.com/longhorn/types/pkg/generated/spdkrpc" "github.com/longhorn/longhorn-spdk-engine/pkg/api" "github.com/longhorn/longhorn-spdk-engine/pkg/client" @@ -47,6 +49,9 @@ type Engine struct { Nqn string Nguid string + ctrlrLossTimeout int + fastIOFailTimeoutSec int + ReplicaStatusMap map[string]*EngineReplicaStatus initiator *nvme.Initiator @@ -92,6 +97,9 @@ func NewEngine(engineName, volumeName, frontend string, specSize uint64, engineU Frontend: frontend, SpecSize: specSize, + ctrlrLossTimeout: helpertypes.DefaultReplicaCtrlrLossTimeoutSec, + fastIOFailTimeoutSec: helpertypes.DefaultReplicaFastIOFailTimeoutSec, + ReplicaStatusMap: map[string]*EngineReplicaStatus{}, State: types.InstanceStatePending, @@ -207,7 +215,7 @@ func (e *Engine) Create(spdkClient *spdkclient.Client, replicaAddressMap map[str Address: replicaAddr, } - bdevName, err := connectNVMfBdev(spdkClient, replicaName, replicaAddr) + bdevName, err := connectNVMfBdev(spdkClient, replicaName, replicaAddr, e.ctrlrLossTimeout, e.fastIOFailTimeoutSec) if err != nil { e.log.WithError(err).Warnf("Failed to get bdev from replica %s with address %s during creation, will mark the mode to ERR and continue", replicaName, replicaAddr) e.ReplicaStatusMap[replicaName].Mode = types.ModeERR @@ -1078,7 +1086,7 @@ func (e *Engine) ReplicaAdd(spdkClient *spdkclient.Client, dstReplicaName, dstRe } // Add rebuilding replica head bdev to the base bdev list of the RAID bdev - dstHeadLvolBdevName, err := connectNVMfBdev(spdkClient, dstReplicaName, dstHeadLvolAddress) + dstHeadLvolBdevName, err := connectNVMfBdev(spdkClient, dstReplicaName, dstHeadLvolAddress, e.ctrlrLossTimeout, e.fastIOFailTimeoutSec) if err != nil { return err } @@ -1648,7 +1656,7 @@ func (e *Engine) replicaSnapshotOperation(spdkClient *spdkclient.Client, replica if err := replicaClient.ReplicaSnapshotRevert(replicaName, snapshotName); err != nil { return err } - bdevName, err := connectNVMfBdev(spdkClient, replicaName, replicaStatus.Address) + bdevName, err := connectNVMfBdev(spdkClient, replicaName, replicaStatus.Address, e.ctrlrLossTimeout, e.fastIOFailTimeoutSec) if err != nil { return err } @@ -1954,8 +1962,10 @@ func (e *Engine) BackupRestoreFinish(spdkClient *spdkclient.Client) error { return err } e.log.Infof("Attaching replica %s with address %s before finishing restoration", replicaName, replicaAddress) - _, err = spdkClient.BdevNvmeAttachController(replicaName, helpertypes.GetNQN(replicaName), replicaIP, replicaPort, spdktypes.NvmeTransportTypeTCP, spdktypes.NvmeAddressFamilyIPv4, - helpertypes.DefaultCtrlrLossTimeoutSec, helpertypes.DefaultReconnectDelaySec, helpertypes.DefaultFastIOFailTimeoutSec, helpertypes.DefaultMultipath) + _, err = spdkClient.BdevNvmeAttachController(replicaName, helpertypes.GetNQN(replicaName), replicaIP, replicaPort, + spdktypes.NvmeTransportTypeTCP, spdktypes.NvmeAddressFamilyIPv4, + int32(e.ctrlrLossTimeout), helpertypes.DefaultReplicaReconnectDelaySec, int32(e.fastIOFailTimeoutSec), + helpertypes.DefaultMultipath) if err != nil { return err } diff --git a/pkg/spdk/replica.go b/pkg/spdk/replica.go index d257c00b..fc626abd 100644 --- a/pkg/spdk/replica.go +++ b/pkg/spdk/replica.go @@ -15,17 +15,18 @@ import ( grpcstatus "google.golang.org/grpc/status" "github.com/longhorn/backupstore" + "github.com/longhorn/go-spdk-helper/pkg/jsonrpc" + "github.com/longhorn/types/pkg/generated/spdkrpc" + btypes "github.com/longhorn/backupstore/types" butil "github.com/longhorn/backupstore/util" commonbitmap "github.com/longhorn/go-common-libs/bitmap" commonnet "github.com/longhorn/go-common-libs/net" commonutils "github.com/longhorn/go-common-libs/utils" - "github.com/longhorn/go-spdk-helper/pkg/jsonrpc" spdkclient "github.com/longhorn/go-spdk-helper/pkg/spdk/client" spdktypes "github.com/longhorn/go-spdk-helper/pkg/spdk/types" helpertypes "github.com/longhorn/go-spdk-helper/pkg/types" helperutil "github.com/longhorn/go-spdk-helper/pkg/util" - "github.com/longhorn/types/pkg/generated/spdkrpc" "github.com/longhorn/longhorn-spdk-engine/pkg/api" "github.com/longhorn/longhorn-spdk-engine/pkg/types" @@ -1325,7 +1326,8 @@ func (r *Replica) RebuildingSrcAttach(spdkClient *spdkclient.Client, dstReplicaN return nil } - r.rebuildingSrcCache.dstRebuildingBdevName, err = connectNVMfBdev(spdkClient, dstRebuildingLvolName, dstRebuildingLvolAddress) + r.rebuildingSrcCache.dstRebuildingBdevName, err = connectNVMfBdev(spdkClient, dstRebuildingLvolName, dstRebuildingLvolAddress, + helpertypes.DefaultReplicaCtrlrLossTimeoutSec, helpertypes.DefaultReplicaFastIOFailTimeoutSec) if err != nil { return errors.Wrapf(err, "failed to connect rebuilding lvol %s with address %s as a NVMe bdev for replica %s rebuilding src attach", dstRebuildingLvolName, dstRebuildingLvolAddress, r.Name) } @@ -1432,7 +1434,8 @@ func (r *Replica) RebuildingDstStart(spdkClient *spdkclient.Client, srcReplicaNa r.rebuildingDstCache.srcReplicaAddress = srcReplicaAddress externalSnapshotLvolName := GetReplicaSnapshotLvolName(srcReplicaName, externalSnapshotName) - externalSnapshotBdevName, err := connectNVMfBdev(spdkClient, externalSnapshotLvolName, externalSnapshotAddress) + externalSnapshotBdevName, err := connectNVMfBdev(spdkClient, externalSnapshotLvolName, externalSnapshotAddress, + helpertypes.DefaultReplicaCtrlrLossTimeoutSec, helpertypes.DefaultReplicaFastIOFailTimeoutSec) if err != nil { return "", errors.Wrapf(err, "failed to connect the external src snapshot lvol %s with address %s as a NVMf bdev for dst replica %v rebuilding start", externalSnapshotLvolName, externalSnapshotAddress, r.Name) } diff --git a/pkg/spdk/server.go b/pkg/spdk/server.go index 24222192..21e613d9 100644 --- a/pkg/spdk/server.go +++ b/pkg/spdk/server.go @@ -65,12 +65,9 @@ func NewServer(ctx context.Context, portStart, portEnd int32) (*Server, error) { } if _, err = cli.BdevNvmeSetOptions( - helpertypes.DefaultCtrlrLossTimeoutSec, - helpertypes.DefaultReconnectDelaySec, - helpertypes.DefaultFastIOFailTimeoutSec, helpertypes.DefaultTransportAckTimeout, helpertypes.DefaultKeepAliveTimeoutMs); err != nil { - return nil, errors.Wrap(err, "failed to set nvme options") + return nil, errors.Wrap(err, "failed to set NVMe options") } broadcasters := map[types.InstanceType]*broadcaster.Broadcaster{} diff --git a/pkg/spdk/util.go b/pkg/spdk/util.go index 0aabbe00..987f8475 100644 --- a/pkg/spdk/util.go +++ b/pkg/spdk/util.go @@ -77,7 +77,7 @@ func splitHostPort(address string) (string, int32, error) { // connectNVMfBdev connects to the NVMe-oF target, which is exposed by a remote lvol bdev. // controllerName is typically the lvol name, and address is the IP:port of the NVMe-oF target. -func connectNVMfBdev(spdkClient *spdkclient.Client, controllerName, address string) (bdevName string, err error) { +func connectNVMfBdev(spdkClient *spdkclient.Client, controllerName, address string, ctrlrLossTimeout, fastIOFailTimeoutSec int) (bdevName string, err error) { if controllerName == "" || address == "" { return "", fmt.Errorf("controllerName or address is empty") } @@ -89,7 +89,7 @@ func connectNVMfBdev(spdkClient *spdkclient.Client, controllerName, address stri nvmeBdevNameList, err := spdkClient.BdevNvmeAttachController(controllerName, helpertypes.GetNQN(controllerName), ip, port, spdktypes.NvmeTransportTypeTCP, spdktypes.NvmeAddressFamilyIPv4, - helpertypes.DefaultCtrlrLossTimeoutSec, helpertypes.DefaultReconnectDelaySec, helpertypes.DefaultFastIOFailTimeoutSec, + int32(ctrlrLossTimeout), helpertypes.DefaultReplicaReconnectDelaySec, int32(fastIOFailTimeoutSec), helpertypes.DefaultMultipath) if err != nil { return "", err