diff --git a/lxd/storage/drivers/driver_powerflex.go b/lxd/storage/drivers/driver_powerflex.go index 6552809d76f9..92012469f847 100644 --- a/lxd/storage/drivers/driver_powerflex.go +++ b/lxd/storage/drivers/driver_powerflex.go @@ -8,6 +8,7 @@ import ( "github.com/canonical/lxd/lxd/migration" "github.com/canonical/lxd/lxd/operations" + "github.com/canonical/lxd/lxd/storage/connectors" "github.com/canonical/lxd/shared" "github.com/canonical/lxd/shared/api" "github.com/canonical/lxd/shared/validate" @@ -19,10 +20,10 @@ const powerFlexDefaultUser = "admin" // powerFlexDefaultSize represents the default PowerFlex volume size. const powerFlexDefaultSize = "8GiB" -const ( - powerFlexModeNVMe = "nvme" - powerFlexModeSDC = "sdc" -) +var powerflexSupportedConnectors = []string{ + connectors.TypeNVME, + connectors.TypeSDC, +} var powerFlexLoaded bool var powerFlexVersion string @@ -30,6 +31,10 @@ var powerFlexVersion string type powerflex struct { common + // Holds the low level connector for the PowerFlex driver. + // Use powerflex.connector() to retrieve the initialized connector. + storageConnector connectors.Connector + // Holds the low level HTTP client for the PowerFlex API. // Use powerflex.client() to retrieve the client struct. httpClient *powerFlexClient @@ -46,28 +51,29 @@ func (d *powerflex) load() error { return nil } - // Detect and record the version. - // The NVMe CLI is shipped with the snap. - out, err := shared.RunCommand("nvme", "version") - if err != nil { - return fmt.Errorf("Failed to get nvme-cli version: %w", err) - } - - fields := strings.Split(strings.TrimSpace(out), " ") - if strings.HasPrefix(out, "nvme version ") && len(fields) > 2 { - powerFlexVersion = fmt.Sprintf("%s (nvme-cli)", fields[2]) - } + versions := connectors.GetSupportedVersions(powerflexSupportedConnectors) + powerFlexVersion = strings.Join(versions, " / ") + powerFlexLoaded = true - // Load the NVMe/TCP kernel modules. + // Load the kernel modules of the respective connector. // Ignore if the modules cannot be loaded. - // Support for the NVMe/TCP mode is checked during pool creation. + // Support for a specific connector is checked during pool creation. // When a LXD host gets rebooted this ensures that the kernel modules are still loaded. - _ = d.loadNVMeModules() + _ = d.connector().LoadModules() - powerFlexLoaded = true return nil } +// connector retrieves an initialized storage connector based on the configured +// PowerFlex mode. The connector is cached in the driver struct. +func (d *powerflex) connector() connectors.Connector { + if d.storageConnector == nil { + d.storageConnector = connectors.NewConnector(d.config["powerflex.mode"], d.state.ServerUUID) + } + + return d.storageConnector +} + // isRemote returns true indicating this driver uses remote storage. func (d *powerflex) isRemote() bool { return true @@ -103,10 +109,13 @@ func (d *powerflex) FillConfig() error { // First try if the NVMe/TCP kernel modules can be loaed. // Second try if the SDC kernel module is setup. if d.config["powerflex.mode"] == "" { - if d.loadNVMeModules() { - d.config["powerflex.mode"] = powerFlexModeNVMe + // Create temporary connector to check if NVMe/TCP kernel modules can be loaded. + nvmeConnector := connectors.NewConnector(connectors.TypeNVME, "") + + if nvmeConnector.LoadModules() { + d.config["powerflex.mode"] = connectors.TypeNVME } else if goscaleio.DrvCfgIsSDCInstalled() { - d.config["powerflex.mode"] = powerFlexModeSDC + d.config["powerflex.mode"] = connectors.TypeSDC } } @@ -140,7 +149,7 @@ func (d *powerflex) Create() error { client := d.client() switch d.config["powerflex.mode"] { - case powerFlexModeNVMe: + case connectors.TypeNVME: // Discover one of the storage pools SDT services. if d.config["powerflex.sdt"] == "" { pool, err := d.resolvePool() @@ -164,7 +173,7 @@ func (d *powerflex) Create() error { d.config["powerflex.sdt"] = relations[0].IPList[0].IP } - case powerFlexModeSDC: + case connectors.TypeSDC: if d.config["powerflex.sdt"] != "" { return fmt.Errorf("The powerflex.sdt config key is specific to the NVMe/TCP mode") } @@ -296,8 +305,8 @@ func (d *powerflex) Validate(config map[string]string) error { // on the other cluster members too. This can be done here since Validate // gets executed on every cluster member when receiving the cluster // notification to finally create the pool. - if d.config["powerflex.mode"] == powerFlexModeNVMe && !d.loadNVMeModules() { - return fmt.Errorf("NVMe/TCP is not supported") + if newMode != "" && !connectors.NewConnector(newMode, "").LoadModules() { + return fmt.Errorf("PowerFlex mode %q is not supported due to missing kernel modules", newMode) } return nil diff --git a/lxd/storage/drivers/driver_powerflex_utils.go b/lxd/storage/drivers/driver_powerflex_utils.go index 96b3785f60e6..dd01b626c897 100644 --- a/lxd/storage/drivers/driver_powerflex_utils.go +++ b/lxd/storage/drivers/driver_powerflex_utils.go @@ -2,27 +2,20 @@ package drivers import ( "bytes" - "context" "crypto/tls" "encoding/base64" "encoding/json" - "errors" "fmt" "io" "net/http" - "os" - "path/filepath" "strconv" "strings" - "time" "github.com/dell/goscaleio" "github.com/google/uuid" - "golang.org/x/sys/unix" "github.com/canonical/lxd/lxd/locking" - "github.com/canonical/lxd/lxd/resources" - "github.com/canonical/lxd/lxd/util" + "github.com/canonical/lxd/lxd/storage/connectors" "github.com/canonical/lxd/shared" "github.com/canonical/lxd/shared/api" "github.com/canonical/lxd/shared/revert" @@ -747,18 +740,6 @@ func (p *powerFlexClient) getHostVolumeMappings(hostID string) ([]powerFlexVolum return actualResponse, nil } -// loadNVMeModules loads the NVMe/TCP kernel modules. -// Returns true if the modules can be loaded. -func (d *powerflex) loadNVMeModules() bool { - err := util.LoadModule("nvme_fabrics") - if err != nil { - return false - } - - err = util.LoadModule("nvme_tcp") - return err == nil -} - // client returns the drivers PowerFlex client. // A new client gets created if it not yet exists. func (d *powerflex) client() *powerFlexClient { @@ -769,13 +750,6 @@ func (d *powerflex) client() *powerFlexClient { return d.httpClient } -// getHostNQN returns the unique NVMe nqn for the current host. -// A custom one is generated from the servers UUID since getting the nqn from /etc/nvme/hostnqn -// requires the nvme-cli package to be installed on the host. -func (d *powerflex) getHostNQN() string { - return fmt.Sprintf("nqn.2014-08.org.nvmexpress:uuid:%s", d.state.ServerUUID) -} - // getHostGUID returns the SDC GUID. // The GUID is unique for a single host. // Cache the GUID as it never changes for a single host. @@ -810,13 +784,17 @@ func (d *powerflex) getVolumeType(vol Volume) powerFlexVolumeType { // createNVMeHost creates this NVMe host in PowerFlex. func (d *powerflex) createNVMeHost() (string, revert.Hook, error) { var hostID string - nqn := d.getHostNQN() + + targetNQN, err := d.connector().QualifiedName() + if err != nil { + return "", nil, err + } revert := revert.New() defer revert.Fail() client := d.client() - host, err := client.getNVMeHostByNQN(nqn) + host, err := client.getNVMeHostByNQN(targetNQN) if err != nil { if !api.StatusErrorCheck(err, http.StatusNotFound) { return "", nil, err @@ -827,7 +805,7 @@ func (d *powerflex) createNVMeHost() (string, revert.Hook, error) { return "", nil, err } - hostID, err = client.createHost(hostname, nqn) + hostID, err = client.createHost(hostname, targetNQN) if err != nil { return "", nil, err } @@ -847,8 +825,13 @@ func (d *powerflex) createNVMeHost() (string, revert.Hook, error) { // deleteNVMeHost deletes this NVMe host in PowerFlex. func (d *powerflex) deleteNVMeHost() error { client := d.client() - nqn := d.getHostNQN() - host, err := client.getNVMeHostByNQN(nqn) + + targetNQN, err := d.connector().QualifiedName() + if err != nil { + return err + } + + host, err := client.getNVMeHostByNQN(targetNQN) if err != nil { // Skip the deletion if the host doesn't exist anymore. if api.StatusErrorCheck(err, http.StatusNotFound) { @@ -869,7 +852,7 @@ func (d *powerflex) mapVolume(vol Volume) (revert.Hook, error) { var hostID string switch d.config["powerflex.mode"] { - case powerFlexModeNVMe: + case connectors.TypeNVME: unlock, err := locking.Lock(d.state.ShutdownCtx, "storage_powerflex_nvme") if err != nil { return nil, err @@ -884,7 +867,7 @@ func (d *powerflex) mapVolume(vol Volume) (revert.Hook, error) { } reverter.Add(cleanup) - case powerFlexModeSDC: + case connectors.TypeSDC: hostGUID, err := d.getHostGUID() if err != nil { return nil, err @@ -931,19 +914,26 @@ func (d *powerflex) mapVolume(vol Volume) (revert.Hook, error) { reverter.Add(func() { _ = client.deleteHostVolumeMapping(hostID, volumeID) }) } - if d.config["powerflex.mode"] == powerFlexModeNVMe { - // Connect to the NVMe/TCP subsystem. - // We have to connect after the first mapping was established. - // PowerFlex does not offer any discovery log entries until a volume gets mapped to the host. - // This action is idempotent. - cleanup, err := d.connectNVMeSubsys() - if err != nil { - return nil, err - } + pool, err := d.resolvePool() + if err != nil { + return nil, err + } - reverter.Add(cleanup) + domain, err := d.client().getProtectionDomain(pool.ProtectionDomainID) + if err != nil { + return nil, err + } + + targetQN := domain.SystemID + targetAddr := d.config["powerflex.sdt"] + + err = d.connector().Connect(d.state.ShutdownCtx, targetAddr, targetQN) + if err != nil { + return nil, err } + reverter.Add(func() { _ = d.connector().Disconnect(targetQN) }) + cleanup := reverter.Clone().Fail reverter.Success() return cleanup, nil @@ -964,44 +954,6 @@ func (d *powerflex) getMappedDevPath(vol Volume, mapVolume bool) (string, revert revert.Add(cleanup) } - powerFlexVolumes := make(map[string]string) - - // discoverFunc has to be called in a loop with a set timeout to ensure - // all the necessary directories and devices can be discovered. - discoverFunc := func(volumeID string, diskPrefix string) error { - var diskPaths []string - - // If there are no other disks on the system by id, the directory might not even be there. - // Returns ENOENT in case the by-id/ directory does not exist. - diskPaths, err := resources.GetDisksByID(diskPrefix) - if err != nil { - return err - } - - for _, diskPath := range diskPaths { - // Skip the disk if it is only a partition of the actual PowerFlex volume. - if strings.Contains(diskPath, "-part") { - continue - } - - // Skip other volume's that don't match the PowerFlex volume's ID. - if !strings.Contains(diskPath, volumeID) { - continue - } - - // The actual device might not already be created. - // Returns ENOENT in case the device does not exist. - devPath, err := filepath.EvalSymlinks(diskPath) - if err != nil { - return err - } - - powerFlexVolumes[volumeID] = devPath - } - - return nil - } - volumeName, err := d.getVolumeName(vol) if err != nil { return "", nil, err @@ -1012,59 +964,30 @@ func (d *powerflex) getMappedDevPath(vol Volume, mapVolume bool) (string, revert return "", nil, err } - timeout := time.Now().Add(5 * time.Second) - // It might take a while to create the local disk. - // Retry until it can be found. - for { - if time.Now().After(timeout) { - return "", nil, fmt.Errorf("Timeout exceeded for PowerFlex volume discovery: %q", volumeName) - } - - var prefix string - switch d.config["powerflex.mode"] { - case powerFlexModeNVMe: - prefix = "nvme-eui." - case powerFlexModeSDC: - prefix = "emc-vol-" - } - - err := discoverFunc(powerFlexVolumeID, prefix) - if err != nil { - // Try again if on of the directories cannot be found. - if errors.Is(err, unix.ENOENT) { - continue - } - - return "", nil, err - } - - // Exit if the volume got discovered. - _, ok := powerFlexVolumes[powerFlexVolumeID] - if ok { - break - } - - // Exit if the volume wasn't explicitly mapped. - // Doing a retry would run into the timeout when the device isn't mapped. - if !mapVolume { - break - } - - time.Sleep(10 * time.Millisecond) + var prefix string + switch d.config["powerflex.mode"] { + case connectors.TypeNVME: + prefix = "nvme-eui." + case connectors.TypeSDC: + prefix = "emc-vol-" } - if len(powerFlexVolumes) == 0 { - return "", nil, fmt.Errorf("Failed to discover any PowerFlex volume") + var devicePath string + if mapVolume { + // Wait for the device path to appear as the volume has been just mapped to the host. + devicePath, err = connectors.WaitDiskDevicePath(d.state.ShutdownCtx, prefix, powerFlexVolumeID) + } else { + // Get the the device path without waiting. + devicePath, err = connectors.GetDiskDevicePath(prefix, powerFlexVolumeID) } - powerFlexVolumePath, ok := powerFlexVolumes[powerFlexVolumeID] - if !ok { - return "", nil, fmt.Errorf("PowerFlex volume not found: %q", volumeName) + if err != nil { + return "", nil, fmt.Errorf("Failed to locate device for volume %q: %w", vol.name, err) } cleanup := revert.Clone().Fail revert.Success() - return powerFlexVolumePath, cleanup, nil + return devicePath, cleanup, nil } // unmapVolume unmaps the given volume from this host. @@ -1082,9 +1005,13 @@ func (d *powerflex) unmapVolume(vol Volume) error { var host *powerFlexSDC switch d.config["powerflex.mode"] { - case powerFlexModeNVMe: - nqn := d.getHostNQN() - host, err = client.getNVMeHostByNQN(nqn) + case connectors.TypeNVME: + hostNQN, err := d.connector().QualifiedName() + if err != nil { + return err + } + + host, err = client.getNVMeHostByNQN(hostNQN) if err != nil { return err } @@ -1095,7 +1022,7 @@ func (d *powerflex) unmapVolume(vol Volume) error { } defer unlock() - case powerFlexModeSDC: + case connectors.TypeSDC: hostGUID, err := d.getHostGUID() if err != nil { return err @@ -1114,28 +1041,35 @@ func (d *powerflex) unmapVolume(vol Volume) error { // Wait until the volume has disappeared. volumePath, _, _ := d.getMappedDevPath(vol, false) - if volumePath != "" { - ctx, cancel := context.WithTimeout(d.state.ShutdownCtx, 10*time.Second) - defer cancel() - - if !waitGone(ctx, volumePath) { - return fmt.Errorf("Timeout whilst waiting for PowerFlex volume to disappear: %q", vol.name) - } + if volumePath != "" && !connectors.WaitDiskDeviceGone(d.state.ShutdownCtx, volumePath, 10) { + return fmt.Errorf("Timeout whilst waiting for PowerFlex volume to disappear: %q", vol.name) } // In case of SDC the driver doesn't manage the underlying connection to PowerFlex. // Therefore if this was the last volume being unmapped from this system // LXD will not try to cleanup the connection. - if d.config["powerflex.mode"] == powerFlexModeNVMe { + if d.config["powerflex.mode"] == connectors.TypeNVME { mappings, err := client.getHostVolumeMappings(host.ID) if err != nil { return err } if len(mappings) == 0 { + pool, err := d.resolvePool() + if err != nil { + return err + } + + domain, err := d.client().getProtectionDomain(pool.ProtectionDomainID) + if err != nil { + return err + } + + targetQN := domain.SystemID + // Disconnect from the NVMe subsystem. // Do this first before removing the host from PowerFlex. - err := d.disconnectNVMeSubsys() + err = d.connector().Disconnect(targetQN) if err != nil { return err } @@ -1152,77 +1086,6 @@ func (d *powerflex) unmapVolume(vol Volume) error { return nil } -// connectNVMeSubsys connects this host to the NVMe subsystem configured in the storage pool. -// The connection can only be established after the first volume is mapped to this host. -// The operation is idempotent and returns nil if already connected to the subsystem. -func (d *powerflex) connectNVMeSubsys() (revert.Hook, error) { - basePath := "/sys/devices/virtual/nvme-subsystem" - - // Retrieve list of existing NVMe subsystems on this host. - directories, err := os.ReadDir(basePath) - if err != nil { - return nil, fmt.Errorf("Failed getting a list of NVMe subsystems: %w", err) - } - - revert := revert.New() - defer revert.Fail() - - pool, err := d.resolvePool() - if err != nil { - return nil, err - } - - domain, err := d.client().getProtectionDomain(pool.ProtectionDomainID) - if err != nil { - return nil, err - } - - for _, directory := range directories { - subsystemName := directory.Name() - - // Get the subsystem's NQN. - nqnBytes, err := os.ReadFile(filepath.Join(basePath, subsystemName, "subsysnqn")) - if err != nil { - return nil, fmt.Errorf("Failed getting the NQN of subystem %q: %w", subsystemName, err) - } - - if strings.Contains(string(nqnBytes), domain.SystemID) { - cleanup := revert.Clone().Fail - revert.Success() - - // Already connected to the NVMe subsystem for the respective PowerFlex system. - return cleanup, nil - } - } - - nqn := d.getHostNQN() - serverUUID := d.state.ServerUUID - _, stderr, err := shared.RunCommandSplit(d.state.ShutdownCtx, nil, nil, "nvme", "connect-all", "-t", "tcp", "-a", d.config["powerflex.sdt"], "-q", nqn, "-I", serverUUID) - if err != nil { - return nil, fmt.Errorf("Failed nvme connect-all: %w", err) - } - - if stderr != "" { - return nil, fmt.Errorf("Failed connecting to PowerFlex NVMe/TCP subsystem: %s", stderr) - } - - revert.Add(func() { _ = d.disconnectNVMeSubsys() }) - - cleanup := revert.Clone().Fail - revert.Success() - return cleanup, nil -} - -// disconnectNVMeSubsys disconnects this host from the NVMe subsystem. -func (d *powerflex) disconnectNVMeSubsys() error { - _, err := shared.RunCommand("nvme", "disconnect-all") - if err != nil { - return fmt.Errorf("Failed disconnecting from PowerFlex NVMe/TCP subsystem: %w", err) - } - - return nil -} - // resolvePool looks up the selected storage pool. // If only the pool is provided, it's expected to be the ID of the pool. // In case both pool and domain are set, the pool will get looked up