Skip to content

Commit

Permalink
fix: multiple fixes for LVM activation
Browse files Browse the repository at this point in the history
Two fixes were in pkgs/lvm2:

* siderolabs/pkgs#1041
* siderolabs/pkgs#1042

Other fixes in this PR:

* adjust the controller a bit for some interactions
* make Rook test use more complicated, encrypted setup which uses LVM
* adjust LVM test to handle a case when there's more than one worker

Signed-off-by: Andrey Smirnov <[email protected]>
  • Loading branch information
smira committed Oct 3, 2024
1 parent 74c12c2 commit 7486157
Show file tree
Hide file tree
Showing 6 changed files with 56 additions and 42 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ ARTIFACTS := _out
TOOLS ?= ghcr.io/siderolabs/tools:v1.9.0-alpha.0-2-g9f2189b

PKGS_PREFIX ?= ghcr.io/siderolabs
PKGS ?= v1.9.0-alpha.0-11-g232a153
PKGS ?= v1.9.0-alpha.0-13-ge2a561f
EXTRAS ?= v1.9.0-alpha.0

KRES_IMAGE ?= ghcr.io/siderolabs/kres:latest
Expand Down
59 changes: 23 additions & 36 deletions internal/app/machined/pkg/controllers/block/lvm.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,11 @@ import (

"github.com/cosi-project/runtime/pkg/controller"
"github.com/cosi-project/runtime/pkg/safe"
"github.com/cosi-project/runtime/pkg/state"
"github.com/hashicorp/go-multierror"
"github.com/siderolabs/gen/optional"
"github.com/siderolabs/go-cmd/pkg/cmd"
"go.uber.org/zap"

"github.com/siderolabs/talos/pkg/machinery/constants"
"github.com/siderolabs/talos/pkg/machinery/resources/block"
runtimeres "github.com/siderolabs/talos/pkg/machinery/resources/runtime"
"github.com/siderolabs/talos/pkg/machinery/resources/v1alpha1"
)

// LVMActivationController activates LVM volumes when they are discovered by the block.DiscoveryController.
Expand All @@ -37,12 +32,6 @@ func (ctrl *LVMActivationController) Name() string {
// Inputs implements controller.Controller interface.
func (ctrl *LVMActivationController) Inputs() []controller.Input {
return []controller.Input{
{
Namespace: v1alpha1.NamespaceName,
Type: runtimeres.MountStatusType,
ID: optional.Some(constants.EphemeralPartitionLabel),
Kind: controller.InputWeak,
},
{
Namespace: block.NamespaceName,
Type: block.DiscoveredVolumeType,
Expand Down Expand Up @@ -75,15 +64,6 @@ func (ctrl *LVMActivationController) Run(ctx context.Context, r controller.Runti
case <-r.EventCh():
}

if _, err := safe.ReaderGetByID[*runtimeres.MountStatus](ctx, r, constants.EphemeralPartitionLabel); err != nil {
if state.IsNotFoundError(err) {
// wait for the mount status to be available
continue
}

return fmt.Errorf("failed to get mount status: %w", err)
}

discoveredVolumes, err := safe.ReaderListAll[*block.DiscoveredVolume](ctx, r)
if err != nil {
return fmt.Errorf("failed to list discovered volumes: %w", err)
Expand All @@ -92,17 +72,19 @@ func (ctrl *LVMActivationController) Run(ctx context.Context, r controller.Runti
var multiErr error

for iterator := discoveredVolumes.Iterator(); iterator.Next(); {
if _, ok := ctrl.seenVolumes[iterator.Value().Metadata().ID()]; ok {
continue
}

if iterator.Value().TypedSpec().Name != "lvm2-pv" {
// if the volume is not an LVM volume the moment we saw it, we can skip it
// we need to activate the volumes only on reboot, not when they are first formatted
ctrl.seenVolumes[iterator.Value().Metadata().ID()] = struct{}{}

continue
}

logger.Info("checking device for LVM volume activation", zap.String("device", iterator.Value().TypedSpec().DevPath))
if _, ok := ctrl.seenVolumes[iterator.Value().Metadata().ID()]; ok {
continue
}

logger.Debug("checking device for LVM volume activation", zap.String("device", iterator.Value().TypedSpec().DevPath))

vgName, err := ctrl.checkVGNeedsActivation(ctx, iterator.Value().TypedSpec().DevPath)
if err != nil {
Expand All @@ -112,8 +94,6 @@ func (ctrl *LVMActivationController) Run(ctx context.Context, r controller.Runti
}

if vgName == "" {
ctrl.seenVolumes[iterator.Value().Metadata().ID()] = struct{}{}

continue
}

Expand All @@ -132,10 +112,10 @@ func (ctrl *LVMActivationController) Run(ctx context.Context, r controller.Runti
"event",
vgName,
); err != nil {
return fmt.Errorf("failed to activate LVM volume %s: %w", vgName, err)
multiErr = multierror.Append(multiErr, fmt.Errorf("failed to activate LVM volume %s: %w", vgName, err))
} else {
ctrl.activatedVGs[vgName] = struct{}{}
}

ctrl.activatedVGs[vgName] = struct{}{}
}

if multiErr != nil {
Expand All @@ -153,7 +133,6 @@ func (ctrl *LVMActivationController) checkVGNeedsActivation(ctx context.Context,
"/sbin/lvm",
"pvscan",
"--cache",
"--verbose",
"--listvg",
"--checkcomplete",
"--vgonline",
Expand All @@ -166,11 +145,19 @@ func (ctrl *LVMActivationController) checkVGNeedsActivation(ctx context.Context,
return "", fmt.Errorf("failed to check if LVM volume backed by device %s needs activation: %w", devicePath, err)
}

if strings.HasPrefix(stdOut, "LVM_VG_NAME_INCOMPLETE") {
return "", nil
}
// parse the key-value pairs from the udev output
for _, line := range strings.Split(stdOut, "\n") {
key, value, ok := strings.Cut(line, "=")
if !ok {
continue
}

vgName := strings.TrimSuffix(strings.TrimPrefix(strings.TrimSuffix(stdOut, "\n"), "LVM_VG_NAME_COMPLETE='"), "'")
value = strings.Trim(value, "'\"")

if key == "LVM_VG_NAME_COMPLETE" {
return value, nil
}
}

return vgName, nil
return "", nil
}
23 changes: 19 additions & 4 deletions internal/integration/api/volumes.go
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,13 @@ func (suite *VolumesSuite) TestLVMActivation() {

node := suite.RandomDiscoveredNodeInternalIP(machine.TypeWorker)

k8sNode, err := suite.GetK8sNodeByInternalIP(suite.ctx, node)
suite.Require().NoError(err)

nodeName := k8sNode.Name

suite.T().Logf("creating LVM volume group on node %s/%s", node, nodeName)

userDisks, err := suite.UserDisks(suite.ctx, node)
suite.Require().NoError(err)

Expand All @@ -201,6 +208,8 @@ func (suite *VolumesSuite) TestLVMActivation() {
podDef, err := suite.NewPrivilegedPod("pv-create")
suite.Require().NoError(err)

podDef = podDef.WithNodeName(nodeName)

suite.Require().NoError(podDef.Create(suite.ctx, 5*time.Minute))

defer podDef.Delete(suite.ctx) //nolint:errcheck
Expand Down Expand Up @@ -230,9 +239,13 @@ func (suite *VolumesSuite) TestLVMActivation() {
suite.Require().Contains(stdout, "Logical volume \"lv1\" created.")

defer func() {
suite.T().Logf("removing LVM volumes %s/%s", node, nodeName)

deletePodDef, err := suite.NewPrivilegedPod("pv-destroy")
suite.Require().NoError(err)

deletePodDef = deletePodDef.WithNodeName(nodeName)

suite.Require().NoError(deletePodDef.Create(suite.ctx, 5*time.Minute))

defer deletePodDef.Delete(suite.ctx) //nolint:errcheck
Expand All @@ -252,21 +265,23 @@ func (suite *VolumesSuite) TestLVMActivation() {
}
}()

suite.T().Logf("rebooting node %s/%s", node, nodeName)

// now we want to reboot the node and make sure the array is still mounted
suite.AssertRebooted(
suite.ctx, node, func(nodeCtx context.Context) error {
return base.IgnoreGRPCUnavailable(suite.Client.Reboot(nodeCtx))
}, 5*time.Minute,
)

suite.T().Logf("verifying LVM activation %s/%s", node, nodeName)

suite.Require().Eventually(func() bool {
return suite.lvmVolumeExists()
return suite.lvmVolumeExists(node)
}, 5*time.Second, 1*time.Second, "LVM volume group was not activated after reboot")
}

func (suite *VolumesSuite) lvmVolumeExists() bool {
node := suite.RandomDiscoveredNodeInternalIP(machine.TypeWorker)

func (suite *VolumesSuite) lvmVolumeExists(node string) bool {
ctx := client.WithNode(suite.ctx, node)

disks, err := safe.StateListAll[*block.Disk](ctx, suite.Client.COSI)
Expand Down
7 changes: 7 additions & 0 deletions internal/integration/base/k8s.go
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,7 @@ func (k8sSuite *K8sSuite) WaitForEventExists(ctx context.Context, ns string, che

type podInfo interface {
Name() string
WithNodeName(nodeName string) podInfo
Create(ctx context.Context, waitTimeout time.Duration) error
Delete(ctx context.Context) error
Exec(ctx context.Context, command string) (string, string, error)
Expand All @@ -217,6 +218,12 @@ func (p *pod) Name() string {
return p.name
}

func (p *pod) WithNodeName(nodeName string) podInfo {
p.pod.Spec.NodeName = nodeName

return p
}

func (p *pod) Create(ctx context.Context, waitTimeout time.Duration) error {
_, err := p.suite.Clientset.CoreV1().Pods(p.namespace).Create(ctx, p.pod, metav1.CreateOptions{})
if err != nil {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,8 @@ cephClusterSpec:
requests:
cpu: "500m"
memory: "1Gi"
storage:
useAllNodes: true
useAllDevices: true
config:
encryptedDevice: "true"
2 changes: 1 addition & 1 deletion pkg/machinery/gendata/data/pkgs
Original file line number Diff line number Diff line change
@@ -1 +1 @@
v1.9.0-alpha.0-11-g232a153
v1.9.0-alpha.0-13-ge2a561f

0 comments on commit 7486157

Please sign in to comment.