From c7485af3c70adf84a87d43e8192b0699ba4e82c8 Mon Sep 17 00:00:00 2001 From: Daniel Henkel Date: Wed, 10 Aug 2022 11:43:49 +0200 Subject: [PATCH 1/5] [local] Support topolvm/openebs storage for scaling decisions --- .../processors/datadog/common/common.go | 49 +++++++++-- .../processors/datadog/common/common_test.go | 82 ++++++++++++++++++- .../datadog/pods/transform_local_data.go | 42 +++++++++- .../datadog/pods/transform_local_data_test.go | 50 ++++++++++- 4 files changed, 211 insertions(+), 12 deletions(-) diff --git a/cluster-autoscaler/processors/datadog/common/common.go b/cluster-autoscaler/processors/datadog/common/common.go index cf15d10926dd..caed42068f0f 100644 --- a/cluster-autoscaler/processors/datadog/common/common.go +++ b/cluster-autoscaler/processors/datadog/common/common.go @@ -19,6 +19,7 @@ package common import ( apiv1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" + "k8s.io/klog/v2" schedulerframework "k8s.io/kubernetes/pkg/scheduler/framework" ) @@ -31,6 +32,17 @@ const ( // nodes offering local storage, and currently injected as requests on // Pending pods having a PVC for local-data volumes. DatadogLocalDataResource apiv1.ResourceName = "storageclass/local-data" + + // DatadogLocalStorageProvisionerLabel is indicating which technology will be used to provide local storage + DatadogLocalStorageProvisionerLabel = "nodegroups.datadoghq.com/local-storage-provisioner" + // DatadogInitialStorageCapacityLabel is storing the amount of local storage a new node will have in the beginning + // e.g. nodegroups.datadoghq.com/initial-storage-capacity=100Gi + DatadogInitialStorageCapacityLabel = "nodegroups.datadoghq.com/initial-storage-capacity" + + // DatadogStorageProvisionerTopoLVM is the storage provisioner label value to use for topolvm implementation + DatadogStorageProvisionerTopoLVM = "topolvm" + // DatadogStorageProvisionerOpenEBS is the storage provisioner label value to use for openebs implementation + DatadogStorageProvisionerOpenEBS = "openebs-lvm" ) var ( @@ -38,13 +50,19 @@ var ( DatadogLocalDataQuantity = resource.NewQuantity(1, resource.DecimalSI) ) -// NodeHasLocalData returns true if the node holds a local-storage:true label +// NodeHasLocalData returns true if the node holds a local-storage:true or local-storage-provisioner: label func NodeHasLocalData(node *apiv1.Node) bool { if node == nil { return false } - value, ok := node.GetLabels()[DatadogLocalStorageLabel] - return ok && value == "true" + + labels := node.GetLabels() + + _, newStorageOk := labels[DatadogLocalStorageProvisionerLabel] + value, ok := labels[DatadogLocalStorageLabel] + + // the node should have either the local-stoarge or local-storage-provisioner label + return (ok && value == "true") || newStorageOk } // SetNodeLocalDataResource updates a NodeInfo with the DatadogLocalDataResource resource @@ -61,7 +79,28 @@ func SetNodeLocalDataResource(nodeInfo *schedulerframework.NodeInfo) { if node.Status.Capacity == nil { node.Status.Capacity = apiv1.ResourceList{} } - node.Status.Capacity[DatadogLocalDataResource] = DatadogLocalDataQuantity.DeepCopy() - node.Status.Allocatable[DatadogLocalDataResource] = DatadogLocalDataQuantity.DeepCopy() + + provisioner, _ := node.Labels[DatadogLocalStorageProvisionerLabel] + switch provisioner { + case DatadogStorageProvisionerTopoLVM, DatadogStorageProvisionerOpenEBS: + capacity, _ := node.Labels[DatadogInitialStorageCapacityLabel] + capacityResource, err := resource.ParseQuantity(capacity) + if err == nil { + node.Status.Capacity[DatadogLocalDataResource] = capacityResource.DeepCopy() + node.Status.Allocatable[DatadogLocalDataResource] = capacityResource.DeepCopy() + } else { + klog.Warningf("failed to attach capacity information (%s) to node (%s): %v", capacity, node.Name, err) + } + default: + // The old local-storage provisioner is using a different label for identification. + // So if we cannot find any of the new options, we should check if it's using the old system and otherwise print a warning. + if val, ok := node.Labels[DatadogLocalStorageLabel]; ok && val == "true" { + node.Status.Capacity[DatadogLocalDataResource] = DatadogLocalDataQuantity.DeepCopy() + node.Status.Allocatable[DatadogLocalDataResource] = DatadogLocalDataQuantity.DeepCopy() + } else { + klog.Warningf("this should never be reached. local storage provisioner (%s) is unknown and cannot be used on node: %s", provisioner, node.Name) + } + } + nodeInfo.SetNode(node) } diff --git a/cluster-autoscaler/processors/datadog/common/common_test.go b/cluster-autoscaler/processors/datadog/common/common_test.go index 7d63271bb4aa..51f2333aa124 100644 --- a/cluster-autoscaler/processors/datadog/common/common_test.go +++ b/cluster-autoscaler/processors/datadog/common/common_test.go @@ -69,6 +69,15 @@ func TestNodeHasLocalData(t *testing.T) { nil, false, }, + { + "local-storage-provisioner label was set", + &corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{DatadogLocalStorageProvisionerLabel: "topolvm"}, + }, + }, + true, + }, } for _, tt := range tests { @@ -87,7 +96,11 @@ func TestSetNodeLocalDataResource(t *testing.T) { ObjectMeta: metav1.ObjectMeta{Name: "egg"}, }, ) - ni.SetNode(&corev1.Node{}) + ni.SetNode(&corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{DatadogLocalStorageLabel: "true"}, + }, + }) SetNodeLocalDataResource(ni) @@ -101,3 +114,70 @@ func TestSetNodeLocalDataResource(t *testing.T) { assert.Equal(t, len(ni.Pods), 2) } + +func TestSetNodeResourceFromTopolvm(t *testing.T) { + var hundredGB int64 = 100 * 1024 * 1024 * 1024 + ni := schedulerframework.NewNodeInfo() + ni.SetNode(&corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + DatadogLocalStorageProvisionerLabel: "topolvm", + DatadogInitialStorageCapacityLabel: "100Gi", + }, + }, + }) + + SetNodeLocalDataResource(ni) + + nodeValue, ok := ni.Node().Status.Allocatable[DatadogLocalDataResource] + assert.True(t, ok) + assert.Equal(t, nodeValue.String(), resource.NewQuantity(hundredGB, resource.BinarySI).String()) + + niValue, ok := ni.Allocatable.ScalarResources[DatadogLocalDataResource] + assert.True(t, ok) + assert.Equal(t, niValue, hundredGB) +} + +func TestShouldNotSetResourcesWithMissingLabel(t *testing.T) { + ni := schedulerframework.NewNodeInfo() + ni.SetNode(&corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + DatadogLocalStorageProvisionerLabel: "topolvm", + }, + }, + }) + + SetNodeLocalDataResource(ni) + + _, ok := ni.Node().Status.Allocatable[DatadogLocalDataResource] + assert.False(t, ok) + _, ok = ni.Node().Status.Capacity[DatadogLocalDataResource] + assert.False(t, ok) + + _, ok = ni.Allocatable.ScalarResources[DatadogLocalDataResource] + assert.False(t, ok) +} + +func TestSetNodeResourceFromOpenEBS(t *testing.T) { + var hundredGB int64 = 100 * 1024 * 1024 * 1024 + ni := schedulerframework.NewNodeInfo() + ni.SetNode(&corev1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + DatadogLocalStorageProvisionerLabel: "openebs-lvm", + DatadogInitialStorageCapacityLabel: "100Gi", + }, + }, + }) + + SetNodeLocalDataResource(ni) + + nodeValue, ok := ni.Node().Status.Allocatable[DatadogLocalDataResource] + assert.True(t, ok) + assert.Equal(t, nodeValue.String(), resource.NewQuantity(hundredGB, resource.BinarySI).String()) + + niValue, ok := ni.Allocatable.ScalarResources[DatadogLocalDataResource] + assert.True(t, ok) + assert.Equal(t, niValue, hundredGB) +} diff --git a/cluster-autoscaler/processors/datadog/pods/transform_local_data.go b/cluster-autoscaler/processors/datadog/pods/transform_local_data.go index 248c8f833ddb..f35cdae3e759 100644 --- a/cluster-autoscaler/processors/datadog/pods/transform_local_data.go +++ b/cluster-autoscaler/processors/datadog/pods/transform_local_data.go @@ -64,6 +64,12 @@ import ( klog "k8s.io/klog/v2" ) +const ( + storageClassNameLocal = "local-data" + storageClassNameTopolvm = "topolvm-provisioner" + storageClassNameOpenEBS = "openebs-lvmpv" +) + type transformLocalData struct { pvcLister v1lister.PersistentVolumeClaimLister stopChannel chan struct{} @@ -102,7 +108,7 @@ func (p *transformLocalData) Process(ctx *context.AutoscalingContext, pods []*ap volumes = append(volumes, vol) continue } - if *pvc.Spec.StorageClassName != "local-data" { + if !isSpecialPVCStorageClass(*pvc.Spec.StorageClassName) { volumes = append(volumes, vol) continue } @@ -113,9 +119,26 @@ func (p *transformLocalData) Process(ctx *context.AutoscalingContext, pods []*ap if len(po.Spec.Containers[0].Resources.Limits) == 0 { po.Spec.Containers[0].Resources.Limits = apiv1.ResourceList{} } + if len(pvc.Spec.Resources.Requests) == 0 { + pvc.Spec.Resources.Requests = apiv1.ResourceList{} + } - po.Spec.Containers[0].Resources.Requests[common.DatadogLocalDataResource] = common.DatadogLocalDataQuantity.DeepCopy() - po.Spec.Containers[0].Resources.Limits[common.DatadogLocalDataResource] = common.DatadogLocalDataQuantity.DeepCopy() + switch *pvc.Spec.StorageClassName { + case storageClassNameTopolvm, storageClassNameOpenEBS: + if storage, ok := pvc.Spec.Resources.Requests["storage"]; ok { + po.Spec.Containers[0].Resources.Requests[common.DatadogLocalDataResource] = storage.DeepCopy() + po.Spec.Containers[0].Resources.Limits[common.DatadogLocalDataResource] = storage.DeepCopy() + } else { + klog.Warningf("ignoring pvc as it does not have storage request information") + volumes = append(volumes, vol) + } + case storageClassNameLocal: + po.Spec.Containers[0].Resources.Requests[common.DatadogLocalDataResource] = common.DatadogLocalDataQuantity.DeepCopy() + po.Spec.Containers[0].Resources.Limits[common.DatadogLocalDataResource] = common.DatadogLocalDataQuantity.DeepCopy() + default: + klog.Warningf("this should never be reached. pvc storage class (%s) cannot be used for scaling on pod: %s", *pvc.Spec.StorageClassName, po.Name) + volumes = append(volumes, vol) + } } po.Spec.Volumes = volumes } @@ -123,6 +146,19 @@ func (p *transformLocalData) Process(ctx *context.AutoscalingContext, pods []*ap return pods, nil } +func isSpecialPVCStorageClass(className string) bool { + switch className { + case storageClassNameOpenEBS: + return true + case storageClassNameTopolvm: + return true + case storageClassNameLocal: + return true + default: + return false + } +} + // NewPersistentVolumeClaimLister builds a persistentvolumeclaim lister. func NewPersistentVolumeClaimLister(kubeClient client.Interface, stopchannel <-chan struct{}) v1lister.PersistentVolumeClaimLister { listWatcher := cache.NewListWatchFromClient(kubeClient.CoreV1().RESTClient(), "persistentvolumeclaims", apiv1.NamespaceAll, fields.Everything()) diff --git a/cluster-autoscaler/processors/datadog/pods/transform_local_data_test.go b/cluster-autoscaler/processors/datadog/pods/transform_local_data_test.go index fcede6354cdd..4564bc5f3b6e 100644 --- a/cluster-autoscaler/processors/datadog/pods/transform_local_data_test.go +++ b/cluster-autoscaler/processors/datadog/pods/transform_local_data_test.go @@ -21,8 +21,10 @@ import ( "testing" "github.com/stretchr/testify/assert" + apiv1 "k8s.io/api/core/v1" corev1 "k8s.io/api/core/v1" apiequality "k8s.io/apimachinery/pkg/api/equality" + "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/autoscaler/cluster-autoscaler/context" "k8s.io/autoscaler/cluster-autoscaler/processors/datadog/common" @@ -32,7 +34,6 @@ import ( var ( testRemoteClass = "remote-data" - testLocalClass = "local-data" testNamespace = "foons" testEmptyResources = corev1.ResourceList{} testLdResources = corev1.ResourceList{ @@ -41,6 +42,11 @@ var ( ) func TestTransformLocalDataProcess(t *testing.T) { + test100GResource, _ := resource.ParseQuantity("100Gi") + testTopolvmResources := corev1.ResourceList{ + common.DatadogLocalDataResource: test100GResource, + } + tests := []struct { name string pods []*corev1.Pod @@ -64,7 +70,7 @@ func TestTransformLocalDataProcess(t *testing.T) { { "local-data volumes are removed, and custom resources added", []*corev1.Pod{buildPod("pod1", testEmptyResources, testEmptyResources, "pvc-1")}, - []*corev1.PersistentVolumeClaim{buildPVC("pvc-1", testLocalClass)}, + []*corev1.PersistentVolumeClaim{buildPVC("pvc-1", storageClassNameLocal)}, []*corev1.Pod{buildPod("pod1", testLdResources, testLdResources)}, }, @@ -73,7 +79,7 @@ func TestTransformLocalDataProcess(t *testing.T) { []*corev1.Pod{buildPod("pod1", testEmptyResources, testEmptyResources, "pvc-1", "pvc-2", "pvc-3")}, []*corev1.PersistentVolumeClaim{ buildPVC("pvc-1", testRemoteClass), - buildPVC("pvc-2", testLocalClass), + buildPVC("pvc-2", storageClassNameLocal), buildPVC("pvc-3", testRemoteClass), }, []*corev1.Pod{buildPod("pod1", testLdResources, testLdResources, "pvc-1", "pvc-3")}, @@ -92,6 +98,36 @@ func TestTransformLocalDataProcess(t *testing.T) { []*corev1.PersistentVolumeClaim{}, []*corev1.Pod{}, }, + + { + "topolvm provisioner is using proper storage capacity value", + []*corev1.Pod{buildPod("pod1", testEmptyResources, testEmptyResources, "pvc-1", "pvc-2")}, + []*corev1.PersistentVolumeClaim{ + buildPVC("pvc-1", testRemoteClass), + buildPVCWithStorage("pvc-2", storageClassNameTopolvm, "100Gi"), + }, + []*corev1.Pod{buildPod("pod1", testTopolvmResources, testTopolvmResources, "pvc-1")}, + }, + + { + "one pvc will override the other", + []*corev1.Pod{buildPod("pod1", testEmptyResources, testEmptyResources, "pvc-1", "pvc-2")}, + []*corev1.PersistentVolumeClaim{ + buildPVCWithStorage("pvc-1", storageClassNameTopolvm, "100Gi"), + buildPVC("pvc-2", storageClassNameLocal), + }, + []*corev1.Pod{buildPod("pod1", testLdResources, testLdResources)}, + }, + + { + "openebs provisioner is using proper storage capacity value", + []*corev1.Pod{buildPod("pod1", testEmptyResources, testEmptyResources, "pvc-1", "pvc-2")}, + []*corev1.PersistentVolumeClaim{ + buildPVC("pvc-1", testRemoteClass), + buildPVCWithStorage("pvc-2", storageClassNameOpenEBS, "100Gi"), + }, + []*corev1.Pod{buildPod("pod1", testTopolvmResources, testTopolvmResources, "pvc-1")}, + }, } for _, tt := range tests { @@ -166,3 +202,11 @@ func buildPVC(name string, storageClassName string) *corev1.PersistentVolumeClai }, } } + +func buildPVCWithStorage(name, storageClassName, storageQuantity string) *corev1.PersistentVolumeClaim { + pvc := buildPVC(name, storageClassName) + quantity, _ := resource.ParseQuantity(storageQuantity) + pvc.Spec.Resources.Requests = apiv1.ResourceList{} + pvc.Spec.Resources.Requests["storage"] = quantity + return pvc +} From c22e3f63deec9c69fa365b86460c222a31aef7b6 Mon Sep 17 00:00:00 2001 From: Baptiste Girard-Carrabin Date: Thu, 6 Jun 2024 09:31:54 +0200 Subject: [PATCH 2/5] [local] Rename topolvm storage class Rename the storage class from `topolvm-provisioner` to `dynamic-local-data` in order to provider a better user experience. Users don't need to know that the storage class is using topolvm underneath but they are interested in knowing what kind of volumes will be provisioned. --- .../processors/datadog/pods/transform_local_data.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cluster-autoscaler/processors/datadog/pods/transform_local_data.go b/cluster-autoscaler/processors/datadog/pods/transform_local_data.go index f35cdae3e759..28508831b031 100644 --- a/cluster-autoscaler/processors/datadog/pods/transform_local_data.go +++ b/cluster-autoscaler/processors/datadog/pods/transform_local_data.go @@ -66,7 +66,7 @@ import ( const ( storageClassNameLocal = "local-data" - storageClassNameTopolvm = "topolvm-provisioner" + storageClassNameTopolvm = "dynamic-local-data" storageClassNameOpenEBS = "openebs-lvmpv" ) From db151177489254e604e464581fb8acc684791bcf Mon Sep 17 00:00:00 2001 From: Baptiste Girard-Carrabin Date: Thu, 6 Jun 2024 17:15:19 +0200 Subject: [PATCH 3/5] [local] Remove openEBS support We don't plan on supporting openEBS in our clusters in the foreseable future so let's drop the code to autoscale nodes base on it for now. --- .../processors/datadog/common/common.go | 8 +++---- .../processors/datadog/common/common_test.go | 23 ------------------- .../datadog/pods/transform_local_data.go | 5 +--- .../datadog/pods/transform_local_data_test.go | 10 -------- 4 files changed, 4 insertions(+), 42 deletions(-) diff --git a/cluster-autoscaler/processors/datadog/common/common.go b/cluster-autoscaler/processors/datadog/common/common.go index caed42068f0f..a1c55547c02c 100644 --- a/cluster-autoscaler/processors/datadog/common/common.go +++ b/cluster-autoscaler/processors/datadog/common/common.go @@ -41,8 +41,6 @@ const ( // DatadogStorageProvisionerTopoLVM is the storage provisioner label value to use for topolvm implementation DatadogStorageProvisionerTopoLVM = "topolvm" - // DatadogStorageProvisionerOpenEBS is the storage provisioner label value to use for openebs implementation - DatadogStorageProvisionerOpenEBS = "openebs-lvm" ) var ( @@ -80,10 +78,10 @@ func SetNodeLocalDataResource(nodeInfo *schedulerframework.NodeInfo) { node.Status.Capacity = apiv1.ResourceList{} } - provisioner, _ := node.Labels[DatadogLocalStorageProvisionerLabel] + provisioner := node.Labels[DatadogLocalStorageProvisionerLabel] switch provisioner { - case DatadogStorageProvisionerTopoLVM, DatadogStorageProvisionerOpenEBS: - capacity, _ := node.Labels[DatadogInitialStorageCapacityLabel] + case DatadogStorageProvisionerTopoLVM: + capacity := node.Labels[DatadogInitialStorageCapacityLabel] capacityResource, err := resource.ParseQuantity(capacity) if err == nil { node.Status.Capacity[DatadogLocalDataResource] = capacityResource.DeepCopy() diff --git a/cluster-autoscaler/processors/datadog/common/common_test.go b/cluster-autoscaler/processors/datadog/common/common_test.go index 51f2333aa124..e9d6b754a54c 100644 --- a/cluster-autoscaler/processors/datadog/common/common_test.go +++ b/cluster-autoscaler/processors/datadog/common/common_test.go @@ -158,26 +158,3 @@ func TestShouldNotSetResourcesWithMissingLabel(t *testing.T) { _, ok = ni.Allocatable.ScalarResources[DatadogLocalDataResource] assert.False(t, ok) } - -func TestSetNodeResourceFromOpenEBS(t *testing.T) { - var hundredGB int64 = 100 * 1024 * 1024 * 1024 - ni := schedulerframework.NewNodeInfo() - ni.SetNode(&corev1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Labels: map[string]string{ - DatadogLocalStorageProvisionerLabel: "openebs-lvm", - DatadogInitialStorageCapacityLabel: "100Gi", - }, - }, - }) - - SetNodeLocalDataResource(ni) - - nodeValue, ok := ni.Node().Status.Allocatable[DatadogLocalDataResource] - assert.True(t, ok) - assert.Equal(t, nodeValue.String(), resource.NewQuantity(hundredGB, resource.BinarySI).String()) - - niValue, ok := ni.Allocatable.ScalarResources[DatadogLocalDataResource] - assert.True(t, ok) - assert.Equal(t, niValue, hundredGB) -} diff --git a/cluster-autoscaler/processors/datadog/pods/transform_local_data.go b/cluster-autoscaler/processors/datadog/pods/transform_local_data.go index 28508831b031..3df801b79892 100644 --- a/cluster-autoscaler/processors/datadog/pods/transform_local_data.go +++ b/cluster-autoscaler/processors/datadog/pods/transform_local_data.go @@ -67,7 +67,6 @@ import ( const ( storageClassNameLocal = "local-data" storageClassNameTopolvm = "dynamic-local-data" - storageClassNameOpenEBS = "openebs-lvmpv" ) type transformLocalData struct { @@ -124,7 +123,7 @@ func (p *transformLocalData) Process(ctx *context.AutoscalingContext, pods []*ap } switch *pvc.Spec.StorageClassName { - case storageClassNameTopolvm, storageClassNameOpenEBS: + case storageClassNameTopolvm: if storage, ok := pvc.Spec.Resources.Requests["storage"]; ok { po.Spec.Containers[0].Resources.Requests[common.DatadogLocalDataResource] = storage.DeepCopy() po.Spec.Containers[0].Resources.Limits[common.DatadogLocalDataResource] = storage.DeepCopy() @@ -148,8 +147,6 @@ func (p *transformLocalData) Process(ctx *context.AutoscalingContext, pods []*ap func isSpecialPVCStorageClass(className string) bool { switch className { - case storageClassNameOpenEBS: - return true case storageClassNameTopolvm: return true case storageClassNameLocal: diff --git a/cluster-autoscaler/processors/datadog/pods/transform_local_data_test.go b/cluster-autoscaler/processors/datadog/pods/transform_local_data_test.go index 4564bc5f3b6e..67ba58768de9 100644 --- a/cluster-autoscaler/processors/datadog/pods/transform_local_data_test.go +++ b/cluster-autoscaler/processors/datadog/pods/transform_local_data_test.go @@ -118,16 +118,6 @@ func TestTransformLocalDataProcess(t *testing.T) { }, []*corev1.Pod{buildPod("pod1", testLdResources, testLdResources)}, }, - - { - "openebs provisioner is using proper storage capacity value", - []*corev1.Pod{buildPod("pod1", testEmptyResources, testEmptyResources, "pvc-1", "pvc-2")}, - []*corev1.PersistentVolumeClaim{ - buildPVC("pvc-1", testRemoteClass), - buildPVCWithStorage("pvc-2", storageClassNameOpenEBS, "100Gi"), - }, - []*corev1.Pod{buildPod("pod1", testTopolvmResources, testTopolvmResources, "pvc-1")}, - }, } for _, tt := range tests { From 411e2cd2836f7310155b126ffede8fb8eceadd5d Mon Sep 17 00:00:00 2001 From: Baptiste Girard-Carrabin Date: Tue, 18 Jun 2024 17:45:52 +0200 Subject: [PATCH 4/5] [local] Split support for ephemeral and persistent volumes Running TopoLVM for local persistent-data like we do with the local volume provisioner will be a pain in terms of scheduling (how to make sure a pod using shared local data on a node can always come back on it). As a result, we will keep the current setup with the local volume provisioner for persistent storage while we leverage TopoLVM for ephemeral storage on the node. This way, we won't have to deal with TopoLVM scheduling issues. --- .../processors/datadog/common/common.go | 8 ++- .../processors/datadog/common/common_test.go | 26 +++++++- .../datadog/pods/transform_local_data.go | 41 +++++++----- .../datadog/pods/transform_local_data_test.go | 64 +++++++++++++------ 4 files changed, 98 insertions(+), 41 deletions(-) diff --git a/cluster-autoscaler/processors/datadog/common/common.go b/cluster-autoscaler/processors/datadog/common/common.go index a1c55547c02c..38d93da268c3 100644 --- a/cluster-autoscaler/processors/datadog/common/common.go +++ b/cluster-autoscaler/processors/datadog/common/common.go @@ -32,6 +32,10 @@ const ( // nodes offering local storage, and currently injected as requests on // Pending pods having a PVC for local-data volumes. DatadogLocalDataResource apiv1.ResourceName = "storageclass/local-data" + // DatadogEphemeralLocalDataResource is a virtual resource placed on new or future + // nodes offering ephemeral local storage, and currently injected as requests on + // Pending pods having an ephemeral PVC for local-data volumes. + DatadogEphemeralLocalDataResource apiv1.ResourceName = "storageclass/ephemeral-local-data" // DatadogLocalStorageProvisionerLabel is indicating which technology will be used to provide local storage DatadogLocalStorageProvisionerLabel = "nodegroups.datadoghq.com/local-storage-provisioner" @@ -84,8 +88,8 @@ func SetNodeLocalDataResource(nodeInfo *schedulerframework.NodeInfo) { capacity := node.Labels[DatadogInitialStorageCapacityLabel] capacityResource, err := resource.ParseQuantity(capacity) if err == nil { - node.Status.Capacity[DatadogLocalDataResource] = capacityResource.DeepCopy() - node.Status.Allocatable[DatadogLocalDataResource] = capacityResource.DeepCopy() + node.Status.Capacity[DatadogEphemeralLocalDataResource] = capacityResource.DeepCopy() + node.Status.Allocatable[DatadogEphemeralLocalDataResource] = capacityResource.DeepCopy() } else { klog.Warningf("failed to attach capacity information (%s) to node (%s): %v", capacity, node.Name, err) } diff --git a/cluster-autoscaler/processors/datadog/common/common_test.go b/cluster-autoscaler/processors/datadog/common/common_test.go index e9d6b754a54c..1fa5e3f3d63d 100644 --- a/cluster-autoscaler/processors/datadog/common/common_test.go +++ b/cluster-autoscaler/processors/datadog/common/common_test.go @@ -112,6 +112,13 @@ func TestSetNodeLocalDataResource(t *testing.T) { assert.True(t, ok) assert.Equal(t, niValue, int64(1)) + // Only DatadogLocalDataResource should be set + _, ok = ni.Node().Status.Allocatable[DatadogEphemeralLocalDataResource] + assert.False(t, ok) + + _, ok = ni.Allocatable.ScalarResources[DatadogEphemeralLocalDataResource] + assert.False(t, ok) + assert.Equal(t, len(ni.Pods), 2) } @@ -129,13 +136,20 @@ func TestSetNodeResourceFromTopolvm(t *testing.T) { SetNodeLocalDataResource(ni) - nodeValue, ok := ni.Node().Status.Allocatable[DatadogLocalDataResource] + nodeValue, ok := ni.Node().Status.Allocatable[DatadogEphemeralLocalDataResource] assert.True(t, ok) assert.Equal(t, nodeValue.String(), resource.NewQuantity(hundredGB, resource.BinarySI).String()) - niValue, ok := ni.Allocatable.ScalarResources[DatadogLocalDataResource] + niValue, ok := ni.Allocatable.ScalarResources[DatadogEphemeralLocalDataResource] assert.True(t, ok) assert.Equal(t, niValue, hundredGB) + + // Only DatadogEphemeralLocalDataResource should be set + _, ok = ni.Node().Status.Allocatable[DatadogLocalDataResource] + assert.False(t, ok) + + _, ok = ni.Allocatable.ScalarResources[DatadogLocalDataResource] + assert.False(t, ok) } func TestShouldNotSetResourcesWithMissingLabel(t *testing.T) { @@ -157,4 +171,12 @@ func TestShouldNotSetResourcesWithMissingLabel(t *testing.T) { _, ok = ni.Allocatable.ScalarResources[DatadogLocalDataResource] assert.False(t, ok) + + _, ok = ni.Node().Status.Allocatable[DatadogEphemeralLocalDataResource] + assert.False(t, ok) + _, ok = ni.Node().Status.Capacity[DatadogEphemeralLocalDataResource] + assert.False(t, ok) + + _, ok = ni.Allocatable.ScalarResources[DatadogEphemeralLocalDataResource] + assert.False(t, ok) } diff --git a/cluster-autoscaler/processors/datadog/pods/transform_local_data.go b/cluster-autoscaler/processors/datadog/pods/transform_local_data.go index 3df801b79892..bf39e1270b7b 100644 --- a/cluster-autoscaler/processors/datadog/pods/transform_local_data.go +++ b/cluster-autoscaler/processors/datadog/pods/transform_local_data.go @@ -56,6 +56,7 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/processors/datadog/common" apiv1 "k8s.io/api/core/v1" + corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/fields" client "k8s.io/client-go/kubernetes" @@ -66,7 +67,7 @@ import ( const ( storageClassNameLocal = "local-data" - storageClassNameTopolvm = "dynamic-local-data" + storageClassNameTopolvm = "ephemeral-local-data" ) type transformLocalData struct { @@ -95,19 +96,25 @@ func (p *transformLocalData) Process(ctx *context.AutoscalingContext, pods []*ap for _, po := range pods { var volumes []apiv1.Volume for _, vol := range po.Spec.Volumes { - if vol.PersistentVolumeClaim == nil { - volumes = append(volumes, vol) - continue - } - pvc, err := p.pvcLister.PersistentVolumeClaims(po.Namespace).Get(vol.PersistentVolumeClaim.ClaimName) - if err != nil { - if !apierrors.IsNotFound(err) { - klog.Warningf("failed to fetch pvc for %s/%s: %v", po.GetNamespace(), po.GetName(), err) + var pvcSpec *apiv1.PersistentVolumeClaimSpec + if vol.PersistentVolumeClaim != nil { + pvc, err := p.pvcLister.PersistentVolumeClaims(po.Namespace).Get(vol.PersistentVolumeClaim.ClaimName) + if err != nil { + if !apierrors.IsNotFound(err) { + klog.Warningf("failed to fetch pvc for %s/%s: %v", po.GetNamespace(), po.GetName(), err) + } + volumes = append(volumes, vol) + continue } + pvcSpec = &pvc.Spec + } else if vol.Ephemeral != nil { + pvcSpec = &vol.Ephemeral.VolumeClaimTemplate.Spec + } else { volumes = append(volumes, vol) continue } - if !isSpecialPVCStorageClass(*pvc.Spec.StorageClassName) { + + if !isSpecialPVCStorageClass(*pvcSpec.StorageClassName) { volumes = append(volumes, vol) continue } @@ -118,15 +125,15 @@ func (p *transformLocalData) Process(ctx *context.AutoscalingContext, pods []*ap if len(po.Spec.Containers[0].Resources.Limits) == 0 { po.Spec.Containers[0].Resources.Limits = apiv1.ResourceList{} } - if len(pvc.Spec.Resources.Requests) == 0 { - pvc.Spec.Resources.Requests = apiv1.ResourceList{} + if len(pvcSpec.Resources.Requests) == 0 { + pvcSpec.Resources.Requests = apiv1.ResourceList{} } - switch *pvc.Spec.StorageClassName { + switch *pvcSpec.StorageClassName { case storageClassNameTopolvm: - if storage, ok := pvc.Spec.Resources.Requests["storage"]; ok { - po.Spec.Containers[0].Resources.Requests[common.DatadogLocalDataResource] = storage.DeepCopy() - po.Spec.Containers[0].Resources.Limits[common.DatadogLocalDataResource] = storage.DeepCopy() + if storage, ok := pvcSpec.Resources.Requests[corev1.ResourceStorage]; ok { + po.Spec.Containers[0].Resources.Requests[common.DatadogEphemeralLocalDataResource] = storage.DeepCopy() + po.Spec.Containers[0].Resources.Limits[common.DatadogEphemeralLocalDataResource] = storage.DeepCopy() } else { klog.Warningf("ignoring pvc as it does not have storage request information") volumes = append(volumes, vol) @@ -135,7 +142,7 @@ func (p *transformLocalData) Process(ctx *context.AutoscalingContext, pods []*ap po.Spec.Containers[0].Resources.Requests[common.DatadogLocalDataResource] = common.DatadogLocalDataQuantity.DeepCopy() po.Spec.Containers[0].Resources.Limits[common.DatadogLocalDataResource] = common.DatadogLocalDataQuantity.DeepCopy() default: - klog.Warningf("this should never be reached. pvc storage class (%s) cannot be used for scaling on pod: %s", *pvc.Spec.StorageClassName, po.Name) + klog.Warningf("this should never be reached. pvc storage class (%s) cannot be used for scaling on pod: %s", *pvcSpec.StorageClassName, po.Name) volumes = append(volumes, vol) } } diff --git a/cluster-autoscaler/processors/datadog/pods/transform_local_data_test.go b/cluster-autoscaler/processors/datadog/pods/transform_local_data_test.go index 67ba58768de9..edf7a69f2bec 100644 --- a/cluster-autoscaler/processors/datadog/pods/transform_local_data_test.go +++ b/cluster-autoscaler/processors/datadog/pods/transform_local_data_test.go @@ -18,6 +18,7 @@ package pods import ( "fmt" + "strings" "testing" "github.com/stretchr/testify/assert" @@ -30,6 +31,7 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/processors/datadog/common" v1lister "k8s.io/client-go/listers/core/v1" "k8s.io/client-go/tools/cache" + "k8s.io/utils/pointer" ) var ( @@ -39,14 +41,17 @@ var ( testLdResources = corev1.ResourceList{ common.DatadogLocalDataResource: common.DatadogLocalDataQuantity.DeepCopy(), } + test100GResource = resource.MustParse("100Gi") + testTopolvmResources = corev1.ResourceList{ + common.DatadogEphemeralLocalDataResource: test100GResource, + } + testMixedResources = corev1.ResourceList{ + common.DatadogLocalDataResource: common.DatadogLocalDataQuantity.DeepCopy(), + common.DatadogEphemeralLocalDataResource: test100GResource, + } ) func TestTransformLocalDataProcess(t *testing.T) { - test100GResource, _ := resource.ParseQuantity("100Gi") - testTopolvmResources := corev1.ResourceList{ - common.DatadogLocalDataResource: test100GResource, - } - tests := []struct { name string pods []*corev1.Pod @@ -100,23 +105,21 @@ func TestTransformLocalDataProcess(t *testing.T) { }, { - "topolvm provisioner is using proper storage capacity value", - []*corev1.Pod{buildPod("pod1", testEmptyResources, testEmptyResources, "pvc-1", "pvc-2")}, + "topolvm handles ephemeral volumes", + []*corev1.Pod{buildPod("pod1", testEmptyResources, testEmptyResources, "pvc-1", "ephemeral-pvc-2")}, []*corev1.PersistentVolumeClaim{ buildPVC("pvc-1", testRemoteClass), - buildPVCWithStorage("pvc-2", storageClassNameTopolvm, "100Gi"), }, []*corev1.Pod{buildPod("pod1", testTopolvmResources, testTopolvmResources, "pvc-1")}, }, { - "one pvc will override the other", - []*corev1.Pod{buildPod("pod1", testEmptyResources, testEmptyResources, "pvc-1", "pvc-2")}, + "ephemeral and persistent volumes are handled separately", + []*corev1.Pod{buildPod("pod1", testEmptyResources, testEmptyResources, "ephemeral-pvc-1", "pvc-2")}, []*corev1.PersistentVolumeClaim{ - buildPVCWithStorage("pvc-1", storageClassNameTopolvm, "100Gi"), buildPVC("pvc-2", storageClassNameLocal), }, - []*corev1.Pod{buildPod("pod1", testLdResources, testLdResources)}, + []*corev1.Pod{buildPod("pod1", testMixedResources, testMixedResources)}, }, } @@ -167,15 +170,36 @@ func buildPod(name string, requests, limits corev1.ResourceList, claimNames ...s } for _, name := range claimNames { - pod.Spec.Volumes = append(pod.Spec.Volumes, - corev1.Volume{ - Name: name, - VolumeSource: corev1.VolumeSource{ - PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{ - ClaimName: name, + if strings.Contains(name, "ephemeral") { + pod.Spec.Volumes = append(pod.Spec.Volumes, + corev1.Volume{ + Name: name, + VolumeSource: corev1.VolumeSource{ + Ephemeral: &corev1.EphemeralVolumeSource{ + VolumeClaimTemplate: &corev1.PersistentVolumeClaimTemplate{ + Spec: corev1.PersistentVolumeClaimSpec{ + StorageClassName: pointer.String(storageClassNameTopolvm), + Resources: corev1.ResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceStorage: resource.MustParse("100Gi"), + }, + }, + }, + }, + }, }, - }, - }) + }) + } else { + pod.Spec.Volumes = append(pod.Spec.Volumes, + corev1.Volume{ + Name: name, + VolumeSource: corev1.VolumeSource{ + PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{ + ClaimName: name, + }, + }, + }) + } } return pod From 09996d5f69c63b4bb5d91ab0e81a4ebee2cc47e5 Mon Sep 17 00:00:00 2001 From: Baptiste Girard-Carrabin Date: Thu, 27 Jun 2024 19:26:26 +0200 Subject: [PATCH 5/5] [local] Explicitely only support ephemeral volumes for topolvm Explicitely do not trigger a scale up if we request a persistent topolvm volume (or an ephemeral local data volume) --- .../datadog/pods/transform_local_data.go | 25 +++++++++++++++++-- .../datadog/pods/transform_local_data_test.go | 8 ++++++ 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/cluster-autoscaler/processors/datadog/pods/transform_local_data.go b/cluster-autoscaler/processors/datadog/pods/transform_local_data.go index bf39e1270b7b..2c88c3502b23 100644 --- a/cluster-autoscaler/processors/datadog/pods/transform_local_data.go +++ b/cluster-autoscaler/processors/datadog/pods/transform_local_data.go @@ -131,14 +131,35 @@ func (p *transformLocalData) Process(ctx *context.AutoscalingContext, pods []*ap switch *pvcSpec.StorageClassName { case storageClassNameTopolvm: + // Only support ephemeral volumes for topolvm + if vol.Ephemeral == nil { + volumes = append(volumes, vol) + continue + } if storage, ok := pvcSpec.Resources.Requests[corev1.ResourceStorage]; ok { - po.Spec.Containers[0].Resources.Requests[common.DatadogEphemeralLocalDataResource] = storage.DeepCopy() - po.Spec.Containers[0].Resources.Limits[common.DatadogEphemeralLocalDataResource] = storage.DeepCopy() + if request, ok := po.Spec.Containers[0].Resources.Requests[common.DatadogEphemeralLocalDataResource]; ok { + request.Add(storage) + po.Spec.Containers[0].Resources.Requests[common.DatadogEphemeralLocalDataResource] = request + } else { + po.Spec.Containers[0].Resources.Requests[common.DatadogEphemeralLocalDataResource] = storage.DeepCopy() + } + + if limit, ok := po.Spec.Containers[0].Resources.Limits[common.DatadogEphemeralLocalDataResource]; ok { + limit.Add(storage) + po.Spec.Containers[0].Resources.Limits[common.DatadogEphemeralLocalDataResource] = limit + } else { + po.Spec.Containers[0].Resources.Limits[common.DatadogEphemeralLocalDataResource] = storage.DeepCopy() + } } else { klog.Warningf("ignoring pvc as it does not have storage request information") volumes = append(volumes, vol) } case storageClassNameLocal: + // Only support persistent volumes for local storage + if vol.PersistentVolumeClaim == nil { + volumes = append(volumes, vol) + continue + } po.Spec.Containers[0].Resources.Requests[common.DatadogLocalDataResource] = common.DatadogLocalDataQuantity.DeepCopy() po.Spec.Containers[0].Resources.Limits[common.DatadogLocalDataResource] = common.DatadogLocalDataQuantity.DeepCopy() default: diff --git a/cluster-autoscaler/processors/datadog/pods/transform_local_data_test.go b/cluster-autoscaler/processors/datadog/pods/transform_local_data_test.go index edf7a69f2bec..9c4cec020639 100644 --- a/cluster-autoscaler/processors/datadog/pods/transform_local_data_test.go +++ b/cluster-autoscaler/processors/datadog/pods/transform_local_data_test.go @@ -121,6 +121,14 @@ func TestTransformLocalDataProcess(t *testing.T) { }, []*corev1.Pod{buildPod("pod1", testMixedResources, testMixedResources)}, }, + { + "no persistent volume for topolvm", + []*corev1.Pod{buildPod("pod1", testEmptyResources, testEmptyResources, "pvc-1")}, + []*corev1.PersistentVolumeClaim{ + buildPVC("pvc-1", storageClassNameTopolvm), + }, + []*corev1.Pod{buildPod("pod1", testEmptyResources, testEmptyResources, "pvc-1")}, + }, } for _, tt := range tests {