Skip to content

Commit

Permalink
Merge pull request #117 from DataDog/cp-azure-minmax
Browse files Browse the repository at this point in the history
Default min/max sizes for Azure VMSSs
  • Loading branch information
rrangith authored Jul 10, 2024
2 parents bada18d + 1d0d96f commit 0c2f44f
Show file tree
Hide file tree
Showing 6 changed files with 180 additions and 15 deletions.
2 changes: 1 addition & 1 deletion cluster-autoscaler/FAQ.md
Original file line number Diff line number Diff line change
Expand Up @@ -755,7 +755,7 @@ The following startup parameters are supported for cluster autoscaler:
| `ok-total-unready-count` | Number of allowed unready nodes, irrespective of max-total-unready-percentage | 3
| `max-node-provision-time` | Maximum time CA waits for node to be provisioned | 15 minutes
| `nodes` | sets min,max size and other configuration data for a node group in a format accepted by cloud provider. Can be used multiple times. Format: \<min>:\<max>:<other...> | ""
| `node-group-auto-discovery` | One or more definition(s) of node group auto-discovery.<br>A definition is expressed `<name of discoverer>:[<key>[=<value>]]`<br>The `aws`, `gce`, and `azure` cloud providers are currently supported. AWS matches by ASG tags, e.g. `asg:tag=tagKey,anotherTagKey`<br>GCE matches by IG name prefix, and requires you to specify min and max nodes per IG, e.g. `mig:namePrefix=pfx,min=0,max=10`<br> Azure matches by tags on VMSS, e.g. `label:foo=bar`, and will auto-detect `min` and `max` tags on the VMSS to set scaling limits.<br>Can be used multiple times | ""
| `node-group-auto-discovery` | One or more definition(s) of node group auto-discovery.<br>A definition is expressed `<name of discoverer>:[<key>[=<value>]]`<br>The `aws`, `gce`, and `azure` cloud providers are currently supported. AWS matches by ASG tags, e.g. `asg:tag=tagKey,anotherTagKey`<br>GCE matches by IG name prefix, and requires you to specify min and max nodes per IG, e.g. `mig:namePrefix=pfx,min=0,max=10`<br> Azure matches by VMSS tags, similar to AWS. And you can optionally specify a default min and max size for VMSSs, e.g. `label:tag=tagKey,anotherTagKey=bar,min=0,max=600`.<br>Can be used multiple times | ""
| `emit-per-nodegroup-metrics` | If true, emit per node group metrics. | false
| `estimator` | Type of resource estimator to be used in scale up | binpacking
| `expander` | Type of node group expander to be used in scale up. | random
Expand Down
64 changes: 55 additions & 9 deletions cluster-autoscaler/cloudprovider/azure/azure_autodiscovery.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,18 +18,31 @@ package azure

import (
"fmt"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
"strconv"
"strings"

"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
)

const (
autoDiscovererTypeLabel = "label"
autoDiscovererTypeLabel = "label"
vmssAutoDiscovererKeyMinNodes = "min"
vmssAutoDiscovererKeyMaxNodes = "max"
)

// A labelAutoDiscoveryConfig specifies how to auto-discover Azure node groups.
type labelAutoDiscoveryConfig struct {
// Key-values to match on.
Selector map[string]string
// MinSize specifies the minimum size for all VMSSs that match Selector.
MinSize *int
// MazSize specifies the maximum size for all VMSSs that match Selector.
MaxSize *int
}

type autoDiscoveryConfigSizes struct {
Min int
Max int
}

// ParseLabelAutoDiscoverySpecs returns any provided NodeGroupAutoDiscoverySpecs
Expand Down Expand Up @@ -70,34 +83,67 @@ func parseLabelAutoDiscoverySpec(spec string) (labelAutoDiscoveryConfig, error)
if k == "" || v == "" {
return cfg, fmt.Errorf("empty value not allowed in key=value tag pairs")
}
cfg.Selector[k] = v

switch k {
case vmssAutoDiscovererKeyMinNodes:
minSize, err := strconv.Atoi(v)
if err != nil || minSize < 0 {
return cfg, fmt.Errorf("invalid minimum nodes: %s", v)
}
cfg.MinSize = &minSize
case vmssAutoDiscovererKeyMaxNodes:
maxSize, err := strconv.Atoi(v)
if err != nil || maxSize < 0 {
return cfg, fmt.Errorf("invalid maximum nodes: %s", v)
}
cfg.MaxSize = &maxSize
default:
cfg.Selector[k] = v
}
}
if cfg.MaxSize != nil && cfg.MinSize != nil && *cfg.MaxSize < *cfg.MinSize {
return cfg, fmt.Errorf("maximum size %d must be greater than or equal to minimum size %d", *cfg.MaxSize, *cfg.MinSize)
}
return cfg, nil
}

func matchDiscoveryConfig(labels map[string]*string, configs []labelAutoDiscoveryConfig) bool {
// returns an autoDiscoveryConfigSizes struct if the VMSS's tags match the autodiscovery configs
// if the VMSS's tags do not match then return nil
// if there are multiple min/max sizes defined, return the highest min value and the lowest max value
func matchDiscoveryConfig(labels map[string]*string, configs []labelAutoDiscoveryConfig) *autoDiscoveryConfigSizes {
if len(configs) == 0 {
return false
return nil
}
minSize := -1
maxSize := -1

for _, c := range configs {
if len(c.Selector) == 0 {
return false
return nil
}

for k, v := range c.Selector {
value, ok := labels[k]
if !ok {
return false
return nil
}

if len(v) > 0 {
if value == nil || *value != v {
return false
return nil
}
}
}
if c.MinSize != nil && minSize < *c.MinSize {
minSize = *c.MinSize
}
if c.MaxSize != nil && (maxSize == -1 || maxSize > *c.MaxSize) {
maxSize = *c.MaxSize
}
}

return true
return &autoDiscoveryConfigSizes{
Min: minSize,
Max: maxSize,
}
}
71 changes: 69 additions & 2 deletions cluster-autoscaler/cloudprovider/azure/azure_autodiscovery_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,15 @@ limitations under the License.
package azure

import (
"testing"

"github.com/stretchr/testify/assert"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
"testing"
)

func TestParseLabelAutoDiscoverySpecs(t *testing.T) {
minVal := 1
maxVal := 2
testCases := []struct {
name string
specs []string
Expand All @@ -46,7 +49,7 @@ func TestParseLabelAutoDiscoverySpecs(t *testing.T) {
expectedErr: true,
},
{
name: "InvalidAutoDiscoerLabel",
name: "InvalidAutoDiscoverLabel",
specs: []string{"invalid:test-tag=test-value,another-test-tag"},
expectedErr: true,
},
Expand All @@ -60,6 +63,70 @@ func TestParseLabelAutoDiscoverySpecs(t *testing.T) {
specs: []string{"label:=test-val"},
expectedErr: true,
},
{
name: "ValidSpecWithSizes",
specs: []string{
"label:cluster-autoscaler-enabled=true,cluster-autoscaler-name=fake-cluster,min=1,max=2",
"label:test-tag=test-value,another-test-tag=another-test-value,min=1,max=2",
},
expected: []labelAutoDiscoveryConfig{
{Selector: map[string]string{"cluster-autoscaler-enabled": "true", "cluster-autoscaler-name": "fake-cluster"}, MinSize: &minVal, MaxSize: &maxVal},
{Selector: map[string]string{"test-tag": "test-value", "another-test-tag": "another-test-value"}, MinSize: &minVal, MaxSize: &maxVal},
},
},
{
name: "ValidSpecWithSizesOnlyMax",
specs: []string{
"label:cluster-autoscaler-enabled=true,max=2",
},
expected: []labelAutoDiscoveryConfig{
{Selector: map[string]string{"cluster-autoscaler-enabled": "true"}, MaxSize: &maxVal},
},
},
{
name: "ValidSpecWithSizesOnlyMin",
specs: []string{
"label:cluster-autoscaler-enabled=true,min=1",
},
expected: []labelAutoDiscoveryConfig{
{Selector: map[string]string{"cluster-autoscaler-enabled": "true"}, MinSize: &minVal},
},
},
{
name: "NonIntegerMin",
specs: []string{
"label:cluster-autoscaler-enabled=true,min=random,max=2",
},
expectedErr: true,
},
{
name: "NegativeMin",
specs: []string{
"label:cluster-autoscaler-enabled=true,min=-5,max=2",
},
expectedErr: true,
},
{
name: "NonIntegerMax",
specs: []string{
"label:cluster-autoscaler-enabled=true,min=1,max=random",
},
expectedErr: true,
},
{
name: "NegativeMax",
specs: []string{
"label:cluster-autoscaler-enabled=true,min=1,max=-5",
},
expectedErr: true,
},
{
name: "LowerMaxThanMin",
specs: []string{
"label:cluster-autoscaler-enabled=true,min=5,max=1",
},
expectedErr: true,
},
}

for _, tc := range testCases {
Expand Down
9 changes: 7 additions & 2 deletions cluster-autoscaler/cloudprovider/azure/azure_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -293,12 +293,13 @@ func (m *AzureManager) getFilteredScaleSets(filter []labelAutoDiscoveryConfig) (

var nodeGroups []cloudprovider.NodeGroup
for _, scaleSet := range vmssList {
var cfgSizes *autoDiscoveryConfigSizes
if len(filter) > 0 {
if scaleSet.Tags == nil || len(scaleSet.Tags) == 0 {
continue
}

if !matchDiscoveryConfig(scaleSet.Tags, filter) {
if cfgSizes = matchDiscoveryConfig(scaleSet.Tags, filter); cfgSizes == nil {
continue
}
}
Expand All @@ -316,6 +317,8 @@ func (m *AzureManager) getFilteredScaleSets(filter []labelAutoDiscoveryConfig) (
klog.Warningf("ignoring vmss %q because of invalid minimum size specified for vmss: %s", *scaleSet.Name, err)
continue
}
} else if cfgSizes.Min >= 0 {
spec.MinSize = cfgSizes.Min
} else {
klog.Warningf("ignoring vmss %q because of no minimum size specified for vmss", *scaleSet.Name)
continue
Expand All @@ -331,12 +334,14 @@ func (m *AzureManager) getFilteredScaleSets(filter []labelAutoDiscoveryConfig) (
klog.Warningf("ignoring vmss %q because of invalid maximum size specified for vmss: %s", *scaleSet.Name, err)
continue
}
} else if cfgSizes.Max >= 0 {
spec.MaxSize = cfgSizes.Max
} else {
klog.Warningf("ignoring vmss %q because of no maximum size specified for vmss", *scaleSet.Name)
continue
}
if spec.MaxSize < spec.MinSize {
klog.Warningf("ignoring vmss %q because of maximum size must be greater than minimum size: max=%d < min=%d", *scaleSet.Name, spec.MaxSize, spec.MinSize)
klog.Warningf("ignoring vmss %q because of maximum size must be greater than or equal to minimum size: max=%d < min=%d", *scaleSet.Name, spec.MaxSize, spec.MinSize)
continue
}

Expand Down
46 changes: 46 additions & 0 deletions cluster-autoscaler/cloudprovider/azure/azure_manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -684,6 +684,52 @@ func TestGetFilteredAutoscalingGroupsVmss(t *testing.T) {
assert.True(t, assert.ObjectsAreEqualValues(expectedAsgs, asgs), "expected %#v, but found: %#v", expectedAsgs, asgs)
}

func TestGetFilteredAutoscalingGroupsVmssWithConfiguredSizes(t *testing.T) {
ctrl := gomock.NewController(t)
defer ctrl.Finish()

vmssName := "test-vmss"
vmssTag := "fake-tag"
vmssTagValue := "fake-value"
vmssTag2 := "fake-tag2"
vmssTagValue2 := "fake-value2"
minVal := 2
maxVal := 4

ngdo := cloudprovider.NodeGroupDiscoveryOptions{
NodeGroupAutoDiscoverySpecs: []string{
fmt.Sprintf("label:%s=%s,min=2,max=5", vmssTag, vmssTagValue),
fmt.Sprintf("label:%s=%s,min=1,max=4", vmssTag2, vmssTagValue2),
},
}

manager := newTestAzureManager(t)
expectedScaleSets := []compute.VirtualMachineScaleSet{fakeVMSSWithTags(vmssName, map[string]*string{vmssTag: &vmssTagValue, vmssTag2: &vmssTagValue2})}
mockVMSSClient := mockvmssclient.NewMockInterface(ctrl)
mockVMSSClient.EXPECT().List(gomock.Any(), manager.config.ResourceGroup).Return(expectedScaleSets, nil).AnyTimes()
manager.azClient.virtualMachineScaleSetsClient = mockVMSSClient
err := manager.forceRefresh()
assert.NoError(t, err)

specs, err := ParseLabelAutoDiscoverySpecs(ngdo)
assert.NoError(t, err)

asgs, err := manager.getFilteredNodeGroups(specs)
assert.NoError(t, err)
expectedAsgs := []cloudprovider.NodeGroup{&ScaleSet{
azureRef: azureRef{
Name: vmssName,
},
minSize: minVal,
maxSize: maxVal,
manager: manager,
curSize: 3,
sizeRefreshPeriod: manager.azureCache.refreshInterval,
instancesRefreshPeriod: defaultVmssInstancesRefreshPeriod,
}}
assert.True(t, assert.ObjectsAreEqualValues(expectedAsgs, asgs), "expected %#v, but found: %#v", expectedAsgs, asgs)
}

func TestGetFilteredAutoscalingGroupsWithInvalidVMType(t *testing.T) {
ctrl := gomock.NewController(t)
defer ctrl.Finish()
Expand Down
3 changes: 2 additions & 1 deletion cluster-autoscaler/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -167,8 +167,9 @@ var (
"node-group-auto-discovery",
"One or more definition(s) of node group auto-discovery. "+
"A definition is expressed `<name of discoverer>:[<key>[=<value>]]`. "+
"The `aws` and `gce` cloud providers are currently supported. AWS matches by ASG tags, e.g. `asg:tag=tagKey,anotherTagKey`. "+
"The `aws`, `gce`, and `azure` cloud providers are currently supported. AWS matches by ASG tags, e.g. `asg:tag=tagKey,anotherTagKey`. "+
"GCE matches by IG name prefix, and requires you to specify min and max nodes per IG, e.g. `mig:namePrefix=pfx,min=0,max=10` "+
"Azure matches by VMSS tags, similar to AWS. And you can optionally specify a default min and max size, e.g. `label:tag=tagKey,anotherTagKey=bar,min=0,max=600`. "+
"Can be used multiple times.")

estimatorFlag = flag.String("estimator", estimator.BinpackingEstimatorName,
Expand Down

0 comments on commit 0c2f44f

Please sign in to comment.