From 47c67ab986361eda19230eadd7d78353a4395f55 Mon Sep 17 00:00:00 2001 From: Cooper Tseng Date: Wed, 3 Jul 2024 17:40:19 +0800 Subject: [PATCH 1/2] Add CPU Manager related config Add new config file: - rke2-99-z00-harvester-reserved-resources.yaml: which includes system-reserved and kube-reserved CPU resources, kube-reserved follow the GKE CPU reservation formula for calculating reserved CPU resources, and system-reserved is 2/3 kube-reserved cpu resource. Signed-off-by: Cooper Tseng --- pkg/config/config.go | 47 ++++++++++ pkg/config/config_test.go | 89 +++++++++++++++++++ pkg/config/cos.go | 15 ++++ ...2-99-z00-harvester-reserved-resources.yaml | 3 + 4 files changed, 154 insertions(+) create mode 100644 pkg/config/templates/rke2-99-z00-harvester-reserved-resources.yaml diff --git a/pkg/config/config.go b/pkg/config/config.go index 0fbb7623e..2bb16b3ad 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -3,6 +3,7 @@ package config import ( "fmt" "net" + "runtime" "strings" "github.com/imdario/mergo" @@ -299,6 +300,16 @@ func (c *HarvesterConfig) GetKubeletArgs() ([]string, error) { return args, nil } +// make system:kube cpu reservation ration 2:3 +func (c *HarvesterConfig) GetSystemReserved() string { + return fmt.Sprintf("system-reserved=cpu=%dm", calculateCPUReservedInMilliCPU(runtime.NumCPU(), MaxPods)*2*2/5) +} + +// make system:kube cpu reservation ration 2:3 +func (c *HarvesterConfig) GetKubeReserved() string { + return fmt.Sprintf("kube-reserved=cpu=%dm", calculateCPUReservedInMilliCPU(runtime.NumCPU(), MaxPods)*2*3/5) +} + func (c HarvesterConfig) ShouldCreateDataPartitionOnOsDisk() bool { // DataDisk is empty means only using the OS disk, and most of the time we should create data // partition on OS disk, unless when ForceMBR=true then we should not create data partition. @@ -443,3 +454,39 @@ func GenerateRancherdConfig(config *HarvesterConfig) (*yipSchema.YipConfig, erro return conf, nil } + +// inspired by GKE CPU reservations https://cloud.google.com/kubernetes-engine/docs/concepts/plan-node-sizes +func calculateCPUReservedInMilliCPU(cores int, maxPods int) int64 { + // this shouldn't happen + if cores <= 0 || maxPods <= 0 { + return 0 + } + + var reserved float64 + + // 6% of the first core + reserved += float64(6) / 100 + + // 1% of the next core (up to 2 cores) + if cores > 1 { + reserved += float64(1) / 100 + } + + // 0.5% of the next 2 cores (up to 4 cores) + if cores > 2 { + reserved += float64(2) * float64(0.5) / 100 + } + + // 0.25% of any cores above 4 cores + if cores > 4 { + reserved += float64(cores-4) * float64(0.25) / 100 + } + + // if the maximum number of Pods per node beyond the default of 110, + // reserves an extra 400 mCPU in addition to the preceding reservations. + if maxPods > 110 { + reserved += 0.4 + } + + return int64(reserved * 1000) +} diff --git a/pkg/config/config_test.go b/pkg/config/config_test.go index 9cf4677d2..e9c4ffa4d 100644 --- a/pkg/config/config_test.go +++ b/pkg/config/config_test.go @@ -1,6 +1,9 @@ package config import ( + "fmt" + "strconv" + "strings" "testing" "github.com/stretchr/testify/assert" @@ -450,3 +453,89 @@ func TestHarvesterConfigMerge_Addons(t *testing.T) { assert.Equal(t, "the value to overwrite original", conf.Addons["rancher-logging"].ValuesContent, "Addons ValuesContent should be merged") assert.Equal(t, false, conf.Addons["rancher-monitoring"].Enabled, "Addons Enabled false should be merged") } + +func TestHarvesterReservedResourcesConfigRendering(t *testing.T) { + conf := &HarvesterConfig{} + content, err := render("rke2-99-z00-harvester-reserved-resources.yaml", conf) + assert.NoError(t, err) + + loadedConf := map[string][]string{} + + err = yaml.Unmarshal([]byte(content), &loadedConf) + assert.NoError(t, err) + assert.Equal(t, 2, len(loadedConf["kubelet-arg+"])) + + systemReserved := loadedConf["kubelet-arg+"][0] + assert.True(t, strings.HasPrefix(systemReserved, "system-reserved=cpu="), + fmt.Sprintf("%s doesn't started with system-reserved=cpu=", systemReserved)) + systemReservedArray := strings.Split(systemReserved, "system-reserved=cpu=") + assert.Equal(t, 2, len(systemReservedArray)) + systemCPUReserved, err := strconv.Atoi(strings.Replace(systemReservedArray[1], "m", "", 1)) + assert.NoError(t, err) + + kubeReserved := loadedConf["kubelet-arg+"][1] + assert.True(t, strings.HasPrefix(kubeReserved, "kube-reserved=cpu="), + fmt.Sprintf("%s doesn't started with kube-reserved=cpu=", kubeReserved)) + kubeReservedArray := strings.Split(kubeReserved, "kube-reserved=cpu=") + assert.Equal(t, 2, len(kubeReservedArray)) + kubeCPUReserved, err := strconv.Atoi(strings.Replace(kubeReservedArray[1], "m", "", 1)) + assert.NoError(t, err) + + assert.Equal(t, systemCPUReserved, kubeCPUReserved*2/3) +} + +func TestCalculateCPUReservedInMilliCPU(t *testing.T) { + testCases := []struct { + name string + coreNum int + maxPods int + reservedMilliCores int64 + }{ + { + name: "invalid core num", + coreNum: -1, + maxPods: MaxPods, + reservedMilliCores: 0, + }, + { + name: "invalid max pods", + coreNum: 1, + maxPods: -1, + reservedMilliCores: 0, + }, + { + name: "core = 1 and max pods = 110", + coreNum: 1, + maxPods: 110, + reservedMilliCores: 60, + }, + { + name: "core = 1", + coreNum: 1, + maxPods: MaxPods, + reservedMilliCores: 60 + 400, + }, + { + name: "core = 2", + coreNum: 2, + maxPods: MaxPods, + reservedMilliCores: 60 + 10 + 400, + }, + { + name: "core = 4", + coreNum: 4, + maxPods: MaxPods, + reservedMilliCores: 60 + 10 + 5*2 + 400, + }, + { + name: "core = 8", + coreNum: 8, + maxPods: MaxPods, + reservedMilliCores: 60 + 10 + 5*2 + 2.5*4 + 400, + }, + } + + for _, tc := range testCases { + assert.Equal(t, tc.reservedMilliCores, calculateCPUReservedInMilliCPU(tc.coreNum, tc.maxPods)) + } +} diff --git a/pkg/config/cos.go b/pkg/config/cos.go index efa3dfba4..dc357f848 100644 --- a/pkg/config/cos.go +++ b/pkg/config/cos.go @@ -397,6 +397,21 @@ func initRancherdStage(config *HarvesterConfig, stage *yipSchema.Stage) error { ) } + reservedResourceConfig, err := render("rke2-99-z00-harvester-reserved-resources.yaml", config) + if err != nil { + return err + } + + stage.Files = append(stage.Files, + yipSchema.File{ + Path: "/etc/rancher/rke2/config.yaml.d/99-z00-harvester-reserved-resources.yaml", + Content: reservedResourceConfig, + Permissions: 0600, + Owner: 0, + Group: 0, + }, + ) + return nil } diff --git a/pkg/config/templates/rke2-99-z00-harvester-reserved-resources.yaml b/pkg/config/templates/rke2-99-z00-harvester-reserved-resources.yaml new file mode 100644 index 000000000..6c9ca78f9 --- /dev/null +++ b/pkg/config/templates/rke2-99-z00-harvester-reserved-resources.yaml @@ -0,0 +1,3 @@ +kubelet-arg+: +- {{ printf "%q" .GetSystemReserved }} +- {{ printf "%q" .GetKubeReserved }} \ No newline at end of file From 9207ac227559168522c8c0898c9b39260dbbb1d3 Mon Sep 17 00:00:00 2001 From: Cooper Tseng Date: Wed, 14 Aug 2024 12:18:07 +0800 Subject: [PATCH 2/2] Remove cpu_manager_state in initramfs stage Signed-off-by: Cooper Tseng --- pkg/config/cos.go | 6 ++++++ pkg/config/cos_test.go | 10 ++++++++++ 2 files changed, 16 insertions(+) diff --git a/pkg/config/cos.go b/pkg/config/cos.go index dc357f848..ea1c0466e 100644 --- a/pkg/config/cos.go +++ b/pkg/config/cos.go @@ -149,6 +149,12 @@ func ConvertToCOS(config *HarvesterConfig) (*yipSchema.YipConfig, error) { for _, module := range cfg.OS.Modules { initramfs.Commands = append(initramfs.Commands, "modprobe "+module) } + // Delete the cpu_manager_state file during the initramfs stage. During a reboot, this state file is always reverted + // because it was originally created during the system installation, becoming part of the root filesystem. + // As a result, the policy in cpu_manager_state file is "none" (default policy) after reboot. If we've already set + // the cpu-manager-policy to "static" before reboot, this mismatch can prevent kubelet from starting, + // and make the entire node unavailable. + initramfs.Commands = append(initramfs.Commands, "rm -f /var/lib/kubelet/cpu_manager_state") initramfs.Sysctl = cfg.OS.Sysctls initramfs.Environment = cfg.OS.Environment diff --git a/pkg/config/cos_test.go b/pkg/config/cos_test.go index 09057e94e..93c41f870 100644 --- a/pkg/config/cos_test.go +++ b/pkg/config/cos_test.go @@ -126,6 +126,16 @@ func TestConvertToCos_VerifyNetworkInstallMode(t *testing.T) { assert.False(t, containsFile(yipConfig.Stages["initramfs"][0].Files, "/etc/sysconfig/network/ifcfg-ens3")) } +func TestConvertToCos_Remove_CPUManagerState(t *testing.T) { + conf, err := LoadHarvesterConfig(util.LoadFixture(t, "harvester-config.yaml")) + assert.NoError(t, err) + + yipConfig, err := ConvertToCOS(conf) + assert.NoError(t, err) + + assert.Contains(t, yipConfig.Stages["initramfs"][0].Commands, "rm -f /var/lib/kubelet/cpu_manager_state") +} + func containsFile(files []yipSchema.File, fileName string) bool { for _, v := range files { if v.Path == fileName {