From 22f30e5ee8ece40a7bf522b4c2bb52e6bd2b0c43 Mon Sep 17 00:00:00 2001 From: Riley Karson Date: Tue, 19 Sep 2023 15:13:54 -0700 Subject: [PATCH 1/2] Rework taint model in GKE --- .../services/container/node_config.go.erb | 182 ++++++++---------- .../resource_container_cluster.go.erb | 2 +- .../resource_container_node_pool.go.erb | 2 +- .../resource_container_node_pool_test.go.erb | 83 -------- .../docs/r/container_cluster.html.markdown | 17 +- 5 files changed, 95 insertions(+), 191 deletions(-) diff --git a/mmv1/third_party/terraform/services/container/node_config.go.erb b/mmv1/third_party/terraform/services/container/node_config.go.erb index 8db029040fad..ec19e46f784c 100644 --- a/mmv1/third_party/terraform/services/container/node_config.go.erb +++ b/mmv1/third_party/terraform/services/container/node_config.go.erb @@ -411,18 +411,10 @@ func schemaNodeConfig() *schema.Schema { }, "taint": { - Type: schema.TypeList, - Optional: true, - // Computed=true because GKE Sandbox will automatically add taints to nodes that can/cannot run sandboxed pods. - Computed: true, - ForceNew: true, - // Legacy config mode allows explicitly defining an empty taint. - // See https://www.terraform.io/docs/configuration/attr-as-blocks.html - ConfigMode: schema.SchemaConfigModeAttr, + Type: schema.TypeList, + Optional: true, + ForceNew: true, Description: `List of Kubernetes taints to be applied to each node.`, - <% unless version.nil? || version == 'ga' -%> - DiffSuppressFunc: containerNodePoolTaintSuppress, - <% end -%> Elem: &schema.Resource{ Schema: map[string]*schema.Schema{ "key": { @@ -448,6 +440,31 @@ func schemaNodeConfig() *schema.Schema { }, }, + "effective_taints": { + Type: schema.TypeList, + Computed: true, + Description: `List of kubernetes taints applied to each node.`, + Elem: &schema.Resource{ + Schema: map[string]*schema.Schema{ + "key": { + Type: schema.TypeString, + Computed: true, + Description: `Key for taint.`, + }, + "value": { + Type: schema.TypeString, + Computed: true, + Description: `Value for taint.`, + }, + "effect": { + Type: schema.TypeString, + Computed: true, + Description: `Effect for taint.`, + }, + }, + }, + }, + "workload_metadata_config": { Computed: true, Type: schema.TypeList, @@ -897,12 +914,13 @@ func expandNodeConfig(v interface{}) *container.NodeConfig { Value: data["value"].(string), Effect: data["effect"].(string), } + nodeTaints = append(nodeTaints, taint) } + nc.Taints = nodeTaints } - if v, ok := nodeConfig["workload_metadata_config"]; ok { nc.WorkloadMetadataConfig = expandWorkloadMetadataConfig(v) } @@ -1081,11 +1099,11 @@ func expandConfidentialNodes(configured interface{}) *container.ConfidentialNode } func flattenNodeConfigDefaults(c *container.NodeConfigDefaults) []map[string]interface{} { - result := make([]map[string]interface{}, 0, 1) + result := make([]map[string]interface{}, 0, 1) - if c == nil { - return result - } + if c == nil { + return result + } result = append(result, map[string]interface{}{}) @@ -1094,16 +1112,27 @@ func flattenNodeConfigDefaults(c *container.NodeConfigDefaults) []map[string]int <% unless version == 'ga' -%> result[0]["gcfs_config"] = flattenGcfsConfig(c.GcfsConfig) <% end -%> - return result + return result } -func flattenNodeConfig(c *container.NodeConfig) []map[string]interface{} { +// v == old state of `node_config` +func flattenNodeConfig(c *container.NodeConfig, v interface{}) []map[string]interface{} { config := make([]map[string]interface{}, 0, 1) if c == nil { return config } + // default to no prior taint state if there are any issues + oldTaints := []interface{}{} + oldNodeConfigSchemaContainer := v.([]interface{}) + if len(oldNodeConfigSchemaContainer) != 0 { + oldNodeConfigSchema := oldNodeConfigSchemaContainer[0].(map[string]interface{}) + if vt, ok := oldNodeConfigSchema["taint"]; ok && len(vt.([]interface{})) > 0 { + oldTaints = vt.([]interface{}) + } + } + config = append(config, map[string]interface{}{ "machine_type": c.MachineType, "disk_size_gb": c.DiskSizeGb, @@ -1123,26 +1152,27 @@ func flattenNodeConfig(c *container.NodeConfig) []map[string]interface{} { "metadata": c.Metadata, "image_type": c.ImageType, "labels": c.Labels, - "resource_labels": c.ResourceLabels, + "resource_labels": c.ResourceLabels, "tags": c.Tags, "preemptible": c.Preemptible, "spot": c.Spot, "min_cpu_platform": c.MinCpuPlatform, "shielded_instance_config": flattenShieldedInstanceConfig(c.ShieldedInstanceConfig), - "taint": flattenTaints(c.Taints), + "taint": flattenTaints(c.Taints, oldTaints), + "effective_taints": flattenEffectiveTaints(c.Taints), "workload_metadata_config": flattenWorkloadMetadataConfig(c.WorkloadMetadataConfig), <% unless version == 'ga' -%> - "sandbox_config": flattenSandboxConfig(c.SandboxConfig), + "sandbox_config": flattenSandboxConfig(c.SandboxConfig), "host_maintenance_policy": flattenHostMaintenancePolicy(c.HostMaintenancePolicy), <% end -%> "confidential_nodes": flattenConfidentialNodes(c.ConfidentialNodes), - "boot_disk_kms_key": c.BootDiskKmsKey, - "kubelet_config": flattenKubeletConfig(c.KubeletConfig), - "linux_node_config": flattenLinuxNodeConfig(c.LinuxNodeConfig), - "node_group": c.NodeGroup, + "boot_disk_kms_key": c.BootDiskKmsKey, + "kubelet_config": flattenKubeletConfig(c.KubeletConfig), + "linux_node_config": flattenLinuxNodeConfig(c.LinuxNodeConfig), + "node_group": c.NodeGroup, "advanced_machine_features": flattenAdvancedMachineFeaturesConfig(c.AdvancedMachineFeatures), - "sole_tenant_config": flattenSoleTenantConfig(c.SoleTenantConfig), - "fast_socket": flattenFastSocket(c.FastSocket), + "sole_tenant_config": flattenSoleTenantConfig(c.SoleTenantConfig), + "fast_socket": flattenFastSocket(c.FastSocket), }) if len(c.OauthScopes) > 0 { @@ -1273,7 +1303,31 @@ func flattenGKEReservationAffinity(c *container.ReservationAffinity) []map[strin return result } -func flattenTaints(c []*container.NodeTaint) []map[string]interface{} { +// flattenTaints records the set of taints already present in state. +func flattenTaints(c []*container.NodeTaint, oldTaints []interface{}) []map[string]interface{} { + taintKeys := map[string]struct{}{} + for _, raw := range oldTaints { + data := raw.(map[string]interface{}) + taintKey := data["key"].(string) + taintKeys[taintKey] = struct{}{} + } + + result := []map[string]interface{}{} + for _, taint := range c { + if _, ok := taintKeys[taint.Key]; ok { + result = append(result, map[string]interface{}{ + "key": taint.Key, + "value": taint.Value, + "effect": taint.Effect, + }) + } + } + + return result +} + +// flattenEffectiveTaints records the complete set of taints returned from GKE. +func flattenEffectiveTaints(c []*container.NodeTaint) []map[string]interface{} { result := []map[string]interface{}{} for _, taint := range c { result = append(result, map[string]interface{}{ @@ -1282,10 +1336,10 @@ func flattenTaints(c []*container.NodeTaint) []map[string]interface{} { "effect": taint.Effect, }) } + return result } - func flattenWorkloadMetadataConfig(c *container.WorkloadMetadataConfig) []map[string]interface{} { result := []map[string]interface{}{} if c != nil { @@ -1352,74 +1406,6 @@ func containerNodePoolLabelsSuppress(k, old, new string, d *schema.ResourceData) return true } - -func containerNodePoolTaintSuppress(k, old, new string, d *schema.ResourceData) bool { - // Node configs are embedded into multiple resources (container cluster and - // container node pool) so we determine the node config key dynamically. - idx := strings.Index(k, ".taint.") - if idx < 0 { - return false - } - - root := k[:idx] - - // Right now, GKE only applies its own out-of-band labels when you enable - // Sandbox. We only need to perform diff suppression in this case; - // otherwise, the default Terraform behavior is fine. - o, n := d.GetChange(root + ".sandbox_config.0.sandbox_type") - if o == nil || n == nil { - return false - } - - // Pull the entire changeset as a list rather than trying to deal with each - // element individually. - o, n = d.GetChange(root + ".taint") - if o == nil || n == nil { - return false - } - - type taintType struct { - Key, Value, Effect string - } - - taintSet := make(map[taintType]struct{}) - - // Add all new taints to set. - for _, raw := range n.([]interface{}) { - data := raw.(map[string]interface{}) - taint := taintType{ - Key: data["key"].(string), - Value: data["value"].(string), - Effect: data["effect"].(string), - } - taintSet[taint] = struct{}{} - } - - // Remove all current taints, skipping GKE-managed keys if not present in - // the new configuration. - for _, raw := range o.([]interface{}) { - data := raw.(map[string]interface{}) - taint := taintType{ - Key: data["key"].(string), - Value: data["value"].(string), - Effect: data["effect"].(string), - } - if _, ok := taintSet[taint]; ok { - delete(taintSet, taint) - } else if !strings.HasPrefix(taint.Key, "sandbox.gke.io/") && taint.Key != "kubernetes.io/arch" { - // User-provided taint removed in new configuration. - return false - } - } - - // If, at this point, the set still has elements, the new configuration - // added an additional taint. - if len(taintSet) > 0 { - return false - } - - return true -} <% end -%> func flattenKubeletConfig(c *container.NodeKubeletConfig) []map[string]interface{} { @@ -1494,4 +1480,4 @@ func flattenHostMaintenancePolicy(c *container.HostMaintenancePolicy) []map[stri return result } -<% end -%> \ No newline at end of file +<% end -%> diff --git a/mmv1/third_party/terraform/services/container/resource_container_cluster.go.erb b/mmv1/third_party/terraform/services/container/resource_container_cluster.go.erb index a8f50aab16a4..1ed34d7f5b91 100644 --- a/mmv1/third_party/terraform/services/container/resource_container_cluster.go.erb +++ b/mmv1/third_party/terraform/services/container/resource_container_cluster.go.erb @@ -2682,7 +2682,7 @@ func resourceContainerClusterRead(d *schema.ResourceData, meta interface{}) erro return fmt.Errorf("Error setting default_max_pods_per_node: %s", err) } } - if err := d.Set("node_config", flattenNodeConfig(cluster.NodeConfig)); err != nil { + if err := d.Set("node_config", flattenNodeConfig(cluster.NodeConfig, d.Get("node_config"))); err != nil { return err } if err := d.Set("project", project); err != nil { diff --git a/mmv1/third_party/terraform/services/container/resource_container_node_pool.go.erb b/mmv1/third_party/terraform/services/container/resource_container_node_pool.go.erb index 648323e612b7..97b0f6e5e507 100644 --- a/mmv1/third_party/terraform/services/container/resource_container_node_pool.go.erb +++ b/mmv1/third_party/terraform/services/container/resource_container_node_pool.go.erb @@ -1112,7 +1112,7 @@ func flattenNodePool(d *schema.ResourceData, config *transport_tpg.Config, np *c "initial_node_count": np.InitialNodeCount, "node_locations": schema.NewSet(schema.HashString, tpgresource.ConvertStringArrToInterface(np.Locations)), "node_count": nodeCount, - "node_config": flattenNodeConfig(np.Config), + "node_config": flattenNodeConfig(np.Config, d.Get(prefix + "node_config")), "instance_group_urls": igmUrls, "managed_instance_group_urls": managedIgmUrls, "version": np.Version, diff --git a/mmv1/third_party/terraform/services/container/resource_container_node_pool_test.go.erb b/mmv1/third_party/terraform/services/container/resource_container_node_pool_test.go.erb index 0fb6f5ede53a..89bf423ba064 100644 --- a/mmv1/third_party/terraform/services/container/resource_container_node_pool_test.go.erb +++ b/mmv1/third_party/terraform/services/container/resource_container_node_pool_test.go.erb @@ -361,32 +361,6 @@ func TestAccContainerNodePool_withSandboxConfig(t *testing.T) { ImportState: true, ImportStateVerify: true, }, - { - // GKE sets automatic labels and taints on nodes. This makes - // sure we ignore the automatic ones and keep our own. - Config: testAccContainerNodePool_withSandboxConfig(cluster, np), - // When we use PlanOnly without ExpectNonEmptyPlan, we're - // guaranteeing that the computed fields of the resources don't - // force an unintentional change to the plan. That is, we - // expect this part of the test to pass only if the plan - // doesn't change. - PlanOnly: true, - }, - { - // Now we'll modify the taints, which should force a change to - // the plan. We make sure we don't over-suppress and end up - // eliminating the labels or taints we asked for. This will - // destroy and recreate the node pool as taints are immutable. - Config: testAccContainerNodePool_withSandboxConfig_changeTaints(cluster, np), - Check: resource.ComposeTestCheckFunc( - resource.TestCheckResourceAttr("google_container_node_pool.with_sandbox_config", - "node_config.0.labels.test.terraform.io/gke-sandbox", "true"), - resource.TestCheckResourceAttr("google_container_node_pool.with_sandbox_config", - "node_config.0.taint.0.key", "test.terraform.io/gke-sandbox"), - resource.TestCheckResourceAttr("google_container_node_pool.with_sandbox_config", - "node_config.0.taint.1.key", "test.terraform.io/gke-sandbox-amended"), - ), - }, }, }) } @@ -2515,63 +2489,6 @@ resource "google_container_node_pool" "with_sandbox_config" { "test.terraform.io/gke-sandbox" = "true" } - taint { - key = "test.terraform.io/gke-sandbox" - value = "true" - effect = "NO_SCHEDULE" - } - - oauth_scopes = [ - "https://www.googleapis.com/auth/logging.write", - "https://www.googleapis.com/auth/monitoring", - ] - } -} -`, cluster, np) -} - -func testAccContainerNodePool_withSandboxConfig_changeTaints(cluster, np string) string { - return fmt.Sprintf(` -data "google_container_engine_versions" "central1a" { - location = "us-central1-a" -} - -resource "google_container_cluster" "cluster" { - name = "%s" - location = "us-central1-a" - initial_node_count = 1 - min_master_version = data.google_container_engine_versions.central1a.latest_master_version -} - -resource "google_container_node_pool" "with_sandbox_config" { - name = "%s" - location = "us-central1-a" - cluster = google_container_cluster.cluster.name - initial_node_count = 1 - node_config { - machine_type = "n1-standard-1" // can't be e2 because of gvisor - image_type = "COS_CONTAINERD" - - sandbox_config { - sandbox_type = "gvisor" - } - - labels = { - "test.terraform.io/gke-sandbox" = "true" - } - - taint { - key = "test.terraform.io/gke-sandbox" - value = "true" - effect = "NO_SCHEDULE" - } - - taint { - key = "test.terraform.io/gke-sandbox-amended" - value = "also-true" - effect = "NO_SCHEDULE" - } - oauth_scopes = [ "https://www.googleapis.com/auth/logging.write", "https://www.googleapis.com/auth/monitoring", diff --git a/mmv1/third_party/terraform/website/docs/r/container_cluster.html.markdown b/mmv1/third_party/terraform/website/docs/r/container_cluster.html.markdown index d29897479d2a..5a1c9688de2b 100644 --- a/mmv1/third_party/terraform/website/docs/r/container_cluster.html.markdown +++ b/mmv1/third_party/terraform/website/docs/r/container_cluster.html.markdown @@ -888,14 +888,13 @@ gvnic { * `tags` - (Optional) The list of instance tags applied to all nodes. Tags are used to identify valid sources or targets for network firewalls. -* `taint` - (Optional) A list of [Kubernetes taints](https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/) -to apply to nodes. GKE's API can only set this field on cluster creation. -However, GKE will add taints to your nodes if you enable certain features such -as GPUs. If this field is set, any diffs on this field will cause Terraform to -recreate the underlying resource. Taint values can be updated safely in -Kubernetes (eg. through `kubectl`), and it's recommended that you do not use -this field to manage taints. If you do, `lifecycle.ignore_changes` is -recommended. Structure is [documented below](#nested_taint). +* `taint` - (Optional) A list of +[Kubernetes taints](https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/) +to apply to nodes. This field will only report drift on taint keys that are +already managed with Terraform, use `effective_taints` to view the list of +GKE-managed taints on the node pool from all sources. Importing this resource +will not record any taints as being Terraform-managed, and will cause drift with +any configured taints. Structure is [documented below](#nested_taint). * `workload_metadata_config` - (Optional) Metadata configuration to expose to workloads on the node pool. Structure is [documented below](#nested_workload_metadata_config). @@ -1310,6 +1309,8 @@ exported: * `cluster_autoscaling.0.auto_provisioning_defaults.0.management.0.upgrade_options` - Specifies the [Auto Upgrade knobs](https://cloud.google.com/kubernetes-engine/docs/reference/rest/v1beta1/NodeManagement#AutoUpgradeOptions) for the node pool. +* `node_config.0.effective_taints` - List of kubernetes taints applied to each node. Structure is [documented above](#nested_taint). + ## Timeouts This resource provides the following From a2332e67368dcca47dd869e27a8f0395e51bb92e Mon Sep 17 00:00:00 2001 From: Riley Karson Date: Wed, 20 Sep 2023 14:11:09 -0700 Subject: [PATCH 2/2] Add ISVI --- .../resource_container_cluster_test.go.erb | 16 +++++++++------- .../resource_container_node_pool_test.go.erb | 4 ++-- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/mmv1/third_party/terraform/services/container/resource_container_cluster_test.go.erb b/mmv1/third_party/terraform/services/container/resource_container_cluster_test.go.erb index 8e0ef285d4b4..c30d5538b362 100644 --- a/mmv1/third_party/terraform/services/container/resource_container_cluster_test.go.erb +++ b/mmv1/third_party/terraform/services/container/resource_container_cluster_test.go.erb @@ -1235,17 +1235,19 @@ func TestAccContainerCluster_withNodeConfig(t *testing.T) { Config: testAccContainerCluster_withNodeConfig(clusterName), }, { - ResourceName: "google_container_cluster.with_node_config", - ImportState: true, - ImportStateVerify: true, + ResourceName: "google_container_cluster.with_node_config", + ImportState: true, + ImportStateVerify: true, + ImportStateVerifyIgnore: []string{"node_config.0.taint"}, }, { Config: testAccContainerCluster_withNodeConfigUpdate(clusterName), }, { - ResourceName: "google_container_cluster.with_node_config", - ImportState: true, - ImportStateVerify: true, + ResourceName: "google_container_cluster.with_node_config", + ImportState: true, + ImportStateVerify: true, + ImportStateVerifyIgnore: []string{"node_config.0.taint"}, }, }, }) @@ -1541,7 +1543,7 @@ func TestAccContainerCluster_withSandboxConfig(t *testing.T) { ResourceName: "google_container_cluster.with_sandbox_config", ImportState: true, ImportStateVerify: true, - ImportStateVerifyIgnore: []string{"min_master_version"}, + ImportStateVerifyIgnore: []string{"min_master_version", "node_config.0.taint"}, }, { // GKE sets automatic labels and taints on nodes. This makes diff --git a/mmv1/third_party/terraform/services/container/resource_container_node_pool_test.go.erb b/mmv1/third_party/terraform/services/container/resource_container_node_pool_test.go.erb index 89bf423ba064..f32c2a052a46 100644 --- a/mmv1/third_party/terraform/services/container/resource_container_node_pool_test.go.erb +++ b/mmv1/third_party/terraform/services/container/resource_container_node_pool_test.go.erb @@ -215,7 +215,7 @@ func TestAccContainerNodePool_withNodeConfig(t *testing.T) { ImportStateVerify: true, // autoscaling.# = 0 is equivalent to no autoscaling at all, // but will still cause an import diff - ImportStateVerifyIgnore: []string{"autoscaling.#"}, + ImportStateVerifyIgnore: []string{"autoscaling.#", "node_config.0.taint"}, }, resource.TestStep{ Config: testAccContainerNodePool_withNodeConfigUpdate(cluster, nodePool), @@ -226,7 +226,7 @@ func TestAccContainerNodePool_withNodeConfig(t *testing.T) { ImportStateVerify: true, // autoscaling.# = 0 is equivalent to no autoscaling at all, // but will still cause an import diff - ImportStateVerifyIgnore: []string{"autoscaling.#"}, + ImportStateVerifyIgnore: []string{"autoscaling.#", "node_config.0.taint"}, }, }, })