From 682aa85f84c532920479319cd14d7930b3dbd7c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gon=C3=A7alo=20Heleno?= Date: Wed, 3 Apr 2024 11:36:17 +0200 Subject: [PATCH] feat: add variable to set resources with default values Having default values is good practice to prevent that our components could eventually starve other workloads on the cluster. However, these should probably be adapted in production clusters and are only a safeguard in case someone forgets to set them. --- aks/main.tf | 2 ++ eks/main.tf | 2 ++ kind/main.tf | 2 ++ locals.tf | 58 +++++++++++++++++++++++++++++---- sks/main.tf | 2 ++ variables.tf | 91 +++++++++++++++++++++++++++++++++++++++++++++++++++- 6 files changed, 150 insertions(+), 7 deletions(-) diff --git a/aks/main.tf b/aks/main.tf index f0598628..35d76771 100644 --- a/aks/main.tf +++ b/aks/main.tf @@ -42,6 +42,8 @@ module "kube-prometheus-stack" { app_autosync = var.app_autosync dependency_ids = var.dependency_ids + resources = var.resources + prometheus = var.prometheus alertmanager = var.alertmanager grafana = var.grafana diff --git a/eks/main.tf b/eks/main.tf index cdfe109e..8655e1f7 100644 --- a/eks/main.tf +++ b/eks/main.tf @@ -13,6 +13,8 @@ module "kube-prometheus-stack" { app_autosync = var.app_autosync dependency_ids = var.dependency_ids + resources = var.resources + prometheus = var.prometheus alertmanager = var.alertmanager grafana = var.grafana diff --git a/kind/main.tf b/kind/main.tf index cdfe109e..8655e1f7 100644 --- a/kind/main.tf +++ b/kind/main.tf @@ -13,6 +13,8 @@ module "kube-prometheus-stack" { app_autosync = var.app_autosync dependency_ids = var.dependency_ids + resources = var.resources + prometheus = var.prometheus alertmanager = var.alertmanager grafana = var.grafana diff --git a/locals.tf b/locals.tf index f87122dc..57abf437 100644 --- a/locals.tf +++ b/locals.tf @@ -1,6 +1,6 @@ locals { oauth2_proxy_image = "quay.io/oauth2-proxy/oauth2-proxy:v7.5.0" - curl_wait_for_oidc_image = "curlimages/curl:8.3.0" + curl_wait_for_oidc_image = "curlimages/curl:8.6.0" domain = trimprefix("${var.subdomain}.${var.base_domain}", ".") domain_full = trimprefix("${var.subdomain}.${var.cluster_name}.${var.base_domain}", ".") @@ -10,6 +10,16 @@ locals { "traefik.ingress.kubernetes.io/router.tls" = "true" } + oidc_proxy_resources = { + requests = { + cpu = "20m" + memory = "64M" + } + limits = { + memory = "128M" + } + } + grafana_defaults = { enabled = true additional_data_sources = false @@ -126,8 +136,9 @@ locals { ] containers = [ { - image = local.oauth2_proxy_image - name = "alertmanager-proxy" + image = local.oauth2_proxy_image + name = "alertmanager-proxy" + resources = local.oidc_proxy_resources ports = [ { name = "proxy" @@ -148,6 +159,10 @@ locals { ], local.alertmanager.oidc.oauth2_proxy_extra_args) }, ] + resources = { + requests = { for k, v in var.resources.alertmanager.requests : k => v if v != null } + limits = { for k, v in var.resources.alertmanager.limits : k => v if v != null } + } } ingress = { enabled = true @@ -207,7 +222,7 @@ locals { } server = { domain = "${local.grafana.domain}" - root_url = "https://%(domain)s" # TODO check this + root_url = "https://%(domain)s" } dataproxy = { timeout = var.dataproxy_timeout @@ -252,6 +267,10 @@ locals { }, ] } + resources = { + requests = { for k, v in var.resources.grafana.requests : k => v if v != null } + limits = { for k, v in var.resources.grafana.limits : k => v if v != null } + } } : null, merge((!local.grafana.enabled && local.grafana.additional_data_sources) ? { forceDeployDashboards = true @@ -333,8 +352,9 @@ locals { "--email-domain=*", "--redirect-url=https://${local.prometheus.domain}/oauth2/callback", ], local.prometheus.oidc.oauth2_proxy_extra_args) - image = local.oauth2_proxy_image - name = "prometheus-proxy" + image = local.oauth2_proxy_image + name = "prometheus-proxy" + resources = local.oidc_proxy_resources ports = [ { containerPort = 9091 @@ -353,6 +373,10 @@ locals { externalLabels = { prometheus = "prometheus-${var.cluster_name}" } + resources = { + requests = { for k, v in var.resources.prometheus.requests : k => v if v != null } + limits = { for k, v in var.resources.prometheus.limits : k => v if v != null } + } }, var.metrics_storage_main != null ? { thanos = { objectStorageConfig = { @@ -361,6 +385,10 @@ locals { key = "thanos.yaml" } } + resources = { + requests = { for k, v in var.resources.thanos_sidecar.requests : k => v if v != null } + limits = { for k, v in var.resources.thanos_sidecar.limits : k => v if v != null } + } } } : null) service = { @@ -382,6 +410,24 @@ locals { } } ) + prometheusOperator = { + resources = { + requests = { for k, v in var.resources.prometheus_operator.requests : k => v if v != null } + limits = { for k, v in var.resources.prometheus_operator.limits : k => v if v != null } + } + } + kube-state-metrics = { + resources = { + requests = { for k, v in var.resources.kube_state_metrics.requests : k => v if v != null } + limits = { for k, v in var.resources.kube_state_metrics.limits : k => v if v != null } + } + } + prometheus-node-exporter = { + resources = { + requests = { for k, v in var.resources.node_exporter.requests : k => v if v != null } + limits = { for k, v in var.resources.node_exporter.limits : k => v if v != null } + } + } } }] } diff --git a/sks/main.tf b/sks/main.tf index cdfe109e..8655e1f7 100644 --- a/sks/main.tf +++ b/sks/main.tf @@ -13,6 +13,8 @@ module "kube-prometheus-stack" { app_autosync = var.app_autosync dependency_ids = var.dependency_ids + resources = var.resources + prometheus = var.prometheus alertmanager = var.alertmanager grafana = var.grafana diff --git a/variables.tf b/variables.tf index 4b89f5f9..f869b543 100644 --- a/variables.tf +++ b/variables.tf @@ -84,6 +84,95 @@ variable "dependency_ids" { ## Module variables ####################### +variable "resources" { + description = <<-EOT + Resource limits and requests for kube-prometheus-stack's components. Follow the style on https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/[official documentation] to understand the format of the values. + + IMPORTANT: These are not production values. You should always adjust them to your needs. + EOT + type = object({ + + prometheus = optional(object({ + requests = optional(object({ + cpu = optional(string, "250m") + memory = optional(string, "512Mi") + }), {}) + limits = optional(object({ + cpu = optional(string) + memory = optional(string, "1024Mi") + }), {}) + }), {}) + + prometheus_operator = optional(object({ + requests = optional(object({ + cpu = optional(string, "50m") + memory = optional(string, "128Mi") + }), {}) + limits = optional(object({ + cpu = optional(string) + memory = optional(string, "128Mi") + }), {}) + }), {}) + + thanos_sidecar = optional(object({ + requests = optional(object({ + cpu = optional(string, "100m") + memory = optional(string, "256Mi") + }), {}) + limits = optional(object({ + cpu = optional(string) + memory = optional(string, "512Mi") + }), {}) + }), {}) + + alertmanager = optional(object({ + requests = optional(object({ + cpu = optional(string, "50m") + memory = optional(string, "128Mi") + }), {}) + limits = optional(object({ + cpu = optional(string) + memory = optional(string, "256Mi") + }), {}) + }), {}) + + kube_state_metrics = optional(object({ + requests = optional(object({ + cpu = optional(string, "50m") + memory = optional(string, "128Mi") + }), {}) + limits = optional(object({ + cpu = optional(string) + memory = optional(string, "128Mi") + }), {}) + }), {}) + + grafana = optional(object({ + requests = optional(object({ + cpu = optional(string, "250m") + memory = optional(string, "512Mi") + }), {}) + limits = optional(object({ + cpu = optional(string) + memory = optional(string, "512Mi") + }), {}) + }), {}) + + node_exporter = optional(object({ + requests = optional(object({ + cpu = optional(string, "50m") + memory = optional(string, "128Mi") + }), {}) + limits = optional(object({ + cpu = optional(string) + memory = optional(string, "128Mi") + }), {}) + }), {}) + + }) + default = {} +} + variable "grafana" { description = "Grafana settings" type = any @@ -116,7 +205,7 @@ variable "alertmanager" { } variable "metrics_storage_main" { - description = "Storage settings for the Thanos sidecar. Needs to be of type `any` because the structure is different depending on the provider used." + description = "Storage settings for the Thanos sidecar. Needs to be of type `any` because the structure is different depending on the variant used." type = any default = {} }