From 65eb719bda167b207ca788e16679d61986f324d2 Mon Sep 17 00:00:00 2001 From: Pongsan Date: Wed, 21 Aug 2024 15:15:40 +0700 Subject: [PATCH 01/24] Add Terraform code to provision AMP, Loki, Tempo and Grafana. --- .../observability/oss/.workshop/cleanup.sh | 29 ++ .../oss/.workshop/terraform/main.tf | 420 ++++++++++++++++++ .../oss/.workshop/terraform/outputs.tf | 7 + .../order-service-metrics-dashboard.yaml | 270 +++++++++++ .../oss/.workshop/terraform/vars.tf | 63 +++ 5 files changed, 789 insertions(+) create mode 100644 manifests/modules/observability/oss/.workshop/cleanup.sh create mode 100644 manifests/modules/observability/oss/.workshop/terraform/main.tf create mode 100644 manifests/modules/observability/oss/.workshop/terraform/outputs.tf create mode 100644 manifests/modules/observability/oss/.workshop/terraform/templates/order-service-metrics-dashboard.yaml create mode 100644 manifests/modules/observability/oss/.workshop/terraform/vars.tf diff --git a/manifests/modules/observability/oss/.workshop/cleanup.sh b/manifests/modules/observability/oss/.workshop/cleanup.sh new file mode 100644 index 000000000..f280018ae --- /dev/null +++ b/manifests/modules/observability/oss/.workshop/cleanup.sh @@ -0,0 +1,29 @@ +#!/bin/bash + +set -e + +logmessage "Deleting OpenTelemetry collectors..." + +delete-all-if-crd-exists certificaterequests.cert-manager.io +delete-all-if-crd-exists certificates.cert-manager.io +delete-all-if-crd-exists challenges.acme.cert-manager.io +delete-all-if-crd-exists clusterissuers.cert-manager.io +delete-all-if-crd-exists issuers.cert-manager.io +delete-all-if-crd-exists orders.acme.cert-manager.io +delete-all-if-crd-exists grafanaalertrulegroups.grafana.integreatly.org +delete-all-if-crd-exists grafanacontactpoints.grafana.integreatly.org +delete-all-if-crd-exists grafanadashboards.grafana.integreatly.org +delete-all-if-crd-exists grafanadatasources.grafana.integreatly.org +delete-all-if-crd-exists grafanafolders.grafana.integreatly.org +delete-all-if-crd-exists grafananotificationpolicies.grafana.integreatly.org +delete-all-if-crd-exists grafanas.grafana.integreatly.org +delete-all-if-crd-exists grafanaagents.monitoring.grafana.com +delete-all-if-crd-exists integrations.monitoring.grafana.com +delete-all-if-crd-exists logsinstances.monitoring.grafana.com +delete-all-if-crd-exists metricsinstances.monitoring.grafana.com +delete-all-if-crd-exists podlogs.monitoring.grafana.com +delete-all-if-crd-exists podmonitors.monitoring.coreos.com +delete-all-if-crd-exists probes.monitoring.coreos.com +delete-all-if-crd-exists servicemonitors.monitoring.coreos.com + +kubectl delete -n other pod load-generator --ignore-not-found \ No newline at end of file diff --git a/manifests/modules/observability/oss/.workshop/terraform/main.tf b/manifests/modules/observability/oss/.workshop/terraform/main.tf new file mode 100644 index 000000000..a7fedb847 --- /dev/null +++ b/manifests/modules/observability/oss/.workshop/terraform/main.tf @@ -0,0 +1,420 @@ +terraform { + required_providers { + kubectl = { + source = "gavinbunney/kubectl" + version = ">= 1.14" + } + } +} + +data "aws_partition" "current" {} + +module "ebs_csi_driver_irsa" { + source = "terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks" + version = "5.44.0" + + role_name_prefix = "${var.addon_context.eks_cluster_id}-ebs-csi-" + + attach_ebs_csi_policy = true + + oidc_providers = { + main = { + provider_arn = var.addon_context.eks_oidc_provider_arn + namespace_service_accounts = ["kube-system:ebs-csi-controller-sa"] + } + } + + tags = var.tags +} + +module "eks_blueprints_addons" { + source = "aws-ia/eks-blueprints-addons/aws" + version = "1.16.3" + + cluster_name = var.addon_context.eks_cluster_id + cluster_endpoint = var.addon_context.aws_eks_cluster_endpoint + cluster_version = var.eks_cluster_version + oidc_provider_arn = var.addon_context.eks_oidc_provider_arn + + eks_addons = { + aws-ebs-csi-driver = { + most_recent = true + service_account_role_arn = module.ebs_csi_driver_irsa.iam_role_arn + preserve = false + configuration_values = jsonencode({ defaultStorageClass = { enabled = true } }) + } + } + + enable_aws_load_balancer_controller = true + aws_load_balancer_controller = { + wait = true + } +} + +resource "time_sleep" "blueprints_addons_sleep" { + depends_on = [ + module.eks_blueprints_addons + ] + + create_duration = "15s" +} + +module "cert_manager" { + source = "aws-ia/eks-blueprints-addon/aws" + version = "1.1.1" + + depends_on = [ + time_sleep.blueprints_addons_sleep + ] + + name = "cert-manager" + namespace = "cert-manager" + create_namespace = true + wait = true + chart = "cert-manager" + chart_version = "v1.15.3" + repository = "https://charts.jetstack.io" + + set = [ + { + name = "crds.enabled" + value = true + } + ] +} + +module "opentelemetry_operator" { + source = "aws-ia/eks-blueprints-addon/aws" + version = "1.1.1" + + depends_on = [ + module.cert_manager + ] + + name = "opentelemetry-operator" + namespace = "opentelemetry-operator-system" + create_namespace = true + wait = true + chart = "opentelemetry-operator" + chart_version = var.opentelemetry_operator_chart_version + repository = "https://open-telemetry.github.io/opentelemetry-helm-charts" + + set = [{ + name = "manager.collectorImage.repository" + value = "otel/opentelemetry-collector-k8s" + }] +} + +module "iam_assumable_role_adot" { + source = "terraform-aws-modules/iam/aws//modules/iam-assumable-role-with-oidc" + version = "5.44.0" + + create_role = true + role_name = "${var.addon_context.eks_cluster_id}-adot-collector" + provider_url = var.addon_context.eks_oidc_issuer_url + role_policy_arns = [ + "arn:${data.aws_partition.current.partition}:iam::aws:policy/AmazonPrometheusRemoteWriteAccess" + ] + oidc_fully_qualified_subjects = ["system:serviceaccount:other:adot-collector"] + + tags = var.tags +} + +resource "aws_prometheus_workspace" "this" { + alias = var.addon_context.eks_cluster_id + + tags = var.tags +} + +module "loki" { + source = "aws-ia/eks-blueprints-addon/aws" + version = "1.1.1" + + name = "loki" + namespace = "loki-system" + create_namespace = true + wait = true + chart = "loki" + chart_version = var.loki_chart_version + repository = "https://grafana.github.io/helm-charts" + + values = [ + <<-EOT + deploymentMode: SingleBinary + loki: + auth_enabled: false + commonConfig: + replication_factor: 1 + storage: + type: 'filesystem' + schemaConfig: + configs: + - from: "2024-01-01" + store: tsdb + index: + prefix: loki_index_ + period: 24h + object_store: filesystem # we're storing on filesystem so there's no real persistence here. + schema: v13 + singleBinary: + replicas: 1 + read: + replicas: 0 + backend: + replicas: 0 + write: + replicas: 0 + test: + enabled: false + lokiCanary: + enabled: false + chunksCache: + enabled: false + resultsCache: + enabled: false + gateway: + enabled: false + EOT + ] +} + +module "tempo" { + source = "aws-ia/eks-blueprints-addon/aws" + version = "1.1.1" + + name = "tempo" + namespace = "tempo-system" + create_namespace = true + wait = true + chart = "tempo" + chart_version = var.tempo_chart_version + repository = "https://grafana.github.io/helm-charts" +} + +module "grafana_operator" { + source = "aws-ia/eks-blueprints-addon/aws" + version = "1.1.1" + + name = "grafana-operator" + namespace = "grafana-operator-system" + create_namespace = true + wait = true + chart = "grafana-operator" + chart_version = var.grafana_operator_chart_version + repository = "oci://ghcr.io/grafana/helm-charts" +} + +resource "kubernetes_namespace" "grafana" { + metadata { + name = "grafana" + } +} + +module "grafana_irsa" { + source = "terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks" + version = "5.44.0" + + role_name_prefix = "${var.addon_context.eks_cluster_id}-grafana-" + + role_policy_arns = { + policy = "arn:aws:iam::aws:policy/AmazonPrometheusQueryAccess" + } + + oidc_providers = { + main = { + provider_arn = var.addon_context.eks_oidc_provider_arn + namespace_service_accounts = ["grafana:grafana-sa"] + } + } + + tags = var.tags +} + +resource "kubectl_manifest" "grafana" { + depends_on = [ + aws_prometheus_workspace.this, + module.loki, + module.tempo, + module.grafana_operator + ] + + yaml_body = <<-YAML +apiVersion: grafana.integreatly.org/v1beta1 +kind: Grafana +metadata: + name: grafana + namespace: grafana + labels: + dashboards: "grafana" +spec: + config: + log: + mode: "console" + auth: + disable_login_form: "false" + sigv4_auth_enabled: "true" + security: + admin_user: root + admin_password: secret + deployment: + spec: + template: + spec: + containers: + - name: grafana + image: grafana/grafana:latest + serviceAccount: + metadata: + annotations: + eks.amazonaws.com/role-arn: ${module.grafana_irsa.iam_role_arn} + ingress: + metadata: + annotations: + alb.ingress.kubernetes.io/scheme: internet-facing + alb.ingress.kubernetes.io/target-type: ip + spec: + ingressClassName: alb + rules: + - http: + paths: + - backend: + service: + name: grafana-service + port: + number: 3000 + path: / + pathType: Prefix + YAML +} + +resource "kubectl_manifest" "grafana_datasource_prometheus" { + depends_on = [ + kubectl_manifest.grafana + ] + + yaml_body = <<-YAML +apiVersion: grafana.integreatly.org/v1beta1 +kind: GrafanaDatasource +metadata: + name: prometheus + namespace: grafana +spec: + instanceSelector: + matchLabels: + dashboards: "grafana" + datasource: + uid: prometheus + name: Prometheus + type: prometheus + access: proxy + url: ${aws_prometheus_workspace.this.prometheus_endpoint} + isDefault: true + editable: false + jsonData: + httpMethod: "POST" + sigV4AuthType: "default" + sigV4Auth: true + sigV4Region: ${var.addon_context.aws_region_name} + YAML +} + +resource "kubectl_manifest" "grafana_datasource_loki" { + depends_on = [ + kubectl_manifest.grafana + ] + + yaml_body = <<-YAML +apiVersion: grafana.integreatly.org/v1beta1 +kind: GrafanaDatasource +metadata: + name: loki + namespace: grafana +spec: + instanceSelector: + matchLabels: + dashboards: "grafana" + datasource: + uid: loki + name: Loki + type: loki + access: proxy + url: http://loki.loki-system:3100 + isDefault: false + editable: false + YAML +} + +resource "kubectl_manifest" "grafana_datasource_tempo" { + depends_on = [ + kubectl_manifest.grafana + ] + + yaml_body = <<-YAML +apiVersion: grafana.integreatly.org/v1beta1 +kind: GrafanaDatasource +metadata: + name: tempo + namespace: grafana +spec: + instanceSelector: + matchLabels: + dashboards: "grafana" + datasource: + uid: tempo + name: Tempo + type: tempo + access: proxy + url: http://tempo.tempo-system:3100 + isDefault: false + editable: false + YAML +} + +resource "kubectl_manifest" "grafana_dashboard_kubernetes_cluster" { + depends_on = [ + kubectl_manifest.grafana_datasource_prometheus + ] + + yaml_body = <<-YAML +apiVersion: grafana.integreatly.org/v1beta1 +kind: GrafanaDashboard +metadata: + name: kubernetes-cluster + namespace: grafana +spec: + folder: "Grafana" + instanceSelector: + matchLabels: + dashboards: "grafana" + grafanaCom: + id: 3119 + revision: 2 + YAML +} + +resource "kubectl_manifest" "config_map_order_service_metrics_dashboard" { + yaml_body = templatefile("${path.module}/templates/order-service-metrics-dashboard.yaml", {}) +} + +resource "kubectl_manifest" "grafana_dashboard_order_service_metrics" { + depends_on = [ + kubectl_manifest.grafana_datasource_prometheus, + kubectl_manifest.config_map_order_service_metrics_dashboard + ] + + yaml_body = <<-YAML +apiVersion: grafana.integreatly.org/v1beta1 +kind: GrafanaDashboard +metadata: + name: order-service-metrics + namespace: grafana +spec: + folder: "Grafana" + instanceSelector: + matchLabels: + dashboards: "grafana" + configMapRef: + name: order-service-metrics-dashboard + key: json + YAML +} diff --git a/manifests/modules/observability/oss/.workshop/terraform/outputs.tf b/manifests/modules/observability/oss/.workshop/terraform/outputs.tf new file mode 100644 index 000000000..250fd3986 --- /dev/null +++ b/manifests/modules/observability/oss/.workshop/terraform/outputs.tf @@ -0,0 +1,7 @@ +output "environment_variables" { + description = "Environment variables to be added to the IDE shell" + value = { + AMP_ENDPOINT = aws_prometheus_workspace.this.prometheus_endpoint + ADOT_IAM_ROLE = module.iam_assumable_role_adot.iam_role_arn + } +} \ No newline at end of file diff --git a/manifests/modules/observability/oss/.workshop/terraform/templates/order-service-metrics-dashboard.yaml b/manifests/modules/observability/oss/.workshop/terraform/templates/order-service-metrics-dashboard.yaml new file mode 100644 index 000000000..65ac633d8 --- /dev/null +++ b/manifests/modules/observability/oss/.workshop/terraform/templates/order-service-metrics-dashboard.yaml @@ -0,0 +1,270 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: order-service-metrics-dashboard + namespace: grafana +data: + json: |- + { + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "mappings": [] + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 9, + "x": 0, + "y": 0 + }, + "id": 2, + "options": { + "legend": { + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "pieType": "pie", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "builder", + "expr": "sum by(productId) (watch_orders_total{productId!=\"*\"})", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Orders by Product ", + "type": "piechart" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 6, + "x": 9, + "y": 0 + }, + "id": 6, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "9.2.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum(watch_orders_total{productId=\"*\"}) by (productId)", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Order Count", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 15, + "x": 0, + "y": 9 + }, + "id": 4, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum by (productId)(rate(watch_orders_total{productId=\"*\"}[2m]))", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Order Rate", + "type": "timeseries" + } + ], + "schemaVersion": 37, + "style": "dark", + "tags": [], + "templating": { + "list": [] + }, + "time": { + "from": "now-3h", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "Order Service Metrics", + "uid": "r7QHEZEVz", + "version": 1, + "weekStart": "" + } diff --git a/manifests/modules/observability/oss/.workshop/terraform/vars.tf b/manifests/modules/observability/oss/.workshop/terraform/vars.tf new file mode 100644 index 000000000..7c46aceb7 --- /dev/null +++ b/manifests/modules/observability/oss/.workshop/terraform/vars.tf @@ -0,0 +1,63 @@ +# tflint-ignore: terraform_unused_declarations +variable "eks_cluster_id" { + description = "EKS cluster name" + type = string +} + +# tflint-ignore: terraform_unused_declarations +variable "eks_cluster_version" { + description = "EKS cluster version" + type = string +} + +# tflint-ignore: terraform_unused_declarations +variable "cluster_security_group_id" { + description = "EKS cluster security group ID" + type = any +} + +# tflint-ignore: terraform_unused_declarations +variable "addon_context" { + description = "Addon context that can be passed directly to blueprints addon modules" + type = any +} + +# tflint-ignore: terraform_unused_declarations +variable "tags" { + description = "Tags to apply to AWS resources" + type = any +} + +# tflint-ignore: terraform_unused_declarations +variable "resources_precreated" { + description = "Have expensive resources been created already" + type = bool +} + +variable "opentelemetry_operator_chart_version" { + description = "The chart version of opentelemetry-operator to use" + type = string + # renovate-helm: depName=opentelemetry-operator registryUrl=https://open-telemetry.github.io/opentelemetry-helm-charts + default = "0.68.0" +} + +variable "loki_chart_version" { + description = "The chart version of loki to use" + type = string + # renovate-helm: depName=loki registryUrl=https://grafana.github.io/helm-charts + default = "6.10.0" +} + +variable "tempo_chart_version" { + description = "The chart version of tempo to use" + type = string + # renovate-helm: depName=tempo registryUrl=https://grafana.github.io/helm-charts + default = "1.10.3" +} + +variable "grafana_operator_chart_version" { + description = "The chart version of grafana-operator to use" + type = string + # renovate-helm: depName=grafana-operator registryUrl=oci://ghcr.io/grafana/helm-charts + default = "v5.12.0" +} From 584d7c94cd6b73ff11753d9ebaa7d04e28d60f0a Mon Sep 17 00:00:00 2001 From: Pongsan Date: Wed, 21 Aug 2024 19:23:14 +0700 Subject: [PATCH 02/24] Add ADOT config. --- .../oss/.workshop/terraform/main.tf | 2 + .../observability/oss/adot/clusterrole.yaml | 29 +++ .../oss/adot/clusterrolebinding.yaml | 12 + .../observability/oss/adot/kustomization.yaml | 8 + .../oss/adot/opentelemetrycollector.yaml | 229 ++++++++++++++++++ .../oss/adot/serviceaccount.yaml | 6 + 6 files changed, 286 insertions(+) create mode 100644 manifests/modules/observability/oss/adot/clusterrole.yaml create mode 100644 manifests/modules/observability/oss/adot/clusterrolebinding.yaml create mode 100644 manifests/modules/observability/oss/adot/kustomization.yaml create mode 100644 manifests/modules/observability/oss/adot/opentelemetrycollector.yaml create mode 100644 manifests/modules/observability/oss/adot/serviceaccount.yaml diff --git a/manifests/modules/observability/oss/.workshop/terraform/main.tf b/manifests/modules/observability/oss/.workshop/terraform/main.tf index a7fedb847..c9f613a5b 100644 --- a/manifests/modules/observability/oss/.workshop/terraform/main.tf +++ b/manifests/modules/observability/oss/.workshop/terraform/main.tf @@ -143,6 +143,8 @@ module "loki" { deploymentMode: SingleBinary loki: auth_enabled: false + limits_config: + allow_structured_metadata: true commonConfig: replication_factor: 1 storage: diff --git a/manifests/modules/observability/oss/adot/clusterrole.yaml b/manifests/modules/observability/oss/adot/clusterrole.yaml new file mode 100644 index 000000000..4bb1fb762 --- /dev/null +++ b/manifests/modules/observability/oss/adot/clusterrole.yaml @@ -0,0 +1,29 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: otel-prometheus-role +rules: + - apiGroups: + - "" + resources: + - nodes + - nodes/proxy + - services + - endpoints + - pods + verbs: + - get + - list + - watch + - apiGroups: + - extensions + resources: + - ingresses + verbs: + - get + - list + - watch + - nonResourceURLs: + - /metrics + verbs: + - get diff --git a/manifests/modules/observability/oss/adot/clusterrolebinding.yaml b/manifests/modules/observability/oss/adot/clusterrolebinding.yaml new file mode 100644 index 000000000..a5c187a1f --- /dev/null +++ b/manifests/modules/observability/oss/adot/clusterrolebinding.yaml @@ -0,0 +1,12 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: otel-prometheus-role-binding +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: otel-prometheus-role +subjects: + - kind: ServiceAccount + name: adot-collector + namespace: other diff --git a/manifests/modules/observability/oss/adot/kustomization.yaml b/manifests/modules/observability/oss/adot/kustomization.yaml new file mode 100644 index 000000000..c6ae77912 --- /dev/null +++ b/manifests/modules/observability/oss/adot/kustomization.yaml @@ -0,0 +1,8 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +namespace: other +resources: + - clusterrole.yaml + - clusterrolebinding.yaml + - serviceaccount.yaml + - opentelemetrycollector.yaml diff --git a/manifests/modules/observability/oss/adot/opentelemetrycollector.yaml b/manifests/modules/observability/oss/adot/opentelemetrycollector.yaml new file mode 100644 index 000000000..8db14aa62 --- /dev/null +++ b/manifests/modules/observability/oss/adot/opentelemetrycollector.yaml @@ -0,0 +1,229 @@ +apiVersion: opentelemetry.io/v1beta1 +kind: OpenTelemetryCollector +metadata: + name: adot + namespace: other +spec: + image: public.ecr.aws/aws-observability/aws-otel-collector:v0.40.0 + mode: daemonset + serviceAccount: adot-collector + config: + receivers: + prometheus: + config: + global: + scrape_interval: 60s + scrape_timeout: 15s + external_labels: + cluster: ${EKS_CLUSTER_NAME} + account_id: ${AWS_ACCOUNT_ID} + region: ${AWS_REGION} + scrape_configs: + - job_name: "kubernetes-kubelet" + scrape_interval: 60s + scrape_timeout: 15s + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + insecure_skip_verify: true + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + kubernetes_sd_configs: + - role: node + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - target_label: __address__ + replacement: kubernetes.default.svc.cluster.local:443 + - source_labels: [__meta_kubernetes_node_name] + regex: (.+) + target_label: __metrics_path__ + replacement: /api/v1/nodes/$${1}/proxy/metrics + - job_name: "kubelet" + scheme: https + tls_config: + ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt + insecure_skip_verify: true + bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token + kubernetes_sd_configs: + - role: node + relabel_configs: + - action: labelmap + regex: __meta_kubernetes_node_label_(.+) + - target_label: __address__ + replacement: kubernetes.default.svc.cluster.local:443 + - source_labels: [__meta_kubernetes_node_name] + regex: (.+) + target_label: __metrics_path__ + replacement: /api/v1/nodes/$${1}/proxy/metrics/cadvisor + - job_name: "kubernetes-pods" + honor_labels: true + + kubernetes_sd_configs: + - role: pod + + relabel_configs: + - source_labels: + [__meta_kubernetes_pod_annotation_prometheus_io_scrape] + action: keep + regex: true + - source_labels: + [__meta_kubernetes_pod_annotation_prometheus_io_scrape_slow] + action: drop + regex: true + - source_labels: + [__meta_kubernetes_pod_annotation_prometheus_io_scheme] + action: replace + regex: (https?) + target_label: __scheme__ + - source_labels: + [__meta_kubernetes_pod_annotation_prometheus_io_path] + action: replace + target_label: __metrics_path__ + regex: (.+) + - action: labelmap + regex: __meta_kubernetes_pod_annotation_prometheus_io_param_(.+) + replacement: __param_$$1 + - action: labelmap + regex: __meta_kubernetes_pod_label_(.+) + - source_labels: [__meta_kubernetes_namespace] + action: replace + target_label: namespace + - source_labels: [__meta_kubernetes_pod_name] + action: replace + target_label: pod + - source_labels: [__meta_kubernetes_pod_phase] + regex: Pending|Succeeded|Failed|Completed + action: drop + filelog: + include: + - /var/log/pods/*/*/*.log + exclude: + - /var/log/pods/*-system_*/*/*.log + - /var/log/pods/cert-manager_*/*/*.log + - /var/log/pods/grafana_*/*/*.log + - /var/log/pods/other_*/*/*.log + start_at: beginning + include_file_path: true + include_file_name: false + operators: + # Find out which format is used by kubernetes + - type: router + id: get-format + routes: + - output: parser-docker + expr: 'body matches "^\\{"' + - output: parser-crio + expr: 'body matches "^[^ Z]+ "' + - output: parser-containerd + expr: 'body matches "^[^ Z]+Z"' + # Parse CRI-O format + - type: regex_parser + id: parser-crio + regex: + "^(?P