diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 281fadc4..81c0aa83 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -25,4 +25,5 @@ jobs: run: pip3 install yamllint - name: Lint all the YAMLs. + working-directory: ./ansible run: yamllint . diff --git a/.gitignore b/.gitignore index 6107e18b..56a3f031 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ -roles/ricsanfre.* -ansible_collections -certificates -docs/_site +/ansible/roles/ricsanfre.* +/ansible/ansible_collections +/certbot +/certificates +/docs/_site diff --git a/Makefile b/Makefile new file mode 100644 index 00000000..7e0f0cb7 --- /dev/null +++ b/Makefile @@ -0,0 +1,92 @@ +.EXPORT_ALL_VARIABLES: + +GPG_EMAIL=ricsanfre@gmail.com +GPG_NAME=Ricardo Sanchez + +.PHONY: default +default: clean + +.PHONY: prepare-ansible +prepare-ansible: install-ansible-requirements gpg-init ~/.vault/vault_passphrase.gpg ansible-credentials + +.PHONY: clean +clean: k3s-reset external-services-reset + +.PHONY: init +init: os-upgrade gateway-setup nodes-setup external-services configure-os-backup k3s-install k3s-bootstrap configure-monitoring-gateway + +.PHONY: install-ansible-requirements +install-ansible-requirements: # install Ansible requirements + cd ansible && ansible-galaxy install -r requirements.yml + +.PHONY: install-ansible-requirements-force +install-ansible-requirements-force: # install Ansible requirements + cd ansible && ansible-galaxy install -r requirements.yml --force + +.PHONY: gpg-init +gpg-init: + scripts/generate_gpg_key.sh + +~/.vault/vault_passphrase.gpg: # Ansible vault gpg password + mkdir -p ~/.vault + pwgen -n 71 -C | head -n1 | gpg --armor --recipient ${GPG_EMAIL} -e -o ~/.vault/vault_passphrase.gpg + +.PHONY: ansible-credentials +ansible-credentials: ~/.vault/vault_passphrase.gpg install-ansible-requirements + cd ansible && ansible-playbook create_vault_credentials.yml + +.PHONY: os-upgrade +os-upgrade: + cd ansible && ansible-playbook update.yml + +.PHONY: gateway-setup +gateway-setup: + cd ansible && ansible-playbook setup_picluster.yml --tags "gateway" + +.PHONY: nodes-setup +nodes-setup: + cd ansible && ansible-playbook setup_picluster.yml --tags "nodes" + +.PHONY: external-services +external-services: + cd ansible && ansible-playbook external_services.yml + +.PHONY: configure-os-backup +configure-os-backup: + cd ansible && ansible-playbook backup_configuration.yml + +.PHONY: configure-monitoring-gateway +configure-monitoring-gateway: + cd ansible && ansible-playbook deploy_monitoring_agent.yml + +.PHONY: os-backup +os-backup: + cd ansible && ansible -b -m shell -a 'systemctl start restic-backup' raspberrypi + +.PHONY: k3s-install +k3s-install: + cd ansible && ansible-playbook k3s_install.yml + +.PHONY: k3s-bootstrap +k3s-bootstrap: + cd ansible && ansible-playbook k3s_bootstrap.yml + +.PHONY: k3s-reset +k3s-reset: + cd ansible && ansible-playbook k3s_reset.yml + +.PHONY: external-services-reset +external-services-reset: + cd ansible && ansible-playbook reset_external_services.yml + +.PHONY: shutdown-k3s-worker +shutdown-k3s-worker: + cd ansible && ansible -b -m shell -a "shutdown -h 1 min" k3s_worker + +.PHONY: shutdown-k3s-master +shutdown-k3s-master: + cd ansible && ansible -b -m shell -a "shutdown -h 1 min" k3s_master + +.PHONY: shutdown-gateway +shutdown-gateway: + cd ansible && ansible -b -m shell -a "shutdown -h 1 min" gateway diff --git a/README.md b/README.md index ba3208de..79542350 100644 --- a/README.md +++ b/README.md @@ -9,25 +9,206 @@ -## **K3S Kubernetes Cluster using bare metal ARM-based nodes (Raspberry-PIs) automated with Ansible** +**K3S Kubernetes Cluster using bare metal ARM-based nodes (Raspberry-PIs) automated with Ansible and ArgoCD** -This is an educational project to explore kubernetes cluster configurations using an ARM architecture and its automation using Ansible. +This is an educational project to explore kubernetes cluster configurations using an ARM architecture and apply IaC (Infrastructure as Code) and GitOps methodologies to automate its provisioning and management. The entire process for creating this cluster at home, from cluster design and architecture to step-by-step manual configuration guides, has been documented and it is published in the project website: https://picluster.ricsanfre.com. -This repository contains the Ansible's source code (playbooks/roles) and Cloud-init's configuration files used for automated all manual tasks described in the documentation. -The cluster can be re-deployed in minutes as many times as needed for testing new cluster configurations, new software versions or just take you out of any mesh you could cause playing with the cluster. +This repository contains all source code used to automate all manual tasks described in the documentation: Cloud-init's configuration files, Ansible's source code (playbooks/roles), and packaged Kubernetes applications (helm and kustomize) to be deployed using ArgoCD. + +Since its deployment is completely automated, the cluster can be re-deployed in minutes as many times as needed for testing new cluster configurations, new software versions or just take you out of any mesh you could cause playing with the cluster. ## Scope -Automatically deploy and configure a lightweight Kubernetes flavor based on [K3S](https://k3s.io/) and deploy cluster basic services such as: 1) distributed block storage for POD's persistent volumes, [LongHorn](https://longhorn.io/), 2) backup/restore solution for the cluster, [Velero](https://velero.io/) and [Restic](https://restic.net/), 3) service mesh architecture, [Linkerd](https://linkerd.io/), and 4) observability platform based on metrics monitoring solution, [Prometheus](https://prometheus.io/), logging and analytics solution, EFḰ+LG stack ([Elasticsearch](https://www.elastic.co/elasticsearch/)-[Fluentd](https://www.fluentd.org/)/[Fluentbit](https://fluentbit.io/)-[Kibana](https://www.elastic.co/kibana/) + [Loki](https://grafana.com/oss/loki/)-[Grafana](https://grafana.com/oss/grafana/)), and distributed tracing solution, [Tempo](https://grafana.com/oss/tempo/). +The scope of this project is to create a kubernetes cluster at home using **Raspberry Pis** and to automate its deployment and configuration applying **IaC (infrastructure as a code)** and **GitOps** methodologies with tools like [Ansible](https://docs.ansible.com/), [cloud-init](https://cloudinit.readthedocs.io/en/latest/) and [Argo CD](https://argo-cd.readthedocs.io/en/stable/). + +As part of the project, the goal is to use a lightweight Kubernetes flavor based on [K3S](https://k3s.io/) and deploy cluster basic services such as: 1) distributed block storage for POD's persistent volumes, [LongHorn](https://longhorn.io/), 2) backup/restore solution for the cluster, [Velero](https://velero.io/) and [Restic](https://restic.net/), 3) service mesh architecture, [Linkerd](https://linkerd.io/), and 4) observability platform based on metrics monitoring solution, [Prometheus](https://prometheus.io/), logging and analytics solution, EFḰ+LG stack ([Elasticsearch](https://www.elastic.co/elasticsearch/)-[Fluentd](https://www.fluentd.org/)/[Fluentbit](https://fluentbit.io/)-[Kibana](https://www.elastic.co/kibana/) + [Loki](https://grafana.com/oss/loki/)-[Grafana](https://grafana.com/oss/grafana/)), and distributed tracing solution, [Tempo](https://grafana.com/oss/tempo/). + +## Technology Stack -The following picture shows the set of opensource solutions used so far in the cluster, which installation process has been documented and its deployment has been automated with Ansible: +The following picture shows the set of opensource solutions used so far in the cluster, which installation process has been documented and its deployment has been automated with Ansible/ArgoCD:

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameDescription
AnsibleAutomate OS configuration, external services installation and k3s installation and bootstrapping
ArgoCDGitOps tool for deploying applications to Kubernetes
Cloud-initAutomate OS initial installation
UbuntuCluster nodes OS
K3SLightweight distribution of Kubernetes
containerdContainer runtime integrated with K3S
FlannelKubernetes Networking (CNI) integrated with K3S
CoreDNSKubernetes DNS
Metal LBLoad-balancer implementation for bare metal Kubernetes clusters
TraefikKubernetes Ingress Controller
LinkerdKubernetes Service Mesh
LonghornKubernetes distributed block storage
MinioS3 Object Storage solution
Cert-managerTLS Certificates management
Hashicorp VaultSecrets Management solution
External Secrets OperatorSync Kubernetes Secrets from Hashicorp Vault
VeleroKubernetes Backup and Restore solution
ResticOS Backup and Restore solution
PrometheusMetrics monitoring and alerting
FluentdLogs forwarding and distribution
FluentbitLogs collection
LokiLogs aggregation
ElasticsearchLogs analytics
KibanaLogs analytics Dashboards
TempoDistributed tracing monitoring
GrafanaMonitoring Dashboards
+
+ +## External Resources and Services + +Even whe the premise is to deploy all services in the kubernetes cluster, there is still a need for a few external services/resources. Below is a list of external resources/services and why we need them. + +### Cloud external services + + +| |Provider | Resource | Purpose | +| --- | --- | --- | --- | +| | [Letsencrypt](https://letsencrypt.org/) | TLS CA Authority | Signed valid TLS certificates | +| |[IONOS](https://www.ionos.es/) | DNS | DNS and [DNS-01 challenge](https://letsencrypt.org/docs/challenge-types/#dns-01-challenge) for certificates | + +> **NOTE:** These resources are optional, the homelab still works without them but it won't have trusted certificates + +**Alternatives:** + +1. Use a private PKI (custom CA to sign certificates). + + Currently supported. Only minor changes are required. See details in [Doc: Quick Start instructions](https://picluster.ricsanfre.com/docs/ansible). + +2. Use other DNS provider. + + Cert-manager / Certbot used to automatically obtain certificates from Let's Encrypt can be used with other DNS providers. This will need further modifications in the way cert-manager application is deployed (new providers and/or webhooks/plugins might be required). + + Currently only acme issuer (letsencytp) using IONOS as dns-01 challenge provider is configured. Check list of [supported dns01 providers](https://cert-manager.io/docs/configuration/acme/dns01/#supported-dns01-providers). + +### Self-hosted external services + +There is another list of services that I have decided to run outside the kuberentes cluster but not using any cloud service. These services currently are running on the same cluster nodes (gateway and node1), but as baremetal service. + +| |External Service | Resource | Purpose | +| --- | --- | --- | --- | +| |[Minio](https://mini.io) | S3 Object Store | Cluster Backup | +| |[Hashicorp Vault](https://www.vaultproject.io/) | Secrets Management | Cluster secrets management | + + ## Cluster architecture and hardware Home lab architecture, showed in the picture below, consist of a Kubernetes cluster of 5 nodes (1 master and 4 workers) and a firewall, built with another Raspberry PI, to isolate cluster network from your home network. @@ -47,7 +228,7 @@ The content of this website and the source code to build it (Jekyll static based ## Usage -Check out the documentation [Quick Start guide](http://picluster.ricsanfre.com/docs/ansible/) to know how to use and tweak cloud-init files (`/cloud-init` folder) and Ansible playbooks contained in this repository. +Check out the documentation [Quick Start guide](http://picluster.ricsanfre.com/docs/ansible/) to know how to use and tweak cloud-init files (`/cloud-init` folder), Ansible playbooks (`/ansible` folder) and packaged Kubernetes applications ( `/argocd` folder) contained in this repository, so you can use in for your own homelab. ## About the Project diff --git a/ansible/.vault/vault_pass.sh b/ansible/.vault/vault_pass.sh new file mode 100755 index 00000000..82608241 --- /dev/null +++ b/ansible/.vault/vault_pass.sh @@ -0,0 +1,2 @@ +#!/bin/sh +gpg --batch --use-agent --decrypt $HOME/.vault/vault_passphrase.gpg diff --git a/.yamllint b/ansible/.yamllint similarity index 100% rename from .yamllint rename to ansible/.yamllint diff --git a/ansible.cfg b/ansible/ansible.cfg similarity index 85% rename from ansible.cfg rename to ansible/ansible.cfg index d75731c8..5f89d724 100644 --- a/ansible.cfg +++ b/ansible/ansible.cfg @@ -11,3 +11,5 @@ collections_path = ./ host_key_checking = false # SSH key private_key_file = $HOME/.ssh/ansible-ssh-key.pem +# Vault password +vault_password_file=./.vault/vault_pass.sh diff --git a/ansible/backup_configuration.yml b/ansible/backup_configuration.yml new file mode 100644 index 00000000..c5f8edd8 --- /dev/null +++ b/ansible/backup_configuration.yml @@ -0,0 +1,24 @@ +--- + +- name: Configure Pi-cluster nodes backup + hosts: raspberrypi + gather_facts: true + tags: [backup] + become: true + pre_tasks: + - name: Include vault variables + include_vars: "vars/vault.yml" + # Include picluster variables + - name: Include picluster variables + include_vars: "vars/picluster.yml" + - name: Load CA certificate for restic + set_fact: + restic_ca_cert: "{{ lookup('file','certificates/CA.pem') }}" + when: not enable_letsencrypt + - name: Do not use CA certificate + set_fact: + restic_use_ca_cert: false + when: enable_letsencrypt + roles: + - role: ricsanfre.backup + tags: [backup] diff --git a/ansible/create_vault_credentials.yml b/ansible/create_vault_credentials.yml new file mode 100644 index 00000000..37a866e0 --- /dev/null +++ b/ansible/create_vault_credentials.yml @@ -0,0 +1,62 @@ +--- + +- name: Generate vault variables file + hosts: localhost + + vars_prompt: + - name: ionos_public_prefix + prompt: Enter IONOS public prefix + private: true + - name: ionos_secret + prompt: Enter IONOS secret + private: true + + pre_tasks: + - name: Ask for SAN centralized credentials + when: centralized_san + block: + - name: Ask for SAN iscsi credentials 1/2 + pause: + prompt: "Enter iSCSI node password: " + echo: false + register: prompt + - name: Set iSCSI node password variable + set_fact: + san_iscsi_node_pass: "{{ prompt.user_input }}" + no_log: true + - name: Ask for SAN iscsi credentials 2/2 + pause: + prompt: "Enter iSCSI mutual password: " + echo: false + register: prompt + - name: Set iSCSI node password variable + set_fact: + san_iscsi_mutual_pass: "{{ prompt.user_input }}" + no_log: true + + tasks: + + - name: Create random passwords + ansible.builtin.set_fact: + "{{ item }}": "{{ lookup('ansible.builtin.password', '/dev/null chars=ascii_letters,digits' ) }}" + with_items: + - k3s_token + - minio_root_password + - minio_restic_password + - minio_longhorn_password + - minio_velero_password + - minio_loki_password + - minio_tempo_password + - traefik_basic_auth_password + - fluentd_shared_key + - grafana_admin_password + - elasticsearch_admin_password + + - name: Generate vault file + ansible.builtin.template: + src: vars/vault.yml.j2 + dest: vars/vault.yml + + - name: Encryp file + ansible.builtin.command: + cmd: ansible-vault encrypt --vault-password-file=./.vault/vault-pass.sh vars/vault.yml diff --git a/ansible/deploy_monitoring_agent.yml b/ansible/deploy_monitoring_agent.yml new file mode 100644 index 00000000..972cc0a7 --- /dev/null +++ b/ansible/deploy_monitoring_agent.yml @@ -0,0 +1,19 @@ +--- +# Deploy fluentbit to get logs and prometheus metrics +- name: Deploy fluentbit on control nodes (gateway and pimaster) + hosts: gateway + gather_facts: true + tags: [logging] + become: true + pre_tasks: + # Include vault variables + - name: Include vault variables + include_vars: "vars/vault.yml" + tags: ["always"] + # Include picluster variables + - name: Include picluster variables + include_vars: "vars/picluster.yml" + tags: ["always"] + roles: + - role: logging/external_node + tags: ['logging'] diff --git a/ansible/external_services.yml b/ansible/external_services.yml new file mode 100644 index 00000000..1c51fcb6 --- /dev/null +++ b/ansible/external_services.yml @@ -0,0 +1,290 @@ +--- +## Generate TLS certificates for external services +## Generated using certbot (letsencrypt) or selfsigned certificates +- name: Generate external services certificates + hosts: localhost + gather_facts: true + tags: [certificates] + vars: + certbot_venv: "{{ playbook_dir }}/../certbot" + certbot_email: "{{ acme_issuer_email }}" + dns_ionos_prefix: "{{ vault.certmanager.ionos.public_prefix }}" + dns_ionos_secret: "{{ vault.certmanager.ionos.secret }}" + selfsigned_certificates_path: "../certificates" + pre_tasks: + # Include vault variables + - name: Include vault variables + include_vars: "vars/vault.yml" + # Include picluster variables + - name: Include picluster variables + include_vars: "vars/picluster.yml" + roles: + - role: certbot + vars: + - install_python_packages: false + when: enable_letsencrypt + + tasks: + - name: Create Letsencrytp certificate for external services + command: "{{ certbot_venv }}/bin/certbot-create-cert.sh {{ item }}" + register: certbot_create + changed_when: + - certbot_create.rc==0 + - '"Certificate not yet due for renewal; no action taken." not in certbot_create.stdout' + when: enable_letsencrypt + with_items: + - "{{ minio_hostname }}" + - "{{ vault_hostname }}" + + - name: Create customCA-signed TLS certificate for minio + when: not enable_letsencrypt + block: + # Generate self-signed certificates directory + - name: Create certificates directory + file: + path: "{{ selfsigned_certificates_path }}" + state: directory + mode: 0750 + # Include selfsigned certificates variables + - name: Include selfsigned certificates variables + include_vars: "vars/selfsigned-certificates.yml" + # Generate custom CA + - name: Generate custom CA + include_tasks: tasks/generate_custom_ca.yml + args: + apply: + delegate_to: localhost + become: false + # Generate selfsigned TLS certificate + - name: Generate customCA-signed SSL certificates + include_tasks: tasks/generate_ca_signed_cert.yml + args: + apply: + delegate_to: localhost + become: false + loop: + - "{{ minio_hostname }}" + - "{{ vault_hostname }}" + loop_control: + loop_var: server_hostname + +## Install Minio S3 Storage Server + +- name: Install Minio S3 Storage Server + hosts: node1 + gather_facts: true + tags: [s3] + become: true + vars: + server_hostname: "{{ minio_hostname }}" + certbot_venv: "{{ playbook_dir }}/../certbot" + selfsigned_certificates_path: "../certificates" + pre_tasks: + # Include vault variables + - name: Include vault variables + include_vars: "vars/vault.yml" + # Include picluster variables + - name: Include picluster variables + include_vars: "vars/picluster.yml" + + - name: Load customCA-signed TLS certificate for minio + set_fact: + minio_key: "{{ lookup('file',selfsigned_certificates_path + '/' + server_hostname + '.key') }}" + minio_cert: "{{ lookup('file',selfsigned_certificates_path + '/' + server_hostname + '.pem') }}" + when: not enable_letsencrypt + + - name: Get letsencrypt TLS certificate for minio + block: + - name: check TLS certificate for minio exits + command: "{{ certbot_venv }}/bin/certbot-wrapper.sh certificates -d {{ minio_hostname }}" + register: certbot_certificates + delegate_to: localhost + become: false + changed_when: false + failed_when: + - '"Certificate Name: " + minio_hostname not in certbot_certificates.stdout' + - name: Get certificate and key paths for minio + set_fact: + cert_path: "{{ certbot_certificates.stdout | regex_search(regexp1,'\\1') }}" + cert_key_path: "{{ certbot_certificates.stdout | regex_search(regexp2,'\\1') }}" + vars: + regexp1: 'Certificate Path: (\S+)' + regexp2: 'Private Key Path: (\S+)' + when: + - certbot_certificates.rc==0 + - '"Certificate Name: " + minio_hostname in certbot_certificates.stdout' + + - name: Load tls key and cert + set_fact: + minio_key: "{{ lookup('file', cert_key_path[0] ) }}" + minio_cert: "{{ lookup('file', cert_path[0] ) }}" + when: enable_letsencrypt + roles: + - role: ricsanfre.minio + +## Install Hashicorp Vault Server + +- name: Install Vault Server + hosts: gateway + gather_facts: true + tags: [vault] + become: true + vars: + server_hostname: "{{ vault_hostname }}" + certbot_venv: "{{ playbook_dir }}/../certbot" + selfsigned_certificates_path: "../certificates" + + pre_tasks: + # Include vault variables + - name: Include vault variables + include_vars: "vars/vault.yml" + + # Include picluster variables + - name: Include picluster variables + include_vars: "vars/picluster.yml" + + - name: Load customCA-signed TLS certificate for minio + set_fact: + vault_key: "{{ lookup('file',selfsigned_certificates_path + '/' + server_hostname + '.key') }}" + vault_cert: "{{ lookup('file',selfsigned_certificates_path + '/' + server_hostname + '.pem') }}" + vault_ca: "{{ lookup('file',selfsigned_certificates_path + '/CA.pem') }}" + when: not enable_letsencrypt + + - name: Get letsencrypt TLS certificate for vault + block: + - name: check TLS certificate for vault exits + command: "{{ certbot_venv }}/bin/certbot-wrapper.sh certificates -d {{ vault_hostname }}" + register: certbot_certificates + delegate_to: localhost + become: false + changed_when: false + failed_when: + - '"Certificate Name: " + vault_hostname not in certbot_certificates.stdout' + - name: Get certificate and key paths for minio + set_fact: + cert_path: "{{ certbot_certificates.stdout | regex_search(regexp1,'\\1') }}" + cert_key_path: "{{ certbot_certificates.stdout | regex_search(regexp2,'\\1') }}" + vars: + regexp1: 'Certificate Path: (\S+)' + regexp2: 'Private Key Path: (\S+)' + when: + - certbot_certificates.rc==0 + - '"Certificate Name: " + vault_hostname in certbot_certificates.stdout' + + - name: Load tls key and cert + set_fact: + vault_key: "{{ lookup('file', cert_key_path[0] ) }}" + vault_cert: "{{ lookup('file', cert_path[0] ) }}" + when: enable_letsencrypt + roles: + - role: ricsanfre.vault + + tasks: + # Configure ansible user profile with VAULT environement variables + - name: Insert http(s) export in dotfile + become: true + lineinfile: + path: ~{{ ansible_user }}/.bashrc + regexp: "^export VAULT_ADDR=" + line: "export VAULT_ADDR='https://{{ vault_hostname }}:8200'" + owner: "{{ ansible_user }}" + create: true + mode: 0644 + - name: Insert CA cert export in dotfile + become: true + lineinfile: + path: ~{{ ansible_user }}/.bashrc + regexp: "^export VAULT_CACERT=" + line: "export VAULT_CACERT=/etc/vault/tls/vault-ca.crt" + owner: "{{ ansible_user }}" + create: true + mode: 0644 + when: custom_ca + + - name: Insert VAULT_TOKEN in dotfile + become: true + lineinfile: + path: ~{{ ansible_user }}/.bashrc + regexp: "^export VAULT_TOKEN=" + line: "export VAULT_TOKEN=$(sudo jq -r '.root_token' /etc/vault/unseal.json)" + owner: "{{ ansible_user }}" + create: true + mode: 0644 + +## Load all credentials into Hashicorp Vault Server +- name: Load Vault Credentials + hosts: gateway + gather_facts: true + tags: [vault, credentials] + become: false + pre_tasks: + # Include vault variables + - name: Include vault variables + include_vars: "vars/vault.yml" + + # Include picluster variables + - name: Include picluster variables + include_vars: "vars/picluster.yml" + + # Install hashicorp vault collection dependencies (hvac python package) using PIP. + - name: Ensure PIP is installed. + package: + name: + - python3-pip + - python3-setuptools + state: present + become: true + - name: Ensure hvac Python library is installed. + pip: + name: hvac + state: present + become: true + tasks: + + # Vault Login using ansible environement variables for creating token file + # token file will be usd in next commands + - name: Vault login + ansible.builtin.shell: bash -ilc 'vault login $VAULT_TOKEN' + # Interactive bash so .bashrc is loaded + # 'source ~/.bashrc && ' does not work because + # Ansible shell is not interactive and ~/.bashrc implementation by default ignores non interactive shell. + # See lines at beginning of bashrc: + # + # If not running interactively, don't do anything + # case $- in + # *i*) ;; + # *) return;; + # esac + # The best solution for executing commands as user after its ssh interactive login: + # bash -ilc '' + # '-i' means interactive shell, so .bashrc won't be ignored '-l' means login shell which sources full user profile + become: false + register: vault_login + + # Create write token + - name: Create KV write token + community.hashi_vault.vault_token_create: + url: "https://{{ vault_dns }}:8200" + policies: ["write"] + register: token_data + become: false + # Load ansible vault variables into Hashicorp Vault Server + - name: Load vault credentials + include_tasks: + file: tasks/load_vault_credentials.yml + loop: "{{ vault | dict2items }}" + loop_control: + loop_var: secret_group + when: + - vault is defined + no_log: true + + - name: Load http auth_basic credentials + include_tasks: + file: tasks/create_basic_auth_credentials.yml + no_log: true + + - name: Load minio prometheus bearer credentials + include_tasks: + file: tasks/create_minio_bearer_token.yml + no_log: true diff --git a/group_vars/all.yml b/ansible/group_vars/all.yml similarity index 100% rename from group_vars/all.yml rename to ansible/group_vars/all.yml diff --git a/group_vars/control.yml b/ansible/group_vars/control.yml similarity index 100% rename from group_vars/control.yml rename to ansible/group_vars/control.yml diff --git a/group_vars/k3s_cluster.yml b/ansible/group_vars/k3s_cluster.yml similarity index 100% rename from group_vars/k3s_cluster.yml rename to ansible/group_vars/k3s_cluster.yml diff --git a/group_vars/k3s_master.yml b/ansible/group_vars/k3s_master.yml similarity index 100% rename from group_vars/k3s_master.yml rename to ansible/group_vars/k3s_master.yml diff --git a/host_vars/gateway.yml b/ansible/host_vars/gateway.yml similarity index 94% rename from host_vars/gateway.yml rename to ansible/host_vars/gateway.yml index 52a0b0f7..8dac7185 100644 --- a/host_vars/gateway.yml +++ b/ansible/host_vars/gateway.yml @@ -41,6 +41,10 @@ dnsmasq_additional_dns_hosts: desc: "Fluentd server" hostname: fluentd ip: 10.0.0.101 + vault_server: + desc: "Vault server" + hostname: vault + ip: 10.0.0.1 #################### # ntp role variables @@ -51,7 +55,9 @@ ntp_allow_hosts: [10.0.0.0/24] # firewall role variables ######################### -in_tcp_port: '{ ssh, https, http, iscsi-target, 9100 }' +# tcp 8200, 8201 Vault server +# tcp 9100 Prometheus (fluent-bit) +in_tcp_port: '{ ssh, https, http, iscsi-target, 9100, 8200, 8201 }' in_udp_port: '{ snmp, domain, ntp, bootps }' forward_tcp_port: '{ http, https, ssh }' forward_udp_port: '{ domain, ntp }' diff --git a/inventory.yml b/ansible/inventory.yml similarity index 100% rename from inventory.yml rename to ansible/inventory.yml diff --git a/ansible/k3s_bootstrap.yml b/ansible/k3s_bootstrap.yml new file mode 100644 index 00000000..91a09123 --- /dev/null +++ b/ansible/k3s_bootstrap.yml @@ -0,0 +1,103 @@ +--- + +- name: Bootstrap Cluster + hosts: k3s_master + gather_facts: false + + collections: + - kubernetes.core + + environment: + # The location of the kubeconfig file on the master. + K8S_AUTH_KUBECONFIG: ~/.kube/config + + pre_tasks: + # Install kubernetes core collection dependencies (kubernetes python package) using PIP. + - name: Ensure PIP is installed. + package: + name: + - python3-pip + - python3-setuptools + state: present + become: true + + - name: Ensure kubernetes Python library is installed. + pip: + name: kubernetes + state: present + become: true + + - name: Ensure hashicorp vault python library is installed. + pip: + name: hvac + state: present + become: true + + # Install Helm diff plugin to have a better idempotence check + - name: Intall Helm Plugin + kubernetes.core.helm_plugin: + plugin_path: "https://github.com/databus23/helm-diff" + state: present + + - name: Include vault variables + include_vars: "vars/vault.yml" + + - name: Include picluster variables + include_vars: "vars/picluster.yml" + + tasks: + - name: Create namespaces. + kubernetes.core.k8s: + name: "{{ item }}" + api_version: v1 + kind: Namespace + state: present + with_items: + - "argocd" + - "vault" + + - name: Configure Vault integration + include_tasks: tasks/configure_vault_integration.yml + + - name: Copy argocd chart files + ansible.builtin.copy: + src: "../argocd/{{ item }}" + dest: /tmp/charts + with_items: + - "bootstrap/argocd" + + - name: Update argo-cd helm dependency. + ansible.builtin.command: + cmd: "helm dependency update /tmp/charts/argocd" + + - name: Deploy Argo CD Helm chart. + shell: | + set -o pipefail + helm template \ + --dependency-update \ + --include-crds \ + --namespace argocd \ + argocd /tmp/charts/argocd \ + | kubectl apply -n argocd -f - + args: + executable: /bin/bash + + - name: Wait for CRDs to be ready + command: + cmd: "kubectl wait --for condition=Established crd/applications.argoproj.io crd/applicationsets.argoproj.io --timeout=600s" + changed_when: false + + - name: Deploy root application + kubernetes.core.k8s: + definition: "{{ lookup('template', 'templates/' + item ) }}" + state: present + with_items: + - argocd_root_app.yml.j2 + + - name: Recursively remove tmp directory + ansible.builtin.file: + path: /tmp/charts + state: absent + + - name: Install cli utils. + include_tasks: tasks/install_cli_utils.yml diff --git a/k3s_install.yml b/ansible/k3s_install.yml similarity index 100% rename from k3s_install.yml rename to ansible/k3s_install.yml diff --git a/k3s_reset.yml b/ansible/k3s_reset.yml similarity index 100% rename from k3s_reset.yml rename to ansible/k3s_reset.yml diff --git a/k3s_start.yml b/ansible/k3s_start.yml similarity index 100% rename from k3s_start.yml rename to ansible/k3s_start.yml diff --git a/k3s_stop.yml b/ansible/k3s_stop.yml similarity index 100% rename from k3s_stop.yml rename to ansible/k3s_stop.yml diff --git a/ansible/patch_grafana_dashboards.yml b/ansible/patch_grafana_dashboards.yml new file mode 100644 index 00000000..a594825f --- /dev/null +++ b/ansible/patch_grafana_dashboards.yml @@ -0,0 +1,13 @@ +--- +- name: Patch Grafana Dashboards + hosts: localhost + + tasks: + - name: Patch Grafana Dashboards + include_tasks: tasks/patch_grafana_dashboard.yml + loop_control: + loop_var: dashboard_file + with_fileglob: + - "roles/prometheus/dashboards/*" + - "roles/prometheus/dashboards/linkerd/*" + - "roles/prometheus/dashboards/k3s/*" diff --git a/requirements.yml b/ansible/requirements.yml similarity index 80% rename from requirements.yml rename to ansible/requirements.yml index 8a0899fe..a8c86a0d 100644 --- a/requirements.yml +++ b/ansible/requirements.yml @@ -19,9 +19,13 @@ roles: - name: ricsanfre.fluentbit version: v1.0.4 - name: ricsanfre.minio - version: v1.0.9 + version: v1.0.10 - name: ricsanfre.backup - version: v1.1.2 + version: v1.1.3 + - name: ricsanfre.vault + version: v1.0.4 collections: - name: kubernetes.core version: 2.3.2 + - name: community.hashi_vault + version: 4.0.0 diff --git a/ansible/reset_external_services.yml b/ansible/reset_external_services.yml new file mode 100644 index 00000000..a0806b73 --- /dev/null +++ b/ansible/reset_external_services.yml @@ -0,0 +1,109 @@ +--- +- name: Clean Minio Installation + hosts: node1 + become: true + gather_facts: false + tags: [s3] + tasks: + - name: Stop and disable Minio Server + systemd: + name: minio + state: stopped + enabled: false + become: true + - name: Delete directories and files + become: true + file: + state: absent + path: "{{ item }}" + with_items: + - /storage/minio/ + - /etc/minio/ + - /usr/local/bin/minio + - /usr/local/bin/mc + - /etc/systemd/system/minio.service + - name: Reload systemd daemon + systemd: + daemon_reload: true + +- name: Clean Vault Installation + hosts: gateway + become: true + gather_facts: false + tags: [vault] + tasks: + - name: Stop and disable Vault Server + systemd: + name: "{{ item }}" + state: stopped + enabled: false + become: true + with_items: + - vault + - vault-unseal + - name: Delete directories and files + become: true + file: + state: absent + path: "{{ item }}" + with_items: + - /var/lib/vault/ + - /etc/vault/ + - /var/log/vault/ + - /usr/local/bin/vault + - /etc/systemd/system/vault.service + - /etc/systemd/system/vault-unseal.service + - name: Reload systemd daemon + systemd: + daemon_reload: true + +- name: Clean Restic Installation + hosts: raspberrypi + become: true + gather_facts: false + tags: [restic] + tasks: + - name: Stop and disable restic backup service + systemd: + name: "{{ item }}" + state: stopped + enabled: false + become: true + with_items: + - restic-backup.timer + + - name: Stop and disable restic clean service + systemd: + name: "{{ item }}" + state: stopped + enabled: false + become: true + when: restic_clean_service + with_items: + - restic-clean.timer + + - name: Delete directories and files + become: true + file: + state: absent + path: "{{ item }}" + with_items: + - /etc/restic/ + - /var/log/restic.log + - /usr/local/bin/restic + - /etc/systemd/system/restic-backup.service + - /etc/systemd/system/restic-backup.timer + + - name: Delete clean service file + become: true + file: + state: absent + path: "{{ item }}" + with_items: + - /etc/systemd/system/restic-clean.timer + - /etc/systemd/system/restic-clean.service + when: restic_clean_service + + - name: Reload systemd daemon + systemd: + daemon_reload: true diff --git a/roles/basic_setup/defaults/main.yaml b/ansible/roles/basic_setup/defaults/main.yaml similarity index 100% rename from roles/basic_setup/defaults/main.yaml rename to ansible/roles/basic_setup/defaults/main.yaml diff --git a/roles/basic_setup/handlers/main.yml b/ansible/roles/basic_setup/handlers/main.yml similarity index 100% rename from roles/basic_setup/handlers/main.yml rename to ansible/roles/basic_setup/handlers/main.yml diff --git a/roles/basic_setup/scripts/pi_temp b/ansible/roles/basic_setup/scripts/pi_temp similarity index 100% rename from roles/basic_setup/scripts/pi_temp rename to ansible/roles/basic_setup/scripts/pi_temp diff --git a/roles/basic_setup/scripts/pi_throttling b/ansible/roles/basic_setup/scripts/pi_throttling similarity index 100% rename from roles/basic_setup/scripts/pi_throttling rename to ansible/roles/basic_setup/scripts/pi_throttling diff --git a/roles/basic_setup/tasks/files/multipath.conf b/ansible/roles/basic_setup/tasks/files/multipath.conf similarity index 100% rename from roles/basic_setup/tasks/files/multipath.conf rename to ansible/roles/basic_setup/tasks/files/multipath.conf diff --git a/roles/basic_setup/tasks/main.yaml b/ansible/roles/basic_setup/tasks/main.yaml similarity index 100% rename from roles/basic_setup/tasks/main.yaml rename to ansible/roles/basic_setup/tasks/main.yaml diff --git a/roles/basic_setup/tasks/multipath_blacklist.yml b/ansible/roles/basic_setup/tasks/multipath_blacklist.yml similarity index 100% rename from roles/basic_setup/tasks/multipath_blacklist.yml rename to ansible/roles/basic_setup/tasks/multipath_blacklist.yml diff --git a/roles/basic_setup/tasks/remove_snap.yaml b/ansible/roles/basic_setup/tasks/remove_snap.yaml similarity index 100% rename from roles/basic_setup/tasks/remove_snap.yaml rename to ansible/roles/basic_setup/tasks/remove_snap.yaml diff --git a/roles/basic_setup/tasks/remove_snap_packages.yml b/ansible/roles/basic_setup/tasks/remove_snap_packages.yml similarity index 100% rename from roles/basic_setup/tasks/remove_snap_packages.yml rename to ansible/roles/basic_setup/tasks/remove_snap_packages.yml diff --git a/roles/certbot/defaults/main.yml b/ansible/roles/certbot/defaults/main.yml similarity index 85% rename from roles/certbot/defaults/main.yml rename to ansible/roles/certbot/defaults/main.yml index c3966771..ee3427f7 100644 --- a/roles/certbot/defaults/main.yml +++ b/ansible/roles/certbot/defaults/main.yml @@ -12,3 +12,6 @@ dns_ionos_secret: ionos-secret dns_ionos_api_endpoint: https://api.hosting.ionos.com certbot_email: myemail@mydomain.es + +# dns propagation in seconds +propagation_seconds: 300 diff --git a/roles/certbot/tasks/main.yml b/ansible/roles/certbot/tasks/main.yml similarity index 88% rename from roles/certbot/tasks/main.yml rename to ansible/roles/certbot/tasks/main.yml index cf689662..b8087a2e 100644 --- a/roles/certbot/tasks/main.yml +++ b/ansible/roles/certbot/tasks/main.yml @@ -7,7 +7,6 @@ - python3-venv state: present become: true - tags: ['always'] when: install_python_packages - name: Install certbot and ionos plugin in venvironment @@ -18,7 +17,6 @@ virtualenv: "{{ certbot_venv }}" virtualenv_command: "python3 -m venv" state: present - tags: ['always'] - name: Create working directories file: @@ -53,6 +51,6 @@ mode: 0755 with_items: - template: certbot.sh.j2 - dest: "{{ certbot_venv }}/certbot-create-cert.sh" + dest: "{{ certbot_venv }}/bin/certbot-create-cert.sh" - template: certbot-wrapper.sh.j2 - dest: "{{ certbot_venv }}/certbot-wrapper.sh" + dest: "{{ certbot_venv }}/bin/certbot-wrapper.sh" diff --git a/ansible/roles/certbot/templates/certbot-wrapper.sh.j2 b/ansible/roles/certbot/templates/certbot-wrapper.sh.j2 new file mode 100644 index 00000000..fed7edca --- /dev/null +++ b/ansible/roles/certbot/templates/certbot-wrapper.sh.j2 @@ -0,0 +1,12 @@ +#!/bin/bash + +# certbot-wrapper script +# Need to be copied to venv_cerbot/bin + +BASEDIR=$(dirname "$0") + +$BASEDIR/certbot \ + --config-dir $BASEDIR/../config \ + --work-dir $BASEDIR/.. \ + --logs-dir $BASEDIR/../logs \ + $@ \ No newline at end of file diff --git a/ansible/roles/certbot/templates/certbot.sh.j2 b/ansible/roles/certbot/templates/certbot.sh.j2 new file mode 100644 index 00000000..3826b16a --- /dev/null +++ b/ansible/roles/certbot/templates/certbot.sh.j2 @@ -0,0 +1,20 @@ +#!/bin/bash + +# Executing certbot within virtualenv +# Need to be copied to venv_cerbot/bin + +BASEDIR=$(dirname "$0") + +$BASEDIR/certbot certonly \ + --config-dir $BASEDIR/../config \ + --work-dir $BASEDIR/.. \ + --logs-dir $BASEDIR/../logs \ + --authenticator dns-ionos \ + --dns-ionos-credentials $BASEDIR/../.secrets/ionos-credentials.ini \ + --dns-ionos-propagation-seconds {{ propagation_seconds }} \ + --server https://acme-v02.api.letsencrypt.org/directory \ + --agree-tos \ + --non-interactive \ + --rsa-key-size 4096 \ + -m {{ certbot_email }} \ + -d $1 diff --git a/roles/certbot/templates/ionos-credentials.ini.j2 b/ansible/roles/certbot/templates/ionos-credentials.ini.j2 similarity index 100% rename from roles/certbot/templates/ionos-credentials.ini.j2 rename to ansible/roles/certbot/templates/ionos-credentials.ini.j2 diff --git a/roles/certbot/tests/install_certbot.yml b/ansible/roles/certbot/tests/install_certbot.yml similarity index 100% rename from roles/certbot/tests/install_certbot.yml rename to ansible/roles/certbot/tests/install_certbot.yml diff --git a/roles/dns/handlers/main.yml b/ansible/roles/dns/handlers/main.yml similarity index 100% rename from roles/dns/handlers/main.yml rename to ansible/roles/dns/handlers/main.yml diff --git a/roles/dns/tasks/main.yml b/ansible/roles/dns/tasks/main.yml similarity index 100% rename from roles/dns/tasks/main.yml rename to ansible/roles/dns/tasks/main.yml diff --git a/roles/k3s/master/defaults/main.yml b/ansible/roles/k3s/master/defaults/main.yml similarity index 100% rename from roles/k3s/master/defaults/main.yml rename to ansible/roles/k3s/master/defaults/main.yml diff --git a/roles/k3s/master/tasks/main.yml b/ansible/roles/k3s/master/tasks/main.yml similarity index 100% rename from roles/k3s/master/tasks/main.yml rename to ansible/roles/k3s/master/tasks/main.yml diff --git a/roles/k3s/prereq/handlers/main.yml b/ansible/roles/k3s/prereq/handlers/main.yml similarity index 100% rename from roles/k3s/prereq/handlers/main.yml rename to ansible/roles/k3s/prereq/handlers/main.yml diff --git a/roles/k3s/prereq/tasks/main.yml b/ansible/roles/k3s/prereq/tasks/main.yml similarity index 100% rename from roles/k3s/prereq/tasks/main.yml rename to ansible/roles/k3s/prereq/tasks/main.yml diff --git a/roles/k3s/worker/defaults/main.yml b/ansible/roles/k3s/worker/defaults/main.yml similarity index 100% rename from roles/k3s/worker/defaults/main.yml rename to ansible/roles/k3s/worker/defaults/main.yml diff --git a/roles/k3s/worker/tasks/main.yml b/ansible/roles/k3s/worker/tasks/main.yml similarity index 100% rename from roles/k3s/worker/tasks/main.yml rename to ansible/roles/k3s/worker/tasks/main.yml diff --git a/roles/linkerd/control-plane/defaults/main.yml b/ansible/roles/linkerd-cli/defaults/main.yml similarity index 78% rename from roles/linkerd/control-plane/defaults/main.yml rename to ansible/roles/linkerd-cli/defaults/main.yml index 3540db7c..99e439a2 100644 --- a/roles/linkerd/control-plane/defaults/main.yml +++ b/ansible/roles/linkerd-cli/defaults/main.yml @@ -1,8 +1,6 @@ --- - # Version linkerd_version: "stable-2.12.2" -linkerd_chart_version: 1.9.4 # Architecture linkerd_arch: "arm64" @@ -15,7 +13,3 @@ linkerd_checksum: "sha256:{{ linkerd_package_url }}.sha256" # linkerd install location linkerd_install_dir: "/usr/local/bin" linkerd_bin: "{{ linkerd_install_dir }}/linkerd" - -# linkerd identity issuer certificate -issuer_certificate_duration: "48h" -issuer_certificate_renewbefore: "23h" diff --git a/roles/linkerd/control-plane/tasks/install_linkerd_cli.yml b/ansible/roles/linkerd-cli/tasks/install_linkerd_cli.yml similarity index 100% rename from roles/linkerd/control-plane/tasks/install_linkerd_cli.yml rename to ansible/roles/linkerd-cli/tasks/install_linkerd_cli.yml diff --git a/ansible/roles/linkerd-cli/tasks/main.yml b/ansible/roles/linkerd-cli/tasks/main.yml new file mode 100644 index 00000000..f9dff85e --- /dev/null +++ b/ansible/roles/linkerd-cli/tasks/main.yml @@ -0,0 +1,14 @@ +--- + +- name: Check Linkerd CLI installation status + stat: + path: "{{ linkerd_bin }}" + register: _linkerd_bin + +- name: Install linkerd CLI + include_tasks: install_linkerd_cli.yml + args: + apply: + become: true + when: + - not _linkerd_bin.stat.exists diff --git a/roles/logging/external_node/tasks/main.yml b/ansible/roles/logging/external_node/tasks/main.yml similarity index 100% rename from roles/logging/external_node/tasks/main.yml rename to ansible/roles/logging/external_node/tasks/main.yml diff --git a/roles/logging/external_node/templates/adjust_ts.lua b/ansible/roles/logging/external_node/templates/adjust_ts.lua similarity index 100% rename from roles/logging/external_node/templates/adjust_ts.lua rename to ansible/roles/logging/external_node/templates/adjust_ts.lua diff --git a/ansible/roles/longhorn-util/defaults/main.yml b/ansible/roles/longhorn-util/defaults/main.yml new file mode 100644 index 00000000..1c3b5fc3 --- /dev/null +++ b/ansible/roles/longhorn-util/defaults/main.yml @@ -0,0 +1,6 @@ +--- +# Namespace for Longhorn +k3s_longhorn_namespace: longhorn-system + +# Enable service mesh +enable_linkerd: false diff --git a/roles/longhorn/files/check_lh.sh b/ansible/roles/longhorn-util/files/check_lh.sh similarity index 100% rename from roles/longhorn/files/check_lh.sh rename to ansible/roles/longhorn-util/files/check_lh.sh diff --git a/roles/longhorn/tasks/configure_linkerd_mesh.yml b/ansible/roles/longhorn-util/tasks/configure_linkerd_mesh.yml similarity index 86% rename from roles/longhorn/tasks/configure_linkerd_mesh.yml rename to ansible/roles/longhorn-util/tasks/configure_linkerd_mesh.yml index 991c10e2..3df0e43c 100644 --- a/roles/longhorn/tasks/configure_linkerd_mesh.yml +++ b/ansible/roles/longhorn-util/tasks/configure_linkerd_mesh.yml @@ -1,6 +1,11 @@ --- -# Make longhorn-manager container listen on localhost +# Check longhorn is running +- name: Check longhorn status. Wait for all components to start + command: + cmd: "/usr/local/bin/check_lh.sh" + changed_when: false +# Make longhorn-manager container listen on localhost - name: Change longhorn-manager POD_IP env variable command: cmd: "kubectl set env daemonset/longhorn-manager -n {{ k3s_longhorn_namespace }} POD_IP=0.0.0.0" @@ -22,7 +27,6 @@ linkerd.io/inject: enabled state: patched - - name: Annotate longhorn-ui kubernetes.core.k8s: definition: diff --git a/ansible/roles/longhorn-util/tasks/main.yml b/ansible/roles/longhorn-util/tasks/main.yml new file mode 100644 index 00000000..a6cf9b8b --- /dev/null +++ b/ansible/roles/longhorn-util/tasks/main.yml @@ -0,0 +1,15 @@ +--- +- name: Copy longhorn health check script + copy: + src: "files/{{ item }}" + dest: "/usr/local/bin/{{ item }}" + owner: "root" + group: "root" + mode: "u=rwx,g=rx,o=rx" + become: true + with_items: + - check_lh.sh + +- name: Enable linkerd integration + include_tasks: configure_linkerd_mesh.yml + when: enable_linkerd diff --git a/roles/longhorn/tests/cleanup_test_longhorn.yml b/ansible/roles/longhorn-util/tests/cleanup_test_longhorn.yml similarity index 100% rename from roles/longhorn/tests/cleanup_test_longhorn.yml rename to ansible/roles/longhorn-util/tests/cleanup_test_longhorn.yml diff --git a/roles/longhorn/tests/templates/testing_longhorn_manifest.yml b/ansible/roles/longhorn-util/tests/templates/testing_longhorn_manifest.yml similarity index 100% rename from roles/longhorn/tests/templates/testing_longhorn_manifest.yml rename to ansible/roles/longhorn-util/tests/templates/testing_longhorn_manifest.yml diff --git a/roles/longhorn/tests/test_longhorn.yml b/ansible/roles/longhorn-util/tests/test_longhorn.yml similarity index 100% rename from roles/longhorn/tests/test_longhorn.yml rename to ansible/roles/longhorn-util/tests/test_longhorn.yml diff --git a/ansible/roles/velero-cli/defaults/main.yml b/ansible/roles/velero-cli/defaults/main.yml new file mode 100644 index 00000000..6dd0adcd --- /dev/null +++ b/ansible/roles/velero-cli/defaults/main.yml @@ -0,0 +1,5 @@ +--- +velero_version: v1.9.2 +velero_arch: arm64 + +velero_namespace: velero diff --git a/roles/backup/velero/tasks/configure_velero_cli.yml b/ansible/roles/velero-cli/tasks/configure_velero_cli.yml similarity index 84% rename from roles/backup/velero/tasks/configure_velero_cli.yml rename to ansible/roles/velero-cli/tasks/configure_velero_cli.yml index 98096d2d..486e9f7f 100644 --- a/roles/backup/velero/tasks/configure_velero_cli.yml +++ b/ansible/roles/velero-cli/tasks/configure_velero_cli.yml @@ -8,10 +8,10 @@ - name: Configure velero CLI namespace command: - cmd: "velero client config set namespace={{ k3s_velero_namespace }}" + cmd: "velero client config set namespace={{ velero_namespace }}" when: - get_velero_namespace.rc==0 - - '"namespace: " in get_velero_namespace.stdout or "namespace: " + k3s_velero_namespace not in get_velero_namespace.stdout' + - '"namespace: " in get_velero_namespace.stdout or "namespace: " + velero_namespace not in get_velero_namespace.stdout' changed_when: true - name: Get CLI configured colored diff --git a/roles/backup/velero/tasks/install_velero_cli.yml b/ansible/roles/velero-cli/tasks/install_velero_cli.yml similarity index 100% rename from roles/backup/velero/tasks/install_velero_cli.yml rename to ansible/roles/velero-cli/tasks/install_velero_cli.yml diff --git a/ansible/roles/velero-cli/tasks/main.yml b/ansible/roles/velero-cli/tasks/main.yml new file mode 100644 index 00000000..45631b56 --- /dev/null +++ b/ansible/roles/velero-cli/tasks/main.yml @@ -0,0 +1,10 @@ +--- + +- name: Install velero client + include_tasks: install_velero_cli.yml + args: + apply: + become: true + +- name: Configure velero CLI + include_tasks: configure_velero_cli.yml diff --git a/roles/backup/velero/tests/cleaning_testing.yml b/ansible/roles/velero-cli/tests/cleaning_testing.yml similarity index 100% rename from roles/backup/velero/tests/cleaning_testing.yml rename to ansible/roles/velero-cli/tests/cleaning_testing.yml diff --git a/roles/backup/velero/tests/files/nginx_test_application.yml b/ansible/roles/velero-cli/tests/files/nginx_test_application.yml similarity index 100% rename from roles/backup/velero/tests/files/nginx_test_application.yml rename to ansible/roles/velero-cli/tests/files/nginx_test_application.yml diff --git a/roles/backup/velero/tests/testing_velero.yml b/ansible/roles/velero-cli/tests/testing_velero.yml similarity index 100% rename from roles/backup/velero/tests/testing_velero.yml rename to ansible/roles/velero-cli/tests/testing_velero.yml diff --git a/setup_picluster.yml b/ansible/setup_picluster.yml similarity index 100% rename from setup_picluster.yml rename to ansible/setup_picluster.yml diff --git a/shutdown.yml b/ansible/shutdown.yml similarity index 100% rename from shutdown.yml rename to ansible/shutdown.yml diff --git a/tasks/cleaning.yml b/ansible/tasks/cleaning.yml similarity index 100% rename from tasks/cleaning.yml rename to ansible/tasks/cleaning.yml diff --git a/ansible/tasks/configure_vault_integration.yml b/ansible/tasks/configure_vault_integration.yml new file mode 100644 index 00000000..b946af01 --- /dev/null +++ b/ansible/tasks/configure_vault_integration.yml @@ -0,0 +1,39 @@ +--- + +- name: Configure vault service account and create token + kubernetes.core.k8s: + definition: "{{ lookup('ansible.builtin.file', '../argocd/bootstrap/vault/' + item ) }}" + state: present + with_items: + - vault-auth-serviceaccount.yaml + +- name: Get Token review + shell: | + KUBERNETES_SA_SECRET_NAME=$(kubectl get secrets --output=json -n vault | jq -r '.items[].metadata | select(.name|startswith("vault-auth")).name') + TOKEN_REVIEW_JWT=$(kubectl get secret $KUBERNETES_SA_SECRET_NAME -n vault -o jsonpath='{.data.token}' | base64 --decode) + echo $TOKEN_REVIEW_JWT + register: get_reviewer_token + changed_when: false + +- name: Set reviewer token + set_fact: + vault_reviewer_token: "{{ get_reviewer_token.stdout }}" + +- name: Get Kubernetes CA cert + shell: | + KUBERNETES_CA_CERT=$(kubectl config view --raw --minify --flatten --output='jsonpath={.clusters[].cluster.certificate-authority-data}' \ + | base64 --decode | awk 'NF {sub(/\r/, ""); printf "%s\\n",$0;}') + echo $KUBERNETES_CA_CERT + register: get_kubernetes_ca + changed_when: false + +- name: Set CA cert + set_fact: + kubernetes_ca_cert: "{{ get_kubernetes_ca.stdout }}" + +- name: Set kubernetes_host + set_fact: + kubernetes_host: "https://{{ k3s_master_ip }}:6443" + +- name: Configure vault-kubernetes-auth + include_tasks: tasks/vault_kubernetes_auth_method_config.yml diff --git a/roles/traefik/tasks/create_basic_auth_credentials.yml b/ansible/tasks/create_basic_auth_credentials.yml similarity index 50% rename from roles/traefik/tasks/create_basic_auth_credentials.yml rename to ansible/tasks/create_basic_auth_credentials.yml index 1b5b2915..cb16ee5e 100644 --- a/roles/traefik/tasks/create_basic_auth_credentials.yml +++ b/ansible/tasks/create_basic_auth_credentials.yml @@ -10,10 +10,22 @@ - name: htpasswd utility shell: cmd: >- - htpasswd -nb {{ traefik_basic_auth_user }} {{ traefik_basic_auth_passwd }} | base64 + htpasswd -nb {{ traefik_basic_auth_user }} {{ traefik_basic_auth_passwd }} register: htpasswd changed_when: false - name: Set htpasswd pair set_fact: traefik_auth_htpasswd_pair: "{{ htpasswd.stdout }}" + + +- name: Create/update traefik/basic_auth credentials + ansible.builtin.uri: + url: "https://{{ vault_dns }}:8200/v1/secret/data/traefik/basic_auth" + method: POST + headers: + X-Vault-Token: "{{ token_data | community.hashi_vault.vault_login_token }}" + body: + data: + htpasswd-pair: "{{ traefik_auth_htpasswd_pair }}" + body_format: json diff --git a/ansible/tasks/create_minio_bearer_token.yml b/ansible/tasks/create_minio_bearer_token.yml new file mode 100644 index 00000000..500b2b74 --- /dev/null +++ b/ansible/tasks/create_minio_bearer_token.yml @@ -0,0 +1,24 @@ +--- +# Minio prometheus bearer token was created and stored in filesystem +- name: Load prometheus bearer token from file in node1 + command: "jq -r '.bearerToken' /etc/minio/prometheus_bearer.json" + register: root_token + become: true + changed_when: false + when: minio_prom_bearer_token is not defined + delegate_to: node1 + +- name: Get bearer token + set_fact: + minio_prom_bearer_token: "{{ root_token.stdout }}" + +- name: Create/update minio/prometheus credentials + ansible.builtin.uri: + url: "https://{{ vault_dns }}:8200/v1/secret/data/minio/prometheus" + method: POST + headers: + X-Vault-Token: "{{ token_data | community.hashi_vault.vault_login_token }}" + body: + data: + bearer-token: "{{ minio_prom_bearer_token }}" + body_format: json diff --git a/ansible/tasks/generate_ca_signed_cert.yml b/ansible/tasks/generate_ca_signed_cert.yml new file mode 100644 index 00000000..4afb210f --- /dev/null +++ b/ansible/tasks/generate_ca_signed_cert.yml @@ -0,0 +1,25 @@ +--- +- name: Create private key + openssl_privatekey: + path: "{{ selfsigned_certificates_path }}/{{ server_hostname }}.key" + size: "{{ ssl_key_size | int }}" + type: "{{ key_type }}" + mode: 0644 + +- name: Create CSR + openssl_csr: + path: "{{ selfsigned_certificates_path }}/{{ server_hostname }}.csr" + privatekey_path: "{{ selfsigned_certificates_path }}/{{ server_hostname }}.key" + country_name: "{{ country_name }}" + organization_name: "{{ organization_name }}" + email_address: "{{ email_address }}" + common_name: "{{ server_hostname }}" + subject_alt_name: "DNS:{{ server_hostname }},IP:{{ ansible_default_ipv4.address }}" + +- name: CA signed CSR + openssl_certificate: + csr_path: "{{ selfsigned_certificates_path }}/{{ server_hostname }}.csr" + path: "{{ selfsigned_certificates_path }}/{{ server_hostname }}.pem" + provider: ownca + ownca_path: "{{ selfsigned_certificates_path }}/CA.pem" + ownca_privatekey_path: "{{ selfsigned_certificates_path }}/CA.key" diff --git a/tasks/generate_custom_ca.yml b/ansible/tasks/generate_custom_ca.yml similarity index 60% rename from tasks/generate_custom_ca.yml rename to ansible/tasks/generate_custom_ca.yml index d111b5f6..b769a411 100644 --- a/tasks/generate_custom_ca.yml +++ b/ansible/tasks/generate_custom_ca.yml @@ -1,14 +1,14 @@ --- - name: Create CA key openssl_privatekey: - path: certificates/CA.key + path: "{{ selfsigned_certificates_path }}/CA.key" size: "{{ ssl_key_size | int }}" mode: 0644 register: ca_key - name: create the CA CSR openssl_csr: - privatekey_path: certificates/CA.key + privatekey_path: "{{ selfsigned_certificates_path }}/CA.key" common_name: Ricsanfre CA use_common_name_for_san: false # since we do not specify SANs, don't use CN as a SAN basic_constraints: @@ -17,13 +17,13 @@ key_usage: - keyCertSign key_usage_critical: true - path: certificates/CA.csr + path: "{{ selfsigned_certificates_path }}/CA.csr" register: ca_csr - name: sign the CA CSR openssl_certificate: - path: certificates/CA.pem - csr_path: certificates/CA.csr - privatekey_path: certificates/CA.key + path: "{{ selfsigned_certificates_path }}/CA.pem" + csr_path: "{{ selfsigned_certificates_path }}/CA.csr" + privatekey_path: "{{ selfsigned_certificates_path }}/CA.key" provider: selfsigned register: ca_crt diff --git a/tasks/generate_selfsigned_cert.yml b/ansible/tasks/generate_selfsigned_cert.yml similarity index 100% rename from tasks/generate_selfsigned_cert.yml rename to ansible/tasks/generate_selfsigned_cert.yml diff --git a/ansible/tasks/install_cli_utils.yml b/ansible/tasks/install_cli_utils.yml new file mode 100644 index 00000000..69f1e0de --- /dev/null +++ b/ansible/tasks/install_cli_utils.yml @@ -0,0 +1,11 @@ +--- +# Execute roles to install cli and utils +- name: Use role in loop + ansible.builtin.include_role: + name: '{{ role }}' + loop_control: + loop_var: role + loop: + - longhorn-util + - velero-cli + - linkerd-cli diff --git a/ansible/tasks/load_vault_credentials.yml b/ansible/tasks/load_vault_credentials.yml new file mode 100644 index 00000000..74d0b8ce --- /dev/null +++ b/ansible/tasks/load_vault_credentials.yml @@ -0,0 +1,29 @@ +--- + +# hashi_vault.vault_write module is not working +# +# - name: Create {{ secret_group.key }} credentials +# community.hashi_vault.vault_write: +# url: "https://{{ vault_dns }}:8200" +# path: "secret/{{ secret_group.key }}/{{ secret.key }}" +# data: "{{ secret.value }}" +# auth_method: token +# token: '{{ token_data | community.hashi_vault.vault_login_token }}' +# loop: "{{ secret_group.value | dict2items }}" +# loop_control: +# loop_var: secret + +# https://developer.hashicorp.com/vault/api-docs/secret/kv/kv-v2#create-update-secret +- name: Create/update {{ secret_group.key }} credentials + ansible.builtin.uri: + url: "https://{{ vault_dns }}:8200/v1/secret/data/{{ secret_group.key }}/{{ secret.key }}" + method: POST + headers: + X-Vault-Token: "{{ token_data | community.hashi_vault.vault_login_token }}" + body: + data: "{{ secret.value }}" + body_format: json + loop: "{{ secret_group.value | dict2items }}" + loop_control: + loop_var: secret + no_log: true diff --git a/roles/prometheus/tasks/configure_grafana_dashboards.yml b/ansible/tasks/patch_grafana_dashboard.yml similarity index 63% rename from roles/prometheus/tasks/configure_grafana_dashboards.yml rename to ansible/tasks/patch_grafana_dashboard.yml index 4aeab8c0..d2d47844 100644 --- a/roles/prometheus/tasks/configure_grafana_dashboards.yml +++ b/ansible/tasks/patch_grafana_dashboard.yml @@ -4,7 +4,7 @@ # Check if json file contains DS_PROMETHEUS variable defined and patch json file # See issue #18 -- name: Provisioninig dashboard {{ dashboard_name }} | Initialize loop variables +- name: Patching dashboard {{ dashboard_name }} | Initialize loop variables set_fact: dashboard_name: "{{ dashboard_file | basename | splitext | first }}" dashboard_file_name: "{{ dashboard_file | basename }}" @@ -12,17 +12,17 @@ input_detected: false input_variable: false -- name: Provisioninig dashboard {{ dashboard_name }} | Check if __inputs key exits within json dashboard +- name: Patching dashboard {{ dashboard_name }} | Check if __inputs key exits within json dashboard set_fact: input_detected: true when: dashboard_content.__inputs is defined -- name: Provisioninig dashboard {{ dashboard_name }} | Detect if variable DS_PROMETHEUS exits +- name: Patching dashboard {{ dashboard_name }} | Detect if variable DS_PROMETHEUS exits set_fact: input_variable: "{{ dashboard_content.__inputs | selectattr('name','==', 'DS_PROMETHEUS') | length > 0 }}" when: input_detected -- name: Provisioninig dashboard {{ dashboard_name }} | Generating patch templating.list code block to add DS_PROMETHEUS variable +- name: Patching dashboard {{ dashboard_name }} | Generating patch templating.list code block to add DS_PROMETHEUS variable set_fact: patch: "{{ [{ 'hide': 0, 'label': 'datasource', @@ -34,15 +34,15 @@ 'type': 'datasource' }] + dashboard_content.templating.list }}" when: input_variable -- name: Provisioninig dashboard {{ dashboard_name }} | Patch json dashboard file +- name: Patching dashboard {{ dashboard_name }} | Patch json dashboard file set_fact: dashboard_content: "{{ dashboard_content | combine(new_item, recursive=true) }}" vars: new_item: "{{ { 'templating': { 'list': patch } } }}" when: input_variable -# Create ConfigMap -- name: "Provisioninig dashboard {{ dashboard_name }} | Kubernetes apply" - kubernetes.core.k8s: - definition: "{{ lookup('template', 'templates/grafana_dashboard.yml.j2' ) }}" - state: present +- name: "Patching dashboard {{ dashboard_name }} | Copying to patching directory" + copy: + dest: "temp/{{ dashboard_file_name }}" + content: "{{ dashboard_content | to_nice_json(indent=2) }}" + when: input_variable diff --git a/ansible/tasks/vault_kubernetes_auth_method_config.yml b/ansible/tasks/vault_kubernetes_auth_method_config.yml new file mode 100644 index 00000000..c5520163 --- /dev/null +++ b/ansible/tasks/vault_kubernetes_auth_method_config.yml @@ -0,0 +1,83 @@ +--- + +- name: Vault login + ansible.builtin.shell: bash -ilc 'vault login -format=json $VAULT_TOKEN' + # Interactive bash so .bashrc is loaded + # 'source ~/.bashrc && ' does not work because + # Ansible shell is not interactive and ~/.bashrc implementation by default ignores non interactive shell. + # See lines at beginning of bashrc: + # + # If not running interactively, don't do anything + # case $- in + # *i*) ;; + # *) return;; + # esac + # The best solution for executing commands as user after its ssh interactive login: + # bash -ilc '' + # '-i' means interactive shell, so .bashrc won't be ignored '-l' means login shell which sources full user profile + become: false + register: vault_login + changed_when: false + delegate_to: gateway + +- name: Get vault token + set_fact: + vault_token: "{{ vault_login.stdout | from_json | community.hashi_vault.vault_login_token }}" + +- name: Get status of kubernetes auth method + ansible.builtin.uri: + url: "https://{{ vault_dns }}:8200/v1/sys/auth" + method: GET + headers: + X-Vault-Token: "{{ vault_token }}" + failed_when: + - false + register: vault_status_kubernetes_auth_method + + # Enable kubernetes auth method + # vault auth enable kubernetes +- name: Enable kubernetes auth method + ansible.builtin.uri: + url: "https://{{ vault_dns }}:8200/v1/sys/auth/kubernetes" + method: POST + headers: + X-Vault-Token: "{{ vault_token }}" + body: + type: "kubernetes" + description: "kubernetes auth" + body_format: json + status_code: + - 200 + - 204 + when: + - "'kubernetes/' not in vault_status_kubernetes_auth_method.json.data" + +- name: Configure kubernetes auth method + ansible.builtin.uri: + url: "https://{{ vault_dns }}:8200/v1/auth/kubernetes/config" + method: POST + headers: + X-Vault-Token: "{{ vault_token }}" + body: + kubernetes_host: "{{ kubernetes_host }}" + kubernetes_ca_cert: "{{ kubernetes_ca_cert }}" + token_reviewer_jwt: "{{ vault_reviewer_token }}" + body_format: json + status_code: + - 200 + - 204 + +- name: Create External Secrets role + ansible.builtin.uri: + url: "https://{{ vault_dns }}:8200/v1/auth/kubernetes/role/external-secrets" + method: POST + headers: + X-Vault-Token: "{{ vault_token }}" + body: + bound_service_account_names: external-secrets + bound_service_account_namespaces: external-secrets + policies: ["read"] + body_format: json + status_code: + - 200 + - 204 diff --git a/ansible/templates/argocd_root_app.yml.j2 b/ansible/templates/argocd_root_app.yml.j2 new file mode 100644 index 00000000..f5e57799 --- /dev/null +++ b/ansible/templates/argocd_root_app.yml.j2 @@ -0,0 +1,26 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: root + namespace: argocd +spec: + destination: + namespace: argocd + server: https://kubernetes.default.svc + project: default + source: + path: argocd/bootstrap/root + repoURL: https://github.com/ricsanfre/pi-cluster + targetRevision: master + syncPolicy: + automated: + prune: true + selfHeal: true + retry: + limit: 10 + backoff: + duration: 1m + maxDuration: 16m + factor: 2 + syncOptions: + - CreateNamespace=true \ No newline at end of file diff --git a/update.yml b/ansible/update.yml similarity index 100% rename from update.yml rename to ansible/update.yml diff --git a/vars/centralized_san/centralized_san_initiator.yml b/ansible/vars/centralized_san/centralized_san_initiator.yml similarity index 100% rename from vars/centralized_san/centralized_san_initiator.yml rename to ansible/vars/centralized_san/centralized_san_initiator.yml diff --git a/vars/centralized_san/centralized_san_target.yml b/ansible/vars/centralized_san/centralized_san_target.yml similarity index 100% rename from vars/centralized_san/centralized_san_target.yml rename to ansible/vars/centralized_san/centralized_san_target.yml diff --git a/vars/dedicated_disks/local_storage.yml b/ansible/vars/dedicated_disks/local_storage.yml similarity index 100% rename from vars/dedicated_disks/local_storage.yml rename to ansible/vars/dedicated_disks/local_storage.yml diff --git a/ansible/vars/picluster.yml b/ansible/vars/picluster.yml new file mode 100644 index 00000000..676d0a8e --- /dev/null +++ b/ansible/vars/picluster.yml @@ -0,0 +1,279 @@ +--- +# Pi Cluster variables + +####### +# K3S # +####### + +# k3s version +k3s_version: v1.24.7+k3s1 + +# k3s master node IP +k3s_master_ip: 10.0.0.11 + +# k3s shared token +k3s_token: "{{ vault.cluster.k3s.token }}" + +# k3s config directory +k3s_config_directory: /etc/rancher/k3s + +# kubelet configuration +k3s_kubelet_config: | + apiVersion: kubelet.config.k8s.io/v1beta1 + kind: KubeletConfiguration + shutdownGracePeriod: 30s + shutdownGracePeriodCriticalPods: 10s + +# Extra arguments for k3s server installation +k3s_server_extra_args: >- + --write-kubeconfig-mode '0644' + --disable 'servicelb' + --disable 'traefik' + --disable 'local-storage' + --node-taint 'node-role.kubernetes.io/master=true:NoSchedule' + --kube-controller-manager-arg 'bind-address=0.0.0.0' + --kube-proxy-arg 'metrics-bind-address=0.0.0.0' + --kube-scheduler-arg 'bind-address=0.0.0.0' + --kubelet-arg 'config=/etc/rancher/k3s/kubelet.config' + --kube-controller-manager-arg 'terminated-pod-gc-threshold=10' + +# Extra arguments for k3s agent installation +k3s_worker_extra_args: >- + --node-label 'node_type=worker' + --kubelet-arg 'config=/etc/rancher/k3s/kubelet.config' + --kube-proxy-arg 'metrics-bind-address=0.0.0.0' + + +########### +# Traefik # +########### + +# HTTP Basic auth credentials +traefik_basic_auth_user: "{{ vault.traefik.admin.user }}" +traefik_basic_auth_passwd: "{{ vault.traefik.admin.password }}" + +# DNS cluster service end-points +traefik_dashboard_dns: "traefik.{{ dns_domain }}" +longhorn_dashboard_dns: "storage.{{ dns_domain }}" +kibana_dashboard_dns: "kibana.{{ dns_domain }}" +elasticsearch_dns: "elasticsearch.{{ dns_domain }}" +fluentd_dns: "fluentd.{{ dns_domain }}" +monitoring_dns: "monitoring.{{ dns_domain }}" +linkerd_dashboard_dns: "linkerd.{{ dns_domain }}" + +################################# +# TLS Certificates: LetsEncrypt # +################################# + +# Enable letsencrypt certificates +enable_letsencrypt: true + +# IONOS API credentials +ionos_public_prefix: "{{ vault.certmanager.ionos.public_prefix }}" +ionos_secret: "{{ vault.certmanager.ionos.secret }}" + +# issuer email +acme_issuer_email: admin@ricsanfre.com + +########################## +# Minio S3 configuration # +########################## + +# Minio S3 Server +minio_hostname: "s3.{{ dns_domain }}" +minio_endpoint: "{{ minio_hostname }}:9091" +minio_url: "https://{{ minio_hostname }}:9091" + +# Minio data dirs +minio_server_make_datadirs: true +minio_server_datadirs: + - /storage/minio + +# Minio admin credentials +minio_root_user: "minioadmin" +minio_root_password: "{{ vault.minio.root.key }}" + +# Minio site region configuration +minio_site_region: "eu-west-1" + +# Enabling TLS +minio_enable_tls: true +minio_validate_certificate: false + +# Create Prometheus bearer token +minio_prometheus_bearer_token: true + +# Minio Buckets +minio_buckets: + - name: restic + policy: read-write + - name: k3s-longhorn + policy: read-write + - name: k3s-velero + policy: read-write + - name: k3s-loki + policy: read-write + - name: k3s-tempo + policy: read-write + +# Minio users and ACLs +minio_users: + - name: "{{ vault.minio.restic.user }}" + password: "{{ vault.minio.restic.key }}" + buckets_acl: + - name: restic + policy: read-write + - name: "{{ vault.minio.longhorn.user }}" + password: "{{ vault.minio.longhorn.key }}" + buckets_acl: + - name: k3s-longhorn + policy: read-write + - name: "{{ vault.minio.velero.user }}" + password: "{{ vault.minio.velero.key }}" + buckets_acl: + - name: k3s-velero + policy: custom + custom: + - rule: | + "Effect": "Allow", + "Action": [ + "s3:GetObject", + "s3:DeleteObject", + "s3:PutObject", + "s3:AbortMultipartUpload", + "s3:ListMultipartUploadParts" + ], + "Resource": [ + "arn:aws:s3:::k3s-velero/*" + ] + - rule: | + "Effect": "Allow", + "Action": [ + "s3:ListBucket" + ], + "Resource": [ + "arn:aws:s3:::k3s-velero" + ] + + - name: "{{ vault.minio.loki.user }}" + password: "{{ vault.minio.loki.key }}" + buckets_acl: + - name: k3s-loki + policy: read-write + + - name: "{{ vault.minio.tempo.user }}" + password: "{{ vault.minio.tempo.key }}" + buckets_acl: + - name: k3s-tempo + policy: custom + custom: + - rule: | + "Effect": "Allow", + "Action": [ + "s3:PutObject", + "s3:GetObject", + "s3:ListBucket", + "s3:DeleteObject", + "s3:GetObjectTagging", + "s3:PutObjectTagging" + ], + "Resource": [ + "arn:aws:s3:::k3s-tempo/*", + "arn:aws:s3:::k3s-tempo" + ] + +######################## +# Restic configuration # +######################## + +# Restic S3 repository configuration +restic_repository: "s3:{{ minio_url }}/restic" +restic_use_ca_cert: true +restic_environment: + - name: AWS_ACCESS_KEY_ID + value: "{{ vault.minio.restic.user }}" + - name: AWS_SECRET_ACCESS_KEY + value: "{{ vault.minio.restic.key }}" + +####################### +# Vault configuration +####################### + +vault_hostname: "vault.{{ dns_domain }}" +vault_address: 10.0.0.1 +vault_dns: "{{ vault_hostname }}" +vault_enable_tls: true +custom_ca: false +vault_init: true +vault_unseal: true +vault_unseal_service: true +tls_skip_verify: false + +# Configure KV +vault_kv_secrets: + path: secret + +# Policies +policies: + - name: write + hcl: | + path "secret/*" { + capabilities = [ "create", "read", "update", "delete", "list", "patch" ] + } + - name: read + hcl: | + path "secret/*" { + capabilities = [ "read" ] + } + +################### +# Velero Secrets # +################### + +# Minio user, key and bucket +minio_velero_user: "{{ vault.minio.velero.user }}" +minio_velero_key: "{{ vault.minio.velero.key }}" + +velero_secret_content: | + [default] + aws_access_key_id: "{{ minio_velero_user }}" + aws_secret_access_key: "{{ minio_velero_key }}" + +################### +# Longhorn Secrets# +################### + +# Minio user, key and bucket +minio_longhorn_user: "{{ vault.minio.longhorn.user }}" +minio_longhorn_key: "{{ vault.minio.longhorn.key }}" + +################### +# Logging Secrets # +################### + +# Fluentd-fluentbit shared key +fluentd_shared_key: "{{ vault.logging.fluentd.shared_key }}" + +# Elasticsearch 'elastic' user password +efk_elasticsearch_passwd: "{{ vault.logging.elasticsearch.password }}" + +# Loki minio user, key and bucket +minio_loki_user: "{{ vault.minio.loki.user }}" +minio_loki_key: "{{ vault.minio.loki.key }}" + +###################### +# Monitoring Secrets # +###################### + +# Grafana admin user password +prometheus_grafana_password: "{{ vault.grafana.admin.password}}" + + +####################### +# Tracing Secrets # +####################### + +# Tempo minio user, key and bucket +minio_tempo_user: "{{ vault.minio.tempo.user }}" +minio_tempo_key: "{{ vault.minio.tempo.key }}" +minio_tempo_bucket: k3s-tempo diff --git a/ansible/vars/selfsigned-certificates.yml b/ansible/vars/selfsigned-certificates.yml new file mode 100644 index 00000000..742355ce --- /dev/null +++ b/ansible/vars/selfsigned-certificates.yml @@ -0,0 +1,8 @@ +--- +# Self-signed certificate generation variables +ssl_key_size: 4096 +ssl_certificate_provider: selfsigned +key_type: RSA +country_name: ES +email_address: admin@ricsanfre.com +organization_name: Ricsanfre diff --git a/ansible/vars/vault.yml b/ansible/vars/vault.yml new file mode 100644 index 00000000..71e91b2c --- /dev/null +++ b/ansible/vars/vault.yml @@ -0,0 +1,55 @@ +--- +# Encrypted variables - Ansible Vault +vault: + # SAN + san: + iscsi: + node_pass: s1cret0 + password_mutual: 0tr0s1cret0 + # K3s secrets + k3s: + k3s_token: s1cret0 + # traefik secrets + traefik: + basic_auth: + user: admin + passwd: s1cret0 + # Minio S3 secrets + minio: + root: + user: root + key: supers1cret0 + restic: + user: restic + key: supers1cret0 + longhorn: + user: longhorn + key: supers1cret0 + velero: + user: velero + key: supers1cret0 + loki: + user: loki + key: supers1cret0 + tempo: + user: tempo + key: supers1cret0 + # elastic search + elasticsearch: + elastic: + user: elastic + password: s1cret0 + # Fluentd + fluentd: + shared_key: s1cret0 + # Grafana + grafana: + admin: + user: admin + password: s1cret0 + + # Certmanager + certmanager: + ionos: + public_prefix: your-public-prefix + secret: your-key diff --git a/ansible/vars/vault.yml.j2 b/ansible/vars/vault.yml.j2 new file mode 100644 index 00000000..d88f9e09 --- /dev/null +++ b/ansible/vars/vault.yml.j2 @@ -0,0 +1,54 @@ +--- +# Encrypted variables - Ansible Vault +vault: + # SAN + san: + iscsi: + node_pass: {{ san_iscsi_node_pass | default("") }} + password_mutual: {{ san_iscsi_mutual_pass | default("") }} + # K3s secrets + cluster: + k3s: + token: {{ k3s_token }} + # Traefik secrets + traefik: + admin: + user: admin + password: {{ traefik_basic_auth_password }} + # Minio S3 secrets + minio: + root: + user: root + key: {{ minio_root_password }} + restic: + user: restic + key: {{ minio_restic_password }} + longhorn: + user: longhorn + key: {{ minio_longhorn_password }} + velero: + user: velero + key: {{ minio_velero_password }} + loki: + user: loki + key: {{ minio_loki_password }} + tempo: + user: tempo + key: {{ minio_tempo_password }} + # elasticsearch and fluentd + logging: + elastic: + user: elastic + password: {{ elasticsearch_admin_password }} + fluentd: + shared_key: {{ fluentd_shared_key }} + # Grafana + grafana: + admin: + user: admin + password: {{ grafana_admin_password }} + # Certmanager + certmanager: + ionos: + public_prefix: {{ ionos_public_prefix }} + secret: {{ ionos_secret }} diff --git a/argocd/bootstrap/argocd/Chart.yaml b/argocd/bootstrap/argocd/Chart.yaml new file mode 100644 index 00000000..8e465fe9 --- /dev/null +++ b/argocd/bootstrap/argocd/Chart.yaml @@ -0,0 +1,7 @@ +apiVersion: v2 +name: argocd +version: 0.0.0 +dependencies: + - name: argo-cd + version: 5.17.1 + repository: https://argoproj.github.io/argo-helm diff --git a/argocd/bootstrap/argocd/templates/ingress.yaml b/argocd/bootstrap/argocd/templates/ingress.yaml new file mode 100644 index 00000000..8abfc9c3 --- /dev/null +++ b/argocd/bootstrap/argocd/templates/ingress.yaml @@ -0,0 +1,31 @@ +--- +# HTTPS Ingress +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: argocd-ingress + namespace: {{ .Release.Namespace }} + annotations: + # HTTPS as entry point + traefik.ingress.kubernetes.io/router.entrypoints: websecure + # Enable TLS + traefik.ingress.kubernetes.io/router.tls: "true" + # Enable cert-manager to create automatically the SSL certificate and store in Secret + cert-manager.io/cluster-issuer: {{ .Values.ingress.certmanager.tlsIssuer }}-issuer + cert-manager.io/common-name: {{ .Values.ingress.host }} +spec: + tls: + - hosts: + - {{ .Values.ingress.host }} + secretName: argocd-tls + rules: + - host: {{ .Values.ingress.host }} + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: argocd-server + port: + number: 80 diff --git a/argocd/bootstrap/argocd/values.yaml b/argocd/bootstrap/argocd/values.yaml new file mode 100644 index 00000000..58b9b713 --- /dev/null +++ b/argocd/bootstrap/argocd/values.yaml @@ -0,0 +1,29 @@ + +# Ingress configuration +ingress: + host: argocd.picluster.ricsanfre.com + # configure cert-manager issuer + certmanager: + tlsIssuer: letsencrypt + +argo-cd: + configs: + params: + # Run server without TLS + # Traefik finishes TLS connections + server.insecure: true + cm: + statusbadge.enabled: 'true' + resource.customizations.health.argoproj.io_Application: | + hs = {} + hs.status = "Progressing" + hs.message = "" + if obj.status ~= nil then + if obj.status.health ~= nil then + hs.status = obj.status.health.status + if obj.status.health.message ~= nil then + hs.message = obj.status.health.message + end + end + end + return hs diff --git a/argocd/bootstrap/crds/cert-manager/kustomization.yaml b/argocd/bootstrap/crds/cert-manager/kustomization.yaml new file mode 100644 index 00000000..610dcf70 --- /dev/null +++ b/argocd/bootstrap/crds/cert-manager/kustomization.yaml @@ -0,0 +1,7 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: +# Cert-manager helm installation https://cert-manager.io/docs/installation/helm/ +# CDRs can be installed manually. +# cert-manager helm chart to be installed with value installCDRs=false +- https://github.com/cert-manager/cert-manager/releases/download/v1.10.0/cert-manager.crds.yaml \ No newline at end of file diff --git a/argocd/bootstrap/crds/external-secrets/kustomization.yaml b/argocd/bootstrap/crds/external-secrets/kustomization.yaml new file mode 100644 index 00000000..2b8c9756 --- /dev/null +++ b/argocd/bootstrap/crds/external-secrets/kustomization.yaml @@ -0,0 +1,7 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: +# external-secrets https://github.com/external-secrets/external-secrets/tree/main/deploy/charts/external-secrets +# helm chart version 0.7.1 +# external-secrets helm chart to be installed with value installCDRs=false +- https://raw.githubusercontent.com/external-secrets/external-secrets/v0.7.1/deploy/crds/bundle.yaml \ No newline at end of file diff --git a/argocd/bootstrap/crds/kube-prometheus-stack/kustomization.yaml b/argocd/bootstrap/crds/kube-prometheus-stack/kustomization.yaml new file mode 100644 index 00000000..2f929277 --- /dev/null +++ b/argocd/bootstrap/crds/kube-prometheus-stack/kustomization.yaml @@ -0,0 +1,15 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: +# Kube-prometheus-stack cdrs. +# https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack +# Helm chart version 43.3.1 +# kube-prometheus-helm chart to be installed with helm param --skip-cdrs +- https://raw.githubusercontent.com/prometheus-community/helm-charts/kube-prometheus-stack-43.3.1/charts/kube-prometheus-stack/crds/crd-alertmanagerconfigs.yaml +- https://raw.githubusercontent.com/prometheus-community/helm-charts/kube-prometheus-stack-43.3.1/charts/kube-prometheus-stack/crds/crd-alertmanagers.yaml +- https://raw.githubusercontent.com/prometheus-community/helm-charts/kube-prometheus-stack-43.3.1/charts/kube-prometheus-stack/crds/crd-podmonitors.yaml +- https://raw.githubusercontent.com/prometheus-community/helm-charts/kube-prometheus-stack-43.3.1/charts/kube-prometheus-stack/crds/crd-probes.yaml +- https://raw.githubusercontent.com/prometheus-community/helm-charts/kube-prometheus-stack-43.3.1/charts/kube-prometheus-stack/crds/crd-prometheuses.yaml +- https://raw.githubusercontent.com/prometheus-community/helm-charts/kube-prometheus-stack-43.3.1/charts/kube-prometheus-stack/crds/crd-prometheusrules.yaml +- https://raw.githubusercontent.com/prometheus-community/helm-charts/kube-prometheus-stack-43.3.1/charts/kube-prometheus-stack/crds/crd-servicemonitors.yaml +- https://raw.githubusercontent.com/prometheus-community/helm-charts/kube-prometheus-stack-43.3.1/charts/kube-prometheus-stack/crds/crd-thanosrulers.yaml diff --git a/argocd/bootstrap/crds/kustomization.yaml b/argocd/bootstrap/crds/kustomization.yaml new file mode 100644 index 00000000..c046d5d4 --- /dev/null +++ b/argocd/bootstrap/crds/kustomization.yaml @@ -0,0 +1,8 @@ +--- +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - ./cert-manager + - ./external-secrets + - ./traefik + - ./kube-prometheus-stack diff --git a/argocd/bootstrap/crds/traefik/kustomization.yaml b/argocd/bootstrap/crds/traefik/kustomization.yaml new file mode 100644 index 00000000..6916f7b6 --- /dev/null +++ b/argocd/bootstrap/crds/traefik/kustomization.yaml @@ -0,0 +1,9 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: +# Traefik helm chart v20.6.0 CDRs. +# Traefik provides CDRs upgrade process (https://github.com/traefik/traefik-helm-chart#upgrading-crds) +# Issue #581(https://github.com/traefik/traefik-helm-chart/issues/581) +# Within CDRs direcory in github repository there is a kustomization.yaml. +# Traefik helm chart to be installed with helm param --skip-cdrs +- https://github.com/traefik/traefik-helm-chart/traefik/crds/?ref=v20.6.0 \ No newline at end of file diff --git a/argocd/bootstrap/root/Chart.yaml b/argocd/bootstrap/root/Chart.yaml new file mode 100644 index 00000000..ec654b80 --- /dev/null +++ b/argocd/bootstrap/root/Chart.yaml @@ -0,0 +1,3 @@ +apiVersion: v2 +name: bootstrap +version: 0.0.0 \ No newline at end of file diff --git a/argocd/bootstrap/root/templates/app-set.yaml b/argocd/bootstrap/root/templates/app-set.yaml new file mode 100644 index 00000000..fdb35ce7 --- /dev/null +++ b/argocd/bootstrap/root/templates/app-set.yaml @@ -0,0 +1,37 @@ +{{- range $index, $app := .Values.apps }} +--- +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: {{ $app.name }} + namespace: {{ $.Release.Namespace }} + annotations: + argocd.argoproj.io/sync-wave: '{{ default 0 $app.syncWave }}' +spec: + destination: + namespace: {{ $app.namespace }} + server: https://kubernetes.default.svc + project: default + source: + path: {{ $app.path }} + repoURL: {{ $.Values.gitops.repo }} + targetRevision: {{ $.Values.gitops.revision }} +{{- if $app.helm }} + helm: +{{ toYaml $app.helm | indent 6 }} +{{- end }} + syncPolicy: + automated: + prune: true + selfHeal: true + retry: + limit: 10 + backoff: + duration: 1m + maxDuration: 16m + factor: 2 + syncOptions: + - CreateNamespace=true + - ServerSideApply=true + - ApplyOutOfSyncOnly=true +{{- end }} \ No newline at end of file diff --git a/argocd/bootstrap/root/templates/namespaces.yaml b/argocd/bootstrap/root/templates/namespaces.yaml new file mode 100644 index 00000000..e69029e8 --- /dev/null +++ b/argocd/bootstrap/root/templates/namespaces.yaml @@ -0,0 +1,39 @@ +# Create annotated namespaces +# Adding Linkerd annotations + +# linkerd namespace +kind: Namespace +apiVersion: v1 +metadata: + name: linkerd + annotations: + linkerd.io/inject: disabled + labels: + linkerd.io/is-control-plane: "true" + config.linkerd.io/admission-webhooks: disabled + linkerd.io/control-plane-ns: linkerd +--- + +# logging namespace +kind: Namespace +apiVersion: v1 +metadata: + name: logging + annotations: + linkerd.io/inject: enabled +--- +# monitoring +kind: Namespace +apiVersion: v1 +metadata: + name: monitoring + annotations: + linkerd.io/inject: enabled +--- +# tracing +kind: Namespace +apiVersion: v1 +metadata: + name: tracing + annotations: + linkerd.io/inject: enabled diff --git a/argocd/bootstrap/root/values.yaml b/argocd/bootstrap/root/values.yaml new file mode 100644 index 00000000..9aabc112 --- /dev/null +++ b/argocd/bootstrap/root/values.yaml @@ -0,0 +1,87 @@ +gitops: + repo: https://github.com/ricsanfre/pi-cluster + revision: master + +# List of application corresponding to different sync waves +apps: + # CDRs App + - name: crds + namespace: default + path: argocd/bootstrap/crds + syncWave: 0 + # External Secrets Operator + - name: external-secrets + namespace: external-secrets + path: argocd/system/external-secrets + syncWave: 1 + # Metal LB + - name: metallb + namespace: metallb + path: argocd/system/metallb + syncWave: 1 + # Cert-Manager and Trust Manager + - name: cert-manager + namespace: cert-manager + path: argocd/system/cert-manager + syncWave: 2 + # Linkerd service mesh + - name: linkerd + namespace: linkerd + path: argocd/system/linkerd + syncWave: 3 + # Traefik Ingress Controller + - name: traefik + namespace: traefik + path: argocd/system/traefik + syncWave: 4 + helm: + # skip installation traefik CDRs + skipCrds: true + # CSI External Snapshotter + - name: csi-external-snapshotter + namespace: kube-system + path: argocd/system/csi-external-snapshotter + syncWave: 4 + # Longhorn Distributed Block Storage + - name: longhorn + namespace: longhorn-system + path: argocd/system/longhorn-system + syncWave: 5 + # Velero Backup + - name: velero + namespace: velero + path: argocd/system/velero + syncWave: 5 + # Logging: Loki and EFK stack + - name: logging + namespace: logging + path: argocd/system/logging + syncWave: 6 + # Kube-prometheus-stack + - name: monitoring + namespace: monitoring + path: argocd/system/monitoring + syncWave: 6 + helm: + # skip installation kube-prometheus-stack CDRs + skipCrds: true + # Linkerd-viz + - name: linkerd-viz + namespace: linkerd-viz + path: argocd/system/linkerd-viz + syncWave: 6 + # Tracing: Tempo + - name: tracing + namespace: tracing + path: argocd/system/tracing + syncWave: 6 + # Linkerd-jaeger + - name: linkerd-jaeger + namespace: linkerd-jaeger + path: argocd/system/linkerd-jaeger + syncWave: 6 + # Argo CD App + - name: argocd + namespace: argocd + path: argocd/bootstrap/argocd + syncWave: 6 diff --git a/argocd/bootstrap/vault/vault-auth-serviceaccount.yaml b/argocd/bootstrap/vault/vault-auth-serviceaccount.yaml new file mode 100644 index 00000000..a94a92c5 --- /dev/null +++ b/argocd/bootstrap/vault/vault-auth-serviceaccount.yaml @@ -0,0 +1,51 @@ +# Create service account to be used by Vault kuberentes authentication +# +# Kubernetes Auth Doc: +# https://developer.hashicorp.com/vault/docs/auth/kubernetes +# External Vault config: +# https://developer.hashicorp.com/vault/tutorials/kubernetes/kubernetes-external-vault + +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: vault-auth + namespace: vault + + +# Vault kubernetes authentication +# auth method accesses the Kubernetes TokenReview API to validate the provided JWT is still valid. +# Service Accounts used in this auth method will need to have access to the TokenReview API. +# If Kubernetes is configured to use RBAC roles, the Service Account should be granted permissions to access this API. +# https://developer.hashicorp.com/vault/docs/auth/kubernetes#configuring-kubernetes + +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: role-tokenreview-binding + namespace: vault +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: system:auth-delegator +subjects: + - kind: ServiceAccount + name: vault-auth + namespace: vault + +--- +# Long-lived token for vault-auth service account. +# From Kubernetes v1.24, secrets contained long-lived tokens associated to service accounts +# are not longer created. +# See how to create it in Kubernetes documentation: +# https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/#manually-create-a-long-lived-api-token-for-a-serviceaccount + +apiVersion: v1 +kind: Secret +type: kubernetes.io/service-account-token +metadata: + name: vault-auth-secret + namespace: vault + annotations: + kubernetes.io/service-account.name: vault-auth diff --git a/argocd/system/cert-manager/Chart.yaml b/argocd/system/cert-manager/Chart.yaml new file mode 100644 index 00000000..137b6feb --- /dev/null +++ b/argocd/system/cert-manager/Chart.yaml @@ -0,0 +1,14 @@ +apiVersion: v2 +name: certmanager +version: 0.0.0 +dependencies: + - name: cert-manager + version: v1.10.0 + repository: https://charts.jetstack.io + - name: trust-manager + version: v0.3.0 + repository: https://charts.jetstack.io + - name: cert-manager-webhook-ionos + version: 1.0.2 + repository: https://fabmade.github.io/cert-manager-webhook-ionos + condition: acme.dns01.ionos.enabled \ No newline at end of file diff --git a/roles/certmanager/templates/ca_issuer.yml.j2 b/argocd/system/cert-manager/templates/ca-issuer.yaml similarity index 73% rename from roles/certmanager/templates/ca_issuer.yml.j2 rename to argocd/system/cert-manager/templates/ca-issuer.yaml index b596eda0..38d64d1c 100644 --- a/roles/certmanager/templates/ca_issuer.yml.j2 +++ b/argocd/system/cert-manager/templates/ca-issuer.yaml @@ -1,9 +1,10 @@ +{{- if .Values.ca.enabled }} --- apiVersion: cert-manager.io/v1 kind: Certificate metadata: name: picluster-ca - namespace: {{ k3s_certmanager_namespace }} + namespace: {{ .Release.Namespace }} spec: isCA: true commonName: picluster-ca @@ -20,7 +21,8 @@ apiVersion: cert-manager.io/v1 kind: ClusterIssuer metadata: name: ca-issuer - namespace: {{ k3s_certmanager_namespace }} + namespace: {{ .Release.Namespace }} spec: ca: - secretName: root-secret \ No newline at end of file + secretName: root-secret +{{- end }} \ No newline at end of file diff --git a/argocd/system/cert-manager/templates/ionos-externalsecret.yaml b/argocd/system/cert-manager/templates/ionos-externalsecret.yaml new file mode 100644 index 00000000..c34e4a57 --- /dev/null +++ b/argocd/system/cert-manager/templates/ionos-externalsecret.yaml @@ -0,0 +1,26 @@ +{{- if .Values.acme.dns01.ionos.enabled }} +apiVersion: external-secrets.io/v1beta1 +kind: ExternalSecret +metadata: + name: ionos-externalsecret + namespace: {{ .Release.Namespace }} +spec: + secretStoreRef: + name: vault-backend + kind: ClusterSecretStore + target: + name: ionos-secret + data: + - secretKey: IONOS_PUBLIC_PREFIX + remoteRef: + key: certmanager/ionos + property: public_prefix + conversionStrategy: Default # ArgoCD sync issue + decodingStrategy: None # ArgoCD sync issue + - secretKey: IONOS_SECRET + remoteRef: + key: certmanager/ionos + property: secret + conversionStrategy: Default # ArgoCD sync issue + decodingStrategy: None # ArgoCD sync issue +{{- end }} \ No newline at end of file diff --git a/roles/certmanager/templates/ionos_issuer.yml.j2 b/argocd/system/cert-manager/templates/ionos-issuer.yaml similarity index 70% rename from roles/certmanager/templates/ionos_issuer.yml.j2 rename to argocd/system/cert-manager/templates/ionos-issuer.yaml index 8880c4ec..0ee468c1 100644 --- a/roles/certmanager/templates/ionos_issuer.yml.j2 +++ b/argocd/system/cert-manager/templates/ionos-issuer.yaml @@ -1,16 +1,17 @@ ---- - +{{- if .Values.acme.dns01.ionos.enabled }} +{{- if index .Values "cert-manager-webhook-ionos" -}} + {{- $webhook:= index .Values "cert-manager-webhook-ionos" }} apiVersion: cert-manager.io/v1 kind: ClusterIssuer metadata: name: letsencrypt-issuer - namespace: {{ k3s_certmanager_namespace }} + namespace: {{ .Release.Namespace }} spec: acme: # The ACME server URL server: https://acme-v02.api.letsencrypt.org/directory # Email address used for ACME registration - email: {{ acme_issuer_email }} + email: {{ .Values.acme.acme_issuer_email }} # Name of a secret used to store the ACME account private key privateKeySecretRef: name: letsencrypt-ionos-prod @@ -18,7 +19,7 @@ spec: solvers: - dns01: webhook: - groupName: {{ certmanager_ionos_webhook_groupName }} + groupName: {{ $webhook.groupName }} solverName: ionos config: apiUrl: https://api.hosting.ionos.com/dns/v1 @@ -28,3 +29,5 @@ spec: secretKeySecretRef: key: IONOS_SECRET name: ionos-secret +{{- end -}} +{{- end -}} \ No newline at end of file diff --git a/argocd/system/cert-manager/templates/selfsigned-issuer.yaml b/argocd/system/cert-manager/templates/selfsigned-issuer.yaml new file mode 100644 index 00000000..931e3b49 --- /dev/null +++ b/argocd/system/cert-manager/templates/selfsigned-issuer.yaml @@ -0,0 +1,11 @@ +{{- if .Values.ca.enabled }} +apiVersion: cert-manager.io/v1 +kind: ClusterIssuer +metadata: + name: self-signed-issuer + namespace: {{ .Release.Namespace }} + annotations: + argocd.argoproj.io/sync-options: SkipDryRunOnMissingResource=true +spec: + selfSigned: {} +{{- end }} \ No newline at end of file diff --git a/argocd/system/cert-manager/values.yaml b/argocd/system/cert-manager/values.yaml new file mode 100644 index 00000000..57f70bef --- /dev/null +++ b/argocd/system/cert-manager/values.yaml @@ -0,0 +1,30 @@ +# Custom CA issuer configuration +# Enable private PKI using a custom CA +# It creates a ClusterIssuer resource `ca-issuer` +ca: + enabled: true + +# ACME (Letsencrypt) issuer configuration +# https://cert-manager.io/docs/configuration/acme/ +acme: + dns01: + # Enable ionos dns01 solver + # It creates ClusterIssuer resource `letsencrypt-issuer` + ionos: + enabled: true + acme_issuer_email: admin@ricsanfre.com + +######################## +# cert-manager subchart +######################## + +# CDRs installed as part of CDR application +cert-manager: + installCRDs: false + +##################################### +# cert-manager-webhook-ionos subchart +##################################### +cert-manager-webhook-ionos: + groupName: acme.ricsanfre.com + diff --git a/argocd/system/csi-external-snapshotter/crd/kustomization.yaml b/argocd/system/csi-external-snapshotter/crd/kustomization.yaml new file mode 100644 index 00000000..5ca90915 --- /dev/null +++ b/argocd/system/csi-external-snapshotter/crd/kustomization.yaml @@ -0,0 +1,7 @@ +--- +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - https://raw.githubusercontent.com/kubernetes-csi/external-snapshotter/release-4.0/client/config/crd/snapshot.storage.k8s.io_volumesnapshotclasses.yaml + - https://raw.githubusercontent.com/kubernetes-csi/external-snapshotter/release-4.0/client/config/crd/snapshot.storage.k8s.io_volumesnapshotcontents.yaml + - https://raw.githubusercontent.com/kubernetes-csi/external-snapshotter/release-4.0/client/config/crd/snapshot.storage.k8s.io_volumesnapshots.yaml diff --git a/argocd/system/csi-external-snapshotter/kustomization.yaml b/argocd/system/csi-external-snapshotter/kustomization.yaml new file mode 100644 index 00000000..0bbb1339 --- /dev/null +++ b/argocd/system/csi-external-snapshotter/kustomization.yaml @@ -0,0 +1,6 @@ +--- +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - ./crd + - ./snapshot-controller diff --git a/argocd/system/csi-external-snapshotter/snapshot-controller/kustomization.yaml b/argocd/system/csi-external-snapshotter/snapshot-controller/kustomization.yaml new file mode 100644 index 00000000..86914e3a --- /dev/null +++ b/argocd/system/csi-external-snapshotter/snapshot-controller/kustomization.yaml @@ -0,0 +1,7 @@ +--- +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +namespace: kube-system +resources: + - https://raw.githubusercontent.com/kubernetes-csi/external-snapshotter/release-4.0/deploy/kubernetes/snapshot-controller/rbac-snapshot-controller.yaml + - https://raw.githubusercontent.com/kubernetes-csi/external-snapshotter/release-4.0/deploy/kubernetes/snapshot-controller/setup-snapshot-controller.yaml diff --git a/argocd/system/external-secrets/Chart.yaml b/argocd/system/external-secrets/Chart.yaml new file mode 100644 index 00000000..8a8f18bd --- /dev/null +++ b/argocd/system/external-secrets/Chart.yaml @@ -0,0 +1,7 @@ +apiVersion: v2 +name: external-secrets +version: 0.0.0 +dependencies: + - name: external-secrets + version: 0.7.1 + repository: https://charts.external-secrets.io \ No newline at end of file diff --git a/argocd/system/external-secrets/templates/vault-secretstore.yaml b/argocd/system/external-secrets/templates/vault-secretstore.yaml new file mode 100644 index 00000000..40ac8211 --- /dev/null +++ b/argocd/system/external-secrets/templates/vault-secretstore.yaml @@ -0,0 +1,18 @@ +apiVersion: external-secrets.io/v1beta1 +kind: ClusterSecretStore +metadata: + name: vault-backend + namespace: {{ .Release.Namespace }} +spec: + provider: + vault: + server: {{ .Values.vault.vaultUrl }} +{{- if .Values.vault.caBundle }} + caBundle: {{ .Values.vault.caBundle }} +{{- end }} + path: {{ .Values.vault.kv.path }} + version: {{ .Values.vault.kv.version }} +{{- if .Values.vault.auth }} + auth: +{{ toYaml .Values.vault.auth | indent 8 }} +{{- end }} \ No newline at end of file diff --git a/argocd/system/external-secrets/values.yaml b/argocd/system/external-secrets/values.yaml new file mode 100644 index 00000000..19af8315 --- /dev/null +++ b/argocd/system/external-secrets/values.yaml @@ -0,0 +1,31 @@ + +# Vault secret store +vault: + # Vault server URL + vaultUrl: "https://vault.picluster.ricsanfre.com:8200" + + # Vault CA cert + # caBundle needed if vault TLS is signed using a custom CA. + # If Vault TLS is valid signed by Letsencrypt this is not needed? + # ca cert base64 encoded and remobed '\n' characteres" + # =`cat vault-ca.pem | base64 | tr -d "\n"` + # caBundle: + + # KV path and version + kv: + path: secret + version: v2 + + # auth method used + auth: + kubernetes: + mountPath: "kubernetes" + role: "external-secrets" + +############################ +# external-secrets subchart +############################ + +# CDRs installed as part of CDR application +external-secrets: + installCRDs: false diff --git a/argocd/system/linkerd-jaeger/Chart.yaml b/argocd/system/linkerd-jaeger/Chart.yaml new file mode 100644 index 00000000..6ccf4678 --- /dev/null +++ b/argocd/system/linkerd-jaeger/Chart.yaml @@ -0,0 +1,7 @@ +apiVersion: v2 +name: linkerd-jaeger +version: 0.0.0 +dependencies: + - name: linkerd-jaeger + version: 30.4.4 + repository: https://helm.linkerd.io/stable diff --git a/argocd/system/linkerd-jaeger/values.yaml b/argocd/system/linkerd-jaeger/values.yaml new file mode 100644 index 00000000..ba5bf083 --- /dev/null +++ b/argocd/system/linkerd-jaeger/values.yaml @@ -0,0 +1,15 @@ +########################### +# linkerd-jaeger subchart +########################### + +linkerd-jaeger: + # Disable OTel collector installation + collector: + enabled: false + # Disable Jaeger installation + jaeger: + enabled: false + # Configure jaeger-injector to use tempo embedded OTel collector + webhook: + collectorSvcAddr: tracing-tempo-distributor.tracing:55678 + collectorSvcAccount: tracing-tempo diff --git a/argocd/system/linkerd-viz/Chart.yaml b/argocd/system/linkerd-viz/Chart.yaml new file mode 100644 index 00000000..e3d3ea56 --- /dev/null +++ b/argocd/system/linkerd-viz/Chart.yaml @@ -0,0 +1,7 @@ +apiVersion: v2 +name: linkerd-viz +version: 0.0.0 +dependencies: + - name: linkerd-viz + version: 30.3.4 + repository: https://helm.linkerd.io/stable diff --git a/roles/prometheus/dashboards/linkerd/linkerd-authority.json b/argocd/system/linkerd-viz/dashboards/linkerd-authority.json similarity index 98% rename from roles/prometheus/dashboards/linkerd/linkerd-authority.json rename to argocd/system/linkerd-viz/dashboards/linkerd-authority.json index 9d719f11..d4e77796 100644 --- a/roles/prometheus/dashboards/linkerd/linkerd-authority.json +++ b/argocd/system/linkerd-viz/dashboards/linkerd-authority.json @@ -1,56 +1,56 @@ { + "__elements": [], "__inputs": [ { - "name": "DS_PROMETHEUS", - "label": "prometheus", "description": "", - "type": "datasource", + "label": "prometheus", + "name": "DS_PROMETHEUS", "pluginId": "prometheus", - "pluginName": "Prometheus" + "pluginName": "Prometheus", + "type": "datasource" } ], - "__elements": [], "__requires": [ { - "type": "panel", "id": "gauge", "name": "Gauge", + "type": "panel", "version": "" }, { - "type": "grafana", "id": "grafana", "name": "Grafana", + "type": "grafana", "version": "8.3.3" }, { - "type": "panel", "id": "graph", "name": "Graph (old)", + "type": "panel", "version": "" }, { - "type": "panel", "id": "heatmap", "name": "Heatmap", + "type": "panel", "version": "" }, { - "type": "datasource", "id": "prometheus", "name": "Prometheus", + "type": "datasource", "version": "1.0.0" }, { - "type": "panel", "id": "stat", "name": "Stat", + "type": "panel", "version": "" }, { - "type": "panel", "id": "text", "name": "Text", + "type": "panel", "version": "" } ], @@ -489,7 +489,7 @@ "expr": "sum(irate(request_total{namespace=\"$namespace\", authority=\"$authority\", direction=\"inbound\", tls=\"true\"}[30s])) by (authority)", "format": "time_series", "intervalFactor": 1, - "legendFormat": "🔒au/{{authority}}", + "legendFormat": "\ud83d\udd12au/{{authority}}", "refId": "A" }, { @@ -788,7 +788,7 @@ "expr": "sum(irate(request_total{namespace=\"$namespace\", authority=\"$authority\", direction=\"outbound\", tls=\"true\"}[30s])) by (deployment)", "format": "time_series", "intervalFactor": 1, - "legendFormat": "🔒deploy/{{deployment}}", + "legendFormat": "\ud83d\udd12deploy/{{deployment}}", "refId": "A" }, { @@ -1070,7 +1070,7 @@ "expr": "sum(irate(request_total{namespace=\"$namespace\", authority=\"$authority\", direction=\"outbound\", tls=\"true\"}[30s])) by (pod)", "format": "time_series", "intervalFactor": 1, - "legendFormat": "🔒po/{{pod}}", + "legendFormat": "\ud83d\udd12po/{{pod}}", "refId": "A" }, { @@ -1234,6 +1234,16 @@ ], "templating": { "list": [ + { + "hide": 0, + "label": "datasource", + "name": "DS_PROMETHEUS", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, { "allValue": ".*", "current": {}, @@ -1313,4 +1323,4 @@ "title": "Linkerd Authority", "uid": "linkerd-authority", "version": 1 -} +} \ No newline at end of file diff --git a/argocd/system/linkerd-viz/dashboards/linkerd-cronjob.json b/argocd/system/linkerd-viz/dashboards/linkerd-cronjob.json new file mode 100644 index 00000000..bde974cf --- /dev/null +++ b/argocd/system/linkerd-viz/dashboards/linkerd-cronjob.json @@ -0,0 +1,2364 @@ +{ + "__elements": [], + "__inputs": [ + { + "description": "", + "label": "prometheus", + "name": "DS_PROMETHEUS", + "pluginId": "prometheus", + "pluginName": "Prometheus", + "type": "datasource" + } + ], + "__requires": [ + { + "id": "gauge", + "name": "Gauge", + "type": "panel", + "version": "" + }, + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "8.3.3" + }, + { + "id": "graph", + "name": "Graph (old)", + "type": "panel", + "version": "" + }, + { + "id": "heatmap", + "name": "Heatmap", + "type": "panel", + "version": "" + }, + { + "id": "prometheus", + "name": "Prometheus", + "type": "datasource", + "version": "1.0.0" + }, + { + "id": "stat", + "name": "Stat", + "type": "panel", + "version": "" + }, + { + "id": "text", + "name": "Text", + "type": "panel", + "version": "" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 1, + "id": null, + "iteration": 1531763681685, + "links": [], + "panels": [ + { + "content": "
\n  \n cj/$cronjob\n
", + "gridPos": { + "h": 2, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 20, + "links": [], + "mode": "html", + "options": {}, + "title": "", + "transparent": true, + "type": "text" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#d44a3a", + "rgba(237, 129, 40, 0.89)", + "#299c46" + ], + "datasource": "${DS_PROMETHEUS}", + "decimals": null, + "format": "percentunit", + "gauge": { + "maxValue": 1, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 4, + "w": 8, + "x": 0, + "y": 2 + }, + "id": 5, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": true, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(irate(response_total{classification=\"success\", namespace=\"$namespace\", cronjob=\"$cronjob\", direction=\"inbound\"}[30s])) / sum(irate(response_total{namespace=\"$namespace\", cronjob=\"$cronjob\", direction=\"inbound\"}[30s]))", + "format": "time_series", + "instant": false, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "0.9,.99", + "title": "SUCCESS RATE", + "transparent": true, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "${DS_PROMETHEUS}", + "decimals": null, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 4, + "w": 8, + "x": 8, + "y": 2 + }, + "id": 4, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": " RPS", + "postfixFontSize": "100%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": true, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(irate(request_total{namespace=\"$namespace\", cronjob=\"$cronjob\", direction=\"inbound\"}[30s]))", + "format": "time_series", + "instant": false, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "REQUEST RATE", + "transparent": true, + "type": "singlestat", + "valueFontSize": "100%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "${DS_PROMETHEUS}", + "decimals": null, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 16, + "y": 2 + }, + "id": 11, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "100%", + "prefix": "", + "prefixFontSize": "100%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "count(count(request_total{dst_namespace=\"$namespace\", cronjob!=\"\", dst_cronjob!=\"\", dst_cronjob=\"$cronjob\", direction=\"outbound\"}) by (namespace, cronjob))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "INBOUND CRONJOBS", + "transparent": true, + "type": "singlestat", + "valueFontSize": "100%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "${DS_PROMETHEUS}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 20, + "y": 2 + }, + "id": 15, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "100%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "count(count(request_total{namespace=\"$namespace\", cronjob=\"$cronjob\", direction=\"outbound\"}) by (namespace, dst_cronjob))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "OUTBOUND CRONJOBS", + "transparent": true, + "type": "singlestat", + "valueFontSize": "100%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "content": "
\n INBOUND TRAFFIC\n
", + "gridPos": { + "h": 2, + "w": 24, + "x": 0, + "y": 6 + }, + "id": 17, + "links": [], + "mode": "html", + "options": {}, + "title": "", + "transparent": true, + "type": "text" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 8 + }, + "id": 67, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": {}, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(response_total{classification=\"success\", namespace=\"$namespace\", cronjob=\"$cronjob\", direction=\"inbound\"}[30s])) by (cronjob) / sum(irate(response_total{namespace=\"$namespace\", cronjob=\"$cronjob\", direction=\"inbound\"}[30s])) by (cronjob)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "cj/{{cronjob}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "SUCCESS RATE", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "percentunit", + "label": "", + "logBase": 1, + "max": "1", + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 8 + }, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": {}, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(request_total{namespace=\"$namespace\", cronjob=\"$cronjob\", direction=\"inbound\", tls=\"true\"}[30s])) by (cronjob)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "\ud83d\udd12cj/{{cronjob}}", + "refId": "A" + }, + { + "expr": "sum(irate(request_total{namespace=\"$namespace\", cronjob=\"$cronjob\", direction=\"inbound\", tls!=\"true\"}[30s])) by (cronjob)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "cj/{{cronjob}}", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "REQUEST RATE", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "rps", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 8 + }, + "id": 68, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": {}, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.5, sum(irate(response_latency_ms_bucket{namespace=\"$namespace\", cronjob=\"$cronjob\", direction=\"inbound\"}[30s])) by (le, cronjob))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "p50 cj/{{cronjob}}", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.95, sum(irate(response_latency_ms_bucket{namespace=\"$namespace\", cronjob=\"$cronjob\", direction=\"inbound\"}[30s])) by (le, cronjob))", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "p95 cj/{{cronjob}}", + "refId": "B" + }, + { + "expr": "histogram_quantile(0.99, sum(irate(response_latency_ms_bucket{namespace=\"$namespace\", cronjob=\"$cronjob\", direction=\"inbound\"}[30s])) by (le, cronjob))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "p99 cj/{{cronjob}}", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "LATENCY", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "ms", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 15 + }, + "id": 148, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 16 + }, + "id": 167, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": {}, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "tcp_close_total{namespace=\"$namespace\", cronjob=\"$cronjob\", direction=\"inbound\",errno!=\"\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{peer}} {{errno}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "TCP CONNECTION FAILURES", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "none", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 16 + }, + "id": 168, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": {}, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "tcp_open_connections{namespace=\"$namespace\", cronjob=\"$cronjob\", direction=\"inbound\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{peer}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "TCP CONNECTIONS OPEN", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateOranges", + "exponent": 0.5, + "mode": "spectrum" + }, + "dataFormat": "timeseries", + "datasource": "${DS_PROMETHEUS}", + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 16 + }, + "heatmap": {}, + "hideZeroBuckets": false, + "highlightCards": true, + "id": 169, + "legend": { + "show": false + }, + "links": [], + "options": {}, + "reverseYBuckets": false, + "targets": [ + { + "expr": "tcp_connection_duration_ms_bucket{namespace=\"$namespace\", cronjob=\"$cronjob\", direction=\"inbound\"}", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "title": "TCP CONNECTION DURATION", + "tooltip": { + "show": true, + "showHistogram": true + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": null, + "format": "dtdurationms", + "logBase": 1, + "max": null, + "min": "0", + "show": true, + "splitFactor": null + }, + "yBucketBound": "auto", + "yBucketNumber": null, + "yBucketSize": null + } + ], + "title": "Inbound TCP Metrics", + "type": "row" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 16 + }, + "id": 152, + "panels": [], + "title": "", + "type": "row" + }, + { + "content": "
\n INBOUND CRONJOBS\n
", + "gridPos": { + "h": 2, + "w": 24, + "x": 0, + "y": 17 + }, + "id": 76, + "links": [], + "mode": "html", + "options": {}, + "title": "", + "transparent": true, + "type": "text" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 19 + }, + "id": 59, + "panels": [ + { + "content": "
\n  \n cj/$inbound\n
", + "gridPos": { + "h": 2, + "w": 24, + "x": 0, + "y": 22.2 + }, + "id": 39, + "links": [], + "mode": "html", + "options": {}, + "title": "", + "transparent": true, + "type": "text" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 24.2 + }, + "id": 36, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": {}, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(response_total{classification=\"success\", cronjob!=\"\", cronjob=\"$inbound\", dst_namespace=\"$namespace\", dst_cronjob=\"$cronjob\", direction=\"outbound\"}[30s])) by (cronjob, pod) / sum(irate(response_total{cronjob!=\"\", cronjob=\"$inbound\", dst_namespace=\"$namespace\", dst_cronjob=\"$cronjob\", direction=\"outbound\"}[30s])) by (cronjob, pod)", + "format": "time_series", + "instant": false, + "intervalFactor": 1, + "legendFormat": "po/{{pod}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "SUCCESS RATE", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "percentunit", + "label": null, + "logBase": 1, + "max": "1", + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 24.2 + }, + "id": 22, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": {}, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(request_total{cronjob!=\"\", cronjob=\"$inbound\", dst_namespace=\"$namespace\", dst_cronjob=\"$cronjob\", direction=\"outbound\", tls=\"true\"}[30s])) by (cronjob, pod)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "\ud83d\udd12po/{{pod}}", + "refId": "A" + }, + { + "expr": "sum(irate(request_total{cronjob!=\"\", cronjob=\"$inbound\", dst_namespace=\"$namespace\", dst_cronjob=\"$cronjob\", direction=\"outbound\", tls!=\"true\"}[30s])) by (cronjob, pod)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "po/{{pod}}", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "REQUEST RATE", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "rps", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 24.2 + }, + "id": 29, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": {}, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.5, sum(rate(response_latency_ms_bucket{cronjob!=\"\", cronjob=\"$inbound\", dst_namespace=\"$namespace\", dst_cronjob=\"$cronjob\", direction=\"outbound\"}[30s])) by (le, cronjob))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "P50 cj/{{cronjob}}", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.95, sum(rate(response_latency_ms_bucket{cronjob!=\"\", cronjob=\"$inbound\", dst_namespace=\"$namespace\", dst_cronjob=\"$cronjob\", direction=\"outbound\"}[30s])) by (le, cronjob))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "P95 cj/{{cronjob}}", + "refId": "B" + }, + { + "expr": "histogram_quantile(0.99, sum(rate(response_latency_ms_bucket{cronjob!=\"\", cronjob=\"$inbound\", dst_namespace=\"$namespace\", dst_cronjob=\"$cronjob\", direction=\"outbound\"}[30s])) by (le, cronjob))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "P99 cj/{{cronjob}}", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "LATENCY", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "repeat": "inbound", + "title": "cj/$inbound", + "type": "row" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 20 + }, + "id": 34, + "panels": [], + "repeat": null, + "title": "", + "type": "row" + }, + { + "content": "
\n OUTBOUND TRAFFIC\n
", + "gridPos": { + "h": 2, + "w": 24, + "x": 0, + "y": 21 + }, + "id": 32, + "links": [], + "mode": "html", + "options": {}, + "title": "", + "transparent": true, + "type": "text" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 23 + }, + "id": 77, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": {}, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(response_total{classification=\"success\", namespace=\"$namespace\", cronjob=\"$cronjob\", direction=\"outbound\"}[30s])) by (dst_cronjob) / sum(irate(response_total{namespace=\"$namespace\", cronjob=\"$cronjob\", direction=\"outbound\"}[30s])) by (dst_cronjob)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "cj/{{dst_cronjob}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "SUCCESS RATE", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "percentunit", + "label": "", + "logBase": 1, + "max": "1", + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 23 + }, + "id": 78, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": {}, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(request_total{namespace=\"$namespace\", cronjob=\"$cronjob\", direction=\"outbound\", tls=\"true\"}[30s])) by (dst_cronjob)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "\ud83d\udd12cj/{{dst_cronjob}}", + "refId": "A" + }, + { + "expr": "sum(irate(request_total{namespace=\"$namespace\", cronjob=\"$cronjob\", direction=\"outbound\", tls!=\"true\"}[30s])) by (dst_cronjob)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "cj/{{dst_cronjob}}", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "REQUEST RATE", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "rps", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 23 + }, + "id": 79, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": {}, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.95, sum(rate(response_latency_ms_bucket{namespace=\"$namespace\", cronjob=\"$cronjob\", direction=\"outbound\"}[30s])) by (le, dst_cronjob))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "P95 cj/{{dst_cronjob}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "P95 LATENCY", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 30 + }, + "id": 154, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 29 + }, + "id": 157, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": {}, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "tcp_close_total{namespace=\"$namespace\", cronjob=\"$cronjob\", direction=\"outbound\",errno!=\"\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{peer}} {{errno}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "TCP CONNECTION FAILURES", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "none", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 29 + }, + "id": 166, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": {}, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "tcp_open_connections{namespace=\"$namespace\", cronjob=\"$cronjob\", direction=\"outbound\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{peer}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "TCP CONNECTIONS OPEN", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateOranges", + "exponent": 0.5, + "mode": "spectrum" + }, + "dataFormat": "timeseries", + "datasource": "${DS_PROMETHEUS}", + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 29 + }, + "heatmap": {}, + "hideZeroBuckets": false, + "highlightCards": true, + "id": 160, + "legend": { + "show": false + }, + "links": [], + "options": {}, + "reverseYBuckets": false, + "targets": [ + { + "expr": "tcp_connection_duration_ms_bucket{namespace=\"$namespace\", cronjob=\"$cronjob\", direction=\"outbound\"}", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "title": "TCP CONNECTION DURATION", + "tooltip": { + "show": true, + "showHistogram": true + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": null, + "format": "dtdurationms", + "logBase": 1, + "max": null, + "min": "0", + "show": true, + "splitFactor": null + }, + "yBucketBound": "auto", + "yBucketNumber": null, + "yBucketSize": null + } + ], + "title": "Outbound TCP Metrics", + "type": "row" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 31 + }, + "id": 156, + "panels": [], + "title": "", + "type": "row" + }, + { + "content": "
\n OUTBOUND CRONJOBS\n
", + "gridPos": { + "h": 2, + "w": 24, + "x": 0, + "y": 32 + }, + "id": 80, + "links": [], + "mode": "html", + "options": {}, + "title": "", + "transparent": true, + "type": "text" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 34 + }, + "id": 27, + "panels": [ + { + "content": "
\n  \n cj/$outbound\n
", + "gridPos": { + "h": 2, + "w": 24, + "x": 0, + "y": 36 + }, + "id": 40, + "links": [], + "mode": "html", + "options": {}, + "title": "", + "transparent": true, + "type": "text" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 38 + }, + "id": 28, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": {}, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(response_total{classification=\"success\", namespace=\"$namespace\", cronjob=\"$cronjob\", dst_cronjob=\"$outbound\", direction=\"outbound\"}[30s])) by (dst_cronjob) / sum(irate(response_total{namespace=\"$namespace\", cronjob=\"$cronjob\", dst_cronjob=\"$outbound\", direction=\"outbound\"}[30s])) by (dst_cronjob)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "cj/{{dst_cronjob}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "SUCCESS RATE", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "percentunit", + "label": null, + "logBase": 1, + "max": "1", + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 38 + }, + "id": 35, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": {}, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(request_total{namespace=\"$namespace\", cronjob=\"$cronjob\", dst_cronjob=\"$outbound\", direction=\"outbound\", tls=\"true\"}[30s])) by (dst_cronjob)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "\ud83d\udd12cj/{{dst_cronjob}}", + "refId": "A" + }, + { + "expr": "sum(irate(request_total{namespace=\"$namespace\", cronjob=\"$cronjob\", dst_cronjob=\"$outbound\", direction=\"outbound\", tls!=\"true\"}[30s])) by (dst_cronjob)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "cj/{{dst_cronjob}}", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "REQUEST RATE", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "rps", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 38 + }, + "id": 41, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": {}, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.5, sum(rate(response_latency_ms_bucket{namespace=\"$namespace\", cronjob=\"$cronjob\", dst_cronjob=\"$outbound\", direction=\"outbound\"}[30s])) by (le, dst_cronjob))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "P50 cj/{{dst_cronjob}}", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.95, sum(rate(response_latency_ms_bucket{namespace=\"$namespace\", cronjob=\"$cronjob\", dst_cronjob=\"$outbound\", direction=\"outbound\"}[30s])) by (le, dst_cronjob))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "P95 cj/{{dst_cronjob}}", + "refId": "B" + }, + { + "expr": "histogram_quantile(0.99, sum(rate(response_latency_ms_bucket{namespace=\"$namespace\", cronjob=\"$cronjob\", dst_cronjob=\"$outbound\", direction=\"outbound\"}[30s])) by (le, dst_cronjob))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "P99 cj/{{dst_cronjob}}", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "LATENCY", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "repeat": "outbound", + "title": "cj/$outbound", + "type": "row" + }, + { + "content": "
\n
\n \n
\n
\n
\n
\n
\n\n
", + "gridPos": { + "h": 3, + "w": 24, + "x": 0, + "y": 35 + }, + "height": "1px", + "id": 171, + "links": [], + "mode": "html", + "options": {}, + "title": "", + "transparent": true, + "type": "text" + } + ], + "refresh": "1m", + "schemaVersion": 18, + "style": "dark", + "tags": [ + "linkerd" + ], + "templating": { + "list": [ + { + "hide": 0, + "label": "datasource", + "name": "DS_PROMETHEUS", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".*", + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "definition": "", + "hide": 0, + "includeAll": false, + "label": "Namespace", + "multi": false, + "name": "namespace", + "options": [], + "query": "label_values(process_start_time_seconds{cronjob!=\"\"}, namespace)", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".*", + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "definition": "", + "hide": 0, + "includeAll": false, + "label": "Deployment", + "multi": false, + "name": "cronjob", + "options": [], + "query": "label_values(process_start_time_seconds{namespace=\"$namespace\"}, cronjob)", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".*", + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "definition": "", + "hide": 2, + "includeAll": true, + "label": null, + "multi": false, + "name": "inbound", + "options": [], + "query": "label_values(request_total{dst_namespace=\"$namespace\", dst_cronjob=\"$cronjob\"}, cronjob)", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".*", + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "definition": "", + "hide": 2, + "includeAll": true, + "label": null, + "multi": false, + "name": "outbound", + "options": [], + "query": "label_values(request_total{namespace=\"$namespace\", cronjob=\"$cronjob\"}, dst_cronjob)", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-5m", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Linkerd CronJob", + "uid": "linkerd-cronjob", + "version": 1 +} \ No newline at end of file diff --git a/roles/prometheus/dashboards/linkerd/linkerd-daemonset.json b/argocd/system/linkerd-viz/dashboards/linkerd-daemonset.json similarity index 99% rename from roles/prometheus/dashboards/linkerd/linkerd-daemonset.json rename to argocd/system/linkerd-viz/dashboards/linkerd-daemonset.json index 3f4eab3c..f1685d90 100644 --- a/roles/prometheus/dashboards/linkerd/linkerd-daemonset.json +++ b/argocd/system/linkerd-viz/dashboards/linkerd-daemonset.json @@ -1,56 +1,56 @@ { + "__elements": [], "__inputs": [ { - "name": "DS_PROMETHEUS", - "label": "prometheus", "description": "", - "type": "datasource", + "label": "prometheus", + "name": "DS_PROMETHEUS", "pluginId": "prometheus", - "pluginName": "Prometheus" + "pluginName": "Prometheus", + "type": "datasource" } ], - "__elements": [], "__requires": [ { - "type": "panel", "id": "gauge", "name": "Gauge", + "type": "panel", "version": "" }, { - "type": "grafana", "id": "grafana", "name": "Grafana", + "type": "grafana", "version": "8.3.3" }, { - "type": "panel", "id": "graph", "name": "Graph (old)", + "type": "panel", "version": "" }, { - "type": "panel", "id": "heatmap", "name": "Heatmap", + "type": "panel", "version": "" }, { - "type": "datasource", "id": "prometheus", "name": "Prometheus", + "type": "datasource", "version": "1.0.0" }, { - "type": "panel", "id": "stat", "name": "Stat", + "type": "panel", "version": "" }, { - "type": "panel", "id": "text", "name": "Text", + "type": "panel", "version": "" } ], @@ -571,7 +571,7 @@ "expr": "sum(irate(request_total{namespace=\"$namespace\", daemonset=\"$daemonset\", direction=\"inbound\", tls=\"true\"}[30s])) by (daemonset)", "format": "time_series", "intervalFactor": 1, - "legendFormat": "🔒ds/{{daemonset}}", + "legendFormat": "\ud83d\udd12ds/{{daemonset}}", "refId": "A" }, { @@ -1159,7 +1159,7 @@ "expr": "sum(irate(request_total{daemonset!=\"\", daemonset=\"$inbound\", dst_namespace=\"$namespace\", dst_daemonset=\"$daemonset\", direction=\"outbound\", tls=\"true\"}[30s])) by (daemonset, pod)", "format": "time_series", "intervalFactor": 1, - "legendFormat": "🔒po/{{pod}}", + "legendFormat": "\ud83d\udd12po/{{pod}}", "refId": "A" }, { @@ -1475,7 +1475,7 @@ "expr": "sum(irate(request_total{namespace=\"$namespace\", daemonset=\"$daemonset\", direction=\"outbound\", tls=\"true\"}[30s])) by (dst_daemonset)", "format": "time_series", "intervalFactor": 1, - "legendFormat": "🔒ds/{{dst_daemonset}}", + "legendFormat": "\ud83d\udd12ds/{{dst_daemonset}}", "refId": "A" }, { @@ -2045,7 +2045,7 @@ "expr": "sum(irate(request_total{namespace=\"$namespace\", daemonset=\"$daemonset\", dst_daemonset=\"$outbound\", direction=\"outbound\", tls=\"true\"}[30s])) by (dst_daemonset)", "format": "time_series", "intervalFactor": 1, - "legendFormat": "🔒ds/{{dst_daemonset}}", + "legendFormat": "\ud83d\udd12ds/{{dst_daemonset}}", "refId": "A" }, { @@ -2228,6 +2228,16 @@ ], "templating": { "list": [ + { + "hide": 0, + "label": "datasource", + "name": "DS_PROMETHEUS", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, { "allValue": ".*", "current": {}, @@ -2351,4 +2361,4 @@ "title": "Linkerd DaemonSet", "uid": "linkerd-daemonset", "version": 1 -} +} \ No newline at end of file diff --git a/roles/prometheus/dashboards/linkerd/linkerd-deployment.json b/argocd/system/linkerd-viz/dashboards/linkerd-deployment.json similarity index 99% rename from roles/prometheus/dashboards/linkerd/linkerd-deployment.json rename to argocd/system/linkerd-viz/dashboards/linkerd-deployment.json index 4d8cb63c..aa940b86 100644 --- a/roles/prometheus/dashboards/linkerd/linkerd-deployment.json +++ b/argocd/system/linkerd-viz/dashboards/linkerd-deployment.json @@ -1,56 +1,56 @@ { + "__elements": [], "__inputs": [ { - "name": "DS_PROMETHEUS", - "label": "prometheus", "description": "", - "type": "datasource", + "label": "prometheus", + "name": "DS_PROMETHEUS", "pluginId": "prometheus", - "pluginName": "Prometheus" + "pluginName": "Prometheus", + "type": "datasource" } ], - "__elements": [], "__requires": [ { - "type": "panel", "id": "gauge", "name": "Gauge", + "type": "panel", "version": "" }, { - "type": "grafana", "id": "grafana", "name": "Grafana", + "type": "grafana", "version": "8.3.3" }, { - "type": "panel", "id": "graph", "name": "Graph (old)", + "type": "panel", "version": "" }, { - "type": "panel", "id": "heatmap", "name": "Heatmap", + "type": "panel", "version": "" }, { - "type": "datasource", "id": "prometheus", "name": "Prometheus", + "type": "datasource", "version": "1.0.0" }, { - "type": "panel", "id": "stat", "name": "Stat", + "type": "panel", "version": "" }, { - "type": "panel", "id": "text", "name": "Text", + "type": "panel", "version": "" } ], @@ -571,7 +571,7 @@ "expr": "sum(irate(request_total{namespace=\"$namespace\", deployment=\"$deployment\", direction=\"inbound\", tls=\"true\"}[30s])) by (deployment)", "format": "time_series", "intervalFactor": 1, - "legendFormat": "🔒deploy/{{deployment}}", + "legendFormat": "\ud83d\udd12deploy/{{deployment}}", "refId": "A" }, { @@ -1159,7 +1159,7 @@ "expr": "sum(irate(request_total{deployment!=\"\", deployment=\"$inbound\", dst_namespace=\"$namespace\", dst_deployment=\"$deployment\", direction=\"outbound\", tls=\"true\"}[30s])) by (deployment, pod)", "format": "time_series", "intervalFactor": 1, - "legendFormat": "🔒po/{{pod}}", + "legendFormat": "\ud83d\udd12po/{{pod}}", "refId": "A" }, { @@ -1475,7 +1475,7 @@ "expr": "sum(irate(request_total{namespace=\"$namespace\", deployment=\"$deployment\", direction=\"outbound\", tls=\"true\"}[30s])) by (dst_deployment)", "format": "time_series", "intervalFactor": 1, - "legendFormat": "🔒deploy/{{dst_deployment}}", + "legendFormat": "\ud83d\udd12deploy/{{dst_deployment}}", "refId": "A" }, { @@ -2045,7 +2045,7 @@ "expr": "sum(irate(request_total{namespace=\"$namespace\", deployment=\"$deployment\", dst_deployment=\"$outbound\", direction=\"outbound\", tls=\"true\"}[30s])) by (dst_deployment)", "format": "time_series", "intervalFactor": 1, - "legendFormat": "🔒deploy/{{dst_deployment}}", + "legendFormat": "\ud83d\udd12deploy/{{dst_deployment}}", "refId": "A" }, { @@ -2228,6 +2228,16 @@ ], "templating": { "list": [ + { + "hide": 0, + "label": "datasource", + "name": "DS_PROMETHEUS", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, { "allValue": ".*", "current": {}, @@ -2351,4 +2361,4 @@ "title": "Linkerd Deployment", "uid": "linkerd-deployment", "version": 1 -} +} \ No newline at end of file diff --git a/roles/prometheus/dashboards/linkerd/linkerd-health.json b/argocd/system/linkerd-viz/dashboards/linkerd-health.json similarity index 99% rename from roles/prometheus/dashboards/linkerd/linkerd-health.json rename to argocd/system/linkerd-viz/dashboards/linkerd-health.json index 1d520de7..f2bae782 100644 --- a/roles/prometheus/dashboards/linkerd/linkerd-health.json +++ b/argocd/system/linkerd-viz/dashboards/linkerd-health.json @@ -1,56 +1,56 @@ { + "__elements": [], "__inputs": [ { - "name": "DS_PROMETHEUS", - "label": "prometheus", "description": "", - "type": "datasource", + "label": "prometheus", + "name": "DS_PROMETHEUS", "pluginId": "prometheus", - "pluginName": "Prometheus" + "pluginName": "Prometheus", + "type": "datasource" } ], - "__elements": [], "__requires": [ { - "type": "panel", "id": "gauge", "name": "Gauge", + "type": "panel", "version": "" }, { - "type": "grafana", "id": "grafana", "name": "Grafana", + "type": "grafana", "version": "8.3.3" }, { - "type": "panel", "id": "graph", "name": "Graph (old)", + "type": "panel", "version": "" }, { - "type": "panel", "id": "heatmap", "name": "Heatmap", + "type": "panel", "version": "" }, { - "type": "datasource", "id": "prometheus", "name": "Prometheus", + "type": "datasource", "version": "1.0.0" }, { - "type": "panel", "id": "stat", "name": "Stat", + "type": "panel", "version": "" }, { - "type": "panel", "id": "text", "name": "Text", + "type": "panel", "version": "" } ], @@ -2287,6 +2287,16 @@ ], "templating": { "list": [ + { + "hide": 0, + "label": "datasource", + "name": "DS_PROMETHEUS", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, { "allValue": ".*", "current": { @@ -2416,4 +2426,4 @@ "title": "Linkerd Health", "uid": "linkerd-health", "version": 1 -} +} \ No newline at end of file diff --git a/roles/prometheus/dashboards/linkerd/linkerd-job.json b/argocd/system/linkerd-viz/dashboards/linkerd-job.json similarity index 99% rename from roles/prometheus/dashboards/linkerd/linkerd-job.json rename to argocd/system/linkerd-viz/dashboards/linkerd-job.json index 375e39ba..833a2174 100644 --- a/roles/prometheus/dashboards/linkerd/linkerd-job.json +++ b/argocd/system/linkerd-viz/dashboards/linkerd-job.json @@ -1,56 +1,56 @@ { + "__elements": [], "__inputs": [ { - "name": "DS_PROMETHEUS", - "label": "prometheus", "description": "", - "type": "datasource", + "label": "prometheus", + "name": "DS_PROMETHEUS", "pluginId": "prometheus", - "pluginName": "Prometheus" + "pluginName": "Prometheus", + "type": "datasource" } ], - "__elements": [], "__requires": [ { - "type": "panel", "id": "gauge", "name": "Gauge", + "type": "panel", "version": "" }, { - "type": "grafana", "id": "grafana", "name": "Grafana", + "type": "grafana", "version": "8.3.3" }, { - "type": "panel", "id": "graph", "name": "Graph (old)", + "type": "panel", "version": "" }, { - "type": "panel", "id": "heatmap", "name": "Heatmap", + "type": "panel", "version": "" }, { - "type": "datasource", "id": "prometheus", "name": "Prometheus", + "type": "datasource", "version": "1.0.0" }, { - "type": "panel", "id": "stat", "name": "Stat", + "type": "panel", "version": "" }, { - "type": "panel", "id": "text", "name": "Text", + "type": "panel", "version": "" } ], @@ -571,7 +571,7 @@ "expr": "sum(irate(request_total{namespace=\"$namespace\", k8s_job=\"$job\", direction=\"inbound\", tls=\"true\"}[30s])) by (k8s_job)", "format": "time_series", "intervalFactor": 1, - "legendFormat": "🔒job/{{k8s_job}}", + "legendFormat": "\ud83d\udd12job/{{k8s_job}}", "refId": "A" }, { @@ -1159,7 +1159,7 @@ "expr": "sum(irate(request_total{k8s_job!=\"\", k8s_job=\"$inbound\", dst_namespace=\"$namespace\", dst_k8s_job=\"$job\", direction=\"outbound\", tls=\"true\"}[30s])) by (k8s_job, pod)", "format": "time_series", "intervalFactor": 1, - "legendFormat": "🔒po/{{pod}}", + "legendFormat": "\ud83d\udd12po/{{pod}}", "refId": "A" }, { @@ -1475,7 +1475,7 @@ "expr": "sum(irate(request_total{namespace=\"$namespace\", k8s_job=\"$job\", direction=\"outbound\", tls=\"true\"}[30s])) by (dst_k8s_job)", "format": "time_series", "intervalFactor": 1, - "legendFormat": "🔒job/{{dst_k8s_job}}", + "legendFormat": "\ud83d\udd12job/{{dst_k8s_job}}", "refId": "A" }, { @@ -2045,7 +2045,7 @@ "expr": "sum(irate(request_total{namespace=\"$namespace\", k8s_job=\"$job\", dst_k8s_job=\"$outbound\", direction=\"outbound\", tls=\"true\"}[30s])) by (dst_k8s_job)", "format": "time_series", "intervalFactor": 1, - "legendFormat": "🔒job/{{dst_k8s_job}}", + "legendFormat": "\ud83d\udd12job/{{dst_k8s_job}}", "refId": "A" }, { @@ -2228,6 +2228,16 @@ ], "templating": { "list": [ + { + "hide": 0, + "label": "datasource", + "name": "DS_PROMETHEUS", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, { "allValue": ".*", "current": {}, @@ -2351,4 +2361,4 @@ "title": "Linkerd Job", "uid": "linkerd-job", "version": 1 -} +} \ No newline at end of file diff --git a/roles/prometheus/dashboards/linkerd/linkerd-kubernetes.json b/argocd/system/linkerd-viz/dashboards/linkerd-kubernetes.json similarity index 99% rename from roles/prometheus/dashboards/linkerd/linkerd-kubernetes.json rename to argocd/system/linkerd-viz/dashboards/linkerd-kubernetes.json index c6f6b317..d141aaba 100644 --- a/roles/prometheus/dashboards/linkerd/linkerd-kubernetes.json +++ b/argocd/system/linkerd-viz/dashboards/linkerd-kubernetes.json @@ -1,56 +1,56 @@ { + "__elements": [], "__inputs": [ { - "name": "DS_PROMETHEUS", - "label": "prometheus", "description": "", - "type": "datasource", + "label": "prometheus", + "name": "DS_PROMETHEUS", "pluginId": "prometheus", - "pluginName": "Prometheus" + "pluginName": "Prometheus", + "type": "datasource" } ], - "__elements": [], "__requires": [ { - "type": "panel", "id": "gauge", "name": "Gauge", + "type": "panel", "version": "" }, { - "type": "grafana", "id": "grafana", "name": "Grafana", + "type": "grafana", "version": "8.3.3" }, { - "type": "panel", "id": "graph", "name": "Graph (old)", + "type": "panel", "version": "" }, { - "type": "panel", "id": "heatmap", "name": "Heatmap", + "type": "panel", "version": "" }, { - "type": "datasource", "id": "prometheus", "name": "Prometheus", + "type": "datasource", "version": "1.0.0" }, { - "type": "panel", "id": "stat", "name": "Stat", + "type": "panel", "version": "" }, { - "type": "panel", "id": "text", "name": "Text", + "type": "panel", "version": "" } ], @@ -2239,6 +2239,16 @@ ], "templating": { "list": [ + { + "hide": 0, + "label": "datasource", + "name": "DS_PROMETHEUS", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, { "allValue": ".*", "current": { @@ -2299,4 +2309,4 @@ "title": "Kubernetes cluster monitoring (via Prometheus)", "uid": "k8s", "version": 1 -} +} \ No newline at end of file diff --git a/argocd/system/linkerd-viz/dashboards/linkerd-multicluster.json b/argocd/system/linkerd-viz/dashboards/linkerd-multicluster.json new file mode 100644 index 00000000..62762627 --- /dev/null +++ b/argocd/system/linkerd-viz/dashboards/linkerd-multicluster.json @@ -0,0 +1,1008 @@ +{ + "__elements": [], + "__inputs": [ + { + "description": "", + "label": "prometheus", + "name": "DS_PROMETHEUS", + "pluginId": "prometheus", + "pluginName": "Prometheus", + "type": "datasource" + } + ], + "__requires": [ + { + "id": "gauge", + "name": "Gauge", + "type": "panel", + "version": "" + }, + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "8.3.3" + }, + { + "id": "graph", + "name": "Graph (old)", + "type": "panel", + "version": "" + }, + { + "id": "heatmap", + "name": "Heatmap", + "type": "panel", + "version": "" + }, + { + "id": "prometheus", + "name": "Prometheus", + "type": "datasource", + "version": "1.0.0" + }, + { + "id": "stat", + "name": "Stat", + "type": "panel", + "version": "" + }, + { + "id": "text", + "name": "Text", + "type": "panel", + "version": "" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 1, + "id": null, + "iteration": 1531434867463, + "links": [], + "panels": [ + { + "content": "
\n  \n Cluster: $cluster\n
", + "gridPos": { + "h": 2, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 20, + "links": [], + "mode": "html", + "options": {}, + "title": "", + "transparent": true, + "type": "text" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#d44a3a", + "rgba(237, 129, 40, 0.89)", + "#299c46" + ], + "datasource": "${DS_PROMETHEUS}", + "decimals": null, + "format": "percentunit", + "gauge": { + "maxValue": 1, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 4, + "w": 8, + "x": 0, + "y": 2 + }, + "id": 5, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": true, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(irate(response_total{classification=\"success\", dst_target_cluster=\"$cluster\", dst_target_cluster!=\"\", direction=\"outbound\"}[30s])) / sum(irate(response_total{dst_target_cluster=\"$cluster\", dst_target_cluster!=\"\", direction=\"outbound\"}[30s]))", + "format": "time_series", + "instant": false, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "0.9,.99", + "title": "SUCCESS RATE", + "transparent": true, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "${DS_PROMETHEUS}", + "decimals": null, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 4, + "w": 8, + "x": 8, + "y": 2 + }, + "id": 4, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": " RPS", + "postfixFontSize": "100%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": true, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(irate(request_total{dst_target_cluster=\"$cluster\", dst_target_cluster!=\"\", direction=\"outbound\"}[30s]))", + "format": "time_series", + "instant": false, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "REQUEST RATE", + "transparent": true, + "type": "singlestat", + "valueFontSize": "100%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "${DS_PROMETHEUS}", + "decimals": null, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 4, + "w": 8, + "x": 16, + "y": 2 + }, + "id": 81, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": " ms", + "postfixFontSize": "100%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": true, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "histogram_quantile(0.95, sum(irate(response_latency_ms_bucket{dst_target_cluster=\"$cluster\", dst_target_cluster!=\"\", direction=\"outbound\"}[30s])) by (le))", + "format": "time_series", + "instant": false, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "P95 LATENCY", + "transparent": true, + "type": "singlestat", + "valueFontSize": "100%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "content": "
\n TOP-LINE TRAFFIC\n
", + "gridPos": { + "h": 2, + "w": 24, + "x": 0, + "y": 6 + }, + "id": 17, + "links": [], + "mode": "html", + "options": {}, + "title": "", + "transparent": true, + "type": "text" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 8 + }, + "id": 67, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": {}, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(response_total{classification=\"success\",dst_target_cluster=\"$cluster\", dst_target_cluster!=\"\", direction=\"outbound\"}[30s])) / sum(irate(response_total{dst_target_cluster=\"$cluster\", dst_target_cluster!=\"\", direction=\"outbound\"}[30s]))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "SUCCESS RATE", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "percentunit", + "label": "", + "logBase": 1, + "max": "1", + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 8 + }, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": {}, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(request_total{dst_target_cluster=\"$cluster\", dst_target_cluster!=\"\", direction=\"outbound\", tls=\"true\"}[30s]))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + }, + { + "expr": "sum(irate(request_total{dst_target_cluster=\"$cluster\", dst_target_cluster!=\"\", tls!=\"true\"}[30s]))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "REQUEST RATE", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "rps", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 8 + }, + "id": 68, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": {}, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.5, sum(irate(response_latency_ms_bucket{dst_target_cluster=\"$cluster\", dst_target_cluster!=\"\", direction=\"outbound\"}[30s])) by (le))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "p50 gateway", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.95, sum(irate(response_latency_ms_bucket{dst_target_cluster=\"$cluster\", dst_target_cluster!=\"\", direction=\"outbound\"}[30s])) by (le))", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "p95 gateway", + "refId": "B" + }, + { + "expr": "histogram_quantile(0.99, sum(irate(response_latency_ms_bucket{dst_target_cluster=\"$cluster\", dst_target_cluster!=\"\", direction=\"outbound\"}[30s])) by (le))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "p99 gateway", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "LATENCY", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "ms", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "content": "
\n TRAFFIC BY TARGET SERVICE\n
", + "gridPos": { + "h": 2, + "w": 24, + "x": 0, + "y": 15 + }, + "id": 32, + "links": [], + "mode": "html", + "options": {}, + "title": "", + "transparent": true, + "type": "text" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 17 + }, + "id": 77, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": {}, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(response_total{classification=\"success\", dst_target_cluster=\"$cluster\", dst_target_cluster!=\"\", direction=\"outbound\"}[30s])) by (dst_target_service) / sum(irate(response_total{dst_target_cluster=\"$cluster\", dst_target_cluster!=\"\", direction=\"outbound\"}[30s])) by (dst_target_service)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "target-svc/{{dst_target_service}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "SUCCESS RATE", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "percentunit", + "label": "", + "logBase": 1, + "max": "1", + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 17 + }, + "id": 78, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": {}, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(request_total{dst_target_cluster=\"$cluster\", dst_target_cluster!=\"\", tls=\"true\"}[30s])) by (dst_target_service)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "\ud83d\udd12target-svc/{{dst_target_service}}", + "refId": "A" + }, + { + "expr": "sum(irate(request_total{dst_target_cluster=\"$cluster\", dst_target_cluster!=\"\", tls!=\"true\"}[30s])) by (dst_target_service)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "target-svc/{{dst_target_service}}", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "REQUEST RATE", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "rps", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 17 + }, + "id": 79, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": {}, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.95, sum(rate(response_latency_ms_bucket{dst_target_cluster=\"$cluster\", dst_target_cluster!=\"\", direction=\"outbound\"}[30s])) by (le, dst_target_service))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "P95 target-svc/{{dst_target_service}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "P95 LATENCY", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "refresh": "1m", + "schemaVersion": 18, + "style": "dark", + "tags": [ + "linkerd" + ], + "templating": { + "list": [ + { + "hide": 0, + "label": "datasource", + "name": "DS_PROMETHEUS", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".*", + "current": { + "text": "All", + "value": "$__all" + }, + "datasource": "${DS_PROMETHEUS}", + "definition": "", + "hide": 0, + "includeAll": false, + "label": "Cluster", + "multi": false, + "name": "cluster", + "options": [], + "query": "label_values(request_total, dst_target_cluster)", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-5m", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Linkerd Multicluster", + "uid": "linkerd-multicluster", + "version": 1 +} \ No newline at end of file diff --git a/roles/prometheus/dashboards/linkerd/linkerd-namespace.json b/argocd/system/linkerd-viz/dashboards/linkerd-namespace.json similarity index 98% rename from roles/prometheus/dashboards/linkerd/linkerd-namespace.json rename to argocd/system/linkerd-viz/dashboards/linkerd-namespace.json index c4261dbc..b178bb99 100644 --- a/roles/prometheus/dashboards/linkerd/linkerd-namespace.json +++ b/argocd/system/linkerd-viz/dashboards/linkerd-namespace.json @@ -1,56 +1,56 @@ { + "__elements": [], "__inputs": [ { - "name": "DS_PROMETHEUS", - "label": "prometheus", "description": "", - "type": "datasource", + "label": "prometheus", + "name": "DS_PROMETHEUS", "pluginId": "prometheus", - "pluginName": "Prometheus" + "pluginName": "Prometheus", + "type": "datasource" } ], - "__elements": [], "__requires": [ { - "type": "panel", "id": "gauge", "name": "Gauge", + "type": "panel", "version": "" }, { - "type": "grafana", "id": "grafana", "name": "Grafana", + "type": "grafana", "version": "8.3.3" }, { - "type": "panel", "id": "graph", "name": "Graph (old)", + "type": "panel", "version": "" }, { - "type": "panel", "id": "heatmap", "name": "Heatmap", + "type": "panel", "version": "" }, { - "type": "datasource", "id": "prometheus", "name": "Prometheus", + "type": "datasource", "version": "1.0.0" }, { - "type": "panel", "id": "stat", "name": "Stat", + "type": "panel", "version": "" }, { - "type": "panel", "id": "text", "name": "Text", + "type": "panel", "version": "" } ], @@ -484,7 +484,7 @@ "expr": "sum(irate(request_total{namespace=~\"$namespace\", deployment=~\"$deployment\", direction=\"inbound\", tls=\"true\"}[30s])) by (deployment)", "format": "time_series", "intervalFactor": 1, - "legendFormat": "🔒deploy/{{deployment}}", + "legendFormat": "\ud83d\udd12deploy/{{deployment}}", "refId": "A" }, { @@ -798,7 +798,7 @@ "format": "time_series", "hide": false, "intervalFactor": 1, - "legendFormat": "🔒deploy/{{deployment}}", + "legendFormat": "\ud83d\udd12deploy/{{deployment}}", "refId": "A" }, { @@ -947,6 +947,16 @@ ], "templating": { "list": [ + { + "hide": 0, + "label": "datasource", + "name": "DS_PROMETHEUS", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, { "allValue": ".*", "current": {}, @@ -1026,4 +1036,4 @@ "title": "Linkerd Namespace", "uid": "linkerd-namespace", "version": 15 -} +} \ No newline at end of file diff --git a/roles/prometheus/dashboards/linkerd/linkerd-pod.json b/argocd/system/linkerd-viz/dashboards/linkerd-pod.json similarity index 99% rename from roles/prometheus/dashboards/linkerd/linkerd-pod.json rename to argocd/system/linkerd-viz/dashboards/linkerd-pod.json index f57e9ea9..c539d52b 100644 --- a/roles/prometheus/dashboards/linkerd/linkerd-pod.json +++ b/argocd/system/linkerd-viz/dashboards/linkerd-pod.json @@ -1,56 +1,56 @@ { + "__elements": [], "__inputs": [ { - "name": "DS_PROMETHEUS", - "label": "prometheus", "description": "", - "type": "datasource", + "label": "prometheus", + "name": "DS_PROMETHEUS", "pluginId": "prometheus", - "pluginName": "Prometheus" + "pluginName": "Prometheus", + "type": "datasource" } ], - "__elements": [], "__requires": [ { - "type": "panel", "id": "gauge", "name": "Gauge", + "type": "panel", "version": "" }, { - "type": "grafana", "id": "grafana", "name": "Grafana", + "type": "grafana", "version": "8.3.3" }, { - "type": "panel", "id": "graph", "name": "Graph (old)", + "type": "panel", "version": "" }, { - "type": "panel", "id": "heatmap", "name": "Heatmap", + "type": "panel", "version": "" }, { - "type": "datasource", "id": "prometheus", "name": "Prometheus", + "type": "datasource", "version": "1.0.0" }, { - "type": "panel", "id": "stat", "name": "Stat", + "type": "panel", "version": "" }, { - "type": "panel", "id": "text", "name": "Text", + "type": "panel", "version": "" } ], @@ -572,7 +572,7 @@ "expr": "sum(irate(request_total{namespace=\"$namespace\", pod=\"$pod\", direction=\"inbound\", tls=\"true\"}[30s])) by (pod)", "format": "time_series", "intervalFactor": 1, - "legendFormat": "🔒po/{{pod}}", + "legendFormat": "\ud83d\udd12po/{{pod}}", "refId": "A" }, { @@ -1133,7 +1133,7 @@ "expr": "sum(irate(request_total{dst_namespace=\"$namespace\", dst_pod!=\"\", dst_pod=\"$pod\", direction=\"outbound\", tls=\"true\"}[30s])) by (pod)", "format": "time_series", "intervalFactor": 1, - "legendFormat": "🔒po/{{pod}}", + "legendFormat": "\ud83d\udd12po/{{pod}}", "refId": "A" }, { @@ -1433,7 +1433,7 @@ "expr": "sum(irate(request_total{namespace=\"$namespace\", pod=\"$pod\", direction=\"outbound\", tls=\"true\"}[30s])) by (pod)", "format": "time_series", "intervalFactor": 1, - "legendFormat": "🔒po/{{pod}}", + "legendFormat": "\ud83d\udd12po/{{pod}}", "refId": "A" }, { @@ -1994,7 +1994,7 @@ "expr": "sum(irate(request_total{namespace=\"$namespace\", pod=\"$pod\", direction=\"outbound\", tls=\"true\"}[30s])) by (dst_pod)", "format": "time_series", "intervalFactor": 1, - "legendFormat": "🔒po/{{dst_pod}}", + "legendFormat": "\ud83d\udd12po/{{dst_pod}}", "refId": "A" }, { @@ -2175,6 +2175,16 @@ ], "templating": { "list": [ + { + "hide": 0, + "label": "datasource", + "name": "DS_PROMETHEUS", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, { "allValue": ".*", "current": {}, @@ -2326,4 +2336,4 @@ "title": "Linkerd Pod", "uid": "linkerd-pod", "version": 1 -} +} \ No newline at end of file diff --git a/roles/prometheus/dashboards/linkerd/linkerd-prometheus-benchmark.json b/argocd/system/linkerd-viz/dashboards/linkerd-prometheus-benchmark.json similarity index 99% rename from roles/prometheus/dashboards/linkerd/linkerd-prometheus-benchmark.json rename to argocd/system/linkerd-viz/dashboards/linkerd-prometheus-benchmark.json index 4e2025ff..a4aa530e 100644 --- a/roles/prometheus/dashboards/linkerd/linkerd-prometheus-benchmark.json +++ b/argocd/system/linkerd-viz/dashboards/linkerd-prometheus-benchmark.json @@ -1,56 +1,56 @@ { + "__elements": [], "__inputs": [ { - "name": "DS_PROMETHEUS", - "label": "prometheus", "description": "", - "type": "datasource", + "label": "prometheus", + "name": "DS_PROMETHEUS", "pluginId": "prometheus", - "pluginName": "Prometheus" + "pluginName": "Prometheus", + "type": "datasource" } ], - "__elements": [], "__requires": [ { - "type": "panel", "id": "gauge", "name": "Gauge", + "type": "panel", "version": "" }, { - "type": "grafana", "id": "grafana", "name": "Grafana", + "type": "grafana", "version": "8.3.3" }, { - "type": "panel", "id": "graph", "name": "Graph (old)", + "type": "panel", "version": "" }, { - "type": "panel", "id": "heatmap", "name": "Heatmap", + "type": "panel", "version": "" }, { - "type": "datasource", "id": "prometheus", "name": "Prometheus", + "type": "datasource", "version": "1.0.0" }, { - "type": "panel", "id": "stat", "name": "Stat", + "type": "panel", "version": "" }, { - "type": "panel", "id": "text", "name": "Text", + "type": "panel", "version": "" } ], @@ -3695,6 +3695,16 @@ ], "templating": { "list": [ + { + "hide": 0, + "label": "datasource", + "name": "DS_PROMETHEUS", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, { "allValue": ".*", "current": {}, @@ -3774,4 +3784,4 @@ "title": "Prometheus Benchmark - 2.7.x", "uid": "prometheus-benchmark", "version": 10 -} +} \ No newline at end of file diff --git a/roles/prometheus/dashboards/linkerd/linkerd-prometheus.json b/argocd/system/linkerd-viz/dashboards/linkerd-prometheus.json similarity index 99% rename from roles/prometheus/dashboards/linkerd/linkerd-prometheus.json rename to argocd/system/linkerd-viz/dashboards/linkerd-prometheus.json index d2939f37..6d1e4d25 100644 --- a/roles/prometheus/dashboards/linkerd/linkerd-prometheus.json +++ b/argocd/system/linkerd-viz/dashboards/linkerd-prometheus.json @@ -1,56 +1,56 @@ { + "__elements": [], "__inputs": [ { - "name": "DS_PROMETHEUS", - "label": "prometheus", "description": "", - "type": "datasource", + "label": "prometheus", + "name": "DS_PROMETHEUS", "pluginId": "prometheus", - "pluginName": "Prometheus" + "pluginName": "Prometheus", + "type": "datasource" } ], - "__elements": [], "__requires": [ { - "type": "panel", "id": "gauge", "name": "Gauge", + "type": "panel", "version": "" }, { - "type": "grafana", "id": "grafana", "name": "Grafana", + "type": "grafana", "version": "8.3.3" }, { - "type": "panel", "id": "graph", "name": "Graph (old)", + "type": "panel", "version": "" }, { - "type": "panel", "id": "heatmap", "name": "Heatmap", + "type": "panel", "version": "" }, { - "type": "datasource", "id": "prometheus", "name": "Prometheus", + "type": "datasource", "version": "1.0.0" }, { - "type": "panel", "id": "stat", "name": "Stat", + "type": "panel", "version": "" }, { - "type": "panel", "id": "text", "name": "Text", + "type": "panel", "version": "" } ], @@ -1353,7 +1353,18 @@ "prometheus" ], "templating": { - "list": [] + "list": [ + { + "hide": 0, + "label": "datasource", + "name": "DS_PROMETHEUS", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + } + ] }, "time": { "from": "now-1h", @@ -1389,4 +1400,4 @@ "title": "Prometheus 2.0 Stats", "uid": "prometheus", "version": 1 -} +} \ No newline at end of file diff --git a/argocd/system/linkerd-viz/dashboards/linkerd-replicaset.json b/argocd/system/linkerd-viz/dashboards/linkerd-replicaset.json new file mode 100644 index 00000000..96d502df --- /dev/null +++ b/argocd/system/linkerd-viz/dashboards/linkerd-replicaset.json @@ -0,0 +1,2410 @@ +{ + "__elements": [], + "__inputs": [ + { + "description": "", + "label": "prometheus", + "name": "DS_PROMETHEUS", + "pluginId": "prometheus", + "pluginName": "Prometheus", + "type": "datasource" + } + ], + "__requires": [ + { + "id": "gauge", + "name": "Gauge", + "type": "panel", + "version": "" + }, + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "8.3.3" + }, + { + "id": "graph", + "name": "Graph (old)", + "type": "panel", + "version": "" + }, + { + "id": "heatmap", + "name": "Heatmap", + "type": "panel", + "version": "" + }, + { + "id": "prometheus", + "name": "Prometheus", + "type": "datasource", + "version": "1.0.0" + }, + { + "id": "stat", + "name": "Stat", + "type": "panel", + "version": "" + }, + { + "id": "text", + "name": "Text", + "type": "panel", + "version": "" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 1, + "id": 16, + "iteration": 1573121539385, + "links": [], + "panels": [ + { + "content": "
\n  \n replicaset/$replicaset\n
", + "gridPos": { + "h": 2, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 20, + "links": [], + "mode": "html", + "options": {}, + "title": "", + "transparent": true, + "type": "text" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#d44a3a", + "rgba(237, 129, 40, 0.89)", + "#299c46" + ], + "datasource": "${DS_PROMETHEUS}", + "decimals": null, + "format": "percentunit", + "gauge": { + "maxValue": 1, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 4, + "w": 8, + "x": 0, + "y": 2 + }, + "id": 5, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": true, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(irate(response_total{classification=\"success\", namespace=\"$namespace\", replicaset=\"$replicaset\", direction=\"inbound\"}[30s])) / sum(irate(response_total{namespace=\"$namespace\", replicaset=\"$replicaset\", direction=\"inbound\"}[30s]))", + "format": "time_series", + "instant": false, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "0.9,.99", + "title": "SUCCESS RATE", + "transparent": true, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "${DS_PROMETHEUS}", + "decimals": null, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 4, + "w": 8, + "x": 8, + "y": 2 + }, + "id": 4, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": " RPS", + "postfixFontSize": "100%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": true, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(irate(request_total{namespace=\"$namespace\", replicaset=\"$replicaset\", direction=\"inbound\"}[30s]))", + "format": "time_series", + "instant": false, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "REQUEST RATE", + "transparent": true, + "type": "singlestat", + "valueFontSize": "100%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "${DS_PROMETHEUS}", + "decimals": null, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 16, + "y": 2 + }, + "id": 11, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "100%", + "prefix": "", + "prefixFontSize": "100%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "count(count(request_total{dst_namespace=\"$namespace\", replicaset!=\"\", dst_replicaset!=\"\", dst_replicaset=\"$replicaset\", direction=\"outbound\"}) by (namespace, replicaset))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "INBOUND REPLICASETS", + "transparent": true, + "type": "singlestat", + "valueFontSize": "100%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "${DS_PROMETHEUS}", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 20, + "y": 2 + }, + "id": 15, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "100%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "count(count(request_total{namespace=\"$namespace\", replicaset=\"$replicaset\", direction=\"outbound\"}) by (namespace, dst_replicaset))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "OUTBOUND REPLICASETS", + "transparent": true, + "type": "singlestat", + "valueFontSize": "100%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "content": "
\n INBOUND TRAFFIC\n
", + "gridPos": { + "h": 2, + "w": 24, + "x": 0, + "y": 6 + }, + "id": 17, + "links": [], + "mode": "html", + "options": {}, + "title": "", + "transparent": true, + "type": "text" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 8 + }, + "id": 67, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": {}, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(response_total{classification=\"success\", namespace=\"$namespace\", replicaset=\"$replicaset\", direction=\"inbound\"}[30s])) by (replicaset) / sum(irate(response_total{namespace=\"$namespace\", replicaset=\"$replicaset\", direction=\"inbound\"}[30s])) by (replicaset)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "rs/{{replicaset}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "SUCCESS RATE", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "percentunit", + "label": "", + "logBase": 1, + "max": "1", + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 8 + }, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": {}, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(request_total{namespace=\"$namespace\", replicaset=\"$replicaset\", direction=\"inbound\", tls=\"true\"}[30s])) by (replicaset)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "\ud83d\udd12rs/{{replicaset}}", + "refId": "A" + }, + { + "expr": "sum(irate(request_total{namespace=\"$namespace\", replicaset=\"$replicaset\", direction=\"inbound\", tls!=\"true\"}[30s])) by (replicaset)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "rs/{{replicaset}}", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "REQUEST RATE", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "rps", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 8 + }, + "id": 68, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": {}, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.5, sum(irate(response_latency_ms_bucket{namespace=\"$namespace\", replicaset=\"$replicaset\", direction=\"inbound\"}[30s])) by (le, replicaset))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "p50 rs/{{replicaset}}", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.95, sum(irate(response_latency_ms_bucket{namespace=\"$namespace\", replicaset=\"$replicaset\", direction=\"inbound\"}[30s])) by (le, replicaset))", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "p95 rs/{{replicaset}}", + "refId": "B" + }, + { + "expr": "histogram_quantile(0.99, sum(irate(response_latency_ms_bucket{namespace=\"$namespace\", replicaset=\"$replicaset\", direction=\"inbound\"}[30s])) by (le, replicaset))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "p99 rs/{{replicaset}}", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "LATENCY", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "ms", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 15 + }, + "id": 148, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 16 + }, + "id": 167, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": {}, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "tcp_close_total{namespace=\"$namespace\", replicaset=\"$replicaset\", direction=\"inbound\",errno!=\"\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{peer}} {{errno}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "TCP CONNECTION FAILURES", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "none", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 16 + }, + "id": 168, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": {}, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "tcp_open_connections{namespace=\"$namespace\", replicaset=\"$replicaset\", direction=\"inbound\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{peer}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "TCP CONNECTIONS OPEN", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateOranges", + "exponent": 0.5, + "mode": "spectrum" + }, + "dataFormat": "timeseries", + "datasource": "${DS_PROMETHEUS}", + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 16 + }, + "heatmap": {}, + "hideZeroBuckets": false, + "highlightCards": true, + "id": 169, + "legend": { + "show": false + }, + "links": [], + "options": {}, + "reverseYBuckets": false, + "targets": [ + { + "expr": "tcp_connection_duration_ms_bucket{namespace=\"$namespace\", replicaset=\"$replicaset\", direction=\"inbound\"}", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "title": "TCP CONNECTION DURATION", + "tooltip": { + "show": true, + "showHistogram": true + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": null, + "format": "dtdurationms", + "logBase": 1, + "max": null, + "min": "0", + "show": true, + "splitFactor": null + }, + "yBucketBound": "auto", + "yBucketNumber": null, + "yBucketSize": null + } + ], + "title": "Inbound TCP Metrics", + "type": "row" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 16 + }, + "id": 152, + "panels": [], + "title": "", + "type": "row" + }, + { + "content": "
\n INBOUND REPLICASETS\n
", + "gridPos": { + "h": 2, + "w": 24, + "x": 0, + "y": 17 + }, + "id": 76, + "links": [], + "mode": "html", + "options": {}, + "title": "", + "transparent": true, + "type": "text" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 19 + }, + "id": 59, + "panels": [], + "repeat": "inbound", + "scopedVars": { + "inbound": { + "selected": false, + "text": "web", + "value": "web" + } + }, + "title": "rs/$inbound", + "type": "row" + }, + { + "content": "
\n  \n rs/$inbound\n
", + "gridPos": { + "h": 2, + "w": 24, + "x": 0, + "y": 20 + }, + "id": 39, + "links": [], + "mode": "html", + "options": {}, + "scopedVars": { + "inbound": { + "selected": false, + "text": "web", + "value": "web" + } + }, + "title": "", + "transparent": true, + "type": "text" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 22 + }, + "id": 36, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": {}, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "scopedVars": { + "inbound": { + "selected": false, + "text": "web", + "value": "web" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(response_total{classification=\"success\", replicaset!=\"\", replicaset=\"$inbound\", dst_namespace=\"$namespace\", dst_replicaset=\"$replicaset\", direction=\"outbound\"}[30s])) by (replicaset, pod) / sum(irate(response_total{replicaset!=\"\", replicaset=\"$inbound\", dst_namespace=\"$namespace\", dst_replicaset=\"$replicaset\", direction=\"outbound\"}[30s])) by (replicaset, pod)", + "format": "time_series", + "instant": false, + "intervalFactor": 1, + "legendFormat": "po/{{pod}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "SUCCESS RATE", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "percentunit", + "label": null, + "logBase": 1, + "max": "1", + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 22 + }, + "id": 22, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": {}, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "scopedVars": { + "inbound": { + "selected": false, + "text": "web", + "value": "web" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(request_total{replicaset!=\"\", replicaset=\"$inbound\", dst_namespace=\"$namespace\", dst_replicaset=\"$replicaset\", direction=\"outbound\", tls=\"true\"}[30s])) by (replicaset, pod)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "\ud83d\udd12po/{{pod}}", + "refId": "A" + }, + { + "expr": "sum(irate(request_total{replicaset!=\"\", replicaset=\"$inbound\", dst_namespace=\"$namespace\", dst_replicaset=\"$replicaset\", direction=\"outbound\", tls!=\"true\"}[30s])) by (replicaset, pod)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "po/{{pod}}", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "REQUEST RATE", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "rps", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 22 + }, + "id": 29, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": {}, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "scopedVars": { + "inbound": { + "selected": false, + "text": "web", + "value": "web" + } + }, + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.5, sum(rate(response_latency_ms_bucket{replicaset!=\"\", replicaset=\"$inbound\", dst_namespace=\"$namespace\", dst_replicaset=\"$replicaset\", direction=\"outbound\"}[30s])) by (le, replicaset))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "P50 rs/{{replicaset}}", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.95, sum(rate(response_latency_ms_bucket{replicaset!=\"\", replicaset=\"$inbound\", dst_namespace=\"$namespace\", dst_replicaset=\"$replicaset\", direction=\"outbound\"}[30s])) by (le, replicaset))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "P95 rs/{{replicaset}}", + "refId": "B" + }, + { + "expr": "histogram_quantile(0.99, sum(rate(response_latency_ms_bucket{replicaset!=\"\", replicaset=\"$inbound\", dst_namespace=\"$namespace\", dst_replicaset=\"$replicaset\", direction=\"outbound\"}[30s])) by (le, replicaset))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "P99 rs/{{replicaset}}", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "LATENCY", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 29 + }, + "id": 34, + "panels": [], + "repeat": null, + "title": "", + "type": "row" + }, + { + "content": "
\n OUTBOUND TRAFFIC\n
", + "gridPos": { + "h": 2, + "w": 24, + "x": 0, + "y": 30 + }, + "id": 32, + "links": [], + "mode": "html", + "options": {}, + "title": "", + "transparent": true, + "type": "text" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 32 + }, + "id": 77, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": {}, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(response_total{classification=\"success\", namespace=\"$namespace\", replicaset=\"$replicaset\", direction=\"outbound\"}[30s])) by (dst_replicaset) / sum(irate(response_total{namespace=\"$namespace\", replicaset=\"$replicaset\", direction=\"outbound\"}[30s])) by (dst_replicaset)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "rs/{{dst_replicaset}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "SUCCESS RATE", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "percentunit", + "label": "", + "logBase": 1, + "max": "1", + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 32 + }, + "id": 78, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": {}, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(request_total{namespace=\"$namespace\", replicaset=\"$replicaset\", direction=\"outbound\", tls=\"true\"}[30s])) by (dst_replicaset)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "\ud83d\udd12rs/{{dst_replicaset}}", + "refId": "A" + }, + { + "expr": "sum(irate(request_total{namespace=\"$namespace\", replicaset=\"$replicaset\", direction=\"outbound\", tls!=\"true\"}[30s])) by (dst_replicaset)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "rs/{{dst_replicaset}}", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "REQUEST RATE", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "rps", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 32 + }, + "id": 79, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": {}, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.95, sum(rate(response_latency_ms_bucket{namespace=\"$namespace\", replicaset=\"$replicaset\", direction=\"outbound\"}[30s])) by (le, dst_replicaset))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "P95 rs/{{dst_replicaset}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "P95 LATENCY", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 39 + }, + "id": 154, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 29 + }, + "id": 157, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": {}, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "tcp_close_total{namespace=\"$namespace\", replicaset=\"$replicaset\", direction=\"outbound\",errno!=\"\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{peer}} {{errno}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "TCP CONNECTION FAILURES", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "none", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 29 + }, + "id": 166, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": {}, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "tcp_open_connections{namespace=\"$namespace\", replicaset=\"$replicaset\", direction=\"outbound\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{peer}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "TCP CONNECTIONS OPEN", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "cards": { + "cardPadding": null, + "cardRound": null + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateOranges", + "exponent": 0.5, + "mode": "spectrum" + }, + "dataFormat": "timeseries", + "datasource": "${DS_PROMETHEUS}", + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 29 + }, + "heatmap": {}, + "hideZeroBuckets": false, + "highlightCards": true, + "id": 160, + "legend": { + "show": false + }, + "links": [], + "options": {}, + "reverseYBuckets": false, + "targets": [ + { + "expr": "tcp_connection_duration_ms_bucket{namespace=\"$namespace\", replicaset=\"$replicaset\", direction=\"outbound\"}", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "title": "TCP CONNECTION DURATION", + "tooltip": { + "show": true, + "showHistogram": true + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": null, + "yAxis": { + "decimals": null, + "format": "dtdurationms", + "logBase": 1, + "max": null, + "min": "0", + "show": true, + "splitFactor": null + }, + "yBucketBound": "auto", + "yBucketNumber": null, + "yBucketSize": null + } + ], + "title": "Outbound TCP Metrics", + "type": "row" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 40 + }, + "id": 156, + "panels": [], + "title": "", + "type": "row" + }, + { + "content": "
\n OUTBOUND REPLICASETS\n
", + "gridPos": { + "h": 2, + "w": 24, + "x": 0, + "y": 41 + }, + "id": 80, + "links": [], + "mode": "html", + "options": {}, + "title": "", + "transparent": true, + "type": "text" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 43 + }, + "id": 27, + "panels": [ + { + "content": "
\n  \n rs/$outbound\n
", + "gridPos": { + "h": 2, + "w": 24, + "x": 0, + "y": 36 + }, + "id": 40, + "links": [], + "mode": "html", + "options": {}, + "title": "", + "transparent": true, + "type": "text" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 38 + }, + "id": 28, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": {}, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(response_total{classification=\"success\", namespace=\"$namespace\", replicaset=\"$replicaset\", dst_replicaset=\"$outbound\", direction=\"outbound\"}[30s])) by (dst_replicaset) / sum(irate(response_total{namespace=\"$namespace\", replicaset=\"$replicaset\", dst_replicaset=\"$outbound\", direction=\"outbound\"}[30s])) by (dst_replicaset)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "rs/{{dst_replicaset}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "SUCCESS RATE", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "percentunit", + "label": null, + "logBase": 1, + "max": "1", + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 38 + }, + "id": 35, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": {}, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(request_total{namespace=\"$namespace\", replicaset=\"$replicaset\", dst_replicaset=\"$outbound\", direction=\"outbound\", tls=\"true\"}[30s])) by (dst_replicaset)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "\ud83d\udd12rs/{{dst_replicaset}}", + "refId": "A" + }, + { + "expr": "sum(irate(request_total{namespace=\"$namespace\", replicaset=\"$replicaset\", dst_replicaset=\"$outbound\", direction=\"outbound\", tls!=\"true\"}[30s])) by (dst_replicaset)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "rs/{{dst_replicaset}}", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "REQUEST RATE", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "rps", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 38 + }, + "id": 41, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": {}, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.5, sum(rate(response_latency_ms_bucket{namespace=\"$namespace\", replicaset=\"$replicaset\", dst_replicaset=\"$outbound\", direction=\"outbound\"}[30s])) by (le, dst_replicaset))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "P50 rs/{{dst_replicaset}}", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.95, sum(rate(response_latency_ms_bucket{namespace=\"$namespace\", replicaset=\"$replicaset\", dst_replicaset=\"$outbound\", direction=\"outbound\"}[30s])) by (le, dst_replicaset))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "P95 rs/{{dst_replicaset}}", + "refId": "B" + }, + { + "expr": "histogram_quantile(0.99, sum(rate(response_latency_ms_bucket{namespace=\"$namespace\", replicaset=\"$replicaset\", dst_replicaset=\"$outbound\", direction=\"outbound\"}[30s])) by (le, dst_replicaset))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "P99 rs/{{dst_replicaset}}", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "LATENCY", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "repeat": "outbound", + "title": "rs/$outbound", + "type": "row" + }, + { + "content": "
\n
\n \n
\n
\n
\n
\n
\n\n
", + "gridPos": { + "h": 3, + "w": 24, + "x": 0, + "y": 44 + }, + "height": "1px", + "id": 171, + "links": [], + "mode": "html", + "options": {}, + "title": "", + "transparent": true, + "type": "text" + } + ], + "refresh": "1m", + "schemaVersion": 18, + "style": "dark", + "tags": [ + "linkerd" + ], + "templating": { + "list": [ + { + "hide": 0, + "label": "datasource", + "name": "DS_PROMETHEUS", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".*", + "current": { + "text": "default", + "value": "default" + }, + "datasource": "${DS_PROMETHEUS}", + "definition": "label_values(process_start_time_seconds{replicaset!=\"\"}, namespace)", + "hide": 0, + "includeAll": false, + "label": "Namespace", + "multi": false, + "name": "namespace", + "options": [], + "query": "label_values(process_start_time_seconds{replicaset!=\"\"}, namespace)", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".*", + "current": { + "text": "rs1", + "value": "rs1" + }, + "datasource": "${DS_PROMETHEUS}", + "definition": "label_values(process_start_time_seconds{namespace=\"$namespace\"}, replicaset)", + "hide": 0, + "includeAll": false, + "label": "ReplicaSet", + "multi": false, + "name": "replicaset", + "options": [], + "query": "label_values(process_start_time_seconds{namespace=\"$namespace\"}, replicaset)", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".*", + "current": { + "text": "All", + "value": "$__all" + }, + "datasource": "${DS_PROMETHEUS}", + "definition": "label_values(request_total{dst_namespace=\"$namespace\", dst_replicaset=\"$replicaset\"}, replicaset)", + "hide": 2, + "includeAll": true, + "label": null, + "multi": false, + "name": "inbound", + "options": [], + "query": "label_values(request_total{dst_namespace=\"$namespace\", dst_replicaset=\"$replicaset\"}, replicaset)", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".*", + "current": { + "text": "All", + "value": "$__all" + }, + "datasource": "${DS_PROMETHEUS}", + "definition": "label_values(request_total{namespace=\"$namespace\", replicaset=\"$replicaset\"}, dst_replicaset)", + "hide": 2, + "includeAll": true, + "label": null, + "multi": false, + "name": "outbound", + "options": [], + "query": "label_values(request_total{namespace=\"$namespace\", replicaset=\"$replicaset\"}, dst_replicaset)", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-5m", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Linkerd ReplicaSet", + "uid": "linkerd-replicaset", + "version": 1 +} \ No newline at end of file diff --git a/roles/prometheus/dashboards/linkerd/linkerd-replicationcontroller.json b/argocd/system/linkerd-viz/dashboards/linkerd-replicationcontroller.json similarity index 99% rename from roles/prometheus/dashboards/linkerd/linkerd-replicationcontroller.json rename to argocd/system/linkerd-viz/dashboards/linkerd-replicationcontroller.json index c552d749..bde22817 100644 --- a/roles/prometheus/dashboards/linkerd/linkerd-replicationcontroller.json +++ b/argocd/system/linkerd-viz/dashboards/linkerd-replicationcontroller.json @@ -1,56 +1,56 @@ { + "__elements": [], "__inputs": [ { - "name": "DS_PROMETHEUS", - "label": "prometheus", "description": "", - "type": "datasource", + "label": "prometheus", + "name": "DS_PROMETHEUS", "pluginId": "prometheus", - "pluginName": "Prometheus" + "pluginName": "Prometheus", + "type": "datasource" } ], - "__elements": [], "__requires": [ { - "type": "panel", "id": "gauge", "name": "Gauge", + "type": "panel", "version": "" }, { - "type": "grafana", "id": "grafana", "name": "Grafana", + "type": "grafana", "version": "8.3.3" }, { - "type": "panel", "id": "graph", "name": "Graph (old)", + "type": "panel", "version": "" }, { - "type": "panel", "id": "heatmap", "name": "Heatmap", + "type": "panel", "version": "" }, { - "type": "datasource", "id": "prometheus", "name": "Prometheus", + "type": "datasource", "version": "1.0.0" }, { - "type": "panel", "id": "stat", "name": "Stat", + "type": "panel", "version": "" }, { - "type": "panel", "id": "text", "name": "Text", + "type": "panel", "version": "" } ], @@ -571,7 +571,7 @@ "expr": "sum(irate(request_total{namespace=\"$namespace\", replicationcontroller=\"$replicationcontroller\", direction=\"inbound\", tls=\"true\"}[30s])) by (replicationcontroller)", "format": "time_series", "intervalFactor": 1, - "legendFormat": "🔒rc/{{replicationcontroller}}", + "legendFormat": "\ud83d\udd12rc/{{replicationcontroller}}", "refId": "A" }, { @@ -1159,7 +1159,7 @@ "expr": "sum(irate(request_total{replicationcontroller!=\"\", replicationcontroller=\"$inbound\", dst_namespace=\"$namespace\", dst_replicationcontroller=\"$replicationcontroller\", direction=\"outbound\", tls=\"true\"}[30s])) by (replicationcontroller, pod)", "format": "time_series", "intervalFactor": 1, - "legendFormat": "🔒po/{{pod}}", + "legendFormat": "\ud83d\udd12po/{{pod}}", "refId": "A" }, { @@ -1475,7 +1475,7 @@ "expr": "sum(irate(request_total{namespace=\"$namespace\", replicationcontroller=\"$replicationcontroller\", direction=\"outbound\", tls=\"true\"}[30s])) by (dst_replicationcontroller)", "format": "time_series", "intervalFactor": 1, - "legendFormat": "🔒rc/{{dst_replicationcontroller}}", + "legendFormat": "\ud83d\udd12rc/{{dst_replicationcontroller}}", "refId": "A" }, { @@ -2045,7 +2045,7 @@ "expr": "sum(irate(request_total{namespace=\"$namespace\", replicationcontroller=\"$replicationcontroller\", dst_replicationcontroller=\"$outbound\", direction=\"outbound\", tls=\"true\"}[30s])) by (dst_replicationcontroller)", "format": "time_series", "intervalFactor": 1, - "legendFormat": "🔒rc/{{dst_replicationcontroller}}", + "legendFormat": "\ud83d\udd12rc/{{dst_replicationcontroller}}", "refId": "A" }, { @@ -2228,6 +2228,16 @@ ], "templating": { "list": [ + { + "hide": 0, + "label": "datasource", + "name": "DS_PROMETHEUS", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, { "allValue": ".*", "current": {}, @@ -2351,4 +2361,4 @@ "title": "Linkerd ReplicationController", "uid": "linkerd-replicationcontroller", "version": 1 -} +} \ No newline at end of file diff --git a/argocd/system/linkerd-viz/dashboards/linkerd-route.json b/argocd/system/linkerd-viz/dashboards/linkerd-route.json new file mode 100644 index 00000000..97d17cca --- /dev/null +++ b/argocd/system/linkerd-viz/dashboards/linkerd-route.json @@ -0,0 +1,1326 @@ +{ + "__elements": [], + "__inputs": [ + { + "description": "", + "label": "prometheus", + "name": "DS_PROMETHEUS", + "pluginId": "prometheus", + "pluginName": "Prometheus", + "type": "datasource" + } + ], + "__requires": [ + { + "id": "gauge", + "name": "Gauge", + "type": "panel", + "version": "" + }, + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "8.3.3" + }, + { + "id": "graph", + "name": "Graph (old)", + "type": "panel", + "version": "" + }, + { + "id": "heatmap", + "name": "Heatmap", + "type": "panel", + "version": "" + }, + { + "id": "prometheus", + "name": "Prometheus", + "type": "datasource", + "version": "1.0.0" + }, + { + "id": "stat", + "name": "Stat", + "type": "panel", + "version": "" + }, + { + "id": "text", + "name": "Text", + "type": "panel", + "version": "" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 1, + "id": null, + "iteration": 1539806914987, + "links": [], + "panels": [ + { + "content": "
\n  \n route/$rt_route\n
", + "gridPos": { + "h": 2, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 2, + "links": [], + "mode": "html", + "options": {}, + "title": "", + "transparent": true, + "type": "text" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#d44a3a", + "rgba(237, 129, 40, 0.89)", + "#299c46" + ], + "datasource": "${DS_PROMETHEUS}", + "decimals": null, + "format": "percentunit", + "gauge": { + "maxValue": 1, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 4, + "w": 8, + "x": 0, + "y": 2 + }, + "id": 4, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": true, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(irate(route_response_total{classification=\"success\", namespace=\"$namespace\", direction=\"inbound\", rt_route=\"$rt_route\"}[30s])) / sum(irate(route_response_total{namespace=\"$namespace\", direction=\"inbound\", rt_route=\"$rt_route\"}[30s]))", + "format": "time_series", + "instant": false, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "0.9,.99", + "title": "SUCCESS RATE", + "transparent": true, + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "${DS_PROMETHEUS}", + "decimals": null, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 4, + "w": 8, + "x": 8, + "y": 2 + }, + "id": 6, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": " RPS", + "postfixFontSize": "100%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": true, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(irate(route_request_total{namespace=\"$namespace\", direction=\"inbound\", rt_route=\"$rt_route\"}[30s]))", + "format": "time_series", + "instant": false, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "REQUEST RATE", + "transparent": true, + "type": "singlestat", + "valueFontSize": "100%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "${DS_PROMETHEUS}", + "decimals": null, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 4, + "w": 8, + "x": 16, + "y": 2 + }, + "id": 8, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": " ms", + "postfixFontSize": "100%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": true, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "histogram_quantile(0.95, sum(irate(route_response_latency_ms_bucket{namespace=\"$namespace\", direction=\"inbound\", rt_route=\"$rt_route\"}[30s])) by (le, rt_route))", + "format": "time_series", + "instant": false, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "P95 LATENCY", + "transparent": true, + "type": "singlestat", + "valueFontSize": "100%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "content": "
\n TOP-LINE TRAFFIC\n
", + "gridPos": { + "h": 2, + "w": 24, + "x": 0, + "y": 6 + }, + "id": 10, + "links": [], + "mode": "html", + "options": {}, + "title": "", + "transparent": true, + "type": "text" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 8 + }, + "id": 12, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": {}, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(route_response_total{classification=\"success\", namespace=\"$namespace\", direction=\"inbound\", rt_route=\"$rt_route\"}[30s])) by (rt_route) / sum(irate(route_response_total{namespace=\"$namespace\", direction=\"inbound\", rt_route=\"$rt_route\"}[30s])) by (rt_route)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "route/{{rt_route}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "SUCCESS RATE", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "percentunit", + "label": "", + "logBase": 1, + "max": "1", + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 8 + }, + "id": 14, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": {}, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(route_request_total{namespace=\"$namespace\", direction=\"inbound\", rt_route=\"$rt_route\", tls=\"true\"}[30s])) by (rt_route)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "\ud83d\udd12route/{{rt_route}}", + "refId": "A" + }, + { + "expr": "sum(irate(route_request_total{namespace=\"$namespace\", direction=\"inbound\", rt_route=\"$rt_route\", tls!=\"true\"}[30s])) by (rt_route)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "route/{{rt_route}}", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "REQUEST RATE", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "rps", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 8 + }, + "id": 16, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": {}, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.5, sum(irate(route_response_latency_ms_bucket{namespace=\"$namespace\", direction=\"inbound\", rt_route=\"$rt_route\"}[30s])) by (le, rt_route))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "p50 route/{{rt_route}}", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.95, sum(irate(route_response_latency_ms_bucket{namespace=\"$namespace\", direction=\"inbound\", rt_route=\"$rt_route\"}[30s])) by (le, rt_route))", + "format": "time_series", + "hide": false, + "intervalFactor": 1, + "legendFormat": "p95 route/{{rt_route}}", + "refId": "B" + }, + { + "expr": "histogram_quantile(0.99, sum(irate(route_response_latency_ms_bucket{namespace=\"$namespace\", direction=\"inbound\", rt_route=\"$rt_route\"}[30s])) by (le, rt_route))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "p99 route/{{rt_route}}", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "LATENCY", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "ms", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "content": "
\n INBOUND TRAFFIC BY DEPLOYMENT\n
", + "gridPos": { + "h": 2, + "w": 24, + "x": 0, + "y": 15 + }, + "id": 18, + "links": [], + "mode": "html", + "options": {}, + "title": "", + "transparent": true, + "type": "text" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 17 + }, + "id": 20, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": {}, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(route_response_total{classification=\"success\", namespace=\"$namespace\", direction=\"outbound\", rt_route=\"$rt_route\"}[30s])) by (deployment, rt_route) / sum(irate(route_response_total{namespace=\"$namespace\", direction=\"outbound\", rt_route=\"$rt_route\"}[30s])) by (deployment, rt_route)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "deploy/{{deployment}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "SUCCESS RATE", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "percentunit", + "label": "", + "logBase": 1, + "max": "1", + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 17 + }, + "id": 22, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": {}, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(route_request_total{namespace=\"$namespace\", direction=\"outbound\", tls=\"true\", rt_route=\"$rt_route\"}[30s])) by (deployment, rt_route)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "\ud83d\udd12deploy/{{deployment}}", + "refId": "A" + }, + { + "expr": "sum(irate(route_request_total{namespace=\"$namespace\", direction=\"outbound\", tls!=\"true\", rt_route=\"$rt_route\"}[30s])) by (deployment, rt_route)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "deploy/{{deployment}}", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "REQUEST RATE", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "rps", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 17 + }, + "id": 24, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": {}, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.95, sum(rate(route_response_latency_ms_bucket{namespace=\"$namespace\", direction=\"outbound\", rt_route=\"$rt_route\"}[30s])) by (le, deployment, rt_route))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "P95 deploy/{{deployment}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "P95 LATENCY", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "content": "
\n INBOUND TRAFFIC BY POD\n
", + "gridPos": { + "h": 2, + "w": 24, + "x": 0, + "y": 24 + }, + "id": 26, + "links": [], + "mode": "html", + "options": {}, + "title": "", + "transparent": true, + "type": "text" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 26 + }, + "id": 28, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": {}, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(route_response_total{classification=\"success\", namespace=\"$namespace\", direction=\"outbound\", rt_route=\"$rt_route\"}[30s])) by (pod, rt_route) / sum(irate(route_response_total{namespace=\"$namespace\", direction=\"outbound\", rt_route=\"$rt_route\"}[30s])) by (pod, rt_route)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "po/{{pod}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "SUCCESS RATE", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "percentunit", + "label": "", + "logBase": 1, + "max": "1", + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 26 + }, + "id": 30, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": {}, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(route_request_total{namespace=\"$namespace\", direction=\"outbound\", tls=\"true\", rt_route=\"$rt_route\"}[30s])) by (pod, rt_route)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "\ud83d\udd12po/{{pod}}", + "refId": "A" + }, + { + "expr": "sum(irate(route_request_total{namespace=\"$namespace\", direction=\"outbound\", tls!=\"true\", rt_route=\"$rt_route\"}[30s])) by (pod, rt_route)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "po/{{pod}}", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "REQUEST RATE", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "rps", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${DS_PROMETHEUS}", + "fill": 1, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 26 + }, + "id": 32, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": {}, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "histogram_quantile(0.95, sum(rate(route_response_latency_ms_bucket{namespace=\"$namespace\", direction=\"outbound\", rt_route=\"$rt_route\"}[30s])) by (le, pod))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "P95 po/{{pod, rt_route}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "P95 LATENCY", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "content": "
\n
\n \n
\n
\n
\n
\n
\n\n
", + "gridPos": { + "h": 3, + "w": 24, + "x": 0, + "y": 33 + }, + "height": "1px", + "id": 34, + "links": [], + "mode": "html", + "options": {}, + "title": "", + "transparent": true, + "type": "text" + } + ], + "refresh": "1m", + "schemaVersion": 18, + "style": "dark", + "tags": [ + "linkerd" + ], + "templating": { + "list": [ + { + "hide": 0, + "label": "datasource", + "name": "DS_PROMETHEUS", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "definition": "", + "hide": 0, + "includeAll": false, + "label": "Namespace", + "multi": false, + "name": "namespace", + "options": [], + "query": "label_values(route_request_total, namespace)", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": {}, + "datasource": "${DS_PROMETHEUS}", + "definition": "", + "hide": 0, + "includeAll": false, + "label": "Route", + "multi": false, + "name": "rt_route", + "options": [], + "query": "label_values(route_request_total{namespace=\"$namespace\"}, rt_route)", + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-5m", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Linkerd Route", + "uid": "route", + "version": 1 +} \ No newline at end of file diff --git a/roles/prometheus/dashboards/linkerd/linkerd-service.json b/argocd/system/linkerd-viz/dashboards/linkerd-service.json similarity index 98% rename from roles/prometheus/dashboards/linkerd/linkerd-service.json rename to argocd/system/linkerd-viz/dashboards/linkerd-service.json index 71fe14bb..c14d698e 100644 --- a/roles/prometheus/dashboards/linkerd/linkerd-service.json +++ b/argocd/system/linkerd-viz/dashboards/linkerd-service.json @@ -1,56 +1,56 @@ { + "__elements": [], "__inputs": [ { - "name": "DS_PROMETHEUS", - "label": "prometheus", "description": "", - "type": "datasource", + "label": "prometheus", + "name": "DS_PROMETHEUS", "pluginId": "prometheus", - "pluginName": "Prometheus" + "pluginName": "Prometheus", + "type": "datasource" } ], - "__elements": [], "__requires": [ { - "type": "panel", "id": "gauge", "name": "Gauge", + "type": "panel", "version": "" }, { - "type": "grafana", "id": "grafana", "name": "Grafana", + "type": "grafana", "version": "8.3.3" }, { - "type": "panel", "id": "graph", "name": "Graph (old)", + "type": "panel", "version": "" }, { - "type": "panel", "id": "heatmap", "name": "Heatmap", + "type": "panel", "version": "" }, { - "type": "datasource", "id": "prometheus", "name": "Prometheus", + "type": "datasource", "version": "1.0.0" }, { - "type": "panel", "id": "stat", "name": "Stat", + "type": "panel", "version": "" }, { - "type": "panel", "id": "text", "name": "Text", + "type": "panel", "version": "" } ], @@ -489,7 +489,7 @@ "expr": "sum(irate(request_total{namespace=\"$namespace\", dst_service=\"$service\", direction=\"outbound\", tls=\"true\"}[30s])) by (dst_service)", "format": "time_series", "intervalFactor": 1, - "legendFormat": "🔒svc/{{dst_service}}", + "legendFormat": "\ud83d\udd12svc/{{dst_service}}", "refId": "A" }, { @@ -788,7 +788,7 @@ "expr": "sum(irate(request_total{namespace=\"$namespace\", dst_service=\"$service\", direction=\"outbound\", tls=\"true\"}[30s])) by (dst_service, deployment)", "format": "time_series", "intervalFactor": 1, - "legendFormat": "🔒deploy/{{deployment}}", + "legendFormat": "\ud83d\udd12deploy/{{deployment}}", "refId": "A" }, { @@ -1070,7 +1070,7 @@ "expr": "sum(irate(request_total{namespace=\"$namespace\", dst_service=\"$service\", direction=\"outbound\", tls=\"true\"}[30s])) by (dst_service, pod)", "format": "time_series", "intervalFactor": 1, - "legendFormat": "🔒po/{{pod}}", + "legendFormat": "\ud83d\udd12po/{{pod}}", "refId": "A" }, { @@ -1234,6 +1234,16 @@ ], "templating": { "list": [ + { + "hide": 0, + "label": "datasource", + "name": "DS_PROMETHEUS", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, { "allValue": ".*", "current": {}, @@ -1363,4 +1373,4 @@ "title": "Linkerd Service", "uid": "linkerd-service", "version": 1 -} +} \ No newline at end of file diff --git a/roles/prometheus/dashboards/linkerd/linkerd-statefulset.json b/argocd/system/linkerd-viz/dashboards/linkerd-statefulset.json similarity index 99% rename from roles/prometheus/dashboards/linkerd/linkerd-statefulset.json rename to argocd/system/linkerd-viz/dashboards/linkerd-statefulset.json index 03405f9f..d223c2fc 100644 --- a/roles/prometheus/dashboards/linkerd/linkerd-statefulset.json +++ b/argocd/system/linkerd-viz/dashboards/linkerd-statefulset.json @@ -1,56 +1,56 @@ { + "__elements": [], "__inputs": [ { - "name": "DS_PROMETHEUS", - "label": "prometheus", "description": "", - "type": "datasource", + "label": "prometheus", + "name": "DS_PROMETHEUS", "pluginId": "prometheus", - "pluginName": "Prometheus" + "pluginName": "Prometheus", + "type": "datasource" } ], - "__elements": [], "__requires": [ { - "type": "panel", "id": "gauge", "name": "Gauge", + "type": "panel", "version": "" }, { - "type": "grafana", "id": "grafana", "name": "Grafana", + "type": "grafana", "version": "8.3.3" }, { - "type": "panel", "id": "graph", "name": "Graph (old)", + "type": "panel", "version": "" }, { - "type": "panel", "id": "heatmap", "name": "Heatmap", + "type": "panel", "version": "" }, { - "type": "datasource", "id": "prometheus", "name": "Prometheus", + "type": "datasource", "version": "1.0.0" }, { - "type": "panel", "id": "stat", "name": "Stat", + "type": "panel", "version": "" }, { - "type": "panel", "id": "text", "name": "Text", + "type": "panel", "version": "" } ], @@ -571,7 +571,7 @@ "expr": "sum(irate(request_total{namespace=\"$namespace\", statefulset=\"$statefulset\", direction=\"inbound\", tls=\"true\"}[30s])) by (statefulset)", "format": "time_series", "intervalFactor": 1, - "legendFormat": "🔒sts/{{statefulset}}", + "legendFormat": "\ud83d\udd12sts/{{statefulset}}", "refId": "A" }, { @@ -1159,7 +1159,7 @@ "expr": "sum(irate(request_total{statefulset!=\"\", statefulset=\"$inbound\", dst_namespace=\"$namespace\", dst_statefulset=\"$statefulset\", direction=\"outbound\", tls=\"true\"}[30s])) by (statefulset, pod)", "format": "time_series", "intervalFactor": 1, - "legendFormat": "🔒po/{{pod}}", + "legendFormat": "\ud83d\udd12po/{{pod}}", "refId": "A" }, { @@ -1475,7 +1475,7 @@ "expr": "sum(irate(request_total{namespace=\"$namespace\", statefulset=\"$statefulset\", direction=\"outbound\", tls=\"true\"}[30s])) by (dst_statefulset)", "format": "time_series", "intervalFactor": 1, - "legendFormat": "🔒sts/{{dst_statefulset}}", + "legendFormat": "\ud83d\udd12sts/{{dst_statefulset}}", "refId": "A" }, { @@ -2045,7 +2045,7 @@ "expr": "sum(irate(request_total{namespace=\"$namespace\", statefulset=\"$statefulset\", dst_statefulset=\"$outbound\", direction=\"outbound\", tls=\"true\"}[30s])) by (dst_statefulset)", "format": "time_series", "intervalFactor": 1, - "legendFormat": "🔒sts/{{dst_statefulset}}", + "legendFormat": "\ud83d\udd12sts/{{dst_statefulset}}", "refId": "A" }, { @@ -2228,6 +2228,16 @@ ], "templating": { "list": [ + { + "hide": 0, + "label": "datasource", + "name": "DS_PROMETHEUS", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, { "allValue": ".*", "current": {}, @@ -2351,4 +2361,4 @@ "title": "Linkerd StatefulSet", "uid": "linkerd-statefulset", "version": 1 -} +} \ No newline at end of file diff --git a/roles/prometheus/dashboards/linkerd/linkerd-top-line.json b/argocd/system/linkerd-viz/dashboards/linkerd-top-line.json similarity index 98% rename from roles/prometheus/dashboards/linkerd/linkerd-top-line.json rename to argocd/system/linkerd-viz/dashboards/linkerd-top-line.json index b449d334..748e52e9 100644 --- a/roles/prometheus/dashboards/linkerd/linkerd-top-line.json +++ b/argocd/system/linkerd-viz/dashboards/linkerd-top-line.json @@ -1,56 +1,56 @@ { + "__elements": [], "__inputs": [ { - "name": "DS_PROMETHEUS", - "label": "prometheus", "description": "", - "type": "datasource", + "label": "prometheus", + "name": "DS_PROMETHEUS", "pluginId": "prometheus", - "pluginName": "Prometheus" + "pluginName": "Prometheus", + "type": "datasource" } ], - "__elements": [], "__requires": [ { - "type": "panel", "id": "gauge", "name": "Gauge", + "type": "panel", "version": "" }, { - "type": "grafana", "id": "grafana", "name": "Grafana", + "type": "grafana", "version": "8.3.3" }, { - "type": "panel", "id": "graph", "name": "Graph (old)", + "type": "panel", "version": "" }, { - "type": "panel", "id": "heatmap", "name": "Heatmap", + "type": "panel", "version": "" }, { - "type": "datasource", "id": "prometheus", "name": "Prometheus", + "type": "datasource", "version": "1.0.0" }, { - "type": "panel", "id": "stat", "name": "Stat", + "type": "panel", "version": "" }, { - "type": "panel", "id": "text", "name": "Text", + "type": "panel", "version": "" } ], @@ -567,7 +567,7 @@ "expr": "sum(irate(request_total{namespace=~\"$namespace\", direction=\"inbound\", tls=\"true\"}[30s])) by (namespace)", "format": "time_series", "intervalFactor": 1, - "legendFormat": "🔒ns/{{namespace}}", + "legendFormat": "\ud83d\udd12ns/{{namespace}}", "refId": "A" }, { @@ -900,7 +900,7 @@ "expr": "sum(irate(request_total{namespace=\"$namespace\", direction=\"inbound\", tls=\"true\"}[30s])) by (deployment)", "format": "time_series", "intervalFactor": 1, - "legendFormat": "🔒deploy/{{deployment}}", + "legendFormat": "\ud83d\udd12deploy/{{deployment}}", "refId": "A" }, { @@ -1053,6 +1053,16 @@ ], "templating": { "list": [ + { + "hide": 0, + "label": "datasource", + "name": "DS_PROMETHEUS", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, { "allValue": ".*", "current": { @@ -1138,4 +1148,4 @@ "title": "Linkerd Top Line", "uid": "linkerd-top-line", "version": 1 -} +} \ No newline at end of file diff --git a/argocd/system/linkerd-viz/templates/grafana-dashboards.yaml b/argocd/system/linkerd-viz/templates/grafana-dashboards.yaml new file mode 100644 index 00000000..55a1ad77 --- /dev/null +++ b/argocd/system/linkerd-viz/templates/grafana-dashboards.yaml @@ -0,0 +1,17 @@ +{{- $files := .Files.Glob "dashboards/*.json" }} +{{- if $files }} +{{- range $path, $fileContents := $files }} +{{- $dashboardName := regexReplaceAll "(^.*/)(.*)\\.json$" $path "${2}" }} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ $dashboardName }} + namespace: {{ $.Release.Namespace }} + labels: + grafana_dashboard: "1" +data: + {{ $dashboardName }}.json: |- + {{- $.Files.Get $path | nindent 4 }} +{{- end }} +{{- end }} diff --git a/argocd/system/linkerd-viz/templates/ingress.yaml b/argocd/system/linkerd-viz/templates/ingress.yaml new file mode 100644 index 00000000..9513a83b --- /dev/null +++ b/argocd/system/linkerd-viz/templates/ingress.yaml @@ -0,0 +1,48 @@ +# Linkerd Middlewares +--- +apiVersion: traefik.containo.us/v1alpha1 +kind: Middleware +metadata: + name: l5d-header-middleware-linkerd-viz + namespace: {{ .Release.Namespace }} +spec: + headers: + customRequestHeaders: + l5d-dst-override: "web.{{ .Release.Namespace }}.svc.cluster.local:8084" + +--- +# HTTPS Ingress +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: linkerd-viz-ingress + namespace: {{ .Release.Namespace }} + annotations: + # HTTPS as entry point + traefik.ingress.kubernetes.io/router.entrypoints: websecure + # Enable TLS + traefik.ingress.kubernetes.io/router.tls: "true" +{{- if .Values.ingress.basicAuth.enable }} + # Use Basic Auth Midleware configured + traefik.ingress.kubernetes.io/router.middlewares: + {{ .Values.ingress.basicAuth.middlewareNamespace }}-{{ .Values.ingress.basicAuth.middlewareName }}@kubernetescrd +{{- end }} + # Enable cert-manager to create automatically the SSL certificate and store in Secret + cert-manager.io/cluster-issuer: {{ .Values.ingress.certmanager.tlsIssuer }}-issuer + cert-manager.io/common-name: {{ .Values.ingress.host }} +spec: + tls: + - hosts: + - {{ .Values.ingress.host }} + secretName: linkerd-viz-tls + rules: + - host: {{ .Values.ingress.host }} + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: web + port: + number: 8084 diff --git a/roles/linkerd/linkerd-viz/templates/linkerd_viz_prometheus.yml.j2 b/argocd/system/linkerd-viz/templates/podmonitor.yaml similarity index 87% rename from roles/linkerd/linkerd-viz/templates/linkerd_viz_prometheus.yml.j2 rename to argocd/system/linkerd-viz/templates/podmonitor.yaml index 0f019a49..7ff059d7 100644 --- a/roles/linkerd/linkerd-viz/templates/linkerd_viz_prometheus.yml.j2 +++ b/argocd/system/linkerd-viz/templates/podmonitor.yaml @@ -1,17 +1,18 @@ +{{- if and .Values.serviceMonitor.enable (.Capabilities.APIVersions.Has "monitoring.coreos.com/v1/PodMonitor") }} --- apiVersion: monitoring.coreos.com/v1 kind: PodMonitor metadata: labels: app: linkerd - release: kube-prometheus-stack + release: {{ .Values.serviceMonitor.release }} name: linkerd-controller - namespace: {{ k3s_monitoring_namespace }} + namespace: {{ .Release.Namespace }} spec: namespaceSelector: matchNames: - - linkerd-viz - - linkerd + - {{ .Release.Namespace }}-viz + - {{ .Release.Namespace }} selector: matchLabels: {} podMetricsEndpoints: @@ -37,9 +38,9 @@ kind: PodMonitor metadata: labels: app: linkerd - release: kube-prometheus-stack + release: {{ .Values.serviceMonitor.release }} name: linkerd-service-mirror - namespace: {{ k3s_monitoring_namespace }} + namespace: {{ .Release.Namespace }} spec: namespaceSelector: any: true @@ -68,9 +69,9 @@ kind: PodMonitor metadata: labels: app: linkerd - release: kube-prometheus-stack + release: {{ .Values.serviceMonitor.release }} name: linkerd-proxy - namespace: {{ k3s_monitoring_namespace }} + namespace: {{ .Release.Namespace }} spec: namespaceSelector: any: true @@ -119,3 +120,5 @@ spec: action: replace targetLabel: job replacement: linkerd-proxy + +{{- end }} \ No newline at end of file diff --git a/argocd/system/linkerd-viz/values.yaml b/argocd/system/linkerd-viz/values.yaml new file mode 100644 index 00000000..77df545f --- /dev/null +++ b/argocd/system/linkerd-viz/values.yaml @@ -0,0 +1,38 @@ + +# Prometheus monitoring +serviceMonitor: + enable: true + release: monitoring + +# Ingress configuration +ingress: + host: linkerd.picluster.ricsanfre.com + # configure cert-manager issuer + certmanager: + # tlsIssuer=letsecrypt to generate valid TLS certficiate using IONOS API + # tlsIssuer=ca to generate a CA-signed certificate (not valid) + tlsIssuer: letsencrypt + # tlsIssuer: ca + # Enabling traefik basic authorization, reusing global middleware created for Traefik + basicAuth: + enable: true + middlewareName: basic-auth + middlewareNamespace: traefik + +######################## +# linkerd-viz subchart +######################## + +linkerd-viz: + # Skip namespace creation + installNamespace: false + # External Prometheus + prometheusUrl: http://monitoring-prometheus.monitoring.svc.cluster.local:9090 + prometheus: + enabled: false + # External Grafana + grafana: + url: monitoring-grafana.monitoring.svc.cluster.local + # Disabling DNS rebinding protection + dashboard: + enforcedHostRegexp: .* diff --git a/argocd/system/linkerd/Chart.yaml b/argocd/system/linkerd/Chart.yaml new file mode 100644 index 00000000..99d6e576 --- /dev/null +++ b/argocd/system/linkerd/Chart.yaml @@ -0,0 +1,10 @@ +apiVersion: v2 +name: linkerd +version: 0.0.0 +dependencies: + - name: linkerd-crds + version: 1.4.0 + repository: https://helm.linkerd.io/stable + - name: linkerd-control-plane + version: 1.9.4 + repository: https://helm.linkerd.io/stable diff --git a/roles/linkerd/control-plane/templates/linkerd_issuer.yml.j2 b/argocd/system/linkerd/templates/linkerd-identity-issuer.yaml similarity index 63% rename from roles/linkerd/control-plane/templates/linkerd_issuer.yml.j2 rename to argocd/system/linkerd/templates/linkerd-identity-issuer.yaml index a788d8fe..cc6e428e 100644 --- a/roles/linkerd/control-plane/templates/linkerd_issuer.yml.j2 +++ b/argocd/system/linkerd/templates/linkerd-identity-issuer.yaml @@ -1,15 +1,14 @@ ---- apiVersion: cert-manager.io/v1 kind: Certificate metadata: name: linkerd-identity-issuer - namespace: {{ k3s_linkerd_namespace }} + namespace: linkerd spec: secretName: linkerd-identity-issuer - duration: {{ issuer_certificate_duration }} - renewBefore: {{ issuer_certificate_renewbefore }} + duration: {{ .Values.identityIssuer.certificate.duration }} + renewBefore: {{ .Values.identityIssuer.certificate.renewBefore }} issuerRef: - name: ca-issuer + name: {{ .Values.identityIssuer.certificate.issuer }} kind: ClusterIssuer group: cert-manager.io commonName: identity.linkerd.cluster.local @@ -22,4 +21,4 @@ spec: - cert sign - crl sign - server auth - - client auth + - client auth \ No newline at end of file diff --git a/argocd/system/linkerd/templates/trust-bundle.yaml b/argocd/system/linkerd/templates/trust-bundle.yaml new file mode 100644 index 00000000..7b1576f8 --- /dev/null +++ b/argocd/system/linkerd/templates/trust-bundle.yaml @@ -0,0 +1,18 @@ +# Cert-manager TrustBundle +# Share CA root certificate as configMap in linkerd namespace + +apiVersion: trust.cert-manager.io/v1alpha1 +kind: Bundle +metadata: + name: linkerd-identity-trust-roots +spec: + sources: + - secret: + name: "root-secret" + key: "ca.crt" + target: + configMap: + key: "ca-bundle.crt" + namespaceSelector: + matchLabels: + kubernetes.io/metadata.name: {{ .Release.Namespace }} diff --git a/argocd/system/linkerd/values.yaml b/argocd/system/linkerd/values.yaml new file mode 100644 index 00000000..25e7eb73 --- /dev/null +++ b/argocd/system/linkerd/values.yaml @@ -0,0 +1,17 @@ + +# Identity Issuer certificate +identityIssuer: + certificate: + issuer: ca-issuer + renewBefore: 360h0m0s # 15d + duration: 2160h0m0s # 90d + +################################ +# linkerd-control-plane subchart +################################ + +linkerd-control-plane: + identity: + externalCA: true + issuer: + scheme: kubernetes.io/tls diff --git a/argocd/system/logging/Chart.yaml b/argocd/system/logging/Chart.yaml new file mode 100644 index 00000000..3f4b6f7d --- /dev/null +++ b/argocd/system/logging/Chart.yaml @@ -0,0 +1,19 @@ +apiVersion: v2 +name: logging +version: 0.0.0 +dependencies: + - name: eck-operator + version: 2.4.0 + repository: https://helm.elastic.co + - name: fluentd + version: 0.3.9 + repository: https://fluent.github.io/helm-charts + - name: fluent-bit + version: 0.21.0 + repository: https://fluent.github.io/helm-charts + - name: loki + version: 3.8.0 + repository: https://grafana.github.io/helm-charts + - name: prometheus-elasticsearch-exporter + version: 4.15.1 + repository: https://prometheus-community.github.io/helm-charts \ No newline at end of file diff --git a/argocd/system/logging/templates/elasticsearch-ingress.yaml b/argocd/system/logging/templates/elasticsearch-ingress.yaml new file mode 100644 index 00000000..40c3c444 --- /dev/null +++ b/argocd/system/logging/templates/elasticsearch-ingress.yaml @@ -0,0 +1,45 @@ +--- +apiVersion: traefik.containo.us/v1alpha1 +kind: Middleware +metadata: + name: l5d-header-middleware-elasticsearch + namespace: {{ .Release.Namespace }} +spec: + headers: + customRequestHeaders: + l5d-dst-override: {{ .Values.eck.clusterName }}-es-http.{{ .Release.Namespace }}.svc.cluster.local:9200 + +--- +# HTTPS Ingress +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: elasticsearch-ingress + namespace: {{ .Release.Namespace }} + annotations: + # HTTPS as entry point + traefik.ingress.kubernetes.io/router.entrypoints: websecure + # Enable TLS + traefik.ingress.kubernetes.io/router.tls: "true" + # Enable cert-manager to create automatically the SSL certificate and store in Secret + cert-manager.io/cluster-issuer: {{ .Values.elasticsearch.ingress.certmanager.tlsIssuer }}-issuer + cert-manager.io/common-name: {{ .Values.elasticsearch.ingress.host }} + # Linkerd header + traefik.ingress.kubernetes.io/router.middlewares: + {{ .Release.Namespace }}-l5d-header-middleware-elasticsearch@kubernetescrd +spec: + tls: + - hosts: + - {{ .Values.elasticsearch.ingress.host }} + secretName: elasticsearch-tls + rules: + - host: {{ .Values.elasticsearch.ingress.host }} + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: {{ .Values.eck.clusterName }}-es-http + port: + number: 9200 diff --git a/roles/logging/k3s/templates/elasticsearch.yml.j2 b/argocd/system/logging/templates/elasticsearch.yaml similarity index 55% rename from roles/logging/k3s/templates/elasticsearch.yml.j2 rename to argocd/system/logging/templates/elasticsearch.yaml index ba7cc93a..eb6a7e14 100644 --- a/roles/logging/k3s/templates/elasticsearch.yml.j2 +++ b/argocd/system/logging/templates/elasticsearch.yaml @@ -1,20 +1,19 @@ ---- apiVersion: elasticsearch.k8s.elastic.co/v1 kind: Elasticsearch metadata: - name: "{{ efk_cluster_name }}" - namespace: "{{ k3s_logging_namespace }}" + name: {{ .Values.eck.clusterName }} + namespace: {{ .Release.Namespace }} spec: - version: {{ efk_elasticsearch_version }} + version: {{ .Values.elasticsearch.version }} http: # Making elasticsearch service available from outisde the cluster tls: selfSignedCertificate: disabled: true nodeSets: - name: default - count: {{ efk_elasticsearch_nodes }} + count: {{ .Values.elasticsearch.clusterNodes }} config: - node.store.allow_mmap: {{ efk_elasticsearch_enable_mmap }} + node.store.allow_mmap: {{ .Values.elasticsearch.enableMmap }} volumeClaimTemplates: - metadata: name: elasticsearch-data @@ -23,20 +22,16 @@ spec: - ReadWriteOnce resources: requests: - storage: "{{ efk_elasticsearch_storage_size }}" - storageClassName: "{{ efk_elasticsearch_storage_class }}" - + storage: {{ .Values.elasticsearch.storage.size }} + storageClassName: {{ .Values.elasticsearch.storage.class }} podTemplate: spec: -{% if enable_linkerd is sameas true %} # Enabling service account token. linkerd requirement automountServiceAccountToken: true -{% endif %} +{{- if .Values.elasticsearch.podSpecResources }} # Limiting Resources consumption containers: - name: elasticsearch resources: - requests: - memory: {{ efk_elasticsearch_mem_limit }} - limits: - memory: {{ efk_elasticsearch_mem_limit }} +{{ toYaml .Values.elasticsearch.podSpecResources | indent 14 }} +{{- end }} \ No newline at end of file diff --git a/roles/logging/k3s/templates/fluentd_certificate.yml.j2 b/argocd/system/logging/templates/fluentd-certificate.yaml similarity index 51% rename from roles/logging/k3s/templates/fluentd_certificate.yml.j2 rename to argocd/system/logging/templates/fluentd-certificate.yaml index d12d0a2d..78173082 100644 --- a/roles/logging/k3s/templates/fluentd_certificate.yml.j2 +++ b/argocd/system/logging/templates/fluentd-certificate.yaml @@ -1,16 +1,14 @@ ---- apiVersion: cert-manager.io/v1 kind: Certificate metadata: name: fluentd-tls - namespace: "{{ k3s_logging_namespace }}" + namespace: {{ .Release.Namespace }} spec: # Secret names are always required. secretName: fluentd-tls - duration: 2160h # 90d - renewBefore: 360h # 15d - commonName: "{{ fluentd_dns }}" - isCA: false + duration: 2160h0m0s # 90d + renewBefore: 360h0m0s # 15d + commonName: {{ .Values.external.fluentd.dns }} privateKey: algorithm: ECDSA size: 256 @@ -19,18 +17,9 @@ spec: - client auth # At least one of a DNS Name, URI, or IP address is required. dnsNames: - - "{{ fluentd_dns }}" + - {{ .Values.external.fluentd.dns}} # ClusterIssuer: ca-issuer. issuerRef: name: ca-issuer kind: ClusterIssuer - group: cert-manager.io ---- -apiVersion: v1 -kind: Secret -metadata: - name: fluentd-shared-key - namespace: "{{ k3s_logging_namespace }}" -type: Opaque -data: - fluentd-shared-key: "{{ fluentd_shared_key | b64encode }}" + group: cert-manager.io \ No newline at end of file diff --git a/argocd/system/logging/templates/fluentd-externalsecret.yaml b/argocd/system/logging/templates/fluentd-externalsecret.yaml new file mode 100644 index 00000000..443006f5 --- /dev/null +++ b/argocd/system/logging/templates/fluentd-externalsecret.yaml @@ -0,0 +1,18 @@ +apiVersion: external-secrets.io/v1beta1 +kind: ExternalSecret +metadata: + name: fluentd-externalsecret + namespace: {{ .Release.Namespace }} +spec: + secretStoreRef: + name: vault-backend + kind: ClusterSecretStore + target: + name: fluentd-shared-key + data: + - secretKey: fluentd-shared-key + remoteRef: + key: logging/fluentd + property: shared_key + conversionStrategy: Default # ArgoCD sync issue + decodingStrategy: None # ArgoCD sync issue diff --git a/roles/logging/k3s/templates/fluentd_service_ext.yml.j2 b/argocd/system/logging/templates/fluentd-extservice.yaml similarity index 73% rename from roles/logging/k3s/templates/fluentd_service_ext.yml.j2 rename to argocd/system/logging/templates/fluentd-extservice.yaml index a1ee225a..9a2dc3d8 100644 --- a/roles/logging/k3s/templates/fluentd_service_ext.yml.j2 +++ b/argocd/system/logging/templates/fluentd-extservice.yaml @@ -1,12 +1,10 @@ ---- -#External Service apiVersion: v1 kind: Service metadata: labels: app: fluentd name: fluentd-ext - namespace: "{{ k3s_logging_namespace }}" + namespace: {{ .Release.Namespace }} spec: ports: - name: forward-ext @@ -18,4 +16,4 @@ spec: app.kubernetes.io/name: fluentd sessionAffinity: None type: LoadBalancer - loadBalancerIP: {{ k3s_fluentd_external_ip }} + loadBalancerIP: {{ .Values.external.fluentd.loadBalancerIp }} diff --git a/argocd/system/logging/templates/fluentd-hpa.yaml b/argocd/system/logging/templates/fluentd-hpa.yaml new file mode 100644 index 00000000..2ef9a1d0 --- /dev/null +++ b/argocd/system/logging/templates/fluentd-hpa.yaml @@ -0,0 +1,22 @@ +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + labels: + app.kubernetes.io/instance: logging + app.kubernetes.io/name: fluentd + name: logging-fluentd + namespace: {{ .Release.Namespace }} +spec: + maxReplicas: 100 + metrics: + - resource: + name: cpu + target: + averageUtilization: 80 + type: Utilization + type: Resource + minReplicas: 1 + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: logging-fluentd diff --git a/argocd/system/logging/templates/kibana-ingress.yaml b/argocd/system/logging/templates/kibana-ingress.yaml new file mode 100644 index 00000000..7fa8c7fd --- /dev/null +++ b/argocd/system/logging/templates/kibana-ingress.yaml @@ -0,0 +1,45 @@ +--- +# Traefik Middleware adding linkerd custom header +apiVersion: traefik.containo.us/v1alpha1 +kind: Middleware +metadata: + name: l5d-header-middleware-kibana + namespace: {{ .Release.Namespace }} +spec: + headers: + customRequestHeaders: + l5d-dst-override: {{ .Values.eck.clusterName }}-kb-http.{{ .Release.Namespace }}.svc.cluster.local:5601 +--- +# HTTPS Ingress +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: kibana-ingress + namespace: {{ .Release.Namespace }} + annotations: + # HTTPS as entry point + traefik.ingress.kubernetes.io/router.entrypoints: websecure + # Enable TLS + traefik.ingress.kubernetes.io/router.tls: "true" + # Enable cert-manager to create automatically the SSL certificate and store in Secret + cert-manager.io/cluster-issuer: {{ .Values.kibana.ingress.certmanager.tlsIssuer }}-issuer + cert-manager.io/common-name: {{ .Values.kibana.ingress.host }} + # Linkerd header + traefik.ingress.kubernetes.io/router.middlewares: + {{ .Release.Namespace }}-l5d-header-middleware-kibana@kubernetescrd +spec: + tls: + - hosts: + - {{ .Values.kibana.ingress.host }} + secretName: kibana-tls + rules: + - host: {{ .Values.kibana.ingress.host }} + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: {{ .Values.eck.clusterName }}-kb-http + port: + number: 5601 diff --git a/roles/logging/k3s/templates/kibana.yml.j2 b/argocd/system/logging/templates/kibana.yaml similarity index 56% rename from roles/logging/k3s/templates/kibana.yml.j2 rename to argocd/system/logging/templates/kibana.yaml index d26e614c..c89f99c2 100644 --- a/roles/logging/k3s/templates/kibana.yml.j2 +++ b/argocd/system/logging/templates/kibana.yaml @@ -1,30 +1,26 @@ ---- apiVersion: kibana.k8s.elastic.co/v1 kind: Kibana metadata: - name: "{{ efk_cluster_name }}" - namespace: "{{ k3s_logging_namespace }}" + name: {{ .Values.eck.clusterName }} + namespace: {{ .Release.Namespace }} spec: - version: {{ efk_elasticsearch_version }} + version: {{ .Values.elasticsearch.version }} count: 1 elasticsearchRef: - name: "{{ efk_cluster_name }}" + name: {{ .Values.eck.clusterName }} http: tls: selfSignedCertificate: disabled: true podTemplate: spec: -{% if enable_linkerd is sameas true %} # Enabling service account token. linkerd requirement automountServiceAccountToken: true -{% endif %} +{{- if .Values.kibana.podSpecResources }} # Limiting Resources consumption containers: - name: kibana resources: - requests: - memory: 1Gi - limits: - memory: 1Gi +{{ toYaml .Values.kibana.podSpecResources | indent 10 }} +{{- end }} diff --git a/argocd/system/logging/templates/logging-cm.yaml b/argocd/system/logging/templates/logging-cm.yaml new file mode 100644 index 00000000..3cbe0d7d --- /dev/null +++ b/argocd/system/logging/templates/logging-cm.yaml @@ -0,0 +1,10 @@ +# Logging internal services endpoints +apiVersion: v1 +kind: ConfigMap +metadata: + name: logging-stack-cm + namespace: {{ .Release.Namespace }} +data: + lokiURL: "http://loki-gateway" + elasticsearchHost: {{ .Values.eck.clusterName }}-es-http + fluentdHost: {{ .Release.Name }}-fluentd diff --git a/argocd/system/logging/templates/loki-externalsecret.yaml b/argocd/system/logging/templates/loki-externalsecret.yaml new file mode 100644 index 00000000..137adb76 --- /dev/null +++ b/argocd/system/logging/templates/loki-externalsecret.yaml @@ -0,0 +1,29 @@ +apiVersion: external-secrets.io/v1beta1 +kind: ExternalSecret +metadata: + name: minio-externalsecret + namespace: {{ .Release.Namespace }} +spec: + secretStoreRef: + name: vault-backend + kind: ClusterSecretStore + target: + name: loki-minio-secret + template: + engineVersion: v2 + data: + MINIO_ACCESS_KEY_ID: '{{ printf "{{ .user }}" }}' + MINIO_SECRET_ACCESS_KEY: '{{ printf "{{ .key }}" }}' + data: + - secretKey: user + remoteRef: + key: minio/loki + property: user + conversionStrategy: Default # ArgoCD sync issue + decodingStrategy: None # ArgoCD sync issue + - secretKey: key + remoteRef: + key: minio/loki + property: key + conversionStrategy: Default # ArgoCD sync issue + decodingStrategy: None # ArgoCD sync issue diff --git a/argocd/system/logging/templates/servicemonitor.yaml b/argocd/system/logging/templates/servicemonitor.yaml new file mode 100644 index 00000000..5bc090e9 --- /dev/null +++ b/argocd/system/logging/templates/servicemonitor.yaml @@ -0,0 +1,74 @@ +{{- if and .Values.serviceMonitor.enable (.Capabilities.APIVersions.Has "monitoring.coreos.com/v1/ServiceMonitor") }} +--- +# Elasticsearch ServiceMonitor +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: + app: prometheus-elasticsearch-exporter + release: {{ .Values.serviceMonitor.release }} + name: elasticsearch-prometheus-servicemonitor + namespace: {{ .Release.Namespace }} +spec: + endpoints: + - port: http + path: /metrics + namespaceSelector: + matchNames: + - {{ .Release.Namespace }} + selector: + matchLabels: + app: prometheus-elasticsearch-exporter + +--- +# Fluentd ServiceMonitor +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: + app: fluentd + release: {{ .Values.serviceMonitor.release }} + name: fluentd-prometheus-servicemonitor + namespace: {{ .Release.Namespace }} +spec: + jobLabel: app.kubernetes.io/name + endpoints: + - port: metrics + path: /metrics + namespaceSelector: + matchNames: + - {{ .Release.Namespace }} + selector: + matchLabels: + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/name: fluentd + +--- +# Fluent-bit ServiceMonitor +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: + app: fluent-bit + release: {{ .Values.serviceMonitor.release }} + name: fluentbit-prometheus-servicemonitor + namespace: {{ .Release.Namespace }} +spec: + jobLabel: app.kubernetes.io/name + endpoints: + - path: /api/v1/metrics/prometheus + targetPort: 2020 + - params: + target: + - http://127.0.0.1:2020/api/v1/storage + path: /probe + targetPort: 7979 + namespaceSelector: + matchNames: + - {{ .Release.Namespace }} + selector: + matchLabels: + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/name: fluent-bit + +{{- end }} \ No newline at end of file diff --git a/argocd/system/logging/values.yaml b/argocd/system/logging/values.yaml new file mode 100644 index 00000000..ea5d7e55 --- /dev/null +++ b/argocd/system/logging/values.yaml @@ -0,0 +1,678 @@ + +# Elasticsearch configuration +eck: + # elasticsearch cluster name + clusterName: efk + +elasticsearch: + # elastic search version + version: 8.1.2 + # Number of Elastic Search nodes + clusterNodes: 1 + # Enable/Disable memory map + enableMmap: false + # Storage Settings + storage: + size: "5Gi" + class: "longhorn" + # Limiting resources of elasticsearch pod + podSpecResources: + requests: + memory: "1Gi" + limits: + memory: "1Gi" + + # Ingress configuration + ingress: + host: elasticsearch.picluster.ricsanfre.com + # configure cert-manager issuer + certmanager: + # tlsIssuer=letsecrypt to generate valid TLS certficiate using IONOS API + # tlsIssuer=ca to generate a CA-signed certificate (not valid) + tlsIssuer: letsencrypt + +# Kibana configuration +kibana: + # Limiting resources of kibana pod + # podSpecResources: + # requests: + # memory: "512Mi" + # limits: + # memory: "512Mi" + + # Ingress configuration + ingress: + host: kibana.picluster.ricsanfre.com + # configure cert-manager issuer + certmanager: + # tlsIssuer=letsecrypt to generate valid TLS certficiate using IONOS API + # tlsIssuer=ca to generate a CA-signed certificate (not valid) + tlsIssuer: letsencrypt + +# LoadBalancer and DNS config +external: + fluentd: + loadBalancerIp: 10.0.0.101 + dns: fluentd.picluster.ricsanfre.com + +# Prometheus monitoring +serviceMonitor: + enable: true + release: monitoring + + +################## +# Loki subchart +################## + +loki: + loki: + # Disable multi-tenant support + auth_enabled: false + + # S3 backend storage configuration + storage: + bucketNames: + chunks: k3s-loki + ruler: k3s-loki + type: s3 + s3: + endpoint: s3.picluster.ricsanfre.com:9091 + region: eu-west-1 + secretAccessKey: ${MINIO_SECRET_ACCESS_KEY} + accessKeyId: ${MINIO_ACCESS_KEY_ID} + s3ForcePathStyle: true + insecure: false + http_config: + idle_conn_timeout: 90s + response_header_timeout: 0s + insecure_skip_verify: false + + # Configuration for the write + write: + # Number of replicas for the write + replicas: 2 + persistence: + # -- Size of persistent disk + size: 10Gi + # -- Storage class to be used. + storageClass: longhorn + + # Enable environment variables in config file + # https://grafana.com/docs/loki/latest/configuration/#use-environment-variables-in-the-configuration + extraArgs: + - '-config.expand-env=true' + extraEnv: + - name: MINIO_ACCESS_KEY_ID + valueFrom: + secretKeyRef: + name: loki-minio-secret + key: MINIO_ACCESS_KEY_ID + - name: MINIO_SECRET_ACCESS_KEY + valueFrom: + secretKeyRef: + name: loki-minio-secret + key: MINIO_SECRET_ACCESS_KEY + + # Configuration for the read + read: + # Number of replicas for the read + replicas: 2 + persistence: + # -- Size of persistent disk + size: 10Gi + # -- Storage class to be used. + storageClass: longhorn + + # Enable environment variables in config file + # https://grafana.com/docs/loki/latest/configuration/#use-environment-variables-in-the-configuration + extraArgs: + - '-config.expand-env=true' + extraEnv: + - name: MINIO_ACCESS_KEY_ID + valueFrom: + secretKeyRef: + name: loki-minio-secret + key: MINIO_ACCESS_KEY_ID + - name: MINIO_SECRET_ACCESS_KEY + valueFrom: + secretKeyRef: + name: loki-minio-secret + key: MINIO_SECRET_ACCESS_KEY + + # Configuration for the gateway + gateway: + # -- Specifies whether the gateway should be enabled + enabled: true + # -- Number of replicas for the gateway + replicas: 1 + + # Disable mino installation + minio: + enabled: false + + # Disable self-monitoring + monitoring: + selfMonitoring: + enabled: false + grafanaAgent: + installOperator: false + lokiCanary: + enabled: false + + # Disable helm-test + test: + enabled: false + +######################### +# Fluentd Subchart +######################### + +fluentd: + + # Fluentd image + image: + repository: "ricsanfre/fluentd-aggregator" + pullPolicy: "IfNotPresent" + tag: "v1.15.2-debian-1.0" + + # Deploy fluentd as deployment + kind: "Deployment" + # Number of replicas + replicaCount: 1 + # Enabling HPA + autoscaling: + enabled: false + minReplicas: 1 + maxReplicas: 100 + targetCPUUtilizationPercentage: 80 + + # Do not create serviceAccount and RBAC. Fluentd does not need to get access to kubernetes API. + serviceAccount: + create: false + rbac: + create: false + + ## Additional environment variables to set for fluentd pods + env: + # Path to fluentd conf file + - name: "FLUENTD_CONF" + value: "../../../etc/fluent/fluent.conf" + # Elastic operator creates elastic service name with format cluster_name-es-http + - name: FLUENT_ELASTICSEARCH_HOST + valueFrom: + configMapKeyRef: + name: "logging-stack-cm" + key: elasticsearchHost + # value: efk-es-http + # Default elasticsearch default port + - name: FLUENT_ELASTICSEARCH_PORT + value: "9200" + # Elasticsearch user + - name: FLUENT_ELASTICSEARCH_USER + value: "elastic" + # Elastic operator stores elastic user password in a secret + - name: FLUENT_ELASTICSEARCH_PASSWORD + valueFrom: + secretKeyRef: + name: "efk-es-elastic-user" + key: elastic + # Setting a index-prefix for fluentd. By default index is logstash + - name: FLUENT_ELASTICSEARCH_LOGSTASH_PREFIX + value: fluentd + - name: FLUENT_ELASTICSEARCH_LOG_ES_400_REASON + value: "true" + # Fluentd forward security + - name: FLUENTD_FORWARD_SEC_SHARED_KEY + valueFrom: + secretKeyRef: + name: fluentd-shared-key + key: fluentd-shared-key + # Loki url + - name: LOKI_URL + valueFrom: + configMapKeyRef: + name: "logging-stack-cm" + key: lokiURL + # value: "http://loki-gateway" + # Loki username + - name: LOKI_USERNAME + value: "" + # Loki password + - name: LOKI_PASSWORD + value: "" + + # Volumes and VolumeMounts (only configuration files and certificates) + volumes: + - name: etcfluentd-main + configMap: + name: fluentd-main + defaultMode: 0777 + - name: etcfluentd-config + configMap: + name: fluentd-config + defaultMode: 0777 + - name: fluentd-tls + secret: + secretName: fluentd-tls + + volumeMounts: + - name: etcfluentd-main + mountPath: /etc/fluent + - name: etcfluentd-config + mountPath: /etc/fluent/config.d/ + - mountPath: /etc/fluent/certs + name: fluentd-tls + readOnly: true + + # Service. Exporting forwarder port (Metric already exposed by chart) + service: + type: "ClusterIP" + annotations: {} + ports: + - name: forwarder + protocol: TCP + containerPort: 24224 + + ## Fluentd list of plugins to install + ## + plugins: [] + # - fluent-plugin-out-http + + ## Do not create additional config maps + ## + configMapConfigs: [] + + ## Fluentd configurations: + ## + fileConfigs: + 01_sources.conf: |- + ## logs from fluentbit forwarders + + @type forward + @label @FORWARD + bind "#{ENV['FLUENTD_FORWARD_BIND'] || '0.0.0.0'}" + port "#{ENV['FLUENTD_FORWARD_PORT'] || '24224'}" + # Enabling TLS + + cert_path /etc/fluent/certs/tls.crt + private_key_path /etc/fluent/certs/tls.key + + # Enabling access security + + self_hostname "#{ENV['FLUENTD_FORWARD_SEC_SELFHOSTNAME'] || 'fluentd-aggregator'}" + shared_key "#{ENV['FLUENTD_FORWARD_SEC_SHARED_KEY'] || 'sharedkey'}" + + + ## Enable Prometheus end point + + @type prometheus + @id in_prometheus + bind "0.0.0.0" + port 24231 + metrics_path "/metrics" + + + @type prometheus_monitor + @id in_prometheus_monitor + + + @type prometheus_output_monitor + @id in_prometheus_output_monitor + + 02_filters.conf: |- + + 03_dispatch.conf: |- + + 04_outputs.conf: |- + + + +######################### +# Fluent-bit configuration +######################### + +fluent-bit: + + #fluentbit-container environment variables: + env: + # Fluentd deployment service + - name: FLUENT_AGGREGATOR_HOST + #value: "fluentd" + valueFrom: + configMapKeyRef: + name: "logging-stack-cm" + key: fluentdHost + # Default fluentd forward port + - name: FLUENT_AGGREGATOR_PORT + value: "24224" + - name: FLUENT_AGGREGATOR_SHARED_KEY + valueFrom: + secretKeyRef: + name: fluentd-shared-key + key: fluentd-shared-key + - name: FLUENT_SELFHOSTNAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + # Specify TZ + - name: TZ + value: "Europe/Madrid" + # Fluentbit config + config: + # Helm chart combines service, inputs, outputs, custom_parsers and filters section + # fluent-bit.config SERVICE + service: | + + [SERVICE] + Daemon Off + Flush 1 + Log_Level info + Parsers_File parsers.conf + Parsers_File custom_parsers.conf + HTTP_Server On + HTTP_Listen 0.0.0.0 + HTTP_Port 2020 + Health_Check On + storage.path /var/log/fluentbit/storage + storage.sync normal + storage.checksum off + storage.backlog.mem_limit 5M + storage.metrics on + + # fluent-bit.config INPUT: + inputs: | + + [INPUT] + Name tail + Alias input.kube + Path /var/log/containers/*.log + Path_Key filename + multiline.parser docker, cri + DB /var/log/fluentbit/flb_kube.db + Tag kube.* + Mem_Buf_Limit 5MB + storage.type filesystem + Skip_Long_Lines On + + [INPUT] + Name tail + Alias input.host + Tag host.* + DB /var/log/fluentbit/flb_host.db + Path /var/log/auth.log,/var/log/syslog + Path_Key filename + Mem_Buf_Limit 5MB + storage.type filesystem + Parser syslog-rfc3164-nopri + + # fluent-bit.config OUTPUT + outputs: | + + [OUTPUT] + Name forward + Alias output.aggregator + match * + Host ${FLUENT_AGGREGATOR_HOST} + Port ${FLUENT_AGGREGATOR_PORT} + Self_Hostname ${FLUENT_SELFHOSTNAME} + Shared_Key ${FLUENT_AGGREGATOR_SHARED_KEY} + tls On + tls.verify Off + + # fluent-bit.config PARSERS: + customParsers: | + + [PARSER] + Name syslog-rfc3164-nopri + Format regex + Regex /^(?