diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 281fadc4..81c0aa83 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -25,4 +25,5 @@ jobs:
run: pip3 install yamllint
- name: Lint all the YAMLs.
+ working-directory: ./ansible
run: yamllint .
diff --git a/.gitignore b/.gitignore
index 6107e18b..56a3f031 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,5 @@
-roles/ricsanfre.*
-ansible_collections
-certificates
-docs/_site
+/ansible/roles/ricsanfre.*
+/ansible/ansible_collections
+/certbot
+/certificates
+/docs/_site
diff --git a/Makefile b/Makefile
new file mode 100644
index 00000000..7e0f0cb7
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,92 @@
+.EXPORT_ALL_VARIABLES:
+
+GPG_EMAIL=ricsanfre@gmail.com
+GPG_NAME=Ricardo Sanchez
+
+.PHONY: default
+default: clean
+
+.PHONY: prepare-ansible
+prepare-ansible: install-ansible-requirements gpg-init ~/.vault/vault_passphrase.gpg ansible-credentials
+
+.PHONY: clean
+clean: k3s-reset external-services-reset
+
+.PHONY: init
+init: os-upgrade gateway-setup nodes-setup external-services configure-os-backup k3s-install k3s-bootstrap configure-monitoring-gateway
+
+.PHONY: install-ansible-requirements
+install-ansible-requirements: # install Ansible requirements
+ cd ansible && ansible-galaxy install -r requirements.yml
+
+.PHONY: install-ansible-requirements-force
+install-ansible-requirements-force: # install Ansible requirements
+ cd ansible && ansible-galaxy install -r requirements.yml --force
+
+.PHONY: gpg-init
+gpg-init:
+ scripts/generate_gpg_key.sh
+
+~/.vault/vault_passphrase.gpg: # Ansible vault gpg password
+ mkdir -p ~/.vault
+ pwgen -n 71 -C | head -n1 | gpg --armor --recipient ${GPG_EMAIL} -e -o ~/.vault/vault_passphrase.gpg
+
+.PHONY: ansible-credentials
+ansible-credentials: ~/.vault/vault_passphrase.gpg install-ansible-requirements
+ cd ansible && ansible-playbook create_vault_credentials.yml
+
+.PHONY: os-upgrade
+os-upgrade:
+ cd ansible && ansible-playbook update.yml
+
+.PHONY: gateway-setup
+gateway-setup:
+ cd ansible && ansible-playbook setup_picluster.yml --tags "gateway"
+
+.PHONY: nodes-setup
+nodes-setup:
+ cd ansible && ansible-playbook setup_picluster.yml --tags "nodes"
+
+.PHONY: external-services
+external-services:
+ cd ansible && ansible-playbook external_services.yml
+
+.PHONY: configure-os-backup
+configure-os-backup:
+ cd ansible && ansible-playbook backup_configuration.yml
+
+.PHONY: configure-monitoring-gateway
+configure-monitoring-gateway:
+ cd ansible && ansible-playbook deploy_monitoring_agent.yml
+
+.PHONY: os-backup
+os-backup:
+ cd ansible && ansible -b -m shell -a 'systemctl start restic-backup' raspberrypi
+
+.PHONY: k3s-install
+k3s-install:
+ cd ansible && ansible-playbook k3s_install.yml
+
+.PHONY: k3s-bootstrap
+k3s-bootstrap:
+ cd ansible && ansible-playbook k3s_bootstrap.yml
+
+.PHONY: k3s-reset
+k3s-reset:
+ cd ansible && ansible-playbook k3s_reset.yml
+
+.PHONY: external-services-reset
+external-services-reset:
+ cd ansible && ansible-playbook reset_external_services.yml
+
+.PHONY: shutdown-k3s-worker
+shutdown-k3s-worker:
+ cd ansible && ansible -b -m shell -a "shutdown -h 1 min" k3s_worker
+
+.PHONY: shutdown-k3s-master
+shutdown-k3s-master:
+ cd ansible && ansible -b -m shell -a "shutdown -h 1 min" k3s_master
+
+.PHONY: shutdown-gateway
+shutdown-gateway:
+ cd ansible && ansible -b -m shell -a "shutdown -h 1 min" gateway
diff --git a/README.md b/README.md
index ba3208de..79542350 100644
--- a/README.md
+++ b/README.md
@@ -9,25 +9,206 @@
-## **K3S Kubernetes Cluster using bare metal ARM-based nodes (Raspberry-PIs) automated with Ansible**
+**K3S Kubernetes Cluster using bare metal ARM-based nodes (Raspberry-PIs) automated with Ansible and ArgoCD**
-This is an educational project to explore kubernetes cluster configurations using an ARM architecture and its automation using Ansible.
+This is an educational project to explore kubernetes cluster configurations using an ARM architecture and apply IaC (Infrastructure as Code) and GitOps methodologies to automate its provisioning and management.
The entire process for creating this cluster at home, from cluster design and architecture to step-by-step manual configuration guides, has been documented and it is published in the project website: https://picluster.ricsanfre.com.
-This repository contains the Ansible's source code (playbooks/roles) and Cloud-init's configuration files used for automated all manual tasks described in the documentation.
-The cluster can be re-deployed in minutes as many times as needed for testing new cluster configurations, new software versions or just take you out of any mesh you could cause playing with the cluster.
+This repository contains all source code used to automate all manual tasks described in the documentation: Cloud-init's configuration files, Ansible's source code (playbooks/roles), and packaged Kubernetes applications (helm and kustomize) to be deployed using ArgoCD.
+
+Since its deployment is completely automated, the cluster can be re-deployed in minutes as many times as needed for testing new cluster configurations, new software versions or just take you out of any mesh you could cause playing with the cluster.
## Scope
-Automatically deploy and configure a lightweight Kubernetes flavor based on [K3S](https://k3s.io/) and deploy cluster basic services such as: 1) distributed block storage for POD's persistent volumes, [LongHorn](https://longhorn.io/), 2) backup/restore solution for the cluster, [Velero](https://velero.io/) and [Restic](https://restic.net/), 3) service mesh architecture, [Linkerd](https://linkerd.io/), and 4) observability platform based on metrics monitoring solution, [Prometheus](https://prometheus.io/), logging and analytics solution, EFḰ+LG stack ([Elasticsearch](https://www.elastic.co/elasticsearch/)-[Fluentd](https://www.fluentd.org/)/[Fluentbit](https://fluentbit.io/)-[Kibana](https://www.elastic.co/kibana/) + [Loki](https://grafana.com/oss/loki/)-[Grafana](https://grafana.com/oss/grafana/)), and distributed tracing solution, [Tempo](https://grafana.com/oss/tempo/).
+The scope of this project is to create a kubernetes cluster at home using **Raspberry Pis** and to automate its deployment and configuration applying **IaC (infrastructure as a code)** and **GitOps** methodologies with tools like [Ansible](https://docs.ansible.com/), [cloud-init](https://cloudinit.readthedocs.io/en/latest/) and [Argo CD](https://argo-cd.readthedocs.io/en/stable/).
+
+As part of the project, the goal is to use a lightweight Kubernetes flavor based on [K3S](https://k3s.io/) and deploy cluster basic services such as: 1) distributed block storage for POD's persistent volumes, [LongHorn](https://longhorn.io/), 2) backup/restore solution for the cluster, [Velero](https://velero.io/) and [Restic](https://restic.net/), 3) service mesh architecture, [Linkerd](https://linkerd.io/), and 4) observability platform based on metrics monitoring solution, [Prometheus](https://prometheus.io/), logging and analytics solution, EFḰ+LG stack ([Elasticsearch](https://www.elastic.co/elasticsearch/)-[Fluentd](https://www.fluentd.org/)/[Fluentbit](https://fluentbit.io/)-[Kibana](https://www.elastic.co/kibana/) + [Loki](https://grafana.com/oss/loki/)-[Grafana](https://grafana.com/oss/grafana/)), and distributed tracing solution, [Tempo](https://grafana.com/oss/tempo/).
+
+## Technology Stack
-The following picture shows the set of opensource solutions used so far in the cluster, which installation process has been documented and its deployment has been automated with Ansible:
+The following picture shows the set of opensource solutions used so far in the cluster, which installation process has been documented and its deployment has been automated with Ansible/ArgoCD:
+
+
+## External Resources and Services
+
+Even whe the premise is to deploy all services in the kubernetes cluster, there is still a need for a few external services/resources. Below is a list of external resources/services and why we need them.
+
+### Cloud external services
+
+
+| |Provider | Resource | Purpose |
+| --- | --- | --- | --- |
+| | [Letsencrypt](https://letsencrypt.org/) | TLS CA Authority | Signed valid TLS certificates |
+| |[IONOS](https://www.ionos.es/) | DNS | DNS and [DNS-01 challenge](https://letsencrypt.org/docs/challenge-types/#dns-01-challenge) for certificates |
+
+> **NOTE:** These resources are optional, the homelab still works without them but it won't have trusted certificates
+
+**Alternatives:**
+
+1. Use a private PKI (custom CA to sign certificates).
+
+ Currently supported. Only minor changes are required. See details in [Doc: Quick Start instructions](https://picluster.ricsanfre.com/docs/ansible).
+
+2. Use other DNS provider.
+
+ Cert-manager / Certbot used to automatically obtain certificates from Let's Encrypt can be used with other DNS providers. This will need further modifications in the way cert-manager application is deployed (new providers and/or webhooks/plugins might be required).
+
+ Currently only acme issuer (letsencytp) using IONOS as dns-01 challenge provider is configured. Check list of [supported dns01 providers](https://cert-manager.io/docs/configuration/acme/dns01/#supported-dns01-providers).
+
+### Self-hosted external services
+
+There is another list of services that I have decided to run outside the kuberentes cluster but not using any cloud service. These services currently are running on the same cluster nodes (gateway and node1), but as baremetal service.
+
+| |External Service | Resource | Purpose |
+| --- | --- | --- | --- |
+| |[Minio](https://mini.io) | S3 Object Store | Cluster Backup |
+| |[Hashicorp Vault](https://www.vaultproject.io/) | Secrets Management | Cluster secrets management |
+
+
## Cluster architecture and hardware
Home lab architecture, showed in the picture below, consist of a Kubernetes cluster of 5 nodes (1 master and 4 workers) and a firewall, built with another Raspberry PI, to isolate cluster network from your home network.
@@ -47,7 +228,7 @@ The content of this website and the source code to build it (Jekyll static based
## Usage
-Check out the documentation [Quick Start guide](http://picluster.ricsanfre.com/docs/ansible/) to know how to use and tweak cloud-init files (`/cloud-init` folder) and Ansible playbooks contained in this repository.
+Check out the documentation [Quick Start guide](http://picluster.ricsanfre.com/docs/ansible/) to know how to use and tweak cloud-init files (`/cloud-init` folder), Ansible playbooks (`/ansible` folder) and packaged Kubernetes applications ( `/argocd` folder) contained in this repository, so you can use in for your own homelab.
## About the Project
diff --git a/ansible/.vault/vault_pass.sh b/ansible/.vault/vault_pass.sh
new file mode 100755
index 00000000..82608241
--- /dev/null
+++ b/ansible/.vault/vault_pass.sh
@@ -0,0 +1,2 @@
+#!/bin/sh
+gpg --batch --use-agent --decrypt $HOME/.vault/vault_passphrase.gpg
diff --git a/.yamllint b/ansible/.yamllint
similarity index 100%
rename from .yamllint
rename to ansible/.yamllint
diff --git a/ansible.cfg b/ansible/ansible.cfg
similarity index 85%
rename from ansible.cfg
rename to ansible/ansible.cfg
index d75731c8..5f89d724 100644
--- a/ansible.cfg
+++ b/ansible/ansible.cfg
@@ -11,3 +11,5 @@ collections_path = ./
host_key_checking = false
# SSH key
private_key_file = $HOME/.ssh/ansible-ssh-key.pem
+# Vault password
+vault_password_file=./.vault/vault_pass.sh
diff --git a/ansible/backup_configuration.yml b/ansible/backup_configuration.yml
new file mode 100644
index 00000000..c5f8edd8
--- /dev/null
+++ b/ansible/backup_configuration.yml
@@ -0,0 +1,24 @@
+---
+
+- name: Configure Pi-cluster nodes backup
+ hosts: raspberrypi
+ gather_facts: true
+ tags: [backup]
+ become: true
+ pre_tasks:
+ - name: Include vault variables
+ include_vars: "vars/vault.yml"
+ # Include picluster variables
+ - name: Include picluster variables
+ include_vars: "vars/picluster.yml"
+ - name: Load CA certificate for restic
+ set_fact:
+ restic_ca_cert: "{{ lookup('file','certificates/CA.pem') }}"
+ when: not enable_letsencrypt
+ - name: Do not use CA certificate
+ set_fact:
+ restic_use_ca_cert: false
+ when: enable_letsencrypt
+ roles:
+ - role: ricsanfre.backup
+ tags: [backup]
diff --git a/ansible/create_vault_credentials.yml b/ansible/create_vault_credentials.yml
new file mode 100644
index 00000000..37a866e0
--- /dev/null
+++ b/ansible/create_vault_credentials.yml
@@ -0,0 +1,62 @@
+---
+
+- name: Generate vault variables file
+ hosts: localhost
+
+ vars_prompt:
+ - name: ionos_public_prefix
+ prompt: Enter IONOS public prefix
+ private: true
+ - name: ionos_secret
+ prompt: Enter IONOS secret
+ private: true
+
+ pre_tasks:
+ - name: Ask for SAN centralized credentials
+ when: centralized_san
+ block:
+ - name: Ask for SAN iscsi credentials 1/2
+ pause:
+ prompt: "Enter iSCSI node password: "
+ echo: false
+ register: prompt
+ - name: Set iSCSI node password variable
+ set_fact:
+ san_iscsi_node_pass: "{{ prompt.user_input }}"
+ no_log: true
+ - name: Ask for SAN iscsi credentials 2/2
+ pause:
+ prompt: "Enter iSCSI mutual password: "
+ echo: false
+ register: prompt
+ - name: Set iSCSI node password variable
+ set_fact:
+ san_iscsi_mutual_pass: "{{ prompt.user_input }}"
+ no_log: true
+
+ tasks:
+
+ - name: Create random passwords
+ ansible.builtin.set_fact:
+ "{{ item }}": "{{ lookup('ansible.builtin.password', '/dev/null chars=ascii_letters,digits' ) }}"
+ with_items:
+ - k3s_token
+ - minio_root_password
+ - minio_restic_password
+ - minio_longhorn_password
+ - minio_velero_password
+ - minio_loki_password
+ - minio_tempo_password
+ - traefik_basic_auth_password
+ - fluentd_shared_key
+ - grafana_admin_password
+ - elasticsearch_admin_password
+
+ - name: Generate vault file
+ ansible.builtin.template:
+ src: vars/vault.yml.j2
+ dest: vars/vault.yml
+
+ - name: Encryp file
+ ansible.builtin.command:
+ cmd: ansible-vault encrypt --vault-password-file=./.vault/vault-pass.sh vars/vault.yml
diff --git a/ansible/deploy_monitoring_agent.yml b/ansible/deploy_monitoring_agent.yml
new file mode 100644
index 00000000..972cc0a7
--- /dev/null
+++ b/ansible/deploy_monitoring_agent.yml
@@ -0,0 +1,19 @@
+---
+# Deploy fluentbit to get logs and prometheus metrics
+- name: Deploy fluentbit on control nodes (gateway and pimaster)
+ hosts: gateway
+ gather_facts: true
+ tags: [logging]
+ become: true
+ pre_tasks:
+ # Include vault variables
+ - name: Include vault variables
+ include_vars: "vars/vault.yml"
+ tags: ["always"]
+ # Include picluster variables
+ - name: Include picluster variables
+ include_vars: "vars/picluster.yml"
+ tags: ["always"]
+ roles:
+ - role: logging/external_node
+ tags: ['logging']
diff --git a/ansible/external_services.yml b/ansible/external_services.yml
new file mode 100644
index 00000000..1c51fcb6
--- /dev/null
+++ b/ansible/external_services.yml
@@ -0,0 +1,290 @@
+---
+## Generate TLS certificates for external services
+## Generated using certbot (letsencrypt) or selfsigned certificates
+- name: Generate external services certificates
+ hosts: localhost
+ gather_facts: true
+ tags: [certificates]
+ vars:
+ certbot_venv: "{{ playbook_dir }}/../certbot"
+ certbot_email: "{{ acme_issuer_email }}"
+ dns_ionos_prefix: "{{ vault.certmanager.ionos.public_prefix }}"
+ dns_ionos_secret: "{{ vault.certmanager.ionos.secret }}"
+ selfsigned_certificates_path: "../certificates"
+ pre_tasks:
+ # Include vault variables
+ - name: Include vault variables
+ include_vars: "vars/vault.yml"
+ # Include picluster variables
+ - name: Include picluster variables
+ include_vars: "vars/picluster.yml"
+ roles:
+ - role: certbot
+ vars:
+ - install_python_packages: false
+ when: enable_letsencrypt
+
+ tasks:
+ - name: Create Letsencrytp certificate for external services
+ command: "{{ certbot_venv }}/bin/certbot-create-cert.sh {{ item }}"
+ register: certbot_create
+ changed_when:
+ - certbot_create.rc==0
+ - '"Certificate not yet due for renewal; no action taken." not in certbot_create.stdout'
+ when: enable_letsencrypt
+ with_items:
+ - "{{ minio_hostname }}"
+ - "{{ vault_hostname }}"
+
+ - name: Create customCA-signed TLS certificate for minio
+ when: not enable_letsencrypt
+ block:
+ # Generate self-signed certificates directory
+ - name: Create certificates directory
+ file:
+ path: "{{ selfsigned_certificates_path }}"
+ state: directory
+ mode: 0750
+ # Include selfsigned certificates variables
+ - name: Include selfsigned certificates variables
+ include_vars: "vars/selfsigned-certificates.yml"
+ # Generate custom CA
+ - name: Generate custom CA
+ include_tasks: tasks/generate_custom_ca.yml
+ args:
+ apply:
+ delegate_to: localhost
+ become: false
+ # Generate selfsigned TLS certificate
+ - name: Generate customCA-signed SSL certificates
+ include_tasks: tasks/generate_ca_signed_cert.yml
+ args:
+ apply:
+ delegate_to: localhost
+ become: false
+ loop:
+ - "{{ minio_hostname }}"
+ - "{{ vault_hostname }}"
+ loop_control:
+ loop_var: server_hostname
+
+## Install Minio S3 Storage Server
+
+- name: Install Minio S3 Storage Server
+ hosts: node1
+ gather_facts: true
+ tags: [s3]
+ become: true
+ vars:
+ server_hostname: "{{ minio_hostname }}"
+ certbot_venv: "{{ playbook_dir }}/../certbot"
+ selfsigned_certificates_path: "../certificates"
+ pre_tasks:
+ # Include vault variables
+ - name: Include vault variables
+ include_vars: "vars/vault.yml"
+ # Include picluster variables
+ - name: Include picluster variables
+ include_vars: "vars/picluster.yml"
+
+ - name: Load customCA-signed TLS certificate for minio
+ set_fact:
+ minio_key: "{{ lookup('file',selfsigned_certificates_path + '/' + server_hostname + '.key') }}"
+ minio_cert: "{{ lookup('file',selfsigned_certificates_path + '/' + server_hostname + '.pem') }}"
+ when: not enable_letsencrypt
+
+ - name: Get letsencrypt TLS certificate for minio
+ block:
+ - name: check TLS certificate for minio exits
+ command: "{{ certbot_venv }}/bin/certbot-wrapper.sh certificates -d {{ minio_hostname }}"
+ register: certbot_certificates
+ delegate_to: localhost
+ become: false
+ changed_when: false
+ failed_when:
+ - '"Certificate Name: " + minio_hostname not in certbot_certificates.stdout'
+ - name: Get certificate and key paths for minio
+ set_fact:
+ cert_path: "{{ certbot_certificates.stdout | regex_search(regexp1,'\\1') }}"
+ cert_key_path: "{{ certbot_certificates.stdout | regex_search(regexp2,'\\1') }}"
+ vars:
+ regexp1: 'Certificate Path: (\S+)'
+ regexp2: 'Private Key Path: (\S+)'
+ when:
+ - certbot_certificates.rc==0
+ - '"Certificate Name: " + minio_hostname in certbot_certificates.stdout'
+
+ - name: Load tls key and cert
+ set_fact:
+ minio_key: "{{ lookup('file', cert_key_path[0] ) }}"
+ minio_cert: "{{ lookup('file', cert_path[0] ) }}"
+ when: enable_letsencrypt
+ roles:
+ - role: ricsanfre.minio
+
+## Install Hashicorp Vault Server
+
+- name: Install Vault Server
+ hosts: gateway
+ gather_facts: true
+ tags: [vault]
+ become: true
+ vars:
+ server_hostname: "{{ vault_hostname }}"
+ certbot_venv: "{{ playbook_dir }}/../certbot"
+ selfsigned_certificates_path: "../certificates"
+
+ pre_tasks:
+ # Include vault variables
+ - name: Include vault variables
+ include_vars: "vars/vault.yml"
+
+ # Include picluster variables
+ - name: Include picluster variables
+ include_vars: "vars/picluster.yml"
+
+ - name: Load customCA-signed TLS certificate for minio
+ set_fact:
+ vault_key: "{{ lookup('file',selfsigned_certificates_path + '/' + server_hostname + '.key') }}"
+ vault_cert: "{{ lookup('file',selfsigned_certificates_path + '/' + server_hostname + '.pem') }}"
+ vault_ca: "{{ lookup('file',selfsigned_certificates_path + '/CA.pem') }}"
+ when: not enable_letsencrypt
+
+ - name: Get letsencrypt TLS certificate for vault
+ block:
+ - name: check TLS certificate for vault exits
+ command: "{{ certbot_venv }}/bin/certbot-wrapper.sh certificates -d {{ vault_hostname }}"
+ register: certbot_certificates
+ delegate_to: localhost
+ become: false
+ changed_when: false
+ failed_when:
+ - '"Certificate Name: " + vault_hostname not in certbot_certificates.stdout'
+ - name: Get certificate and key paths for minio
+ set_fact:
+ cert_path: "{{ certbot_certificates.stdout | regex_search(regexp1,'\\1') }}"
+ cert_key_path: "{{ certbot_certificates.stdout | regex_search(regexp2,'\\1') }}"
+ vars:
+ regexp1: 'Certificate Path: (\S+)'
+ regexp2: 'Private Key Path: (\S+)'
+ when:
+ - certbot_certificates.rc==0
+ - '"Certificate Name: " + vault_hostname in certbot_certificates.stdout'
+
+ - name: Load tls key and cert
+ set_fact:
+ vault_key: "{{ lookup('file', cert_key_path[0] ) }}"
+ vault_cert: "{{ lookup('file', cert_path[0] ) }}"
+ when: enable_letsencrypt
+ roles:
+ - role: ricsanfre.vault
+
+ tasks:
+ # Configure ansible user profile with VAULT environement variables
+ - name: Insert http(s) export in dotfile
+ become: true
+ lineinfile:
+ path: ~{{ ansible_user }}/.bashrc
+ regexp: "^export VAULT_ADDR="
+ line: "export VAULT_ADDR='https://{{ vault_hostname }}:8200'"
+ owner: "{{ ansible_user }}"
+ create: true
+ mode: 0644
+ - name: Insert CA cert export in dotfile
+ become: true
+ lineinfile:
+ path: ~{{ ansible_user }}/.bashrc
+ regexp: "^export VAULT_CACERT="
+ line: "export VAULT_CACERT=/etc/vault/tls/vault-ca.crt"
+ owner: "{{ ansible_user }}"
+ create: true
+ mode: 0644
+ when: custom_ca
+
+ - name: Insert VAULT_TOKEN in dotfile
+ become: true
+ lineinfile:
+ path: ~{{ ansible_user }}/.bashrc
+ regexp: "^export VAULT_TOKEN="
+ line: "export VAULT_TOKEN=$(sudo jq -r '.root_token' /etc/vault/unseal.json)"
+ owner: "{{ ansible_user }}"
+ create: true
+ mode: 0644
+
+## Load all credentials into Hashicorp Vault Server
+- name: Load Vault Credentials
+ hosts: gateway
+ gather_facts: true
+ tags: [vault, credentials]
+ become: false
+ pre_tasks:
+ # Include vault variables
+ - name: Include vault variables
+ include_vars: "vars/vault.yml"
+
+ # Include picluster variables
+ - name: Include picluster variables
+ include_vars: "vars/picluster.yml"
+
+ # Install hashicorp vault collection dependencies (hvac python package) using PIP.
+ - name: Ensure PIP is installed.
+ package:
+ name:
+ - python3-pip
+ - python3-setuptools
+ state: present
+ become: true
+ - name: Ensure hvac Python library is installed.
+ pip:
+ name: hvac
+ state: present
+ become: true
+ tasks:
+
+ # Vault Login using ansible environement variables for creating token file
+ # token file will be usd in next commands
+ - name: Vault login
+ ansible.builtin.shell: bash -ilc 'vault login $VAULT_TOKEN'
+ # Interactive bash so .bashrc is loaded
+ # 'source ~/.bashrc && ' does not work because
+ # Ansible shell is not interactive and ~/.bashrc implementation by default ignores non interactive shell.
+ # See lines at beginning of bashrc:
+ #
+ # If not running interactively, don't do anything
+ # case $- in
+ # *i*) ;;
+ # *) return;;
+ # esac
+ # The best solution for executing commands as user after its ssh interactive login:
+ # bash -ilc ''
+ # '-i' means interactive shell, so .bashrc won't be ignored '-l' means login shell which sources full user profile
+ become: false
+ register: vault_login
+
+ # Create write token
+ - name: Create KV write token
+ community.hashi_vault.vault_token_create:
+ url: "https://{{ vault_dns }}:8200"
+ policies: ["write"]
+ register: token_data
+ become: false
+ # Load ansible vault variables into Hashicorp Vault Server
+ - name: Load vault credentials
+ include_tasks:
+ file: tasks/load_vault_credentials.yml
+ loop: "{{ vault | dict2items }}"
+ loop_control:
+ loop_var: secret_group
+ when:
+ - vault is defined
+ no_log: true
+
+ - name: Load http auth_basic credentials
+ include_tasks:
+ file: tasks/create_basic_auth_credentials.yml
+ no_log: true
+
+ - name: Load minio prometheus bearer credentials
+ include_tasks:
+ file: tasks/create_minio_bearer_token.yml
+ no_log: true
diff --git a/group_vars/all.yml b/ansible/group_vars/all.yml
similarity index 100%
rename from group_vars/all.yml
rename to ansible/group_vars/all.yml
diff --git a/group_vars/control.yml b/ansible/group_vars/control.yml
similarity index 100%
rename from group_vars/control.yml
rename to ansible/group_vars/control.yml
diff --git a/group_vars/k3s_cluster.yml b/ansible/group_vars/k3s_cluster.yml
similarity index 100%
rename from group_vars/k3s_cluster.yml
rename to ansible/group_vars/k3s_cluster.yml
diff --git a/group_vars/k3s_master.yml b/ansible/group_vars/k3s_master.yml
similarity index 100%
rename from group_vars/k3s_master.yml
rename to ansible/group_vars/k3s_master.yml
diff --git a/host_vars/gateway.yml b/ansible/host_vars/gateway.yml
similarity index 94%
rename from host_vars/gateway.yml
rename to ansible/host_vars/gateway.yml
index 52a0b0f7..8dac7185 100644
--- a/host_vars/gateway.yml
+++ b/ansible/host_vars/gateway.yml
@@ -41,6 +41,10 @@ dnsmasq_additional_dns_hosts:
desc: "Fluentd server"
hostname: fluentd
ip: 10.0.0.101
+ vault_server:
+ desc: "Vault server"
+ hostname: vault
+ ip: 10.0.0.1
####################
# ntp role variables
@@ -51,7 +55,9 @@ ntp_allow_hosts: [10.0.0.0/24]
# firewall role variables
#########################
-in_tcp_port: '{ ssh, https, http, iscsi-target, 9100 }'
+# tcp 8200, 8201 Vault server
+# tcp 9100 Prometheus (fluent-bit)
+in_tcp_port: '{ ssh, https, http, iscsi-target, 9100, 8200, 8201 }'
in_udp_port: '{ snmp, domain, ntp, bootps }'
forward_tcp_port: '{ http, https, ssh }'
forward_udp_port: '{ domain, ntp }'
diff --git a/inventory.yml b/ansible/inventory.yml
similarity index 100%
rename from inventory.yml
rename to ansible/inventory.yml
diff --git a/ansible/k3s_bootstrap.yml b/ansible/k3s_bootstrap.yml
new file mode 100644
index 00000000..91a09123
--- /dev/null
+++ b/ansible/k3s_bootstrap.yml
@@ -0,0 +1,103 @@
+---
+
+- name: Bootstrap Cluster
+ hosts: k3s_master
+ gather_facts: false
+
+ collections:
+ - kubernetes.core
+
+ environment:
+ # The location of the kubeconfig file on the master.
+ K8S_AUTH_KUBECONFIG: ~/.kube/config
+
+ pre_tasks:
+ # Install kubernetes core collection dependencies (kubernetes python package) using PIP.
+ - name: Ensure PIP is installed.
+ package:
+ name:
+ - python3-pip
+ - python3-setuptools
+ state: present
+ become: true
+
+ - name: Ensure kubernetes Python library is installed.
+ pip:
+ name: kubernetes
+ state: present
+ become: true
+
+ - name: Ensure hashicorp vault python library is installed.
+ pip:
+ name: hvac
+ state: present
+ become: true
+
+ # Install Helm diff plugin to have a better idempotence check
+ - name: Intall Helm Plugin
+ kubernetes.core.helm_plugin:
+ plugin_path: "https://github.com/databus23/helm-diff"
+ state: present
+
+ - name: Include vault variables
+ include_vars: "vars/vault.yml"
+
+ - name: Include picluster variables
+ include_vars: "vars/picluster.yml"
+
+ tasks:
+ - name: Create namespaces.
+ kubernetes.core.k8s:
+ name: "{{ item }}"
+ api_version: v1
+ kind: Namespace
+ state: present
+ with_items:
+ - "argocd"
+ - "vault"
+
+ - name: Configure Vault integration
+ include_tasks: tasks/configure_vault_integration.yml
+
+ - name: Copy argocd chart files
+ ansible.builtin.copy:
+ src: "../argocd/{{ item }}"
+ dest: /tmp/charts
+ with_items:
+ - "bootstrap/argocd"
+
+ - name: Update argo-cd helm dependency.
+ ansible.builtin.command:
+ cmd: "helm dependency update /tmp/charts/argocd"
+
+ - name: Deploy Argo CD Helm chart.
+ shell: |
+ set -o pipefail
+ helm template \
+ --dependency-update \
+ --include-crds \
+ --namespace argocd \
+ argocd /tmp/charts/argocd \
+ | kubectl apply -n argocd -f -
+ args:
+ executable: /bin/bash
+
+ - name: Wait for CRDs to be ready
+ command:
+ cmd: "kubectl wait --for condition=Established crd/applications.argoproj.io crd/applicationsets.argoproj.io --timeout=600s"
+ changed_when: false
+
+ - name: Deploy root application
+ kubernetes.core.k8s:
+ definition: "{{ lookup('template', 'templates/' + item ) }}"
+ state: present
+ with_items:
+ - argocd_root_app.yml.j2
+
+ - name: Recursively remove tmp directory
+ ansible.builtin.file:
+ path: /tmp/charts
+ state: absent
+
+ - name: Install cli utils.
+ include_tasks: tasks/install_cli_utils.yml
diff --git a/k3s_install.yml b/ansible/k3s_install.yml
similarity index 100%
rename from k3s_install.yml
rename to ansible/k3s_install.yml
diff --git a/k3s_reset.yml b/ansible/k3s_reset.yml
similarity index 100%
rename from k3s_reset.yml
rename to ansible/k3s_reset.yml
diff --git a/k3s_start.yml b/ansible/k3s_start.yml
similarity index 100%
rename from k3s_start.yml
rename to ansible/k3s_start.yml
diff --git a/k3s_stop.yml b/ansible/k3s_stop.yml
similarity index 100%
rename from k3s_stop.yml
rename to ansible/k3s_stop.yml
diff --git a/ansible/patch_grafana_dashboards.yml b/ansible/patch_grafana_dashboards.yml
new file mode 100644
index 00000000..a594825f
--- /dev/null
+++ b/ansible/patch_grafana_dashboards.yml
@@ -0,0 +1,13 @@
+---
+- name: Patch Grafana Dashboards
+ hosts: localhost
+
+ tasks:
+ - name: Patch Grafana Dashboards
+ include_tasks: tasks/patch_grafana_dashboard.yml
+ loop_control:
+ loop_var: dashboard_file
+ with_fileglob:
+ - "roles/prometheus/dashboards/*"
+ - "roles/prometheus/dashboards/linkerd/*"
+ - "roles/prometheus/dashboards/k3s/*"
diff --git a/requirements.yml b/ansible/requirements.yml
similarity index 80%
rename from requirements.yml
rename to ansible/requirements.yml
index 8a0899fe..a8c86a0d 100644
--- a/requirements.yml
+++ b/ansible/requirements.yml
@@ -19,9 +19,13 @@ roles:
- name: ricsanfre.fluentbit
version: v1.0.4
- name: ricsanfre.minio
- version: v1.0.9
+ version: v1.0.10
- name: ricsanfre.backup
- version: v1.1.2
+ version: v1.1.3
+ - name: ricsanfre.vault
+ version: v1.0.4
collections:
- name: kubernetes.core
version: 2.3.2
+ - name: community.hashi_vault
+ version: 4.0.0
diff --git a/ansible/reset_external_services.yml b/ansible/reset_external_services.yml
new file mode 100644
index 00000000..a0806b73
--- /dev/null
+++ b/ansible/reset_external_services.yml
@@ -0,0 +1,109 @@
+---
+- name: Clean Minio Installation
+ hosts: node1
+ become: true
+ gather_facts: false
+ tags: [s3]
+ tasks:
+ - name: Stop and disable Minio Server
+ systemd:
+ name: minio
+ state: stopped
+ enabled: false
+ become: true
+ - name: Delete directories and files
+ become: true
+ file:
+ state: absent
+ path: "{{ item }}"
+ with_items:
+ - /storage/minio/
+ - /etc/minio/
+ - /usr/local/bin/minio
+ - /usr/local/bin/mc
+ - /etc/systemd/system/minio.service
+ - name: Reload systemd daemon
+ systemd:
+ daemon_reload: true
+
+- name: Clean Vault Installation
+ hosts: gateway
+ become: true
+ gather_facts: false
+ tags: [vault]
+ tasks:
+ - name: Stop and disable Vault Server
+ systemd:
+ name: "{{ item }}"
+ state: stopped
+ enabled: false
+ become: true
+ with_items:
+ - vault
+ - vault-unseal
+ - name: Delete directories and files
+ become: true
+ file:
+ state: absent
+ path: "{{ item }}"
+ with_items:
+ - /var/lib/vault/
+ - /etc/vault/
+ - /var/log/vault/
+ - /usr/local/bin/vault
+ - /etc/systemd/system/vault.service
+ - /etc/systemd/system/vault-unseal.service
+ - name: Reload systemd daemon
+ systemd:
+ daemon_reload: true
+
+- name: Clean Restic Installation
+ hosts: raspberrypi
+ become: true
+ gather_facts: false
+ tags: [restic]
+ tasks:
+ - name: Stop and disable restic backup service
+ systemd:
+ name: "{{ item }}"
+ state: stopped
+ enabled: false
+ become: true
+ with_items:
+ - restic-backup.timer
+
+ - name: Stop and disable restic clean service
+ systemd:
+ name: "{{ item }}"
+ state: stopped
+ enabled: false
+ become: true
+ when: restic_clean_service
+ with_items:
+ - restic-clean.timer
+
+ - name: Delete directories and files
+ become: true
+ file:
+ state: absent
+ path: "{{ item }}"
+ with_items:
+ - /etc/restic/
+ - /var/log/restic.log
+ - /usr/local/bin/restic
+ - /etc/systemd/system/restic-backup.service
+ - /etc/systemd/system/restic-backup.timer
+
+ - name: Delete clean service file
+ become: true
+ file:
+ state: absent
+ path: "{{ item }}"
+ with_items:
+ - /etc/systemd/system/restic-clean.timer
+ - /etc/systemd/system/restic-clean.service
+ when: restic_clean_service
+
+ - name: Reload systemd daemon
+ systemd:
+ daemon_reload: true
diff --git a/roles/basic_setup/defaults/main.yaml b/ansible/roles/basic_setup/defaults/main.yaml
similarity index 100%
rename from roles/basic_setup/defaults/main.yaml
rename to ansible/roles/basic_setup/defaults/main.yaml
diff --git a/roles/basic_setup/handlers/main.yml b/ansible/roles/basic_setup/handlers/main.yml
similarity index 100%
rename from roles/basic_setup/handlers/main.yml
rename to ansible/roles/basic_setup/handlers/main.yml
diff --git a/roles/basic_setup/scripts/pi_temp b/ansible/roles/basic_setup/scripts/pi_temp
similarity index 100%
rename from roles/basic_setup/scripts/pi_temp
rename to ansible/roles/basic_setup/scripts/pi_temp
diff --git a/roles/basic_setup/scripts/pi_throttling b/ansible/roles/basic_setup/scripts/pi_throttling
similarity index 100%
rename from roles/basic_setup/scripts/pi_throttling
rename to ansible/roles/basic_setup/scripts/pi_throttling
diff --git a/roles/basic_setup/tasks/files/multipath.conf b/ansible/roles/basic_setup/tasks/files/multipath.conf
similarity index 100%
rename from roles/basic_setup/tasks/files/multipath.conf
rename to ansible/roles/basic_setup/tasks/files/multipath.conf
diff --git a/roles/basic_setup/tasks/main.yaml b/ansible/roles/basic_setup/tasks/main.yaml
similarity index 100%
rename from roles/basic_setup/tasks/main.yaml
rename to ansible/roles/basic_setup/tasks/main.yaml
diff --git a/roles/basic_setup/tasks/multipath_blacklist.yml b/ansible/roles/basic_setup/tasks/multipath_blacklist.yml
similarity index 100%
rename from roles/basic_setup/tasks/multipath_blacklist.yml
rename to ansible/roles/basic_setup/tasks/multipath_blacklist.yml
diff --git a/roles/basic_setup/tasks/remove_snap.yaml b/ansible/roles/basic_setup/tasks/remove_snap.yaml
similarity index 100%
rename from roles/basic_setup/tasks/remove_snap.yaml
rename to ansible/roles/basic_setup/tasks/remove_snap.yaml
diff --git a/roles/basic_setup/tasks/remove_snap_packages.yml b/ansible/roles/basic_setup/tasks/remove_snap_packages.yml
similarity index 100%
rename from roles/basic_setup/tasks/remove_snap_packages.yml
rename to ansible/roles/basic_setup/tasks/remove_snap_packages.yml
diff --git a/roles/certbot/defaults/main.yml b/ansible/roles/certbot/defaults/main.yml
similarity index 85%
rename from roles/certbot/defaults/main.yml
rename to ansible/roles/certbot/defaults/main.yml
index c3966771..ee3427f7 100644
--- a/roles/certbot/defaults/main.yml
+++ b/ansible/roles/certbot/defaults/main.yml
@@ -12,3 +12,6 @@ dns_ionos_secret: ionos-secret
dns_ionos_api_endpoint: https://api.hosting.ionos.com
certbot_email: myemail@mydomain.es
+
+# dns propagation in seconds
+propagation_seconds: 300
diff --git a/roles/certbot/tasks/main.yml b/ansible/roles/certbot/tasks/main.yml
similarity index 88%
rename from roles/certbot/tasks/main.yml
rename to ansible/roles/certbot/tasks/main.yml
index cf689662..b8087a2e 100644
--- a/roles/certbot/tasks/main.yml
+++ b/ansible/roles/certbot/tasks/main.yml
@@ -7,7 +7,6 @@
- python3-venv
state: present
become: true
- tags: ['always']
when: install_python_packages
- name: Install certbot and ionos plugin in venvironment
@@ -18,7 +17,6 @@
virtualenv: "{{ certbot_venv }}"
virtualenv_command: "python3 -m venv"
state: present
- tags: ['always']
- name: Create working directories
file:
@@ -53,6 +51,6 @@
mode: 0755
with_items:
- template: certbot.sh.j2
- dest: "{{ certbot_venv }}/certbot-create-cert.sh"
+ dest: "{{ certbot_venv }}/bin/certbot-create-cert.sh"
- template: certbot-wrapper.sh.j2
- dest: "{{ certbot_venv }}/certbot-wrapper.sh"
+ dest: "{{ certbot_venv }}/bin/certbot-wrapper.sh"
diff --git a/ansible/roles/certbot/templates/certbot-wrapper.sh.j2 b/ansible/roles/certbot/templates/certbot-wrapper.sh.j2
new file mode 100644
index 00000000..fed7edca
--- /dev/null
+++ b/ansible/roles/certbot/templates/certbot-wrapper.sh.j2
@@ -0,0 +1,12 @@
+#!/bin/bash
+
+# certbot-wrapper script
+# Need to be copied to venv_cerbot/bin
+
+BASEDIR=$(dirname "$0")
+
+$BASEDIR/certbot \
+ --config-dir $BASEDIR/../config \
+ --work-dir $BASEDIR/.. \
+ --logs-dir $BASEDIR/../logs \
+ $@
\ No newline at end of file
diff --git a/ansible/roles/certbot/templates/certbot.sh.j2 b/ansible/roles/certbot/templates/certbot.sh.j2
new file mode 100644
index 00000000..3826b16a
--- /dev/null
+++ b/ansible/roles/certbot/templates/certbot.sh.j2
@@ -0,0 +1,20 @@
+#!/bin/bash
+
+# Executing certbot within virtualenv
+# Need to be copied to venv_cerbot/bin
+
+BASEDIR=$(dirname "$0")
+
+$BASEDIR/certbot certonly \
+ --config-dir $BASEDIR/../config \
+ --work-dir $BASEDIR/.. \
+ --logs-dir $BASEDIR/../logs \
+ --authenticator dns-ionos \
+ --dns-ionos-credentials $BASEDIR/../.secrets/ionos-credentials.ini \
+ --dns-ionos-propagation-seconds {{ propagation_seconds }} \
+ --server https://acme-v02.api.letsencrypt.org/directory \
+ --agree-tos \
+ --non-interactive \
+ --rsa-key-size 4096 \
+ -m {{ certbot_email }} \
+ -d $1
diff --git a/roles/certbot/templates/ionos-credentials.ini.j2 b/ansible/roles/certbot/templates/ionos-credentials.ini.j2
similarity index 100%
rename from roles/certbot/templates/ionos-credentials.ini.j2
rename to ansible/roles/certbot/templates/ionos-credentials.ini.j2
diff --git a/roles/certbot/tests/install_certbot.yml b/ansible/roles/certbot/tests/install_certbot.yml
similarity index 100%
rename from roles/certbot/tests/install_certbot.yml
rename to ansible/roles/certbot/tests/install_certbot.yml
diff --git a/roles/dns/handlers/main.yml b/ansible/roles/dns/handlers/main.yml
similarity index 100%
rename from roles/dns/handlers/main.yml
rename to ansible/roles/dns/handlers/main.yml
diff --git a/roles/dns/tasks/main.yml b/ansible/roles/dns/tasks/main.yml
similarity index 100%
rename from roles/dns/tasks/main.yml
rename to ansible/roles/dns/tasks/main.yml
diff --git a/roles/k3s/master/defaults/main.yml b/ansible/roles/k3s/master/defaults/main.yml
similarity index 100%
rename from roles/k3s/master/defaults/main.yml
rename to ansible/roles/k3s/master/defaults/main.yml
diff --git a/roles/k3s/master/tasks/main.yml b/ansible/roles/k3s/master/tasks/main.yml
similarity index 100%
rename from roles/k3s/master/tasks/main.yml
rename to ansible/roles/k3s/master/tasks/main.yml
diff --git a/roles/k3s/prereq/handlers/main.yml b/ansible/roles/k3s/prereq/handlers/main.yml
similarity index 100%
rename from roles/k3s/prereq/handlers/main.yml
rename to ansible/roles/k3s/prereq/handlers/main.yml
diff --git a/roles/k3s/prereq/tasks/main.yml b/ansible/roles/k3s/prereq/tasks/main.yml
similarity index 100%
rename from roles/k3s/prereq/tasks/main.yml
rename to ansible/roles/k3s/prereq/tasks/main.yml
diff --git a/roles/k3s/worker/defaults/main.yml b/ansible/roles/k3s/worker/defaults/main.yml
similarity index 100%
rename from roles/k3s/worker/defaults/main.yml
rename to ansible/roles/k3s/worker/defaults/main.yml
diff --git a/roles/k3s/worker/tasks/main.yml b/ansible/roles/k3s/worker/tasks/main.yml
similarity index 100%
rename from roles/k3s/worker/tasks/main.yml
rename to ansible/roles/k3s/worker/tasks/main.yml
diff --git a/roles/linkerd/control-plane/defaults/main.yml b/ansible/roles/linkerd-cli/defaults/main.yml
similarity index 78%
rename from roles/linkerd/control-plane/defaults/main.yml
rename to ansible/roles/linkerd-cli/defaults/main.yml
index 3540db7c..99e439a2 100644
--- a/roles/linkerd/control-plane/defaults/main.yml
+++ b/ansible/roles/linkerd-cli/defaults/main.yml
@@ -1,8 +1,6 @@
---
-
# Version
linkerd_version: "stable-2.12.2"
-linkerd_chart_version: 1.9.4
# Architecture
linkerd_arch: "arm64"
@@ -15,7 +13,3 @@ linkerd_checksum: "sha256:{{ linkerd_package_url }}.sha256"
# linkerd install location
linkerd_install_dir: "/usr/local/bin"
linkerd_bin: "{{ linkerd_install_dir }}/linkerd"
-
-# linkerd identity issuer certificate
-issuer_certificate_duration: "48h"
-issuer_certificate_renewbefore: "23h"
diff --git a/roles/linkerd/control-plane/tasks/install_linkerd_cli.yml b/ansible/roles/linkerd-cli/tasks/install_linkerd_cli.yml
similarity index 100%
rename from roles/linkerd/control-plane/tasks/install_linkerd_cli.yml
rename to ansible/roles/linkerd-cli/tasks/install_linkerd_cli.yml
diff --git a/ansible/roles/linkerd-cli/tasks/main.yml b/ansible/roles/linkerd-cli/tasks/main.yml
new file mode 100644
index 00000000..f9dff85e
--- /dev/null
+++ b/ansible/roles/linkerd-cli/tasks/main.yml
@@ -0,0 +1,14 @@
+---
+
+- name: Check Linkerd CLI installation status
+ stat:
+ path: "{{ linkerd_bin }}"
+ register: _linkerd_bin
+
+- name: Install linkerd CLI
+ include_tasks: install_linkerd_cli.yml
+ args:
+ apply:
+ become: true
+ when:
+ - not _linkerd_bin.stat.exists
diff --git a/roles/logging/external_node/tasks/main.yml b/ansible/roles/logging/external_node/tasks/main.yml
similarity index 100%
rename from roles/logging/external_node/tasks/main.yml
rename to ansible/roles/logging/external_node/tasks/main.yml
diff --git a/roles/logging/external_node/templates/adjust_ts.lua b/ansible/roles/logging/external_node/templates/adjust_ts.lua
similarity index 100%
rename from roles/logging/external_node/templates/adjust_ts.lua
rename to ansible/roles/logging/external_node/templates/adjust_ts.lua
diff --git a/ansible/roles/longhorn-util/defaults/main.yml b/ansible/roles/longhorn-util/defaults/main.yml
new file mode 100644
index 00000000..1c3b5fc3
--- /dev/null
+++ b/ansible/roles/longhorn-util/defaults/main.yml
@@ -0,0 +1,6 @@
+---
+# Namespace for Longhorn
+k3s_longhorn_namespace: longhorn-system
+
+# Enable service mesh
+enable_linkerd: false
diff --git a/roles/longhorn/files/check_lh.sh b/ansible/roles/longhorn-util/files/check_lh.sh
similarity index 100%
rename from roles/longhorn/files/check_lh.sh
rename to ansible/roles/longhorn-util/files/check_lh.sh
diff --git a/roles/longhorn/tasks/configure_linkerd_mesh.yml b/ansible/roles/longhorn-util/tasks/configure_linkerd_mesh.yml
similarity index 86%
rename from roles/longhorn/tasks/configure_linkerd_mesh.yml
rename to ansible/roles/longhorn-util/tasks/configure_linkerd_mesh.yml
index 991c10e2..3df0e43c 100644
--- a/roles/longhorn/tasks/configure_linkerd_mesh.yml
+++ b/ansible/roles/longhorn-util/tasks/configure_linkerd_mesh.yml
@@ -1,6 +1,11 @@
---
-# Make longhorn-manager container listen on localhost
+# Check longhorn is running
+- name: Check longhorn status. Wait for all components to start
+ command:
+ cmd: "/usr/local/bin/check_lh.sh"
+ changed_when: false
+# Make longhorn-manager container listen on localhost
- name: Change longhorn-manager POD_IP env variable
command:
cmd: "kubectl set env daemonset/longhorn-manager -n {{ k3s_longhorn_namespace }} POD_IP=0.0.0.0"
@@ -22,7 +27,6 @@
linkerd.io/inject: enabled
state: patched
-
- name: Annotate longhorn-ui
kubernetes.core.k8s:
definition:
diff --git a/ansible/roles/longhorn-util/tasks/main.yml b/ansible/roles/longhorn-util/tasks/main.yml
new file mode 100644
index 00000000..a6cf9b8b
--- /dev/null
+++ b/ansible/roles/longhorn-util/tasks/main.yml
@@ -0,0 +1,15 @@
+---
+- name: Copy longhorn health check script
+ copy:
+ src: "files/{{ item }}"
+ dest: "/usr/local/bin/{{ item }}"
+ owner: "root"
+ group: "root"
+ mode: "u=rwx,g=rx,o=rx"
+ become: true
+ with_items:
+ - check_lh.sh
+
+- name: Enable linkerd integration
+ include_tasks: configure_linkerd_mesh.yml
+ when: enable_linkerd
diff --git a/roles/longhorn/tests/cleanup_test_longhorn.yml b/ansible/roles/longhorn-util/tests/cleanup_test_longhorn.yml
similarity index 100%
rename from roles/longhorn/tests/cleanup_test_longhorn.yml
rename to ansible/roles/longhorn-util/tests/cleanup_test_longhorn.yml
diff --git a/roles/longhorn/tests/templates/testing_longhorn_manifest.yml b/ansible/roles/longhorn-util/tests/templates/testing_longhorn_manifest.yml
similarity index 100%
rename from roles/longhorn/tests/templates/testing_longhorn_manifest.yml
rename to ansible/roles/longhorn-util/tests/templates/testing_longhorn_manifest.yml
diff --git a/roles/longhorn/tests/test_longhorn.yml b/ansible/roles/longhorn-util/tests/test_longhorn.yml
similarity index 100%
rename from roles/longhorn/tests/test_longhorn.yml
rename to ansible/roles/longhorn-util/tests/test_longhorn.yml
diff --git a/ansible/roles/velero-cli/defaults/main.yml b/ansible/roles/velero-cli/defaults/main.yml
new file mode 100644
index 00000000..6dd0adcd
--- /dev/null
+++ b/ansible/roles/velero-cli/defaults/main.yml
@@ -0,0 +1,5 @@
+---
+velero_version: v1.9.2
+velero_arch: arm64
+
+velero_namespace: velero
diff --git a/roles/backup/velero/tasks/configure_velero_cli.yml b/ansible/roles/velero-cli/tasks/configure_velero_cli.yml
similarity index 84%
rename from roles/backup/velero/tasks/configure_velero_cli.yml
rename to ansible/roles/velero-cli/tasks/configure_velero_cli.yml
index 98096d2d..486e9f7f 100644
--- a/roles/backup/velero/tasks/configure_velero_cli.yml
+++ b/ansible/roles/velero-cli/tasks/configure_velero_cli.yml
@@ -8,10 +8,10 @@
- name: Configure velero CLI namespace
command:
- cmd: "velero client config set namespace={{ k3s_velero_namespace }}"
+ cmd: "velero client config set namespace={{ velero_namespace }}"
when:
- get_velero_namespace.rc==0
- - '"namespace: " in get_velero_namespace.stdout or "namespace: " + k3s_velero_namespace not in get_velero_namespace.stdout'
+ - '"namespace: " in get_velero_namespace.stdout or "namespace: " + velero_namespace not in get_velero_namespace.stdout'
changed_when: true
- name: Get CLI configured colored
diff --git a/roles/backup/velero/tasks/install_velero_cli.yml b/ansible/roles/velero-cli/tasks/install_velero_cli.yml
similarity index 100%
rename from roles/backup/velero/tasks/install_velero_cli.yml
rename to ansible/roles/velero-cli/tasks/install_velero_cli.yml
diff --git a/ansible/roles/velero-cli/tasks/main.yml b/ansible/roles/velero-cli/tasks/main.yml
new file mode 100644
index 00000000..45631b56
--- /dev/null
+++ b/ansible/roles/velero-cli/tasks/main.yml
@@ -0,0 +1,10 @@
+---
+
+- name: Install velero client
+ include_tasks: install_velero_cli.yml
+ args:
+ apply:
+ become: true
+
+- name: Configure velero CLI
+ include_tasks: configure_velero_cli.yml
diff --git a/roles/backup/velero/tests/cleaning_testing.yml b/ansible/roles/velero-cli/tests/cleaning_testing.yml
similarity index 100%
rename from roles/backup/velero/tests/cleaning_testing.yml
rename to ansible/roles/velero-cli/tests/cleaning_testing.yml
diff --git a/roles/backup/velero/tests/files/nginx_test_application.yml b/ansible/roles/velero-cli/tests/files/nginx_test_application.yml
similarity index 100%
rename from roles/backup/velero/tests/files/nginx_test_application.yml
rename to ansible/roles/velero-cli/tests/files/nginx_test_application.yml
diff --git a/roles/backup/velero/tests/testing_velero.yml b/ansible/roles/velero-cli/tests/testing_velero.yml
similarity index 100%
rename from roles/backup/velero/tests/testing_velero.yml
rename to ansible/roles/velero-cli/tests/testing_velero.yml
diff --git a/setup_picluster.yml b/ansible/setup_picluster.yml
similarity index 100%
rename from setup_picluster.yml
rename to ansible/setup_picluster.yml
diff --git a/shutdown.yml b/ansible/shutdown.yml
similarity index 100%
rename from shutdown.yml
rename to ansible/shutdown.yml
diff --git a/tasks/cleaning.yml b/ansible/tasks/cleaning.yml
similarity index 100%
rename from tasks/cleaning.yml
rename to ansible/tasks/cleaning.yml
diff --git a/ansible/tasks/configure_vault_integration.yml b/ansible/tasks/configure_vault_integration.yml
new file mode 100644
index 00000000..b946af01
--- /dev/null
+++ b/ansible/tasks/configure_vault_integration.yml
@@ -0,0 +1,39 @@
+---
+
+- name: Configure vault service account and create token
+ kubernetes.core.k8s:
+ definition: "{{ lookup('ansible.builtin.file', '../argocd/bootstrap/vault/' + item ) }}"
+ state: present
+ with_items:
+ - vault-auth-serviceaccount.yaml
+
+- name: Get Token review
+ shell: |
+ KUBERNETES_SA_SECRET_NAME=$(kubectl get secrets --output=json -n vault | jq -r '.items[].metadata | select(.name|startswith("vault-auth")).name')
+ TOKEN_REVIEW_JWT=$(kubectl get secret $KUBERNETES_SA_SECRET_NAME -n vault -o jsonpath='{.data.token}' | base64 --decode)
+ echo $TOKEN_REVIEW_JWT
+ register: get_reviewer_token
+ changed_when: false
+
+- name: Set reviewer token
+ set_fact:
+ vault_reviewer_token: "{{ get_reviewer_token.stdout }}"
+
+- name: Get Kubernetes CA cert
+ shell: |
+ KUBERNETES_CA_CERT=$(kubectl config view --raw --minify --flatten --output='jsonpath={.clusters[].cluster.certificate-authority-data}' \
+ | base64 --decode | awk 'NF {sub(/\r/, ""); printf "%s\\n",$0;}')
+ echo $KUBERNETES_CA_CERT
+ register: get_kubernetes_ca
+ changed_when: false
+
+- name: Set CA cert
+ set_fact:
+ kubernetes_ca_cert: "{{ get_kubernetes_ca.stdout }}"
+
+- name: Set kubernetes_host
+ set_fact:
+ kubernetes_host: "https://{{ k3s_master_ip }}:6443"
+
+- name: Configure vault-kubernetes-auth
+ include_tasks: tasks/vault_kubernetes_auth_method_config.yml
diff --git a/roles/traefik/tasks/create_basic_auth_credentials.yml b/ansible/tasks/create_basic_auth_credentials.yml
similarity index 50%
rename from roles/traefik/tasks/create_basic_auth_credentials.yml
rename to ansible/tasks/create_basic_auth_credentials.yml
index 1b5b2915..cb16ee5e 100644
--- a/roles/traefik/tasks/create_basic_auth_credentials.yml
+++ b/ansible/tasks/create_basic_auth_credentials.yml
@@ -10,10 +10,22 @@
- name: htpasswd utility
shell:
cmd: >-
- htpasswd -nb {{ traefik_basic_auth_user }} {{ traefik_basic_auth_passwd }} | base64
+ htpasswd -nb {{ traefik_basic_auth_user }} {{ traefik_basic_auth_passwd }}
register: htpasswd
changed_when: false
- name: Set htpasswd pair
set_fact:
traefik_auth_htpasswd_pair: "{{ htpasswd.stdout }}"
+
+
+- name: Create/update traefik/basic_auth credentials
+ ansible.builtin.uri:
+ url: "https://{{ vault_dns }}:8200/v1/secret/data/traefik/basic_auth"
+ method: POST
+ headers:
+ X-Vault-Token: "{{ token_data | community.hashi_vault.vault_login_token }}"
+ body:
+ data:
+ htpasswd-pair: "{{ traefik_auth_htpasswd_pair }}"
+ body_format: json
diff --git a/ansible/tasks/create_minio_bearer_token.yml b/ansible/tasks/create_minio_bearer_token.yml
new file mode 100644
index 00000000..500b2b74
--- /dev/null
+++ b/ansible/tasks/create_minio_bearer_token.yml
@@ -0,0 +1,24 @@
+---
+# Minio prometheus bearer token was created and stored in filesystem
+- name: Load prometheus bearer token from file in node1
+ command: "jq -r '.bearerToken' /etc/minio/prometheus_bearer.json"
+ register: root_token
+ become: true
+ changed_when: false
+ when: minio_prom_bearer_token is not defined
+ delegate_to: node1
+
+- name: Get bearer token
+ set_fact:
+ minio_prom_bearer_token: "{{ root_token.stdout }}"
+
+- name: Create/update minio/prometheus credentials
+ ansible.builtin.uri:
+ url: "https://{{ vault_dns }}:8200/v1/secret/data/minio/prometheus"
+ method: POST
+ headers:
+ X-Vault-Token: "{{ token_data | community.hashi_vault.vault_login_token }}"
+ body:
+ data:
+ bearer-token: "{{ minio_prom_bearer_token }}"
+ body_format: json
diff --git a/ansible/tasks/generate_ca_signed_cert.yml b/ansible/tasks/generate_ca_signed_cert.yml
new file mode 100644
index 00000000..4afb210f
--- /dev/null
+++ b/ansible/tasks/generate_ca_signed_cert.yml
@@ -0,0 +1,25 @@
+---
+- name: Create private key
+ openssl_privatekey:
+ path: "{{ selfsigned_certificates_path }}/{{ server_hostname }}.key"
+ size: "{{ ssl_key_size | int }}"
+ type: "{{ key_type }}"
+ mode: 0644
+
+- name: Create CSR
+ openssl_csr:
+ path: "{{ selfsigned_certificates_path }}/{{ server_hostname }}.csr"
+ privatekey_path: "{{ selfsigned_certificates_path }}/{{ server_hostname }}.key"
+ country_name: "{{ country_name }}"
+ organization_name: "{{ organization_name }}"
+ email_address: "{{ email_address }}"
+ common_name: "{{ server_hostname }}"
+ subject_alt_name: "DNS:{{ server_hostname }},IP:{{ ansible_default_ipv4.address }}"
+
+- name: CA signed CSR
+ openssl_certificate:
+ csr_path: "{{ selfsigned_certificates_path }}/{{ server_hostname }}.csr"
+ path: "{{ selfsigned_certificates_path }}/{{ server_hostname }}.pem"
+ provider: ownca
+ ownca_path: "{{ selfsigned_certificates_path }}/CA.pem"
+ ownca_privatekey_path: "{{ selfsigned_certificates_path }}/CA.key"
diff --git a/tasks/generate_custom_ca.yml b/ansible/tasks/generate_custom_ca.yml
similarity index 60%
rename from tasks/generate_custom_ca.yml
rename to ansible/tasks/generate_custom_ca.yml
index d111b5f6..b769a411 100644
--- a/tasks/generate_custom_ca.yml
+++ b/ansible/tasks/generate_custom_ca.yml
@@ -1,14 +1,14 @@
---
- name: Create CA key
openssl_privatekey:
- path: certificates/CA.key
+ path: "{{ selfsigned_certificates_path }}/CA.key"
size: "{{ ssl_key_size | int }}"
mode: 0644
register: ca_key
- name: create the CA CSR
openssl_csr:
- privatekey_path: certificates/CA.key
+ privatekey_path: "{{ selfsigned_certificates_path }}/CA.key"
common_name: Ricsanfre CA
use_common_name_for_san: false # since we do not specify SANs, don't use CN as a SAN
basic_constraints:
@@ -17,13 +17,13 @@
key_usage:
- keyCertSign
key_usage_critical: true
- path: certificates/CA.csr
+ path: "{{ selfsigned_certificates_path }}/CA.csr"
register: ca_csr
- name: sign the CA CSR
openssl_certificate:
- path: certificates/CA.pem
- csr_path: certificates/CA.csr
- privatekey_path: certificates/CA.key
+ path: "{{ selfsigned_certificates_path }}/CA.pem"
+ csr_path: "{{ selfsigned_certificates_path }}/CA.csr"
+ privatekey_path: "{{ selfsigned_certificates_path }}/CA.key"
provider: selfsigned
register: ca_crt
diff --git a/tasks/generate_selfsigned_cert.yml b/ansible/tasks/generate_selfsigned_cert.yml
similarity index 100%
rename from tasks/generate_selfsigned_cert.yml
rename to ansible/tasks/generate_selfsigned_cert.yml
diff --git a/ansible/tasks/install_cli_utils.yml b/ansible/tasks/install_cli_utils.yml
new file mode 100644
index 00000000..69f1e0de
--- /dev/null
+++ b/ansible/tasks/install_cli_utils.yml
@@ -0,0 +1,11 @@
+---
+# Execute roles to install cli and utils
+- name: Use role in loop
+ ansible.builtin.include_role:
+ name: '{{ role }}'
+ loop_control:
+ loop_var: role
+ loop:
+ - longhorn-util
+ - velero-cli
+ - linkerd-cli
diff --git a/ansible/tasks/load_vault_credentials.yml b/ansible/tasks/load_vault_credentials.yml
new file mode 100644
index 00000000..74d0b8ce
--- /dev/null
+++ b/ansible/tasks/load_vault_credentials.yml
@@ -0,0 +1,29 @@
+---
+
+# hashi_vault.vault_write module is not working
+#
+# - name: Create {{ secret_group.key }} credentials
+# community.hashi_vault.vault_write:
+# url: "https://{{ vault_dns }}:8200"
+# path: "secret/{{ secret_group.key }}/{{ secret.key }}"
+# data: "{{ secret.value }}"
+# auth_method: token
+# token: '{{ token_data | community.hashi_vault.vault_login_token }}'
+# loop: "{{ secret_group.value | dict2items }}"
+# loop_control:
+# loop_var: secret
+
+# https://developer.hashicorp.com/vault/api-docs/secret/kv/kv-v2#create-update-secret
+- name: Create/update {{ secret_group.key }} credentials
+ ansible.builtin.uri:
+ url: "https://{{ vault_dns }}:8200/v1/secret/data/{{ secret_group.key }}/{{ secret.key }}"
+ method: POST
+ headers:
+ X-Vault-Token: "{{ token_data | community.hashi_vault.vault_login_token }}"
+ body:
+ data: "{{ secret.value }}"
+ body_format: json
+ loop: "{{ secret_group.value | dict2items }}"
+ loop_control:
+ loop_var: secret
+ no_log: true
diff --git a/roles/prometheus/tasks/configure_grafana_dashboards.yml b/ansible/tasks/patch_grafana_dashboard.yml
similarity index 63%
rename from roles/prometheus/tasks/configure_grafana_dashboards.yml
rename to ansible/tasks/patch_grafana_dashboard.yml
index 4aeab8c0..d2d47844 100644
--- a/roles/prometheus/tasks/configure_grafana_dashboards.yml
+++ b/ansible/tasks/patch_grafana_dashboard.yml
@@ -4,7 +4,7 @@
# Check if json file contains DS_PROMETHEUS variable defined and patch json file
# See issue #18
-- name: Provisioninig dashboard {{ dashboard_name }} | Initialize loop variables
+- name: Patching dashboard {{ dashboard_name }} | Initialize loop variables
set_fact:
dashboard_name: "{{ dashboard_file | basename | splitext | first }}"
dashboard_file_name: "{{ dashboard_file | basename }}"
@@ -12,17 +12,17 @@
input_detected: false
input_variable: false
-- name: Provisioninig dashboard {{ dashboard_name }} | Check if __inputs key exits within json dashboard
+- name: Patching dashboard {{ dashboard_name }} | Check if __inputs key exits within json dashboard
set_fact:
input_detected: true
when: dashboard_content.__inputs is defined
-- name: Provisioninig dashboard {{ dashboard_name }} | Detect if variable DS_PROMETHEUS exits
+- name: Patching dashboard {{ dashboard_name }} | Detect if variable DS_PROMETHEUS exits
set_fact:
input_variable: "{{ dashboard_content.__inputs | selectattr('name','==', 'DS_PROMETHEUS') | length > 0 }}"
when: input_detected
-- name: Provisioninig dashboard {{ dashboard_name }} | Generating patch templating.list code block to add DS_PROMETHEUS variable
+- name: Patching dashboard {{ dashboard_name }} | Generating patch templating.list code block to add DS_PROMETHEUS variable
set_fact:
patch: "{{ [{ 'hide': 0,
'label': 'datasource',
@@ -34,15 +34,15 @@
'type': 'datasource' }] + dashboard_content.templating.list }}"
when: input_variable
-- name: Provisioninig dashboard {{ dashboard_name }} | Patch json dashboard file
+- name: Patching dashboard {{ dashboard_name }} | Patch json dashboard file
set_fact:
dashboard_content: "{{ dashboard_content | combine(new_item, recursive=true) }}"
vars:
new_item: "{{ { 'templating': { 'list': patch } } }}"
when: input_variable
-# Create ConfigMap
-- name: "Provisioninig dashboard {{ dashboard_name }} | Kubernetes apply"
- kubernetes.core.k8s:
- definition: "{{ lookup('template', 'templates/grafana_dashboard.yml.j2' ) }}"
- state: present
+- name: "Patching dashboard {{ dashboard_name }} | Copying to patching directory"
+ copy:
+ dest: "temp/{{ dashboard_file_name }}"
+ content: "{{ dashboard_content | to_nice_json(indent=2) }}"
+ when: input_variable
diff --git a/ansible/tasks/vault_kubernetes_auth_method_config.yml b/ansible/tasks/vault_kubernetes_auth_method_config.yml
new file mode 100644
index 00000000..c5520163
--- /dev/null
+++ b/ansible/tasks/vault_kubernetes_auth_method_config.yml
@@ -0,0 +1,83 @@
+---
+
+- name: Vault login
+ ansible.builtin.shell: bash -ilc 'vault login -format=json $VAULT_TOKEN'
+ # Interactive bash so .bashrc is loaded
+ # 'source ~/.bashrc && ' does not work because
+ # Ansible shell is not interactive and ~/.bashrc implementation by default ignores non interactive shell.
+ # See lines at beginning of bashrc:
+ #
+ # If not running interactively, don't do anything
+ # case $- in
+ # *i*) ;;
+ # *) return;;
+ # esac
+ # The best solution for executing commands as user after its ssh interactive login:
+ # bash -ilc ''
+ # '-i' means interactive shell, so .bashrc won't be ignored '-l' means login shell which sources full user profile
+ become: false
+ register: vault_login
+ changed_when: false
+ delegate_to: gateway
+
+- name: Get vault token
+ set_fact:
+ vault_token: "{{ vault_login.stdout | from_json | community.hashi_vault.vault_login_token }}"
+
+- name: Get status of kubernetes auth method
+ ansible.builtin.uri:
+ url: "https://{{ vault_dns }}:8200/v1/sys/auth"
+ method: GET
+ headers:
+ X-Vault-Token: "{{ vault_token }}"
+ failed_when:
+ - false
+ register: vault_status_kubernetes_auth_method
+
+ # Enable kubernetes auth method
+ # vault auth enable kubernetes
+- name: Enable kubernetes auth method
+ ansible.builtin.uri:
+ url: "https://{{ vault_dns }}:8200/v1/sys/auth/kubernetes"
+ method: POST
+ headers:
+ X-Vault-Token: "{{ vault_token }}"
+ body:
+ type: "kubernetes"
+ description: "kubernetes auth"
+ body_format: json
+ status_code:
+ - 200
+ - 204
+ when:
+ - "'kubernetes/' not in vault_status_kubernetes_auth_method.json.data"
+
+- name: Configure kubernetes auth method
+ ansible.builtin.uri:
+ url: "https://{{ vault_dns }}:8200/v1/auth/kubernetes/config"
+ method: POST
+ headers:
+ X-Vault-Token: "{{ vault_token }}"
+ body:
+ kubernetes_host: "{{ kubernetes_host }}"
+ kubernetes_ca_cert: "{{ kubernetes_ca_cert }}"
+ token_reviewer_jwt: "{{ vault_reviewer_token }}"
+ body_format: json
+ status_code:
+ - 200
+ - 204
+
+- name: Create External Secrets role
+ ansible.builtin.uri:
+ url: "https://{{ vault_dns }}:8200/v1/auth/kubernetes/role/external-secrets"
+ method: POST
+ headers:
+ X-Vault-Token: "{{ vault_token }}"
+ body:
+ bound_service_account_names: external-secrets
+ bound_service_account_namespaces: external-secrets
+ policies: ["read"]
+ body_format: json
+ status_code:
+ - 200
+ - 204
diff --git a/ansible/templates/argocd_root_app.yml.j2 b/ansible/templates/argocd_root_app.yml.j2
new file mode 100644
index 00000000..f5e57799
--- /dev/null
+++ b/ansible/templates/argocd_root_app.yml.j2
@@ -0,0 +1,26 @@
+apiVersion: argoproj.io/v1alpha1
+kind: Application
+metadata:
+ name: root
+ namespace: argocd
+spec:
+ destination:
+ namespace: argocd
+ server: https://kubernetes.default.svc
+ project: default
+ source:
+ path: argocd/bootstrap/root
+ repoURL: https://github.com/ricsanfre/pi-cluster
+ targetRevision: master
+ syncPolicy:
+ automated:
+ prune: true
+ selfHeal: true
+ retry:
+ limit: 10
+ backoff:
+ duration: 1m
+ maxDuration: 16m
+ factor: 2
+ syncOptions:
+ - CreateNamespace=true
\ No newline at end of file
diff --git a/update.yml b/ansible/update.yml
similarity index 100%
rename from update.yml
rename to ansible/update.yml
diff --git a/vars/centralized_san/centralized_san_initiator.yml b/ansible/vars/centralized_san/centralized_san_initiator.yml
similarity index 100%
rename from vars/centralized_san/centralized_san_initiator.yml
rename to ansible/vars/centralized_san/centralized_san_initiator.yml
diff --git a/vars/centralized_san/centralized_san_target.yml b/ansible/vars/centralized_san/centralized_san_target.yml
similarity index 100%
rename from vars/centralized_san/centralized_san_target.yml
rename to ansible/vars/centralized_san/centralized_san_target.yml
diff --git a/vars/dedicated_disks/local_storage.yml b/ansible/vars/dedicated_disks/local_storage.yml
similarity index 100%
rename from vars/dedicated_disks/local_storage.yml
rename to ansible/vars/dedicated_disks/local_storage.yml
diff --git a/ansible/vars/picluster.yml b/ansible/vars/picluster.yml
new file mode 100644
index 00000000..676d0a8e
--- /dev/null
+++ b/ansible/vars/picluster.yml
@@ -0,0 +1,279 @@
+---
+# Pi Cluster variables
+
+#######
+# K3S #
+#######
+
+# k3s version
+k3s_version: v1.24.7+k3s1
+
+# k3s master node IP
+k3s_master_ip: 10.0.0.11
+
+# k3s shared token
+k3s_token: "{{ vault.cluster.k3s.token }}"
+
+# k3s config directory
+k3s_config_directory: /etc/rancher/k3s
+
+# kubelet configuration
+k3s_kubelet_config: |
+ apiVersion: kubelet.config.k8s.io/v1beta1
+ kind: KubeletConfiguration
+ shutdownGracePeriod: 30s
+ shutdownGracePeriodCriticalPods: 10s
+
+# Extra arguments for k3s server installation
+k3s_server_extra_args: >-
+ --write-kubeconfig-mode '0644'
+ --disable 'servicelb'
+ --disable 'traefik'
+ --disable 'local-storage'
+ --node-taint 'node-role.kubernetes.io/master=true:NoSchedule'
+ --kube-controller-manager-arg 'bind-address=0.0.0.0'
+ --kube-proxy-arg 'metrics-bind-address=0.0.0.0'
+ --kube-scheduler-arg 'bind-address=0.0.0.0'
+ --kubelet-arg 'config=/etc/rancher/k3s/kubelet.config'
+ --kube-controller-manager-arg 'terminated-pod-gc-threshold=10'
+
+# Extra arguments for k3s agent installation
+k3s_worker_extra_args: >-
+ --node-label 'node_type=worker'
+ --kubelet-arg 'config=/etc/rancher/k3s/kubelet.config'
+ --kube-proxy-arg 'metrics-bind-address=0.0.0.0'
+
+
+###########
+# Traefik #
+###########
+
+# HTTP Basic auth credentials
+traefik_basic_auth_user: "{{ vault.traefik.admin.user }}"
+traefik_basic_auth_passwd: "{{ vault.traefik.admin.password }}"
+
+# DNS cluster service end-points
+traefik_dashboard_dns: "traefik.{{ dns_domain }}"
+longhorn_dashboard_dns: "storage.{{ dns_domain }}"
+kibana_dashboard_dns: "kibana.{{ dns_domain }}"
+elasticsearch_dns: "elasticsearch.{{ dns_domain }}"
+fluentd_dns: "fluentd.{{ dns_domain }}"
+monitoring_dns: "monitoring.{{ dns_domain }}"
+linkerd_dashboard_dns: "linkerd.{{ dns_domain }}"
+
+#################################
+# TLS Certificates: LetsEncrypt #
+#################################
+
+# Enable letsencrypt certificates
+enable_letsencrypt: true
+
+# IONOS API credentials
+ionos_public_prefix: "{{ vault.certmanager.ionos.public_prefix }}"
+ionos_secret: "{{ vault.certmanager.ionos.secret }}"
+
+# issuer email
+acme_issuer_email: admin@ricsanfre.com
+
+##########################
+# Minio S3 configuration #
+##########################
+
+# Minio S3 Server
+minio_hostname: "s3.{{ dns_domain }}"
+minio_endpoint: "{{ minio_hostname }}:9091"
+minio_url: "https://{{ minio_hostname }}:9091"
+
+# Minio data dirs
+minio_server_make_datadirs: true
+minio_server_datadirs:
+ - /storage/minio
+
+# Minio admin credentials
+minio_root_user: "minioadmin"
+minio_root_password: "{{ vault.minio.root.key }}"
+
+# Minio site region configuration
+minio_site_region: "eu-west-1"
+
+# Enabling TLS
+minio_enable_tls: true
+minio_validate_certificate: false
+
+# Create Prometheus bearer token
+minio_prometheus_bearer_token: true
+
+# Minio Buckets
+minio_buckets:
+ - name: restic
+ policy: read-write
+ - name: k3s-longhorn
+ policy: read-write
+ - name: k3s-velero
+ policy: read-write
+ - name: k3s-loki
+ policy: read-write
+ - name: k3s-tempo
+ policy: read-write
+
+# Minio users and ACLs
+minio_users:
+ - name: "{{ vault.minio.restic.user }}"
+ password: "{{ vault.minio.restic.key }}"
+ buckets_acl:
+ - name: restic
+ policy: read-write
+ - name: "{{ vault.minio.longhorn.user }}"
+ password: "{{ vault.minio.longhorn.key }}"
+ buckets_acl:
+ - name: k3s-longhorn
+ policy: read-write
+ - name: "{{ vault.minio.velero.user }}"
+ password: "{{ vault.minio.velero.key }}"
+ buckets_acl:
+ - name: k3s-velero
+ policy: custom
+ custom:
+ - rule: |
+ "Effect": "Allow",
+ "Action": [
+ "s3:GetObject",
+ "s3:DeleteObject",
+ "s3:PutObject",
+ "s3:AbortMultipartUpload",
+ "s3:ListMultipartUploadParts"
+ ],
+ "Resource": [
+ "arn:aws:s3:::k3s-velero/*"
+ ]
+ - rule: |
+ "Effect": "Allow",
+ "Action": [
+ "s3:ListBucket"
+ ],
+ "Resource": [
+ "arn:aws:s3:::k3s-velero"
+ ]
+
+ - name: "{{ vault.minio.loki.user }}"
+ password: "{{ vault.minio.loki.key }}"
+ buckets_acl:
+ - name: k3s-loki
+ policy: read-write
+
+ - name: "{{ vault.minio.tempo.user }}"
+ password: "{{ vault.minio.tempo.key }}"
+ buckets_acl:
+ - name: k3s-tempo
+ policy: custom
+ custom:
+ - rule: |
+ "Effect": "Allow",
+ "Action": [
+ "s3:PutObject",
+ "s3:GetObject",
+ "s3:ListBucket",
+ "s3:DeleteObject",
+ "s3:GetObjectTagging",
+ "s3:PutObjectTagging"
+ ],
+ "Resource": [
+ "arn:aws:s3:::k3s-tempo/*",
+ "arn:aws:s3:::k3s-tempo"
+ ]
+
+########################
+# Restic configuration #
+########################
+
+# Restic S3 repository configuration
+restic_repository: "s3:{{ minio_url }}/restic"
+restic_use_ca_cert: true
+restic_environment:
+ - name: AWS_ACCESS_KEY_ID
+ value: "{{ vault.minio.restic.user }}"
+ - name: AWS_SECRET_ACCESS_KEY
+ value: "{{ vault.minio.restic.key }}"
+
+#######################
+# Vault configuration
+#######################
+
+vault_hostname: "vault.{{ dns_domain }}"
+vault_address: 10.0.0.1
+vault_dns: "{{ vault_hostname }}"
+vault_enable_tls: true
+custom_ca: false
+vault_init: true
+vault_unseal: true
+vault_unseal_service: true
+tls_skip_verify: false
+
+# Configure KV
+vault_kv_secrets:
+ path: secret
+
+# Policies
+policies:
+ - name: write
+ hcl: |
+ path "secret/*" {
+ capabilities = [ "create", "read", "update", "delete", "list", "patch" ]
+ }
+ - name: read
+ hcl: |
+ path "secret/*" {
+ capabilities = [ "read" ]
+ }
+
+###################
+# Velero Secrets #
+###################
+
+# Minio user, key and bucket
+minio_velero_user: "{{ vault.minio.velero.user }}"
+minio_velero_key: "{{ vault.minio.velero.key }}"
+
+velero_secret_content: |
+ [default]
+ aws_access_key_id: "{{ minio_velero_user }}"
+ aws_secret_access_key: "{{ minio_velero_key }}"
+
+###################
+# Longhorn Secrets#
+###################
+
+# Minio user, key and bucket
+minio_longhorn_user: "{{ vault.minio.longhorn.user }}"
+minio_longhorn_key: "{{ vault.minio.longhorn.key }}"
+
+###################
+# Logging Secrets #
+###################
+
+# Fluentd-fluentbit shared key
+fluentd_shared_key: "{{ vault.logging.fluentd.shared_key }}"
+
+# Elasticsearch 'elastic' user password
+efk_elasticsearch_passwd: "{{ vault.logging.elasticsearch.password }}"
+
+# Loki minio user, key and bucket
+minio_loki_user: "{{ vault.minio.loki.user }}"
+minio_loki_key: "{{ vault.minio.loki.key }}"
+
+######################
+# Monitoring Secrets #
+######################
+
+# Grafana admin user password
+prometheus_grafana_password: "{{ vault.grafana.admin.password}}"
+
+
+#######################
+# Tracing Secrets #
+#######################
+
+# Tempo minio user, key and bucket
+minio_tempo_user: "{{ vault.minio.tempo.user }}"
+minio_tempo_key: "{{ vault.minio.tempo.key }}"
+minio_tempo_bucket: k3s-tempo
diff --git a/ansible/vars/selfsigned-certificates.yml b/ansible/vars/selfsigned-certificates.yml
new file mode 100644
index 00000000..742355ce
--- /dev/null
+++ b/ansible/vars/selfsigned-certificates.yml
@@ -0,0 +1,8 @@
+---
+# Self-signed certificate generation variables
+ssl_key_size: 4096
+ssl_certificate_provider: selfsigned
+key_type: RSA
+country_name: ES
+email_address: admin@ricsanfre.com
+organization_name: Ricsanfre
diff --git a/ansible/vars/vault.yml b/ansible/vars/vault.yml
new file mode 100644
index 00000000..71e91b2c
--- /dev/null
+++ b/ansible/vars/vault.yml
@@ -0,0 +1,55 @@
+---
+# Encrypted variables - Ansible Vault
+vault:
+ # SAN
+ san:
+ iscsi:
+ node_pass: s1cret0
+ password_mutual: 0tr0s1cret0
+ # K3s secrets
+ k3s:
+ k3s_token: s1cret0
+ # traefik secrets
+ traefik:
+ basic_auth:
+ user: admin
+ passwd: s1cret0
+ # Minio S3 secrets
+ minio:
+ root:
+ user: root
+ key: supers1cret0
+ restic:
+ user: restic
+ key: supers1cret0
+ longhorn:
+ user: longhorn
+ key: supers1cret0
+ velero:
+ user: velero
+ key: supers1cret0
+ loki:
+ user: loki
+ key: supers1cret0
+ tempo:
+ user: tempo
+ key: supers1cret0
+ # elastic search
+ elasticsearch:
+ elastic:
+ user: elastic
+ password: s1cret0
+ # Fluentd
+ fluentd:
+ shared_key: s1cret0
+ # Grafana
+ grafana:
+ admin:
+ user: admin
+ password: s1cret0
+
+ # Certmanager
+ certmanager:
+ ionos:
+ public_prefix: your-public-prefix
+ secret: your-key
diff --git a/ansible/vars/vault.yml.j2 b/ansible/vars/vault.yml.j2
new file mode 100644
index 00000000..d88f9e09
--- /dev/null
+++ b/ansible/vars/vault.yml.j2
@@ -0,0 +1,54 @@
+---
+# Encrypted variables - Ansible Vault
+vault:
+ # SAN
+ san:
+ iscsi:
+ node_pass: {{ san_iscsi_node_pass | default("") }}
+ password_mutual: {{ san_iscsi_mutual_pass | default("") }}
+ # K3s secrets
+ cluster:
+ k3s:
+ token: {{ k3s_token }}
+ # Traefik secrets
+ traefik:
+ admin:
+ user: admin
+ password: {{ traefik_basic_auth_password }}
+ # Minio S3 secrets
+ minio:
+ root:
+ user: root
+ key: {{ minio_root_password }}
+ restic:
+ user: restic
+ key: {{ minio_restic_password }}
+ longhorn:
+ user: longhorn
+ key: {{ minio_longhorn_password }}
+ velero:
+ user: velero
+ key: {{ minio_velero_password }}
+ loki:
+ user: loki
+ key: {{ minio_loki_password }}
+ tempo:
+ user: tempo
+ key: {{ minio_tempo_password }}
+ # elasticsearch and fluentd
+ logging:
+ elastic:
+ user: elastic
+ password: {{ elasticsearch_admin_password }}
+ fluentd:
+ shared_key: {{ fluentd_shared_key }}
+ # Grafana
+ grafana:
+ admin:
+ user: admin
+ password: {{ grafana_admin_password }}
+ # Certmanager
+ certmanager:
+ ionos:
+ public_prefix: {{ ionos_public_prefix }}
+ secret: {{ ionos_secret }}
diff --git a/argocd/bootstrap/argocd/Chart.yaml b/argocd/bootstrap/argocd/Chart.yaml
new file mode 100644
index 00000000..8e465fe9
--- /dev/null
+++ b/argocd/bootstrap/argocd/Chart.yaml
@@ -0,0 +1,7 @@
+apiVersion: v2
+name: argocd
+version: 0.0.0
+dependencies:
+ - name: argo-cd
+ version: 5.17.1
+ repository: https://argoproj.github.io/argo-helm
diff --git a/argocd/bootstrap/argocd/templates/ingress.yaml b/argocd/bootstrap/argocd/templates/ingress.yaml
new file mode 100644
index 00000000..8abfc9c3
--- /dev/null
+++ b/argocd/bootstrap/argocd/templates/ingress.yaml
@@ -0,0 +1,31 @@
+---
+# HTTPS Ingress
+apiVersion: networking.k8s.io/v1
+kind: Ingress
+metadata:
+ name: argocd-ingress
+ namespace: {{ .Release.Namespace }}
+ annotations:
+ # HTTPS as entry point
+ traefik.ingress.kubernetes.io/router.entrypoints: websecure
+ # Enable TLS
+ traefik.ingress.kubernetes.io/router.tls: "true"
+ # Enable cert-manager to create automatically the SSL certificate and store in Secret
+ cert-manager.io/cluster-issuer: {{ .Values.ingress.certmanager.tlsIssuer }}-issuer
+ cert-manager.io/common-name: {{ .Values.ingress.host }}
+spec:
+ tls:
+ - hosts:
+ - {{ .Values.ingress.host }}
+ secretName: argocd-tls
+ rules:
+ - host: {{ .Values.ingress.host }}
+ http:
+ paths:
+ - path: /
+ pathType: Prefix
+ backend:
+ service:
+ name: argocd-server
+ port:
+ number: 80
diff --git a/argocd/bootstrap/argocd/values.yaml b/argocd/bootstrap/argocd/values.yaml
new file mode 100644
index 00000000..58b9b713
--- /dev/null
+++ b/argocd/bootstrap/argocd/values.yaml
@@ -0,0 +1,29 @@
+
+# Ingress configuration
+ingress:
+ host: argocd.picluster.ricsanfre.com
+ # configure cert-manager issuer
+ certmanager:
+ tlsIssuer: letsencrypt
+
+argo-cd:
+ configs:
+ params:
+ # Run server without TLS
+ # Traefik finishes TLS connections
+ server.insecure: true
+ cm:
+ statusbadge.enabled: 'true'
+ resource.customizations.health.argoproj.io_Application: |
+ hs = {}
+ hs.status = "Progressing"
+ hs.message = ""
+ if obj.status ~= nil then
+ if obj.status.health ~= nil then
+ hs.status = obj.status.health.status
+ if obj.status.health.message ~= nil then
+ hs.message = obj.status.health.message
+ end
+ end
+ end
+ return hs
diff --git a/argocd/bootstrap/crds/cert-manager/kustomization.yaml b/argocd/bootstrap/crds/cert-manager/kustomization.yaml
new file mode 100644
index 00000000..610dcf70
--- /dev/null
+++ b/argocd/bootstrap/crds/cert-manager/kustomization.yaml
@@ -0,0 +1,7 @@
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+resources:
+# Cert-manager helm installation https://cert-manager.io/docs/installation/helm/
+# CDRs can be installed manually.
+# cert-manager helm chart to be installed with value installCDRs=false
+- https://github.com/cert-manager/cert-manager/releases/download/v1.10.0/cert-manager.crds.yaml
\ No newline at end of file
diff --git a/argocd/bootstrap/crds/external-secrets/kustomization.yaml b/argocd/bootstrap/crds/external-secrets/kustomization.yaml
new file mode 100644
index 00000000..2b8c9756
--- /dev/null
+++ b/argocd/bootstrap/crds/external-secrets/kustomization.yaml
@@ -0,0 +1,7 @@
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+resources:
+# external-secrets https://github.com/external-secrets/external-secrets/tree/main/deploy/charts/external-secrets
+# helm chart version 0.7.1
+# external-secrets helm chart to be installed with value installCDRs=false
+- https://raw.githubusercontent.com/external-secrets/external-secrets/v0.7.1/deploy/crds/bundle.yaml
\ No newline at end of file
diff --git a/argocd/bootstrap/crds/kube-prometheus-stack/kustomization.yaml b/argocd/bootstrap/crds/kube-prometheus-stack/kustomization.yaml
new file mode 100644
index 00000000..2f929277
--- /dev/null
+++ b/argocd/bootstrap/crds/kube-prometheus-stack/kustomization.yaml
@@ -0,0 +1,15 @@
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+resources:
+# Kube-prometheus-stack cdrs.
+# https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack
+# Helm chart version 43.3.1
+# kube-prometheus-helm chart to be installed with helm param --skip-cdrs
+- https://raw.githubusercontent.com/prometheus-community/helm-charts/kube-prometheus-stack-43.3.1/charts/kube-prometheus-stack/crds/crd-alertmanagerconfigs.yaml
+- https://raw.githubusercontent.com/prometheus-community/helm-charts/kube-prometheus-stack-43.3.1/charts/kube-prometheus-stack/crds/crd-alertmanagers.yaml
+- https://raw.githubusercontent.com/prometheus-community/helm-charts/kube-prometheus-stack-43.3.1/charts/kube-prometheus-stack/crds/crd-podmonitors.yaml
+- https://raw.githubusercontent.com/prometheus-community/helm-charts/kube-prometheus-stack-43.3.1/charts/kube-prometheus-stack/crds/crd-probes.yaml
+- https://raw.githubusercontent.com/prometheus-community/helm-charts/kube-prometheus-stack-43.3.1/charts/kube-prometheus-stack/crds/crd-prometheuses.yaml
+- https://raw.githubusercontent.com/prometheus-community/helm-charts/kube-prometheus-stack-43.3.1/charts/kube-prometheus-stack/crds/crd-prometheusrules.yaml
+- https://raw.githubusercontent.com/prometheus-community/helm-charts/kube-prometheus-stack-43.3.1/charts/kube-prometheus-stack/crds/crd-servicemonitors.yaml
+- https://raw.githubusercontent.com/prometheus-community/helm-charts/kube-prometheus-stack-43.3.1/charts/kube-prometheus-stack/crds/crd-thanosrulers.yaml
diff --git a/argocd/bootstrap/crds/kustomization.yaml b/argocd/bootstrap/crds/kustomization.yaml
new file mode 100644
index 00000000..c046d5d4
--- /dev/null
+++ b/argocd/bootstrap/crds/kustomization.yaml
@@ -0,0 +1,8 @@
+---
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+resources:
+ - ./cert-manager
+ - ./external-secrets
+ - ./traefik
+ - ./kube-prometheus-stack
diff --git a/argocd/bootstrap/crds/traefik/kustomization.yaml b/argocd/bootstrap/crds/traefik/kustomization.yaml
new file mode 100644
index 00000000..6916f7b6
--- /dev/null
+++ b/argocd/bootstrap/crds/traefik/kustomization.yaml
@@ -0,0 +1,9 @@
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+resources:
+# Traefik helm chart v20.6.0 CDRs.
+# Traefik provides CDRs upgrade process (https://github.com/traefik/traefik-helm-chart#upgrading-crds)
+# Issue #581(https://github.com/traefik/traefik-helm-chart/issues/581)
+# Within CDRs direcory in github repository there is a kustomization.yaml.
+# Traefik helm chart to be installed with helm param --skip-cdrs
+- https://github.com/traefik/traefik-helm-chart/traefik/crds/?ref=v20.6.0
\ No newline at end of file
diff --git a/argocd/bootstrap/root/Chart.yaml b/argocd/bootstrap/root/Chart.yaml
new file mode 100644
index 00000000..ec654b80
--- /dev/null
+++ b/argocd/bootstrap/root/Chart.yaml
@@ -0,0 +1,3 @@
+apiVersion: v2
+name: bootstrap
+version: 0.0.0
\ No newline at end of file
diff --git a/argocd/bootstrap/root/templates/app-set.yaml b/argocd/bootstrap/root/templates/app-set.yaml
new file mode 100644
index 00000000..fdb35ce7
--- /dev/null
+++ b/argocd/bootstrap/root/templates/app-set.yaml
@@ -0,0 +1,37 @@
+{{- range $index, $app := .Values.apps }}
+---
+apiVersion: argoproj.io/v1alpha1
+kind: Application
+metadata:
+ name: {{ $app.name }}
+ namespace: {{ $.Release.Namespace }}
+ annotations:
+ argocd.argoproj.io/sync-wave: '{{ default 0 $app.syncWave }}'
+spec:
+ destination:
+ namespace: {{ $app.namespace }}
+ server: https://kubernetes.default.svc
+ project: default
+ source:
+ path: {{ $app.path }}
+ repoURL: {{ $.Values.gitops.repo }}
+ targetRevision: {{ $.Values.gitops.revision }}
+{{- if $app.helm }}
+ helm:
+{{ toYaml $app.helm | indent 6 }}
+{{- end }}
+ syncPolicy:
+ automated:
+ prune: true
+ selfHeal: true
+ retry:
+ limit: 10
+ backoff:
+ duration: 1m
+ maxDuration: 16m
+ factor: 2
+ syncOptions:
+ - CreateNamespace=true
+ - ServerSideApply=true
+ - ApplyOutOfSyncOnly=true
+{{- end }}
\ No newline at end of file
diff --git a/argocd/bootstrap/root/templates/namespaces.yaml b/argocd/bootstrap/root/templates/namespaces.yaml
new file mode 100644
index 00000000..e69029e8
--- /dev/null
+++ b/argocd/bootstrap/root/templates/namespaces.yaml
@@ -0,0 +1,39 @@
+# Create annotated namespaces
+# Adding Linkerd annotations
+
+# linkerd namespace
+kind: Namespace
+apiVersion: v1
+metadata:
+ name: linkerd
+ annotations:
+ linkerd.io/inject: disabled
+ labels:
+ linkerd.io/is-control-plane: "true"
+ config.linkerd.io/admission-webhooks: disabled
+ linkerd.io/control-plane-ns: linkerd
+---
+
+# logging namespace
+kind: Namespace
+apiVersion: v1
+metadata:
+ name: logging
+ annotations:
+ linkerd.io/inject: enabled
+---
+# monitoring
+kind: Namespace
+apiVersion: v1
+metadata:
+ name: monitoring
+ annotations:
+ linkerd.io/inject: enabled
+---
+# tracing
+kind: Namespace
+apiVersion: v1
+metadata:
+ name: tracing
+ annotations:
+ linkerd.io/inject: enabled
diff --git a/argocd/bootstrap/root/values.yaml b/argocd/bootstrap/root/values.yaml
new file mode 100644
index 00000000..9aabc112
--- /dev/null
+++ b/argocd/bootstrap/root/values.yaml
@@ -0,0 +1,87 @@
+gitops:
+ repo: https://github.com/ricsanfre/pi-cluster
+ revision: master
+
+# List of application corresponding to different sync waves
+apps:
+ # CDRs App
+ - name: crds
+ namespace: default
+ path: argocd/bootstrap/crds
+ syncWave: 0
+ # External Secrets Operator
+ - name: external-secrets
+ namespace: external-secrets
+ path: argocd/system/external-secrets
+ syncWave: 1
+ # Metal LB
+ - name: metallb
+ namespace: metallb
+ path: argocd/system/metallb
+ syncWave: 1
+ # Cert-Manager and Trust Manager
+ - name: cert-manager
+ namespace: cert-manager
+ path: argocd/system/cert-manager
+ syncWave: 2
+ # Linkerd service mesh
+ - name: linkerd
+ namespace: linkerd
+ path: argocd/system/linkerd
+ syncWave: 3
+ # Traefik Ingress Controller
+ - name: traefik
+ namespace: traefik
+ path: argocd/system/traefik
+ syncWave: 4
+ helm:
+ # skip installation traefik CDRs
+ skipCrds: true
+ # CSI External Snapshotter
+ - name: csi-external-snapshotter
+ namespace: kube-system
+ path: argocd/system/csi-external-snapshotter
+ syncWave: 4
+ # Longhorn Distributed Block Storage
+ - name: longhorn
+ namespace: longhorn-system
+ path: argocd/system/longhorn-system
+ syncWave: 5
+ # Velero Backup
+ - name: velero
+ namespace: velero
+ path: argocd/system/velero
+ syncWave: 5
+ # Logging: Loki and EFK stack
+ - name: logging
+ namespace: logging
+ path: argocd/system/logging
+ syncWave: 6
+ # Kube-prometheus-stack
+ - name: monitoring
+ namespace: monitoring
+ path: argocd/system/monitoring
+ syncWave: 6
+ helm:
+ # skip installation kube-prometheus-stack CDRs
+ skipCrds: true
+ # Linkerd-viz
+ - name: linkerd-viz
+ namespace: linkerd-viz
+ path: argocd/system/linkerd-viz
+ syncWave: 6
+ # Tracing: Tempo
+ - name: tracing
+ namespace: tracing
+ path: argocd/system/tracing
+ syncWave: 6
+ # Linkerd-jaeger
+ - name: linkerd-jaeger
+ namespace: linkerd-jaeger
+ path: argocd/system/linkerd-jaeger
+ syncWave: 6
+ # Argo CD App
+ - name: argocd
+ namespace: argocd
+ path: argocd/bootstrap/argocd
+ syncWave: 6
diff --git a/argocd/bootstrap/vault/vault-auth-serviceaccount.yaml b/argocd/bootstrap/vault/vault-auth-serviceaccount.yaml
new file mode 100644
index 00000000..a94a92c5
--- /dev/null
+++ b/argocd/bootstrap/vault/vault-auth-serviceaccount.yaml
@@ -0,0 +1,51 @@
+# Create service account to be used by Vault kuberentes authentication
+#
+# Kubernetes Auth Doc:
+# https://developer.hashicorp.com/vault/docs/auth/kubernetes
+# External Vault config:
+# https://developer.hashicorp.com/vault/tutorials/kubernetes/kubernetes-external-vault
+
+---
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+ name: vault-auth
+ namespace: vault
+
+
+# Vault kubernetes authentication
+# auth method accesses the Kubernetes TokenReview API to validate the provided JWT is still valid.
+# Service Accounts used in this auth method will need to have access to the TokenReview API.
+# If Kubernetes is configured to use RBAC roles, the Service Account should be granted permissions to access this API.
+# https://developer.hashicorp.com/vault/docs/auth/kubernetes#configuring-kubernetes
+
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
+metadata:
+ name: role-tokenreview-binding
+ namespace: vault
+roleRef:
+ apiGroup: rbac.authorization.k8s.io
+ kind: ClusterRole
+ name: system:auth-delegator
+subjects:
+ - kind: ServiceAccount
+ name: vault-auth
+ namespace: vault
+
+---
+# Long-lived token for vault-auth service account.
+# From Kubernetes v1.24, secrets contained long-lived tokens associated to service accounts
+# are not longer created.
+# See how to create it in Kubernetes documentation:
+# https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/#manually-create-a-long-lived-api-token-for-a-serviceaccount
+
+apiVersion: v1
+kind: Secret
+type: kubernetes.io/service-account-token
+metadata:
+ name: vault-auth-secret
+ namespace: vault
+ annotations:
+ kubernetes.io/service-account.name: vault-auth
diff --git a/argocd/system/cert-manager/Chart.yaml b/argocd/system/cert-manager/Chart.yaml
new file mode 100644
index 00000000..137b6feb
--- /dev/null
+++ b/argocd/system/cert-manager/Chart.yaml
@@ -0,0 +1,14 @@
+apiVersion: v2
+name: certmanager
+version: 0.0.0
+dependencies:
+ - name: cert-manager
+ version: v1.10.0
+ repository: https://charts.jetstack.io
+ - name: trust-manager
+ version: v0.3.0
+ repository: https://charts.jetstack.io
+ - name: cert-manager-webhook-ionos
+ version: 1.0.2
+ repository: https://fabmade.github.io/cert-manager-webhook-ionos
+ condition: acme.dns01.ionos.enabled
\ No newline at end of file
diff --git a/roles/certmanager/templates/ca_issuer.yml.j2 b/argocd/system/cert-manager/templates/ca-issuer.yaml
similarity index 73%
rename from roles/certmanager/templates/ca_issuer.yml.j2
rename to argocd/system/cert-manager/templates/ca-issuer.yaml
index b596eda0..38d64d1c 100644
--- a/roles/certmanager/templates/ca_issuer.yml.j2
+++ b/argocd/system/cert-manager/templates/ca-issuer.yaml
@@ -1,9 +1,10 @@
+{{- if .Values.ca.enabled }}
---
apiVersion: cert-manager.io/v1
kind: Certificate
metadata:
name: picluster-ca
- namespace: {{ k3s_certmanager_namespace }}
+ namespace: {{ .Release.Namespace }}
spec:
isCA: true
commonName: picluster-ca
@@ -20,7 +21,8 @@ apiVersion: cert-manager.io/v1
kind: ClusterIssuer
metadata:
name: ca-issuer
- namespace: {{ k3s_certmanager_namespace }}
+ namespace: {{ .Release.Namespace }}
spec:
ca:
- secretName: root-secret
\ No newline at end of file
+ secretName: root-secret
+{{- end }}
\ No newline at end of file
diff --git a/argocd/system/cert-manager/templates/ionos-externalsecret.yaml b/argocd/system/cert-manager/templates/ionos-externalsecret.yaml
new file mode 100644
index 00000000..c34e4a57
--- /dev/null
+++ b/argocd/system/cert-manager/templates/ionos-externalsecret.yaml
@@ -0,0 +1,26 @@
+{{- if .Values.acme.dns01.ionos.enabled }}
+apiVersion: external-secrets.io/v1beta1
+kind: ExternalSecret
+metadata:
+ name: ionos-externalsecret
+ namespace: {{ .Release.Namespace }}
+spec:
+ secretStoreRef:
+ name: vault-backend
+ kind: ClusterSecretStore
+ target:
+ name: ionos-secret
+ data:
+ - secretKey: IONOS_PUBLIC_PREFIX
+ remoteRef:
+ key: certmanager/ionos
+ property: public_prefix
+ conversionStrategy: Default # ArgoCD sync issue
+ decodingStrategy: None # ArgoCD sync issue
+ - secretKey: IONOS_SECRET
+ remoteRef:
+ key: certmanager/ionos
+ property: secret
+ conversionStrategy: Default # ArgoCD sync issue
+ decodingStrategy: None # ArgoCD sync issue
+{{- end }}
\ No newline at end of file
diff --git a/roles/certmanager/templates/ionos_issuer.yml.j2 b/argocd/system/cert-manager/templates/ionos-issuer.yaml
similarity index 70%
rename from roles/certmanager/templates/ionos_issuer.yml.j2
rename to argocd/system/cert-manager/templates/ionos-issuer.yaml
index 8880c4ec..0ee468c1 100644
--- a/roles/certmanager/templates/ionos_issuer.yml.j2
+++ b/argocd/system/cert-manager/templates/ionos-issuer.yaml
@@ -1,16 +1,17 @@
----
-
+{{- if .Values.acme.dns01.ionos.enabled }}
+{{- if index .Values "cert-manager-webhook-ionos" -}}
+ {{- $webhook:= index .Values "cert-manager-webhook-ionos" }}
apiVersion: cert-manager.io/v1
kind: ClusterIssuer
metadata:
name: letsencrypt-issuer
- namespace: {{ k3s_certmanager_namespace }}
+ namespace: {{ .Release.Namespace }}
spec:
acme:
# The ACME server URL
server: https://acme-v02.api.letsencrypt.org/directory
# Email address used for ACME registration
- email: {{ acme_issuer_email }}
+ email: {{ .Values.acme.acme_issuer_email }}
# Name of a secret used to store the ACME account private key
privateKeySecretRef:
name: letsencrypt-ionos-prod
@@ -18,7 +19,7 @@ spec:
solvers:
- dns01:
webhook:
- groupName: {{ certmanager_ionos_webhook_groupName }}
+ groupName: {{ $webhook.groupName }}
solverName: ionos
config:
apiUrl: https://api.hosting.ionos.com/dns/v1
@@ -28,3 +29,5 @@ spec:
secretKeySecretRef:
key: IONOS_SECRET
name: ionos-secret
+{{- end -}}
+{{- end -}}
\ No newline at end of file
diff --git a/argocd/system/cert-manager/templates/selfsigned-issuer.yaml b/argocd/system/cert-manager/templates/selfsigned-issuer.yaml
new file mode 100644
index 00000000..931e3b49
--- /dev/null
+++ b/argocd/system/cert-manager/templates/selfsigned-issuer.yaml
@@ -0,0 +1,11 @@
+{{- if .Values.ca.enabled }}
+apiVersion: cert-manager.io/v1
+kind: ClusterIssuer
+metadata:
+ name: self-signed-issuer
+ namespace: {{ .Release.Namespace }}
+ annotations:
+ argocd.argoproj.io/sync-options: SkipDryRunOnMissingResource=true
+spec:
+ selfSigned: {}
+{{- end }}
\ No newline at end of file
diff --git a/argocd/system/cert-manager/values.yaml b/argocd/system/cert-manager/values.yaml
new file mode 100644
index 00000000..57f70bef
--- /dev/null
+++ b/argocd/system/cert-manager/values.yaml
@@ -0,0 +1,30 @@
+# Custom CA issuer configuration
+# Enable private PKI using a custom CA
+# It creates a ClusterIssuer resource `ca-issuer`
+ca:
+ enabled: true
+
+# ACME (Letsencrypt) issuer configuration
+# https://cert-manager.io/docs/configuration/acme/
+acme:
+ dns01:
+ # Enable ionos dns01 solver
+ # It creates ClusterIssuer resource `letsencrypt-issuer`
+ ionos:
+ enabled: true
+ acme_issuer_email: admin@ricsanfre.com
+
+########################
+# cert-manager subchart
+########################
+
+# CDRs installed as part of CDR application
+cert-manager:
+ installCRDs: false
+
+#####################################
+# cert-manager-webhook-ionos subchart
+#####################################
+cert-manager-webhook-ionos:
+ groupName: acme.ricsanfre.com
+
diff --git a/argocd/system/csi-external-snapshotter/crd/kustomization.yaml b/argocd/system/csi-external-snapshotter/crd/kustomization.yaml
new file mode 100644
index 00000000..5ca90915
--- /dev/null
+++ b/argocd/system/csi-external-snapshotter/crd/kustomization.yaml
@@ -0,0 +1,7 @@
+---
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+resources:
+ - https://raw.githubusercontent.com/kubernetes-csi/external-snapshotter/release-4.0/client/config/crd/snapshot.storage.k8s.io_volumesnapshotclasses.yaml
+ - https://raw.githubusercontent.com/kubernetes-csi/external-snapshotter/release-4.0/client/config/crd/snapshot.storage.k8s.io_volumesnapshotcontents.yaml
+ - https://raw.githubusercontent.com/kubernetes-csi/external-snapshotter/release-4.0/client/config/crd/snapshot.storage.k8s.io_volumesnapshots.yaml
diff --git a/argocd/system/csi-external-snapshotter/kustomization.yaml b/argocd/system/csi-external-snapshotter/kustomization.yaml
new file mode 100644
index 00000000..0bbb1339
--- /dev/null
+++ b/argocd/system/csi-external-snapshotter/kustomization.yaml
@@ -0,0 +1,6 @@
+---
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+resources:
+ - ./crd
+ - ./snapshot-controller
diff --git a/argocd/system/csi-external-snapshotter/snapshot-controller/kustomization.yaml b/argocd/system/csi-external-snapshotter/snapshot-controller/kustomization.yaml
new file mode 100644
index 00000000..86914e3a
--- /dev/null
+++ b/argocd/system/csi-external-snapshotter/snapshot-controller/kustomization.yaml
@@ -0,0 +1,7 @@
+---
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+namespace: kube-system
+resources:
+ - https://raw.githubusercontent.com/kubernetes-csi/external-snapshotter/release-4.0/deploy/kubernetes/snapshot-controller/rbac-snapshot-controller.yaml
+ - https://raw.githubusercontent.com/kubernetes-csi/external-snapshotter/release-4.0/deploy/kubernetes/snapshot-controller/setup-snapshot-controller.yaml
diff --git a/argocd/system/external-secrets/Chart.yaml b/argocd/system/external-secrets/Chart.yaml
new file mode 100644
index 00000000..8a8f18bd
--- /dev/null
+++ b/argocd/system/external-secrets/Chart.yaml
@@ -0,0 +1,7 @@
+apiVersion: v2
+name: external-secrets
+version: 0.0.0
+dependencies:
+ - name: external-secrets
+ version: 0.7.1
+ repository: https://charts.external-secrets.io
\ No newline at end of file
diff --git a/argocd/system/external-secrets/templates/vault-secretstore.yaml b/argocd/system/external-secrets/templates/vault-secretstore.yaml
new file mode 100644
index 00000000..40ac8211
--- /dev/null
+++ b/argocd/system/external-secrets/templates/vault-secretstore.yaml
@@ -0,0 +1,18 @@
+apiVersion: external-secrets.io/v1beta1
+kind: ClusterSecretStore
+metadata:
+ name: vault-backend
+ namespace: {{ .Release.Namespace }}
+spec:
+ provider:
+ vault:
+ server: {{ .Values.vault.vaultUrl }}
+{{- if .Values.vault.caBundle }}
+ caBundle: {{ .Values.vault.caBundle }}
+{{- end }}
+ path: {{ .Values.vault.kv.path }}
+ version: {{ .Values.vault.kv.version }}
+{{- if .Values.vault.auth }}
+ auth:
+{{ toYaml .Values.vault.auth | indent 8 }}
+{{- end }}
\ No newline at end of file
diff --git a/argocd/system/external-secrets/values.yaml b/argocd/system/external-secrets/values.yaml
new file mode 100644
index 00000000..19af8315
--- /dev/null
+++ b/argocd/system/external-secrets/values.yaml
@@ -0,0 +1,31 @@
+
+# Vault secret store
+vault:
+ # Vault server URL
+ vaultUrl: "https://vault.picluster.ricsanfre.com:8200"
+
+ # Vault CA cert
+ # caBundle needed if vault TLS is signed using a custom CA.
+ # If Vault TLS is valid signed by Letsencrypt this is not needed?
+ # ca cert base64 encoded and remobed '\n' characteres"
+ # =`cat vault-ca.pem | base64 | tr -d "\n"`
+ # caBundle:
+
+ # KV path and version
+ kv:
+ path: secret
+ version: v2
+
+ # auth method used
+ auth:
+ kubernetes:
+ mountPath: "kubernetes"
+ role: "external-secrets"
+
+############################
+# external-secrets subchart
+############################
+
+# CDRs installed as part of CDR application
+external-secrets:
+ installCRDs: false
diff --git a/argocd/system/linkerd-jaeger/Chart.yaml b/argocd/system/linkerd-jaeger/Chart.yaml
new file mode 100644
index 00000000..6ccf4678
--- /dev/null
+++ b/argocd/system/linkerd-jaeger/Chart.yaml
@@ -0,0 +1,7 @@
+apiVersion: v2
+name: linkerd-jaeger
+version: 0.0.0
+dependencies:
+ - name: linkerd-jaeger
+ version: 30.4.4
+ repository: https://helm.linkerd.io/stable
diff --git a/argocd/system/linkerd-jaeger/values.yaml b/argocd/system/linkerd-jaeger/values.yaml
new file mode 100644
index 00000000..ba5bf083
--- /dev/null
+++ b/argocd/system/linkerd-jaeger/values.yaml
@@ -0,0 +1,15 @@
+###########################
+# linkerd-jaeger subchart
+###########################
+
+linkerd-jaeger:
+ # Disable OTel collector installation
+ collector:
+ enabled: false
+ # Disable Jaeger installation
+ jaeger:
+ enabled: false
+ # Configure jaeger-injector to use tempo embedded OTel collector
+ webhook:
+ collectorSvcAddr: tracing-tempo-distributor.tracing:55678
+ collectorSvcAccount: tracing-tempo
diff --git a/argocd/system/linkerd-viz/Chart.yaml b/argocd/system/linkerd-viz/Chart.yaml
new file mode 100644
index 00000000..e3d3ea56
--- /dev/null
+++ b/argocd/system/linkerd-viz/Chart.yaml
@@ -0,0 +1,7 @@
+apiVersion: v2
+name: linkerd-viz
+version: 0.0.0
+dependencies:
+ - name: linkerd-viz
+ version: 30.3.4
+ repository: https://helm.linkerd.io/stable
diff --git a/roles/prometheus/dashboards/linkerd/linkerd-authority.json b/argocd/system/linkerd-viz/dashboards/linkerd-authority.json
similarity index 98%
rename from roles/prometheus/dashboards/linkerd/linkerd-authority.json
rename to argocd/system/linkerd-viz/dashboards/linkerd-authority.json
index 9d719f11..d4e77796 100644
--- a/roles/prometheus/dashboards/linkerd/linkerd-authority.json
+++ b/argocd/system/linkerd-viz/dashboards/linkerd-authority.json
@@ -1,56 +1,56 @@
{
+ "__elements": [],
"__inputs": [
{
- "name": "DS_PROMETHEUS",
- "label": "prometheus",
"description": "",
- "type": "datasource",
+ "label": "prometheus",
+ "name": "DS_PROMETHEUS",
"pluginId": "prometheus",
- "pluginName": "Prometheus"
+ "pluginName": "Prometheus",
+ "type": "datasource"
}
],
- "__elements": [],
"__requires": [
{
- "type": "panel",
"id": "gauge",
"name": "Gauge",
+ "type": "panel",
"version": ""
},
{
- "type": "grafana",
"id": "grafana",
"name": "Grafana",
+ "type": "grafana",
"version": "8.3.3"
},
{
- "type": "panel",
"id": "graph",
"name": "Graph (old)",
+ "type": "panel",
"version": ""
},
{
- "type": "panel",
"id": "heatmap",
"name": "Heatmap",
+ "type": "panel",
"version": ""
},
{
- "type": "datasource",
"id": "prometheus",
"name": "Prometheus",
+ "type": "datasource",
"version": "1.0.0"
},
{
- "type": "panel",
"id": "stat",
"name": "Stat",
+ "type": "panel",
"version": ""
},
{
- "type": "panel",
"id": "text",
"name": "Text",
+ "type": "panel",
"version": ""
}
],
@@ -489,7 +489,7 @@
"expr": "sum(irate(request_total{namespace=\"$namespace\", authority=\"$authority\", direction=\"inbound\", tls=\"true\"}[30s])) by (authority)",
"format": "time_series",
"intervalFactor": 1,
- "legendFormat": "🔒au/{{authority}}",
+ "legendFormat": "\ud83d\udd12au/{{authority}}",
"refId": "A"
},
{
@@ -788,7 +788,7 @@
"expr": "sum(irate(request_total{namespace=\"$namespace\", authority=\"$authority\", direction=\"outbound\", tls=\"true\"}[30s])) by (deployment)",
"format": "time_series",
"intervalFactor": 1,
- "legendFormat": "🔒deploy/{{deployment}}",
+ "legendFormat": "\ud83d\udd12deploy/{{deployment}}",
"refId": "A"
},
{
@@ -1070,7 +1070,7 @@
"expr": "sum(irate(request_total{namespace=\"$namespace\", authority=\"$authority\", direction=\"outbound\", tls=\"true\"}[30s])) by (pod)",
"format": "time_series",
"intervalFactor": 1,
- "legendFormat": "🔒po/{{pod}}",
+ "legendFormat": "\ud83d\udd12po/{{pod}}",
"refId": "A"
},
{
@@ -1234,6 +1234,16 @@
],
"templating": {
"list": [
+ {
+ "hide": 0,
+ "label": "datasource",
+ "name": "DS_PROMETHEUS",
+ "options": [],
+ "query": "prometheus",
+ "refresh": 1,
+ "regex": "",
+ "type": "datasource"
+ },
{
"allValue": ".*",
"current": {},
@@ -1313,4 +1323,4 @@
"title": "Linkerd Authority",
"uid": "linkerd-authority",
"version": 1
-}
+}
\ No newline at end of file
diff --git a/argocd/system/linkerd-viz/dashboards/linkerd-cronjob.json b/argocd/system/linkerd-viz/dashboards/linkerd-cronjob.json
new file mode 100644
index 00000000..bde974cf
--- /dev/null
+++ b/argocd/system/linkerd-viz/dashboards/linkerd-cronjob.json
@@ -0,0 +1,2364 @@
+{
+ "__elements": [],
+ "__inputs": [
+ {
+ "description": "",
+ "label": "prometheus",
+ "name": "DS_PROMETHEUS",
+ "pluginId": "prometheus",
+ "pluginName": "Prometheus",
+ "type": "datasource"
+ }
+ ],
+ "__requires": [
+ {
+ "id": "gauge",
+ "name": "Gauge",
+ "type": "panel",
+ "version": ""
+ },
+ {
+ "id": "grafana",
+ "name": "Grafana",
+ "type": "grafana",
+ "version": "8.3.3"
+ },
+ {
+ "id": "graph",
+ "name": "Graph (old)",
+ "type": "panel",
+ "version": ""
+ },
+ {
+ "id": "heatmap",
+ "name": "Heatmap",
+ "type": "panel",
+ "version": ""
+ },
+ {
+ "id": "prometheus",
+ "name": "Prometheus",
+ "type": "datasource",
+ "version": "1.0.0"
+ },
+ {
+ "id": "stat",
+ "name": "Stat",
+ "type": "panel",
+ "version": ""
+ },
+ {
+ "id": "text",
+ "name": "Text",
+ "type": "panel",
+ "version": ""
+ }
+ ],
+ "annotations": {
+ "list": [
+ {
+ "builtIn": 1,
+ "datasource": "-- Grafana --",
+ "enable": true,
+ "hide": true,
+ "iconColor": "rgba(0, 211, 255, 1)",
+ "name": "Annotations & Alerts",
+ "type": "dashboard"
+ }
+ ]
+ },
+ "editable": true,
+ "gnetId": null,
+ "graphTooltip": 1,
+ "id": null,
+ "iteration": 1531763681685,
+ "links": [],
+ "panels": [
+ {
+ "content": "\n
\n
cj/$cronjob \n
",
+ "gridPos": {
+ "h": 2,
+ "w": 24,
+ "x": 0,
+ "y": 0
+ },
+ "id": 20,
+ "links": [],
+ "mode": "html",
+ "options": {},
+ "title": "",
+ "transparent": true,
+ "type": "text"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "#d44a3a",
+ "rgba(237, 129, 40, 0.89)",
+ "#299c46"
+ ],
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": null,
+ "format": "percentunit",
+ "gauge": {
+ "maxValue": 1,
+ "minValue": 0,
+ "show": true,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 4,
+ "w": 8,
+ "x": 0,
+ "y": 2
+ },
+ "id": 5,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "options": {},
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": true,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": true
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "sum(irate(response_total{classification=\"success\", namespace=\"$namespace\", cronjob=\"$cronjob\", direction=\"inbound\"}[30s])) / sum(irate(response_total{namespace=\"$namespace\", cronjob=\"$cronjob\", direction=\"inbound\"}[30s]))",
+ "format": "time_series",
+ "instant": false,
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "thresholds": "0.9,.99",
+ "title": "SUCCESS RATE",
+ "transparent": true,
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "#299c46",
+ "rgba(237, 129, 40, 0.89)",
+ "#d44a3a"
+ ],
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": null,
+ "format": "none",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 4,
+ "w": 8,
+ "x": 8,
+ "y": 2
+ },
+ "id": 4,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "options": {},
+ "postfix": " RPS",
+ "postfixFontSize": "100%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": true,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": true
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "sum(irate(request_total{namespace=\"$namespace\", cronjob=\"$cronjob\", direction=\"inbound\"}[30s]))",
+ "format": "time_series",
+ "instant": false,
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "thresholds": "",
+ "title": "REQUEST RATE",
+ "transparent": true,
+ "type": "singlestat",
+ "valueFontSize": "100%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "#299c46",
+ "rgba(237, 129, 40, 0.89)",
+ "#d44a3a"
+ ],
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": null,
+ "format": "none",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 4,
+ "w": 4,
+ "x": 16,
+ "y": 2
+ },
+ "id": 11,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "options": {},
+ "postfix": "",
+ "postfixFontSize": "100%",
+ "prefix": "",
+ "prefixFontSize": "100%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "count(count(request_total{dst_namespace=\"$namespace\", cronjob!=\"\", dst_cronjob!=\"\", dst_cronjob=\"$cronjob\", direction=\"outbound\"}) by (namespace, cronjob))",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "thresholds": "",
+ "title": "INBOUND CRONJOBS",
+ "transparent": true,
+ "type": "singlestat",
+ "valueFontSize": "100%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "#299c46",
+ "rgba(237, 129, 40, 0.89)",
+ "#d44a3a"
+ ],
+ "datasource": "${DS_PROMETHEUS}",
+ "format": "none",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 4,
+ "w": 4,
+ "x": 20,
+ "y": 2
+ },
+ "id": 15,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "options": {},
+ "postfix": "",
+ "postfixFontSize": "100%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "count(count(request_total{namespace=\"$namespace\", cronjob=\"$cronjob\", direction=\"outbound\"}) by (namespace, dst_cronjob))",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "thresholds": "",
+ "title": "OUTBOUND CRONJOBS",
+ "transparent": true,
+ "type": "singlestat",
+ "valueFontSize": "100%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "content": "\n INBOUND TRAFFIC \n
",
+ "gridPos": {
+ "h": 2,
+ "w": 24,
+ "x": 0,
+ "y": 6
+ },
+ "id": 17,
+ "links": [],
+ "mode": "html",
+ "options": {},
+ "title": "",
+ "transparent": true,
+ "type": "text"
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 1,
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 0,
+ "y": 8
+ },
+ "id": 67,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {},
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(irate(response_total{classification=\"success\", namespace=\"$namespace\", cronjob=\"$cronjob\", direction=\"inbound\"}[30s])) by (cronjob) / sum(irate(response_total{namespace=\"$namespace\", cronjob=\"$cronjob\", direction=\"inbound\"}[30s])) by (cronjob)",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "cj/{{cronjob}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "SUCCESS RATE",
+ "tooltip": {
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "decimals": null,
+ "format": "percentunit",
+ "label": "",
+ "logBase": 1,
+ "max": "1",
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 8,
+ "y": 8
+ },
+ "id": 2,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {},
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(irate(request_total{namespace=\"$namespace\", cronjob=\"$cronjob\", direction=\"inbound\", tls=\"true\"}[30s])) by (cronjob)",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "\ud83d\udd12cj/{{cronjob}}",
+ "refId": "A"
+ },
+ {
+ "expr": "sum(irate(request_total{namespace=\"$namespace\", cronjob=\"$cronjob\", direction=\"inbound\", tls!=\"true\"}[30s])) by (cronjob)",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "cj/{{cronjob}}",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "REQUEST RATE",
+ "tooltip": {
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "decimals": null,
+ "format": "rps",
+ "label": "",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 1,
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 16,
+ "y": 8
+ },
+ "id": 68,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {},
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "histogram_quantile(0.5, sum(irate(response_latency_ms_bucket{namespace=\"$namespace\", cronjob=\"$cronjob\", direction=\"inbound\"}[30s])) by (le, cronjob))",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "p50 cj/{{cronjob}}",
+ "refId": "A"
+ },
+ {
+ "expr": "histogram_quantile(0.95, sum(irate(response_latency_ms_bucket{namespace=\"$namespace\", cronjob=\"$cronjob\", direction=\"inbound\"}[30s])) by (le, cronjob))",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 1,
+ "legendFormat": "p95 cj/{{cronjob}}",
+ "refId": "B"
+ },
+ {
+ "expr": "histogram_quantile(0.99, sum(irate(response_latency_ms_bucket{namespace=\"$namespace\", cronjob=\"$cronjob\", direction=\"inbound\"}[30s])) by (le, cronjob))",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "p99 cj/{{cronjob}}",
+ "refId": "C"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "LATENCY",
+ "tooltip": {
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "decimals": null,
+ "format": "ms",
+ "label": "",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "collapsed": true,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 15
+ },
+ "id": 148,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 1,
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 0,
+ "y": 16
+ },
+ "id": 167,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {},
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "tcp_close_total{namespace=\"$namespace\", cronjob=\"$cronjob\", direction=\"inbound\",errno!=\"\"}",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{peer}} {{errno}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "TCP CONNECTION FAILURES",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "decimals": null,
+ "format": "none",
+ "label": "",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 8,
+ "y": 16
+ },
+ "id": 168,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {},
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "tcp_open_connections{namespace=\"$namespace\", cronjob=\"$cronjob\", direction=\"inbound\"}",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{peer}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "TCP CONNECTIONS OPEN",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "cards": {
+ "cardPadding": null,
+ "cardRound": null
+ },
+ "color": {
+ "cardColor": "#b4ff00",
+ "colorScale": "sqrt",
+ "colorScheme": "interpolateOranges",
+ "exponent": 0.5,
+ "mode": "spectrum"
+ },
+ "dataFormat": "timeseries",
+ "datasource": "${DS_PROMETHEUS}",
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 16,
+ "y": 16
+ },
+ "heatmap": {},
+ "hideZeroBuckets": false,
+ "highlightCards": true,
+ "id": 169,
+ "legend": {
+ "show": false
+ },
+ "links": [],
+ "options": {},
+ "reverseYBuckets": false,
+ "targets": [
+ {
+ "expr": "tcp_connection_duration_ms_bucket{namespace=\"$namespace\", cronjob=\"$cronjob\", direction=\"inbound\"}",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "refId": "A"
+ }
+ ],
+ "title": "TCP CONNECTION DURATION",
+ "tooltip": {
+ "show": true,
+ "showHistogram": true
+ },
+ "type": "heatmap",
+ "xAxis": {
+ "show": true
+ },
+ "xBucketNumber": null,
+ "xBucketSize": null,
+ "yAxis": {
+ "decimals": null,
+ "format": "dtdurationms",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true,
+ "splitFactor": null
+ },
+ "yBucketBound": "auto",
+ "yBucketNumber": null,
+ "yBucketSize": null
+ }
+ ],
+ "title": "Inbound TCP Metrics",
+ "type": "row"
+ },
+ {
+ "collapsed": false,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 16
+ },
+ "id": 152,
+ "panels": [],
+ "title": "",
+ "type": "row"
+ },
+ {
+ "content": "\n INBOUND CRONJOBS \n
",
+ "gridPos": {
+ "h": 2,
+ "w": 24,
+ "x": 0,
+ "y": 17
+ },
+ "id": 76,
+ "links": [],
+ "mode": "html",
+ "options": {},
+ "title": "",
+ "transparent": true,
+ "type": "text"
+ },
+ {
+ "collapsed": true,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 19
+ },
+ "id": 59,
+ "panels": [
+ {
+ "content": "\n
\n
cj/$inbound \n
",
+ "gridPos": {
+ "h": 2,
+ "w": 24,
+ "x": 0,
+ "y": 22.2
+ },
+ "id": 39,
+ "links": [],
+ "mode": "html",
+ "options": {},
+ "title": "",
+ "transparent": true,
+ "type": "text"
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 1,
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 0,
+ "y": 24.2
+ },
+ "id": 36,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {},
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(irate(response_total{classification=\"success\", cronjob!=\"\", cronjob=\"$inbound\", dst_namespace=\"$namespace\", dst_cronjob=\"$cronjob\", direction=\"outbound\"}[30s])) by (cronjob, pod) / sum(irate(response_total{cronjob!=\"\", cronjob=\"$inbound\", dst_namespace=\"$namespace\", dst_cronjob=\"$cronjob\", direction=\"outbound\"}[30s])) by (cronjob, pod)",
+ "format": "time_series",
+ "instant": false,
+ "intervalFactor": 1,
+ "legendFormat": "po/{{pod}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "SUCCESS RATE",
+ "tooltip": {
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "decimals": null,
+ "format": "percentunit",
+ "label": null,
+ "logBase": 1,
+ "max": "1",
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 8,
+ "y": 24.2
+ },
+ "id": 22,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {},
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(irate(request_total{cronjob!=\"\", cronjob=\"$inbound\", dst_namespace=\"$namespace\", dst_cronjob=\"$cronjob\", direction=\"outbound\", tls=\"true\"}[30s])) by (cronjob, pod)",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "\ud83d\udd12po/{{pod}}",
+ "refId": "A"
+ },
+ {
+ "expr": "sum(irate(request_total{cronjob!=\"\", cronjob=\"$inbound\", dst_namespace=\"$namespace\", dst_cronjob=\"$cronjob\", direction=\"outbound\", tls!=\"true\"}[30s])) by (cronjob, pod)",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "po/{{pod}}",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "REQUEST RATE",
+ "tooltip": {
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "decimals": null,
+ "format": "rps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 1,
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 16,
+ "y": 24.2
+ },
+ "id": 29,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {},
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "histogram_quantile(0.5, sum(rate(response_latency_ms_bucket{cronjob!=\"\", cronjob=\"$inbound\", dst_namespace=\"$namespace\", dst_cronjob=\"$cronjob\", direction=\"outbound\"}[30s])) by (le, cronjob))",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "P50 cj/{{cronjob}}",
+ "refId": "A"
+ },
+ {
+ "expr": "histogram_quantile(0.95, sum(rate(response_latency_ms_bucket{cronjob!=\"\", cronjob=\"$inbound\", dst_namespace=\"$namespace\", dst_cronjob=\"$cronjob\", direction=\"outbound\"}[30s])) by (le, cronjob))",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "P95 cj/{{cronjob}}",
+ "refId": "B"
+ },
+ {
+ "expr": "histogram_quantile(0.99, sum(rate(response_latency_ms_bucket{cronjob!=\"\", cronjob=\"$inbound\", dst_namespace=\"$namespace\", dst_cronjob=\"$cronjob\", direction=\"outbound\"}[30s])) by (le, cronjob))",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "P99 cj/{{cronjob}}",
+ "refId": "C"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "LATENCY",
+ "tooltip": {
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "ms",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ }
+ ],
+ "repeat": "inbound",
+ "title": "cj/$inbound",
+ "type": "row"
+ },
+ {
+ "collapsed": false,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 20
+ },
+ "id": 34,
+ "panels": [],
+ "repeat": null,
+ "title": "",
+ "type": "row"
+ },
+ {
+ "content": "\n OUTBOUND TRAFFIC \n
",
+ "gridPos": {
+ "h": 2,
+ "w": 24,
+ "x": 0,
+ "y": 21
+ },
+ "id": 32,
+ "links": [],
+ "mode": "html",
+ "options": {},
+ "title": "",
+ "transparent": true,
+ "type": "text"
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 1,
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 0,
+ "y": 23
+ },
+ "id": 77,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {},
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(irate(response_total{classification=\"success\", namespace=\"$namespace\", cronjob=\"$cronjob\", direction=\"outbound\"}[30s])) by (dst_cronjob) / sum(irate(response_total{namespace=\"$namespace\", cronjob=\"$cronjob\", direction=\"outbound\"}[30s])) by (dst_cronjob)",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "cj/{{dst_cronjob}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "SUCCESS RATE",
+ "tooltip": {
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "decimals": null,
+ "format": "percentunit",
+ "label": "",
+ "logBase": 1,
+ "max": "1",
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 8,
+ "y": 23
+ },
+ "id": 78,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {},
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(irate(request_total{namespace=\"$namespace\", cronjob=\"$cronjob\", direction=\"outbound\", tls=\"true\"}[30s])) by (dst_cronjob)",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "\ud83d\udd12cj/{{dst_cronjob}}",
+ "refId": "A"
+ },
+ {
+ "expr": "sum(irate(request_total{namespace=\"$namespace\", cronjob=\"$cronjob\", direction=\"outbound\", tls!=\"true\"}[30s])) by (dst_cronjob)",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "cj/{{dst_cronjob}}",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "REQUEST RATE",
+ "tooltip": {
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "rps",
+ "label": "",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 1,
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 16,
+ "y": 23
+ },
+ "id": 79,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {},
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "histogram_quantile(0.95, sum(rate(response_latency_ms_bucket{namespace=\"$namespace\", cronjob=\"$cronjob\", direction=\"outbound\"}[30s])) by (le, dst_cronjob))",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "P95 cj/{{dst_cronjob}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "P95 LATENCY",
+ "tooltip": {
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "ms",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "collapsed": true,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 30
+ },
+ "id": 154,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 1,
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 0,
+ "y": 29
+ },
+ "id": 157,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {},
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "tcp_close_total{namespace=\"$namespace\", cronjob=\"$cronjob\", direction=\"outbound\",errno!=\"\"}",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{peer}} {{errno}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "TCP CONNECTION FAILURES",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "decimals": null,
+ "format": "none",
+ "label": "",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 8,
+ "y": 29
+ },
+ "id": 166,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {},
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "tcp_open_connections{namespace=\"$namespace\", cronjob=\"$cronjob\", direction=\"outbound\"}",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{peer}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "TCP CONNECTIONS OPEN",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "cards": {
+ "cardPadding": null,
+ "cardRound": null
+ },
+ "color": {
+ "cardColor": "#b4ff00",
+ "colorScale": "sqrt",
+ "colorScheme": "interpolateOranges",
+ "exponent": 0.5,
+ "mode": "spectrum"
+ },
+ "dataFormat": "timeseries",
+ "datasource": "${DS_PROMETHEUS}",
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 16,
+ "y": 29
+ },
+ "heatmap": {},
+ "hideZeroBuckets": false,
+ "highlightCards": true,
+ "id": 160,
+ "legend": {
+ "show": false
+ },
+ "links": [],
+ "options": {},
+ "reverseYBuckets": false,
+ "targets": [
+ {
+ "expr": "tcp_connection_duration_ms_bucket{namespace=\"$namespace\", cronjob=\"$cronjob\", direction=\"outbound\"}",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "refId": "A"
+ }
+ ],
+ "title": "TCP CONNECTION DURATION",
+ "tooltip": {
+ "show": true,
+ "showHistogram": true
+ },
+ "type": "heatmap",
+ "xAxis": {
+ "show": true
+ },
+ "xBucketNumber": null,
+ "xBucketSize": null,
+ "yAxis": {
+ "decimals": null,
+ "format": "dtdurationms",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true,
+ "splitFactor": null
+ },
+ "yBucketBound": "auto",
+ "yBucketNumber": null,
+ "yBucketSize": null
+ }
+ ],
+ "title": "Outbound TCP Metrics",
+ "type": "row"
+ },
+ {
+ "collapsed": false,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 31
+ },
+ "id": 156,
+ "panels": [],
+ "title": "",
+ "type": "row"
+ },
+ {
+ "content": "\n OUTBOUND CRONJOBS \n
",
+ "gridPos": {
+ "h": 2,
+ "w": 24,
+ "x": 0,
+ "y": 32
+ },
+ "id": 80,
+ "links": [],
+ "mode": "html",
+ "options": {},
+ "title": "",
+ "transparent": true,
+ "type": "text"
+ },
+ {
+ "collapsed": true,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 34
+ },
+ "id": 27,
+ "panels": [
+ {
+ "content": "\n
\n
cj/$outbound \n
",
+ "gridPos": {
+ "h": 2,
+ "w": 24,
+ "x": 0,
+ "y": 36
+ },
+ "id": 40,
+ "links": [],
+ "mode": "html",
+ "options": {},
+ "title": "",
+ "transparent": true,
+ "type": "text"
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 1,
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 0,
+ "y": 38
+ },
+ "id": 28,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {},
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(irate(response_total{classification=\"success\", namespace=\"$namespace\", cronjob=\"$cronjob\", dst_cronjob=\"$outbound\", direction=\"outbound\"}[30s])) by (dst_cronjob) / sum(irate(response_total{namespace=\"$namespace\", cronjob=\"$cronjob\", dst_cronjob=\"$outbound\", direction=\"outbound\"}[30s])) by (dst_cronjob)",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "cj/{{dst_cronjob}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "SUCCESS RATE",
+ "tooltip": {
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "decimals": null,
+ "format": "percentunit",
+ "label": null,
+ "logBase": 1,
+ "max": "1",
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 8,
+ "y": 38
+ },
+ "id": 35,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {},
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(irate(request_total{namespace=\"$namespace\", cronjob=\"$cronjob\", dst_cronjob=\"$outbound\", direction=\"outbound\", tls=\"true\"}[30s])) by (dst_cronjob)",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "\ud83d\udd12cj/{{dst_cronjob}}",
+ "refId": "A"
+ },
+ {
+ "expr": "sum(irate(request_total{namespace=\"$namespace\", cronjob=\"$cronjob\", dst_cronjob=\"$outbound\", direction=\"outbound\", tls!=\"true\"}[30s])) by (dst_cronjob)",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "cj/{{dst_cronjob}}",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "REQUEST RATE",
+ "tooltip": {
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "rps",
+ "label": "",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 1,
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 16,
+ "y": 38
+ },
+ "id": 41,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {},
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "histogram_quantile(0.5, sum(rate(response_latency_ms_bucket{namespace=\"$namespace\", cronjob=\"$cronjob\", dst_cronjob=\"$outbound\", direction=\"outbound\"}[30s])) by (le, dst_cronjob))",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "P50 cj/{{dst_cronjob}}",
+ "refId": "A"
+ },
+ {
+ "expr": "histogram_quantile(0.95, sum(rate(response_latency_ms_bucket{namespace=\"$namespace\", cronjob=\"$cronjob\", dst_cronjob=\"$outbound\", direction=\"outbound\"}[30s])) by (le, dst_cronjob))",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "P95 cj/{{dst_cronjob}}",
+ "refId": "B"
+ },
+ {
+ "expr": "histogram_quantile(0.99, sum(rate(response_latency_ms_bucket{namespace=\"$namespace\", cronjob=\"$cronjob\", dst_cronjob=\"$outbound\", direction=\"outbound\"}[30s])) by (le, dst_cronjob))",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "P99 cj/{{dst_cronjob}}",
+ "refId": "C"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "LATENCY",
+ "tooltip": {
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "ms",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ }
+ ],
+ "repeat": "outbound",
+ "title": "cj/$outbound",
+ "type": "row"
+ },
+ {
+ "content": "\n
\n
\n
\n
\n
\n
\n\n\n
",
+ "gridPos": {
+ "h": 3,
+ "w": 24,
+ "x": 0,
+ "y": 35
+ },
+ "height": "1px",
+ "id": 171,
+ "links": [],
+ "mode": "html",
+ "options": {},
+ "title": "",
+ "transparent": true,
+ "type": "text"
+ }
+ ],
+ "refresh": "1m",
+ "schemaVersion": 18,
+ "style": "dark",
+ "tags": [
+ "linkerd"
+ ],
+ "templating": {
+ "list": [
+ {
+ "hide": 0,
+ "label": "datasource",
+ "name": "DS_PROMETHEUS",
+ "options": [],
+ "query": "prometheus",
+ "refresh": 1,
+ "regex": "",
+ "type": "datasource"
+ },
+ {
+ "allValue": ".*",
+ "current": {},
+ "datasource": "${DS_PROMETHEUS}",
+ "definition": "",
+ "hide": 0,
+ "includeAll": false,
+ "label": "Namespace",
+ "multi": false,
+ "name": "namespace",
+ "options": [],
+ "query": "label_values(process_start_time_seconds{cronjob!=\"\"}, namespace)",
+ "refresh": 2,
+ "regex": "",
+ "skipUrlSync": false,
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": ".*",
+ "current": {},
+ "datasource": "${DS_PROMETHEUS}",
+ "definition": "",
+ "hide": 0,
+ "includeAll": false,
+ "label": "Deployment",
+ "multi": false,
+ "name": "cronjob",
+ "options": [],
+ "query": "label_values(process_start_time_seconds{namespace=\"$namespace\"}, cronjob)",
+ "refresh": 2,
+ "regex": "",
+ "skipUrlSync": false,
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": ".*",
+ "current": {},
+ "datasource": "${DS_PROMETHEUS}",
+ "definition": "",
+ "hide": 2,
+ "includeAll": true,
+ "label": null,
+ "multi": false,
+ "name": "inbound",
+ "options": [],
+ "query": "label_values(request_total{dst_namespace=\"$namespace\", dst_cronjob=\"$cronjob\"}, cronjob)",
+ "refresh": 2,
+ "regex": "",
+ "skipUrlSync": false,
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": ".*",
+ "current": {},
+ "datasource": "${DS_PROMETHEUS}",
+ "definition": "",
+ "hide": 2,
+ "includeAll": true,
+ "label": null,
+ "multi": false,
+ "name": "outbound",
+ "options": [],
+ "query": "label_values(request_total{namespace=\"$namespace\", cronjob=\"$cronjob\"}, dst_cronjob)",
+ "refresh": 2,
+ "regex": "",
+ "skipUrlSync": false,
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ }
+ ]
+ },
+ "time": {
+ "from": "now-5m",
+ "to": "now"
+ },
+ "timepicker": {
+ "refresh_intervals": [
+ "5s",
+ "10s",
+ "30s",
+ "1m",
+ "5m",
+ "15m",
+ "30m",
+ "1h",
+ "2h",
+ "1d"
+ ],
+ "time_options": [
+ "5m",
+ "15m",
+ "1h",
+ "6h",
+ "12h",
+ "24h",
+ "2d",
+ "7d",
+ "30d"
+ ]
+ },
+ "timezone": "",
+ "title": "Linkerd CronJob",
+ "uid": "linkerd-cronjob",
+ "version": 1
+}
\ No newline at end of file
diff --git a/roles/prometheus/dashboards/linkerd/linkerd-daemonset.json b/argocd/system/linkerd-viz/dashboards/linkerd-daemonset.json
similarity index 99%
rename from roles/prometheus/dashboards/linkerd/linkerd-daemonset.json
rename to argocd/system/linkerd-viz/dashboards/linkerd-daemonset.json
index 3f4eab3c..f1685d90 100644
--- a/roles/prometheus/dashboards/linkerd/linkerd-daemonset.json
+++ b/argocd/system/linkerd-viz/dashboards/linkerd-daemonset.json
@@ -1,56 +1,56 @@
{
+ "__elements": [],
"__inputs": [
{
- "name": "DS_PROMETHEUS",
- "label": "prometheus",
"description": "",
- "type": "datasource",
+ "label": "prometheus",
+ "name": "DS_PROMETHEUS",
"pluginId": "prometheus",
- "pluginName": "Prometheus"
+ "pluginName": "Prometheus",
+ "type": "datasource"
}
],
- "__elements": [],
"__requires": [
{
- "type": "panel",
"id": "gauge",
"name": "Gauge",
+ "type": "panel",
"version": ""
},
{
- "type": "grafana",
"id": "grafana",
"name": "Grafana",
+ "type": "grafana",
"version": "8.3.3"
},
{
- "type": "panel",
"id": "graph",
"name": "Graph (old)",
+ "type": "panel",
"version": ""
},
{
- "type": "panel",
"id": "heatmap",
"name": "Heatmap",
+ "type": "panel",
"version": ""
},
{
- "type": "datasource",
"id": "prometheus",
"name": "Prometheus",
+ "type": "datasource",
"version": "1.0.0"
},
{
- "type": "panel",
"id": "stat",
"name": "Stat",
+ "type": "panel",
"version": ""
},
{
- "type": "panel",
"id": "text",
"name": "Text",
+ "type": "panel",
"version": ""
}
],
@@ -571,7 +571,7 @@
"expr": "sum(irate(request_total{namespace=\"$namespace\", daemonset=\"$daemonset\", direction=\"inbound\", tls=\"true\"}[30s])) by (daemonset)",
"format": "time_series",
"intervalFactor": 1,
- "legendFormat": "🔒ds/{{daemonset}}",
+ "legendFormat": "\ud83d\udd12ds/{{daemonset}}",
"refId": "A"
},
{
@@ -1159,7 +1159,7 @@
"expr": "sum(irate(request_total{daemonset!=\"\", daemonset=\"$inbound\", dst_namespace=\"$namespace\", dst_daemonset=\"$daemonset\", direction=\"outbound\", tls=\"true\"}[30s])) by (daemonset, pod)",
"format": "time_series",
"intervalFactor": 1,
- "legendFormat": "🔒po/{{pod}}",
+ "legendFormat": "\ud83d\udd12po/{{pod}}",
"refId": "A"
},
{
@@ -1475,7 +1475,7 @@
"expr": "sum(irate(request_total{namespace=\"$namespace\", daemonset=\"$daemonset\", direction=\"outbound\", tls=\"true\"}[30s])) by (dst_daemonset)",
"format": "time_series",
"intervalFactor": 1,
- "legendFormat": "🔒ds/{{dst_daemonset}}",
+ "legendFormat": "\ud83d\udd12ds/{{dst_daemonset}}",
"refId": "A"
},
{
@@ -2045,7 +2045,7 @@
"expr": "sum(irate(request_total{namespace=\"$namespace\", daemonset=\"$daemonset\", dst_daemonset=\"$outbound\", direction=\"outbound\", tls=\"true\"}[30s])) by (dst_daemonset)",
"format": "time_series",
"intervalFactor": 1,
- "legendFormat": "🔒ds/{{dst_daemonset}}",
+ "legendFormat": "\ud83d\udd12ds/{{dst_daemonset}}",
"refId": "A"
},
{
@@ -2228,6 +2228,16 @@
],
"templating": {
"list": [
+ {
+ "hide": 0,
+ "label": "datasource",
+ "name": "DS_PROMETHEUS",
+ "options": [],
+ "query": "prometheus",
+ "refresh": 1,
+ "regex": "",
+ "type": "datasource"
+ },
{
"allValue": ".*",
"current": {},
@@ -2351,4 +2361,4 @@
"title": "Linkerd DaemonSet",
"uid": "linkerd-daemonset",
"version": 1
-}
+}
\ No newline at end of file
diff --git a/roles/prometheus/dashboards/linkerd/linkerd-deployment.json b/argocd/system/linkerd-viz/dashboards/linkerd-deployment.json
similarity index 99%
rename from roles/prometheus/dashboards/linkerd/linkerd-deployment.json
rename to argocd/system/linkerd-viz/dashboards/linkerd-deployment.json
index 4d8cb63c..aa940b86 100644
--- a/roles/prometheus/dashboards/linkerd/linkerd-deployment.json
+++ b/argocd/system/linkerd-viz/dashboards/linkerd-deployment.json
@@ -1,56 +1,56 @@
{
+ "__elements": [],
"__inputs": [
{
- "name": "DS_PROMETHEUS",
- "label": "prometheus",
"description": "",
- "type": "datasource",
+ "label": "prometheus",
+ "name": "DS_PROMETHEUS",
"pluginId": "prometheus",
- "pluginName": "Prometheus"
+ "pluginName": "Prometheus",
+ "type": "datasource"
}
],
- "__elements": [],
"__requires": [
{
- "type": "panel",
"id": "gauge",
"name": "Gauge",
+ "type": "panel",
"version": ""
},
{
- "type": "grafana",
"id": "grafana",
"name": "Grafana",
+ "type": "grafana",
"version": "8.3.3"
},
{
- "type": "panel",
"id": "graph",
"name": "Graph (old)",
+ "type": "panel",
"version": ""
},
{
- "type": "panel",
"id": "heatmap",
"name": "Heatmap",
+ "type": "panel",
"version": ""
},
{
- "type": "datasource",
"id": "prometheus",
"name": "Prometheus",
+ "type": "datasource",
"version": "1.0.0"
},
{
- "type": "panel",
"id": "stat",
"name": "Stat",
+ "type": "panel",
"version": ""
},
{
- "type": "panel",
"id": "text",
"name": "Text",
+ "type": "panel",
"version": ""
}
],
@@ -571,7 +571,7 @@
"expr": "sum(irate(request_total{namespace=\"$namespace\", deployment=\"$deployment\", direction=\"inbound\", tls=\"true\"}[30s])) by (deployment)",
"format": "time_series",
"intervalFactor": 1,
- "legendFormat": "🔒deploy/{{deployment}}",
+ "legendFormat": "\ud83d\udd12deploy/{{deployment}}",
"refId": "A"
},
{
@@ -1159,7 +1159,7 @@
"expr": "sum(irate(request_total{deployment!=\"\", deployment=\"$inbound\", dst_namespace=\"$namespace\", dst_deployment=\"$deployment\", direction=\"outbound\", tls=\"true\"}[30s])) by (deployment, pod)",
"format": "time_series",
"intervalFactor": 1,
- "legendFormat": "🔒po/{{pod}}",
+ "legendFormat": "\ud83d\udd12po/{{pod}}",
"refId": "A"
},
{
@@ -1475,7 +1475,7 @@
"expr": "sum(irate(request_total{namespace=\"$namespace\", deployment=\"$deployment\", direction=\"outbound\", tls=\"true\"}[30s])) by (dst_deployment)",
"format": "time_series",
"intervalFactor": 1,
- "legendFormat": "🔒deploy/{{dst_deployment}}",
+ "legendFormat": "\ud83d\udd12deploy/{{dst_deployment}}",
"refId": "A"
},
{
@@ -2045,7 +2045,7 @@
"expr": "sum(irate(request_total{namespace=\"$namespace\", deployment=\"$deployment\", dst_deployment=\"$outbound\", direction=\"outbound\", tls=\"true\"}[30s])) by (dst_deployment)",
"format": "time_series",
"intervalFactor": 1,
- "legendFormat": "🔒deploy/{{dst_deployment}}",
+ "legendFormat": "\ud83d\udd12deploy/{{dst_deployment}}",
"refId": "A"
},
{
@@ -2228,6 +2228,16 @@
],
"templating": {
"list": [
+ {
+ "hide": 0,
+ "label": "datasource",
+ "name": "DS_PROMETHEUS",
+ "options": [],
+ "query": "prometheus",
+ "refresh": 1,
+ "regex": "",
+ "type": "datasource"
+ },
{
"allValue": ".*",
"current": {},
@@ -2351,4 +2361,4 @@
"title": "Linkerd Deployment",
"uid": "linkerd-deployment",
"version": 1
-}
+}
\ No newline at end of file
diff --git a/roles/prometheus/dashboards/linkerd/linkerd-health.json b/argocd/system/linkerd-viz/dashboards/linkerd-health.json
similarity index 99%
rename from roles/prometheus/dashboards/linkerd/linkerd-health.json
rename to argocd/system/linkerd-viz/dashboards/linkerd-health.json
index 1d520de7..f2bae782 100644
--- a/roles/prometheus/dashboards/linkerd/linkerd-health.json
+++ b/argocd/system/linkerd-viz/dashboards/linkerd-health.json
@@ -1,56 +1,56 @@
{
+ "__elements": [],
"__inputs": [
{
- "name": "DS_PROMETHEUS",
- "label": "prometheus",
"description": "",
- "type": "datasource",
+ "label": "prometheus",
+ "name": "DS_PROMETHEUS",
"pluginId": "prometheus",
- "pluginName": "Prometheus"
+ "pluginName": "Prometheus",
+ "type": "datasource"
}
],
- "__elements": [],
"__requires": [
{
- "type": "panel",
"id": "gauge",
"name": "Gauge",
+ "type": "panel",
"version": ""
},
{
- "type": "grafana",
"id": "grafana",
"name": "Grafana",
+ "type": "grafana",
"version": "8.3.3"
},
{
- "type": "panel",
"id": "graph",
"name": "Graph (old)",
+ "type": "panel",
"version": ""
},
{
- "type": "panel",
"id": "heatmap",
"name": "Heatmap",
+ "type": "panel",
"version": ""
},
{
- "type": "datasource",
"id": "prometheus",
"name": "Prometheus",
+ "type": "datasource",
"version": "1.0.0"
},
{
- "type": "panel",
"id": "stat",
"name": "Stat",
+ "type": "panel",
"version": ""
},
{
- "type": "panel",
"id": "text",
"name": "Text",
+ "type": "panel",
"version": ""
}
],
@@ -2287,6 +2287,16 @@
],
"templating": {
"list": [
+ {
+ "hide": 0,
+ "label": "datasource",
+ "name": "DS_PROMETHEUS",
+ "options": [],
+ "query": "prometheus",
+ "refresh": 1,
+ "regex": "",
+ "type": "datasource"
+ },
{
"allValue": ".*",
"current": {
@@ -2416,4 +2426,4 @@
"title": "Linkerd Health",
"uid": "linkerd-health",
"version": 1
-}
+}
\ No newline at end of file
diff --git a/roles/prometheus/dashboards/linkerd/linkerd-job.json b/argocd/system/linkerd-viz/dashboards/linkerd-job.json
similarity index 99%
rename from roles/prometheus/dashboards/linkerd/linkerd-job.json
rename to argocd/system/linkerd-viz/dashboards/linkerd-job.json
index 375e39ba..833a2174 100644
--- a/roles/prometheus/dashboards/linkerd/linkerd-job.json
+++ b/argocd/system/linkerd-viz/dashboards/linkerd-job.json
@@ -1,56 +1,56 @@
{
+ "__elements": [],
"__inputs": [
{
- "name": "DS_PROMETHEUS",
- "label": "prometheus",
"description": "",
- "type": "datasource",
+ "label": "prometheus",
+ "name": "DS_PROMETHEUS",
"pluginId": "prometheus",
- "pluginName": "Prometheus"
+ "pluginName": "Prometheus",
+ "type": "datasource"
}
],
- "__elements": [],
"__requires": [
{
- "type": "panel",
"id": "gauge",
"name": "Gauge",
+ "type": "panel",
"version": ""
},
{
- "type": "grafana",
"id": "grafana",
"name": "Grafana",
+ "type": "grafana",
"version": "8.3.3"
},
{
- "type": "panel",
"id": "graph",
"name": "Graph (old)",
+ "type": "panel",
"version": ""
},
{
- "type": "panel",
"id": "heatmap",
"name": "Heatmap",
+ "type": "panel",
"version": ""
},
{
- "type": "datasource",
"id": "prometheus",
"name": "Prometheus",
+ "type": "datasource",
"version": "1.0.0"
},
{
- "type": "panel",
"id": "stat",
"name": "Stat",
+ "type": "panel",
"version": ""
},
{
- "type": "panel",
"id": "text",
"name": "Text",
+ "type": "panel",
"version": ""
}
],
@@ -571,7 +571,7 @@
"expr": "sum(irate(request_total{namespace=\"$namespace\", k8s_job=\"$job\", direction=\"inbound\", tls=\"true\"}[30s])) by (k8s_job)",
"format": "time_series",
"intervalFactor": 1,
- "legendFormat": "🔒job/{{k8s_job}}",
+ "legendFormat": "\ud83d\udd12job/{{k8s_job}}",
"refId": "A"
},
{
@@ -1159,7 +1159,7 @@
"expr": "sum(irate(request_total{k8s_job!=\"\", k8s_job=\"$inbound\", dst_namespace=\"$namespace\", dst_k8s_job=\"$job\", direction=\"outbound\", tls=\"true\"}[30s])) by (k8s_job, pod)",
"format": "time_series",
"intervalFactor": 1,
- "legendFormat": "🔒po/{{pod}}",
+ "legendFormat": "\ud83d\udd12po/{{pod}}",
"refId": "A"
},
{
@@ -1475,7 +1475,7 @@
"expr": "sum(irate(request_total{namespace=\"$namespace\", k8s_job=\"$job\", direction=\"outbound\", tls=\"true\"}[30s])) by (dst_k8s_job)",
"format": "time_series",
"intervalFactor": 1,
- "legendFormat": "🔒job/{{dst_k8s_job}}",
+ "legendFormat": "\ud83d\udd12job/{{dst_k8s_job}}",
"refId": "A"
},
{
@@ -2045,7 +2045,7 @@
"expr": "sum(irate(request_total{namespace=\"$namespace\", k8s_job=\"$job\", dst_k8s_job=\"$outbound\", direction=\"outbound\", tls=\"true\"}[30s])) by (dst_k8s_job)",
"format": "time_series",
"intervalFactor": 1,
- "legendFormat": "🔒job/{{dst_k8s_job}}",
+ "legendFormat": "\ud83d\udd12job/{{dst_k8s_job}}",
"refId": "A"
},
{
@@ -2228,6 +2228,16 @@
],
"templating": {
"list": [
+ {
+ "hide": 0,
+ "label": "datasource",
+ "name": "DS_PROMETHEUS",
+ "options": [],
+ "query": "prometheus",
+ "refresh": 1,
+ "regex": "",
+ "type": "datasource"
+ },
{
"allValue": ".*",
"current": {},
@@ -2351,4 +2361,4 @@
"title": "Linkerd Job",
"uid": "linkerd-job",
"version": 1
-}
+}
\ No newline at end of file
diff --git a/roles/prometheus/dashboards/linkerd/linkerd-kubernetes.json b/argocd/system/linkerd-viz/dashboards/linkerd-kubernetes.json
similarity index 99%
rename from roles/prometheus/dashboards/linkerd/linkerd-kubernetes.json
rename to argocd/system/linkerd-viz/dashboards/linkerd-kubernetes.json
index c6f6b317..d141aaba 100644
--- a/roles/prometheus/dashboards/linkerd/linkerd-kubernetes.json
+++ b/argocd/system/linkerd-viz/dashboards/linkerd-kubernetes.json
@@ -1,56 +1,56 @@
{
+ "__elements": [],
"__inputs": [
{
- "name": "DS_PROMETHEUS",
- "label": "prometheus",
"description": "",
- "type": "datasource",
+ "label": "prometheus",
+ "name": "DS_PROMETHEUS",
"pluginId": "prometheus",
- "pluginName": "Prometheus"
+ "pluginName": "Prometheus",
+ "type": "datasource"
}
],
- "__elements": [],
"__requires": [
{
- "type": "panel",
"id": "gauge",
"name": "Gauge",
+ "type": "panel",
"version": ""
},
{
- "type": "grafana",
"id": "grafana",
"name": "Grafana",
+ "type": "grafana",
"version": "8.3.3"
},
{
- "type": "panel",
"id": "graph",
"name": "Graph (old)",
+ "type": "panel",
"version": ""
},
{
- "type": "panel",
"id": "heatmap",
"name": "Heatmap",
+ "type": "panel",
"version": ""
},
{
- "type": "datasource",
"id": "prometheus",
"name": "Prometheus",
+ "type": "datasource",
"version": "1.0.0"
},
{
- "type": "panel",
"id": "stat",
"name": "Stat",
+ "type": "panel",
"version": ""
},
{
- "type": "panel",
"id": "text",
"name": "Text",
+ "type": "panel",
"version": ""
}
],
@@ -2239,6 +2239,16 @@
],
"templating": {
"list": [
+ {
+ "hide": 0,
+ "label": "datasource",
+ "name": "DS_PROMETHEUS",
+ "options": [],
+ "query": "prometheus",
+ "refresh": 1,
+ "regex": "",
+ "type": "datasource"
+ },
{
"allValue": ".*",
"current": {
@@ -2299,4 +2309,4 @@
"title": "Kubernetes cluster monitoring (via Prometheus)",
"uid": "k8s",
"version": 1
-}
+}
\ No newline at end of file
diff --git a/argocd/system/linkerd-viz/dashboards/linkerd-multicluster.json b/argocd/system/linkerd-viz/dashboards/linkerd-multicluster.json
new file mode 100644
index 00000000..62762627
--- /dev/null
+++ b/argocd/system/linkerd-viz/dashboards/linkerd-multicluster.json
@@ -0,0 +1,1008 @@
+{
+ "__elements": [],
+ "__inputs": [
+ {
+ "description": "",
+ "label": "prometheus",
+ "name": "DS_PROMETHEUS",
+ "pluginId": "prometheus",
+ "pluginName": "Prometheus",
+ "type": "datasource"
+ }
+ ],
+ "__requires": [
+ {
+ "id": "gauge",
+ "name": "Gauge",
+ "type": "panel",
+ "version": ""
+ },
+ {
+ "id": "grafana",
+ "name": "Grafana",
+ "type": "grafana",
+ "version": "8.3.3"
+ },
+ {
+ "id": "graph",
+ "name": "Graph (old)",
+ "type": "panel",
+ "version": ""
+ },
+ {
+ "id": "heatmap",
+ "name": "Heatmap",
+ "type": "panel",
+ "version": ""
+ },
+ {
+ "id": "prometheus",
+ "name": "Prometheus",
+ "type": "datasource",
+ "version": "1.0.0"
+ },
+ {
+ "id": "stat",
+ "name": "Stat",
+ "type": "panel",
+ "version": ""
+ },
+ {
+ "id": "text",
+ "name": "Text",
+ "type": "panel",
+ "version": ""
+ }
+ ],
+ "annotations": {
+ "list": [
+ {
+ "builtIn": 1,
+ "datasource": "-- Grafana --",
+ "enable": true,
+ "hide": true,
+ "iconColor": "rgba(0, 211, 255, 1)",
+ "name": "Annotations & Alerts",
+ "type": "dashboard"
+ }
+ ]
+ },
+ "editable": true,
+ "gnetId": null,
+ "graphTooltip": 1,
+ "id": null,
+ "iteration": 1531434867463,
+ "links": [],
+ "panels": [
+ {
+ "content": "\n
\n
Cluster: $cluster \n
",
+ "gridPos": {
+ "h": 2,
+ "w": 24,
+ "x": 0,
+ "y": 0
+ },
+ "id": 20,
+ "links": [],
+ "mode": "html",
+ "options": {},
+ "title": "",
+ "transparent": true,
+ "type": "text"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "#d44a3a",
+ "rgba(237, 129, 40, 0.89)",
+ "#299c46"
+ ],
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": null,
+ "format": "percentunit",
+ "gauge": {
+ "maxValue": 1,
+ "minValue": 0,
+ "show": true,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 4,
+ "w": 8,
+ "x": 0,
+ "y": 2
+ },
+ "id": 5,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "options": {},
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": true,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": true
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "sum(irate(response_total{classification=\"success\", dst_target_cluster=\"$cluster\", dst_target_cluster!=\"\", direction=\"outbound\"}[30s])) / sum(irate(response_total{dst_target_cluster=\"$cluster\", dst_target_cluster!=\"\", direction=\"outbound\"}[30s]))",
+ "format": "time_series",
+ "instant": false,
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "thresholds": "0.9,.99",
+ "title": "SUCCESS RATE",
+ "transparent": true,
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "#299c46",
+ "rgba(237, 129, 40, 0.89)",
+ "#d44a3a"
+ ],
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": null,
+ "format": "none",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 4,
+ "w": 8,
+ "x": 8,
+ "y": 2
+ },
+ "id": 4,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "options": {},
+ "postfix": " RPS",
+ "postfixFontSize": "100%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": true,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": true
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "sum(irate(request_total{dst_target_cluster=\"$cluster\", dst_target_cluster!=\"\", direction=\"outbound\"}[30s]))",
+ "format": "time_series",
+ "instant": false,
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "thresholds": "",
+ "title": "REQUEST RATE",
+ "transparent": true,
+ "type": "singlestat",
+ "valueFontSize": "100%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "#299c46",
+ "rgba(237, 129, 40, 0.89)",
+ "#d44a3a"
+ ],
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": null,
+ "format": "none",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 4,
+ "w": 8,
+ "x": 16,
+ "y": 2
+ },
+ "id": 81,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "options": {},
+ "postfix": " ms",
+ "postfixFontSize": "100%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": true,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": true
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "histogram_quantile(0.95, sum(irate(response_latency_ms_bucket{dst_target_cluster=\"$cluster\", dst_target_cluster!=\"\", direction=\"outbound\"}[30s])) by (le))",
+ "format": "time_series",
+ "instant": false,
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "thresholds": "",
+ "title": "P95 LATENCY",
+ "transparent": true,
+ "type": "singlestat",
+ "valueFontSize": "100%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "content": "\n TOP-LINE TRAFFIC \n
",
+ "gridPos": {
+ "h": 2,
+ "w": 24,
+ "x": 0,
+ "y": 6
+ },
+ "id": 17,
+ "links": [],
+ "mode": "html",
+ "options": {},
+ "title": "",
+ "transparent": true,
+ "type": "text"
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 1,
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 0,
+ "y": 8
+ },
+ "id": 67,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {},
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(irate(response_total{classification=\"success\",dst_target_cluster=\"$cluster\", dst_target_cluster!=\"\", direction=\"outbound\"}[30s])) / sum(irate(response_total{dst_target_cluster=\"$cluster\", dst_target_cluster!=\"\", direction=\"outbound\"}[30s]))",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "SUCCESS RATE",
+ "tooltip": {
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "decimals": null,
+ "format": "percentunit",
+ "label": "",
+ "logBase": 1,
+ "max": "1",
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 8,
+ "y": 8
+ },
+ "id": 2,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {},
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(irate(request_total{dst_target_cluster=\"$cluster\", dst_target_cluster!=\"\", direction=\"outbound\", tls=\"true\"}[30s]))",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A"
+ },
+ {
+ "expr": "sum(irate(request_total{dst_target_cluster=\"$cluster\", dst_target_cluster!=\"\", tls!=\"true\"}[30s]))",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "REQUEST RATE",
+ "tooltip": {
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "decimals": null,
+ "format": "rps",
+ "label": "",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 1,
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 16,
+ "y": 8
+ },
+ "id": 68,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {},
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "histogram_quantile(0.5, sum(irate(response_latency_ms_bucket{dst_target_cluster=\"$cluster\", dst_target_cluster!=\"\", direction=\"outbound\"}[30s])) by (le))",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "p50 gateway",
+ "refId": "A"
+ },
+ {
+ "expr": "histogram_quantile(0.95, sum(irate(response_latency_ms_bucket{dst_target_cluster=\"$cluster\", dst_target_cluster!=\"\", direction=\"outbound\"}[30s])) by (le))",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 1,
+ "legendFormat": "p95 gateway",
+ "refId": "B"
+ },
+ {
+ "expr": "histogram_quantile(0.99, sum(irate(response_latency_ms_bucket{dst_target_cluster=\"$cluster\", dst_target_cluster!=\"\", direction=\"outbound\"}[30s])) by (le))",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "p99 gateway",
+ "refId": "C"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "LATENCY",
+ "tooltip": {
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "decimals": null,
+ "format": "ms",
+ "label": "",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "content": "\n TRAFFIC BY TARGET SERVICE \n
",
+ "gridPos": {
+ "h": 2,
+ "w": 24,
+ "x": 0,
+ "y": 15
+ },
+ "id": 32,
+ "links": [],
+ "mode": "html",
+ "options": {},
+ "title": "",
+ "transparent": true,
+ "type": "text"
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 1,
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 0,
+ "y": 17
+ },
+ "id": 77,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {},
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(irate(response_total{classification=\"success\", dst_target_cluster=\"$cluster\", dst_target_cluster!=\"\", direction=\"outbound\"}[30s])) by (dst_target_service) / sum(irate(response_total{dst_target_cluster=\"$cluster\", dst_target_cluster!=\"\", direction=\"outbound\"}[30s])) by (dst_target_service)",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "target-svc/{{dst_target_service}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "SUCCESS RATE",
+ "tooltip": {
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "decimals": null,
+ "format": "percentunit",
+ "label": "",
+ "logBase": 1,
+ "max": "1",
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 8,
+ "y": 17
+ },
+ "id": 78,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {},
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(irate(request_total{dst_target_cluster=\"$cluster\", dst_target_cluster!=\"\", tls=\"true\"}[30s])) by (dst_target_service)",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "\ud83d\udd12target-svc/{{dst_target_service}}",
+ "refId": "A"
+ },
+ {
+ "expr": "sum(irate(request_total{dst_target_cluster=\"$cluster\", dst_target_cluster!=\"\", tls!=\"true\"}[30s])) by (dst_target_service)",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "target-svc/{{dst_target_service}}",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "REQUEST RATE",
+ "tooltip": {
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "rps",
+ "label": "",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 1,
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 16,
+ "y": 17
+ },
+ "id": 79,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {},
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "histogram_quantile(0.95, sum(rate(response_latency_ms_bucket{dst_target_cluster=\"$cluster\", dst_target_cluster!=\"\", direction=\"outbound\"}[30s])) by (le, dst_target_service))",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "P95 target-svc/{{dst_target_service}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "P95 LATENCY",
+ "tooltip": {
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "ms",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ }
+ ],
+ "refresh": "1m",
+ "schemaVersion": 18,
+ "style": "dark",
+ "tags": [
+ "linkerd"
+ ],
+ "templating": {
+ "list": [
+ {
+ "hide": 0,
+ "label": "datasource",
+ "name": "DS_PROMETHEUS",
+ "options": [],
+ "query": "prometheus",
+ "refresh": 1,
+ "regex": "",
+ "type": "datasource"
+ },
+ {
+ "allValue": ".*",
+ "current": {
+ "text": "All",
+ "value": "$__all"
+ },
+ "datasource": "${DS_PROMETHEUS}",
+ "definition": "",
+ "hide": 0,
+ "includeAll": false,
+ "label": "Cluster",
+ "multi": false,
+ "name": "cluster",
+ "options": [],
+ "query": "label_values(request_total, dst_target_cluster)",
+ "refresh": 2,
+ "regex": "",
+ "skipUrlSync": false,
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ }
+ ]
+ },
+ "time": {
+ "from": "now-5m",
+ "to": "now"
+ },
+ "timepicker": {
+ "refresh_intervals": [
+ "5s",
+ "10s",
+ "30s",
+ "1m",
+ "5m",
+ "15m",
+ "30m",
+ "1h",
+ "2h",
+ "1d"
+ ],
+ "time_options": [
+ "5m",
+ "15m",
+ "1h",
+ "6h",
+ "12h",
+ "24h",
+ "2d",
+ "7d",
+ "30d"
+ ]
+ },
+ "timezone": "",
+ "title": "Linkerd Multicluster",
+ "uid": "linkerd-multicluster",
+ "version": 1
+}
\ No newline at end of file
diff --git a/roles/prometheus/dashboards/linkerd/linkerd-namespace.json b/argocd/system/linkerd-viz/dashboards/linkerd-namespace.json
similarity index 98%
rename from roles/prometheus/dashboards/linkerd/linkerd-namespace.json
rename to argocd/system/linkerd-viz/dashboards/linkerd-namespace.json
index c4261dbc..b178bb99 100644
--- a/roles/prometheus/dashboards/linkerd/linkerd-namespace.json
+++ b/argocd/system/linkerd-viz/dashboards/linkerd-namespace.json
@@ -1,56 +1,56 @@
{
+ "__elements": [],
"__inputs": [
{
- "name": "DS_PROMETHEUS",
- "label": "prometheus",
"description": "",
- "type": "datasource",
+ "label": "prometheus",
+ "name": "DS_PROMETHEUS",
"pluginId": "prometheus",
- "pluginName": "Prometheus"
+ "pluginName": "Prometheus",
+ "type": "datasource"
}
],
- "__elements": [],
"__requires": [
{
- "type": "panel",
"id": "gauge",
"name": "Gauge",
+ "type": "panel",
"version": ""
},
{
- "type": "grafana",
"id": "grafana",
"name": "Grafana",
+ "type": "grafana",
"version": "8.3.3"
},
{
- "type": "panel",
"id": "graph",
"name": "Graph (old)",
+ "type": "panel",
"version": ""
},
{
- "type": "panel",
"id": "heatmap",
"name": "Heatmap",
+ "type": "panel",
"version": ""
},
{
- "type": "datasource",
"id": "prometheus",
"name": "Prometheus",
+ "type": "datasource",
"version": "1.0.0"
},
{
- "type": "panel",
"id": "stat",
"name": "Stat",
+ "type": "panel",
"version": ""
},
{
- "type": "panel",
"id": "text",
"name": "Text",
+ "type": "panel",
"version": ""
}
],
@@ -484,7 +484,7 @@
"expr": "sum(irate(request_total{namespace=~\"$namespace\", deployment=~\"$deployment\", direction=\"inbound\", tls=\"true\"}[30s])) by (deployment)",
"format": "time_series",
"intervalFactor": 1,
- "legendFormat": "🔒deploy/{{deployment}}",
+ "legendFormat": "\ud83d\udd12deploy/{{deployment}}",
"refId": "A"
},
{
@@ -798,7 +798,7 @@
"format": "time_series",
"hide": false,
"intervalFactor": 1,
- "legendFormat": "🔒deploy/{{deployment}}",
+ "legendFormat": "\ud83d\udd12deploy/{{deployment}}",
"refId": "A"
},
{
@@ -947,6 +947,16 @@
],
"templating": {
"list": [
+ {
+ "hide": 0,
+ "label": "datasource",
+ "name": "DS_PROMETHEUS",
+ "options": [],
+ "query": "prometheus",
+ "refresh": 1,
+ "regex": "",
+ "type": "datasource"
+ },
{
"allValue": ".*",
"current": {},
@@ -1026,4 +1036,4 @@
"title": "Linkerd Namespace",
"uid": "linkerd-namespace",
"version": 15
-}
+}
\ No newline at end of file
diff --git a/roles/prometheus/dashboards/linkerd/linkerd-pod.json b/argocd/system/linkerd-viz/dashboards/linkerd-pod.json
similarity index 99%
rename from roles/prometheus/dashboards/linkerd/linkerd-pod.json
rename to argocd/system/linkerd-viz/dashboards/linkerd-pod.json
index f57e9ea9..c539d52b 100644
--- a/roles/prometheus/dashboards/linkerd/linkerd-pod.json
+++ b/argocd/system/linkerd-viz/dashboards/linkerd-pod.json
@@ -1,56 +1,56 @@
{
+ "__elements": [],
"__inputs": [
{
- "name": "DS_PROMETHEUS",
- "label": "prometheus",
"description": "",
- "type": "datasource",
+ "label": "prometheus",
+ "name": "DS_PROMETHEUS",
"pluginId": "prometheus",
- "pluginName": "Prometheus"
+ "pluginName": "Prometheus",
+ "type": "datasource"
}
],
- "__elements": [],
"__requires": [
{
- "type": "panel",
"id": "gauge",
"name": "Gauge",
+ "type": "panel",
"version": ""
},
{
- "type": "grafana",
"id": "grafana",
"name": "Grafana",
+ "type": "grafana",
"version": "8.3.3"
},
{
- "type": "panel",
"id": "graph",
"name": "Graph (old)",
+ "type": "panel",
"version": ""
},
{
- "type": "panel",
"id": "heatmap",
"name": "Heatmap",
+ "type": "panel",
"version": ""
},
{
- "type": "datasource",
"id": "prometheus",
"name": "Prometheus",
+ "type": "datasource",
"version": "1.0.0"
},
{
- "type": "panel",
"id": "stat",
"name": "Stat",
+ "type": "panel",
"version": ""
},
{
- "type": "panel",
"id": "text",
"name": "Text",
+ "type": "panel",
"version": ""
}
],
@@ -572,7 +572,7 @@
"expr": "sum(irate(request_total{namespace=\"$namespace\", pod=\"$pod\", direction=\"inbound\", tls=\"true\"}[30s])) by (pod)",
"format": "time_series",
"intervalFactor": 1,
- "legendFormat": "🔒po/{{pod}}",
+ "legendFormat": "\ud83d\udd12po/{{pod}}",
"refId": "A"
},
{
@@ -1133,7 +1133,7 @@
"expr": "sum(irate(request_total{dst_namespace=\"$namespace\", dst_pod!=\"\", dst_pod=\"$pod\", direction=\"outbound\", tls=\"true\"}[30s])) by (pod)",
"format": "time_series",
"intervalFactor": 1,
- "legendFormat": "🔒po/{{pod}}",
+ "legendFormat": "\ud83d\udd12po/{{pod}}",
"refId": "A"
},
{
@@ -1433,7 +1433,7 @@
"expr": "sum(irate(request_total{namespace=\"$namespace\", pod=\"$pod\", direction=\"outbound\", tls=\"true\"}[30s])) by (pod)",
"format": "time_series",
"intervalFactor": 1,
- "legendFormat": "🔒po/{{pod}}",
+ "legendFormat": "\ud83d\udd12po/{{pod}}",
"refId": "A"
},
{
@@ -1994,7 +1994,7 @@
"expr": "sum(irate(request_total{namespace=\"$namespace\", pod=\"$pod\", direction=\"outbound\", tls=\"true\"}[30s])) by (dst_pod)",
"format": "time_series",
"intervalFactor": 1,
- "legendFormat": "🔒po/{{dst_pod}}",
+ "legendFormat": "\ud83d\udd12po/{{dst_pod}}",
"refId": "A"
},
{
@@ -2175,6 +2175,16 @@
],
"templating": {
"list": [
+ {
+ "hide": 0,
+ "label": "datasource",
+ "name": "DS_PROMETHEUS",
+ "options": [],
+ "query": "prometheus",
+ "refresh": 1,
+ "regex": "",
+ "type": "datasource"
+ },
{
"allValue": ".*",
"current": {},
@@ -2326,4 +2336,4 @@
"title": "Linkerd Pod",
"uid": "linkerd-pod",
"version": 1
-}
+}
\ No newline at end of file
diff --git a/roles/prometheus/dashboards/linkerd/linkerd-prometheus-benchmark.json b/argocd/system/linkerd-viz/dashboards/linkerd-prometheus-benchmark.json
similarity index 99%
rename from roles/prometheus/dashboards/linkerd/linkerd-prometheus-benchmark.json
rename to argocd/system/linkerd-viz/dashboards/linkerd-prometheus-benchmark.json
index 4e2025ff..a4aa530e 100644
--- a/roles/prometheus/dashboards/linkerd/linkerd-prometheus-benchmark.json
+++ b/argocd/system/linkerd-viz/dashboards/linkerd-prometheus-benchmark.json
@@ -1,56 +1,56 @@
{
+ "__elements": [],
"__inputs": [
{
- "name": "DS_PROMETHEUS",
- "label": "prometheus",
"description": "",
- "type": "datasource",
+ "label": "prometheus",
+ "name": "DS_PROMETHEUS",
"pluginId": "prometheus",
- "pluginName": "Prometheus"
+ "pluginName": "Prometheus",
+ "type": "datasource"
}
],
- "__elements": [],
"__requires": [
{
- "type": "panel",
"id": "gauge",
"name": "Gauge",
+ "type": "panel",
"version": ""
},
{
- "type": "grafana",
"id": "grafana",
"name": "Grafana",
+ "type": "grafana",
"version": "8.3.3"
},
{
- "type": "panel",
"id": "graph",
"name": "Graph (old)",
+ "type": "panel",
"version": ""
},
{
- "type": "panel",
"id": "heatmap",
"name": "Heatmap",
+ "type": "panel",
"version": ""
},
{
- "type": "datasource",
"id": "prometheus",
"name": "Prometheus",
+ "type": "datasource",
"version": "1.0.0"
},
{
- "type": "panel",
"id": "stat",
"name": "Stat",
+ "type": "panel",
"version": ""
},
{
- "type": "panel",
"id": "text",
"name": "Text",
+ "type": "panel",
"version": ""
}
],
@@ -3695,6 +3695,16 @@
],
"templating": {
"list": [
+ {
+ "hide": 0,
+ "label": "datasource",
+ "name": "DS_PROMETHEUS",
+ "options": [],
+ "query": "prometheus",
+ "refresh": 1,
+ "regex": "",
+ "type": "datasource"
+ },
{
"allValue": ".*",
"current": {},
@@ -3774,4 +3784,4 @@
"title": "Prometheus Benchmark - 2.7.x",
"uid": "prometheus-benchmark",
"version": 10
-}
+}
\ No newline at end of file
diff --git a/roles/prometheus/dashboards/linkerd/linkerd-prometheus.json b/argocd/system/linkerd-viz/dashboards/linkerd-prometheus.json
similarity index 99%
rename from roles/prometheus/dashboards/linkerd/linkerd-prometheus.json
rename to argocd/system/linkerd-viz/dashboards/linkerd-prometheus.json
index d2939f37..6d1e4d25 100644
--- a/roles/prometheus/dashboards/linkerd/linkerd-prometheus.json
+++ b/argocd/system/linkerd-viz/dashboards/linkerd-prometheus.json
@@ -1,56 +1,56 @@
{
+ "__elements": [],
"__inputs": [
{
- "name": "DS_PROMETHEUS",
- "label": "prometheus",
"description": "",
- "type": "datasource",
+ "label": "prometheus",
+ "name": "DS_PROMETHEUS",
"pluginId": "prometheus",
- "pluginName": "Prometheus"
+ "pluginName": "Prometheus",
+ "type": "datasource"
}
],
- "__elements": [],
"__requires": [
{
- "type": "panel",
"id": "gauge",
"name": "Gauge",
+ "type": "panel",
"version": ""
},
{
- "type": "grafana",
"id": "grafana",
"name": "Grafana",
+ "type": "grafana",
"version": "8.3.3"
},
{
- "type": "panel",
"id": "graph",
"name": "Graph (old)",
+ "type": "panel",
"version": ""
},
{
- "type": "panel",
"id": "heatmap",
"name": "Heatmap",
+ "type": "panel",
"version": ""
},
{
- "type": "datasource",
"id": "prometheus",
"name": "Prometheus",
+ "type": "datasource",
"version": "1.0.0"
},
{
- "type": "panel",
"id": "stat",
"name": "Stat",
+ "type": "panel",
"version": ""
},
{
- "type": "panel",
"id": "text",
"name": "Text",
+ "type": "panel",
"version": ""
}
],
@@ -1353,7 +1353,18 @@
"prometheus"
],
"templating": {
- "list": []
+ "list": [
+ {
+ "hide": 0,
+ "label": "datasource",
+ "name": "DS_PROMETHEUS",
+ "options": [],
+ "query": "prometheus",
+ "refresh": 1,
+ "regex": "",
+ "type": "datasource"
+ }
+ ]
},
"time": {
"from": "now-1h",
@@ -1389,4 +1400,4 @@
"title": "Prometheus 2.0 Stats",
"uid": "prometheus",
"version": 1
-}
+}
\ No newline at end of file
diff --git a/argocd/system/linkerd-viz/dashboards/linkerd-replicaset.json b/argocd/system/linkerd-viz/dashboards/linkerd-replicaset.json
new file mode 100644
index 00000000..96d502df
--- /dev/null
+++ b/argocd/system/linkerd-viz/dashboards/linkerd-replicaset.json
@@ -0,0 +1,2410 @@
+{
+ "__elements": [],
+ "__inputs": [
+ {
+ "description": "",
+ "label": "prometheus",
+ "name": "DS_PROMETHEUS",
+ "pluginId": "prometheus",
+ "pluginName": "Prometheus",
+ "type": "datasource"
+ }
+ ],
+ "__requires": [
+ {
+ "id": "gauge",
+ "name": "Gauge",
+ "type": "panel",
+ "version": ""
+ },
+ {
+ "id": "grafana",
+ "name": "Grafana",
+ "type": "grafana",
+ "version": "8.3.3"
+ },
+ {
+ "id": "graph",
+ "name": "Graph (old)",
+ "type": "panel",
+ "version": ""
+ },
+ {
+ "id": "heatmap",
+ "name": "Heatmap",
+ "type": "panel",
+ "version": ""
+ },
+ {
+ "id": "prometheus",
+ "name": "Prometheus",
+ "type": "datasource",
+ "version": "1.0.0"
+ },
+ {
+ "id": "stat",
+ "name": "Stat",
+ "type": "panel",
+ "version": ""
+ },
+ {
+ "id": "text",
+ "name": "Text",
+ "type": "panel",
+ "version": ""
+ }
+ ],
+ "annotations": {
+ "list": [
+ {
+ "builtIn": 1,
+ "datasource": "-- Grafana --",
+ "enable": true,
+ "hide": true,
+ "iconColor": "rgba(0, 211, 255, 1)",
+ "name": "Annotations & Alerts",
+ "type": "dashboard"
+ }
+ ]
+ },
+ "editable": true,
+ "gnetId": null,
+ "graphTooltip": 1,
+ "id": 16,
+ "iteration": 1573121539385,
+ "links": [],
+ "panels": [
+ {
+ "content": "\n
\n
replicaset/$replicaset \n
",
+ "gridPos": {
+ "h": 2,
+ "w": 24,
+ "x": 0,
+ "y": 0
+ },
+ "id": 20,
+ "links": [],
+ "mode": "html",
+ "options": {},
+ "title": "",
+ "transparent": true,
+ "type": "text"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "#d44a3a",
+ "rgba(237, 129, 40, 0.89)",
+ "#299c46"
+ ],
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": null,
+ "format": "percentunit",
+ "gauge": {
+ "maxValue": 1,
+ "minValue": 0,
+ "show": true,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 4,
+ "w": 8,
+ "x": 0,
+ "y": 2
+ },
+ "id": 5,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "options": {},
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": true,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": true
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "sum(irate(response_total{classification=\"success\", namespace=\"$namespace\", replicaset=\"$replicaset\", direction=\"inbound\"}[30s])) / sum(irate(response_total{namespace=\"$namespace\", replicaset=\"$replicaset\", direction=\"inbound\"}[30s]))",
+ "format": "time_series",
+ "instant": false,
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "thresholds": "0.9,.99",
+ "title": "SUCCESS RATE",
+ "transparent": true,
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "#299c46",
+ "rgba(237, 129, 40, 0.89)",
+ "#d44a3a"
+ ],
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": null,
+ "format": "none",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 4,
+ "w": 8,
+ "x": 8,
+ "y": 2
+ },
+ "id": 4,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "options": {},
+ "postfix": " RPS",
+ "postfixFontSize": "100%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": true,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": true
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "sum(irate(request_total{namespace=\"$namespace\", replicaset=\"$replicaset\", direction=\"inbound\"}[30s]))",
+ "format": "time_series",
+ "instant": false,
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "thresholds": "",
+ "title": "REQUEST RATE",
+ "transparent": true,
+ "type": "singlestat",
+ "valueFontSize": "100%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "#299c46",
+ "rgba(237, 129, 40, 0.89)",
+ "#d44a3a"
+ ],
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": null,
+ "format": "none",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 4,
+ "w": 4,
+ "x": 16,
+ "y": 2
+ },
+ "id": 11,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "options": {},
+ "postfix": "",
+ "postfixFontSize": "100%",
+ "prefix": "",
+ "prefixFontSize": "100%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "count(count(request_total{dst_namespace=\"$namespace\", replicaset!=\"\", dst_replicaset!=\"\", dst_replicaset=\"$replicaset\", direction=\"outbound\"}) by (namespace, replicaset))",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "thresholds": "",
+ "title": "INBOUND REPLICASETS",
+ "transparent": true,
+ "type": "singlestat",
+ "valueFontSize": "100%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "#299c46",
+ "rgba(237, 129, 40, 0.89)",
+ "#d44a3a"
+ ],
+ "datasource": "${DS_PROMETHEUS}",
+ "format": "none",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 4,
+ "w": 4,
+ "x": 20,
+ "y": 2
+ },
+ "id": 15,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "options": {},
+ "postfix": "",
+ "postfixFontSize": "100%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "count(count(request_total{namespace=\"$namespace\", replicaset=\"$replicaset\", direction=\"outbound\"}) by (namespace, dst_replicaset))",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "thresholds": "",
+ "title": "OUTBOUND REPLICASETS",
+ "transparent": true,
+ "type": "singlestat",
+ "valueFontSize": "100%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "content": "\n INBOUND TRAFFIC \n
",
+ "gridPos": {
+ "h": 2,
+ "w": 24,
+ "x": 0,
+ "y": 6
+ },
+ "id": 17,
+ "links": [],
+ "mode": "html",
+ "options": {},
+ "title": "",
+ "transparent": true,
+ "type": "text"
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 1,
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 0,
+ "y": 8
+ },
+ "id": 67,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {},
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(irate(response_total{classification=\"success\", namespace=\"$namespace\", replicaset=\"$replicaset\", direction=\"inbound\"}[30s])) by (replicaset) / sum(irate(response_total{namespace=\"$namespace\", replicaset=\"$replicaset\", direction=\"inbound\"}[30s])) by (replicaset)",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "rs/{{replicaset}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "SUCCESS RATE",
+ "tooltip": {
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "decimals": null,
+ "format": "percentunit",
+ "label": "",
+ "logBase": 1,
+ "max": "1",
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 8,
+ "y": 8
+ },
+ "id": 2,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {},
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(irate(request_total{namespace=\"$namespace\", replicaset=\"$replicaset\", direction=\"inbound\", tls=\"true\"}[30s])) by (replicaset)",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "\ud83d\udd12rs/{{replicaset}}",
+ "refId": "A"
+ },
+ {
+ "expr": "sum(irate(request_total{namespace=\"$namespace\", replicaset=\"$replicaset\", direction=\"inbound\", tls!=\"true\"}[30s])) by (replicaset)",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "rs/{{replicaset}}",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "REQUEST RATE",
+ "tooltip": {
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "decimals": null,
+ "format": "rps",
+ "label": "",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 1,
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 16,
+ "y": 8
+ },
+ "id": 68,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {},
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "histogram_quantile(0.5, sum(irate(response_latency_ms_bucket{namespace=\"$namespace\", replicaset=\"$replicaset\", direction=\"inbound\"}[30s])) by (le, replicaset))",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "p50 rs/{{replicaset}}",
+ "refId": "A"
+ },
+ {
+ "expr": "histogram_quantile(0.95, sum(irate(response_latency_ms_bucket{namespace=\"$namespace\", replicaset=\"$replicaset\", direction=\"inbound\"}[30s])) by (le, replicaset))",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 1,
+ "legendFormat": "p95 rs/{{replicaset}}",
+ "refId": "B"
+ },
+ {
+ "expr": "histogram_quantile(0.99, sum(irate(response_latency_ms_bucket{namespace=\"$namespace\", replicaset=\"$replicaset\", direction=\"inbound\"}[30s])) by (le, replicaset))",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "p99 rs/{{replicaset}}",
+ "refId": "C"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "LATENCY",
+ "tooltip": {
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "decimals": null,
+ "format": "ms",
+ "label": "",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "collapsed": true,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 15
+ },
+ "id": 148,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 1,
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 0,
+ "y": 16
+ },
+ "id": 167,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {},
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "tcp_close_total{namespace=\"$namespace\", replicaset=\"$replicaset\", direction=\"inbound\",errno!=\"\"}",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{peer}} {{errno}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "TCP CONNECTION FAILURES",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "decimals": null,
+ "format": "none",
+ "label": "",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 8,
+ "y": 16
+ },
+ "id": 168,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {},
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "tcp_open_connections{namespace=\"$namespace\", replicaset=\"$replicaset\", direction=\"inbound\"}",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{peer}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "TCP CONNECTIONS OPEN",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "cards": {
+ "cardPadding": null,
+ "cardRound": null
+ },
+ "color": {
+ "cardColor": "#b4ff00",
+ "colorScale": "sqrt",
+ "colorScheme": "interpolateOranges",
+ "exponent": 0.5,
+ "mode": "spectrum"
+ },
+ "dataFormat": "timeseries",
+ "datasource": "${DS_PROMETHEUS}",
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 16,
+ "y": 16
+ },
+ "heatmap": {},
+ "hideZeroBuckets": false,
+ "highlightCards": true,
+ "id": 169,
+ "legend": {
+ "show": false
+ },
+ "links": [],
+ "options": {},
+ "reverseYBuckets": false,
+ "targets": [
+ {
+ "expr": "tcp_connection_duration_ms_bucket{namespace=\"$namespace\", replicaset=\"$replicaset\", direction=\"inbound\"}",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "refId": "A"
+ }
+ ],
+ "title": "TCP CONNECTION DURATION",
+ "tooltip": {
+ "show": true,
+ "showHistogram": true
+ },
+ "type": "heatmap",
+ "xAxis": {
+ "show": true
+ },
+ "xBucketNumber": null,
+ "xBucketSize": null,
+ "yAxis": {
+ "decimals": null,
+ "format": "dtdurationms",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true,
+ "splitFactor": null
+ },
+ "yBucketBound": "auto",
+ "yBucketNumber": null,
+ "yBucketSize": null
+ }
+ ],
+ "title": "Inbound TCP Metrics",
+ "type": "row"
+ },
+ {
+ "collapsed": false,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 16
+ },
+ "id": 152,
+ "panels": [],
+ "title": "",
+ "type": "row"
+ },
+ {
+ "content": "\n INBOUND REPLICASETS \n
",
+ "gridPos": {
+ "h": 2,
+ "w": 24,
+ "x": 0,
+ "y": 17
+ },
+ "id": 76,
+ "links": [],
+ "mode": "html",
+ "options": {},
+ "title": "",
+ "transparent": true,
+ "type": "text"
+ },
+ {
+ "collapsed": false,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 19
+ },
+ "id": 59,
+ "panels": [],
+ "repeat": "inbound",
+ "scopedVars": {
+ "inbound": {
+ "selected": false,
+ "text": "web",
+ "value": "web"
+ }
+ },
+ "title": "rs/$inbound",
+ "type": "row"
+ },
+ {
+ "content": "\n
\n
rs/$inbound \n
",
+ "gridPos": {
+ "h": 2,
+ "w": 24,
+ "x": 0,
+ "y": 20
+ },
+ "id": 39,
+ "links": [],
+ "mode": "html",
+ "options": {},
+ "scopedVars": {
+ "inbound": {
+ "selected": false,
+ "text": "web",
+ "value": "web"
+ }
+ },
+ "title": "",
+ "transparent": true,
+ "type": "text"
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 1,
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 0,
+ "y": 22
+ },
+ "id": 36,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {},
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "scopedVars": {
+ "inbound": {
+ "selected": false,
+ "text": "web",
+ "value": "web"
+ }
+ },
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(irate(response_total{classification=\"success\", replicaset!=\"\", replicaset=\"$inbound\", dst_namespace=\"$namespace\", dst_replicaset=\"$replicaset\", direction=\"outbound\"}[30s])) by (replicaset, pod) / sum(irate(response_total{replicaset!=\"\", replicaset=\"$inbound\", dst_namespace=\"$namespace\", dst_replicaset=\"$replicaset\", direction=\"outbound\"}[30s])) by (replicaset, pod)",
+ "format": "time_series",
+ "instant": false,
+ "intervalFactor": 1,
+ "legendFormat": "po/{{pod}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "SUCCESS RATE",
+ "tooltip": {
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "decimals": null,
+ "format": "percentunit",
+ "label": null,
+ "logBase": 1,
+ "max": "1",
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 8,
+ "y": 22
+ },
+ "id": 22,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {},
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "scopedVars": {
+ "inbound": {
+ "selected": false,
+ "text": "web",
+ "value": "web"
+ }
+ },
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(irate(request_total{replicaset!=\"\", replicaset=\"$inbound\", dst_namespace=\"$namespace\", dst_replicaset=\"$replicaset\", direction=\"outbound\", tls=\"true\"}[30s])) by (replicaset, pod)",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "\ud83d\udd12po/{{pod}}",
+ "refId": "A"
+ },
+ {
+ "expr": "sum(irate(request_total{replicaset!=\"\", replicaset=\"$inbound\", dst_namespace=\"$namespace\", dst_replicaset=\"$replicaset\", direction=\"outbound\", tls!=\"true\"}[30s])) by (replicaset, pod)",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "po/{{pod}}",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "REQUEST RATE",
+ "tooltip": {
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "decimals": null,
+ "format": "rps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 1,
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 16,
+ "y": 22
+ },
+ "id": 29,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {},
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "scopedVars": {
+ "inbound": {
+ "selected": false,
+ "text": "web",
+ "value": "web"
+ }
+ },
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "histogram_quantile(0.5, sum(rate(response_latency_ms_bucket{replicaset!=\"\", replicaset=\"$inbound\", dst_namespace=\"$namespace\", dst_replicaset=\"$replicaset\", direction=\"outbound\"}[30s])) by (le, replicaset))",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "P50 rs/{{replicaset}}",
+ "refId": "A"
+ },
+ {
+ "expr": "histogram_quantile(0.95, sum(rate(response_latency_ms_bucket{replicaset!=\"\", replicaset=\"$inbound\", dst_namespace=\"$namespace\", dst_replicaset=\"$replicaset\", direction=\"outbound\"}[30s])) by (le, replicaset))",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "P95 rs/{{replicaset}}",
+ "refId": "B"
+ },
+ {
+ "expr": "histogram_quantile(0.99, sum(rate(response_latency_ms_bucket{replicaset!=\"\", replicaset=\"$inbound\", dst_namespace=\"$namespace\", dst_replicaset=\"$replicaset\", direction=\"outbound\"}[30s])) by (le, replicaset))",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "P99 rs/{{replicaset}}",
+ "refId": "C"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "LATENCY",
+ "tooltip": {
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "ms",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "collapsed": false,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 29
+ },
+ "id": 34,
+ "panels": [],
+ "repeat": null,
+ "title": "",
+ "type": "row"
+ },
+ {
+ "content": "\n OUTBOUND TRAFFIC \n
",
+ "gridPos": {
+ "h": 2,
+ "w": 24,
+ "x": 0,
+ "y": 30
+ },
+ "id": 32,
+ "links": [],
+ "mode": "html",
+ "options": {},
+ "title": "",
+ "transparent": true,
+ "type": "text"
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 1,
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 0,
+ "y": 32
+ },
+ "id": 77,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {},
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(irate(response_total{classification=\"success\", namespace=\"$namespace\", replicaset=\"$replicaset\", direction=\"outbound\"}[30s])) by (dst_replicaset) / sum(irate(response_total{namespace=\"$namespace\", replicaset=\"$replicaset\", direction=\"outbound\"}[30s])) by (dst_replicaset)",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "rs/{{dst_replicaset}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "SUCCESS RATE",
+ "tooltip": {
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "decimals": null,
+ "format": "percentunit",
+ "label": "",
+ "logBase": 1,
+ "max": "1",
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 8,
+ "y": 32
+ },
+ "id": 78,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {},
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(irate(request_total{namespace=\"$namespace\", replicaset=\"$replicaset\", direction=\"outbound\", tls=\"true\"}[30s])) by (dst_replicaset)",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "\ud83d\udd12rs/{{dst_replicaset}}",
+ "refId": "A"
+ },
+ {
+ "expr": "sum(irate(request_total{namespace=\"$namespace\", replicaset=\"$replicaset\", direction=\"outbound\", tls!=\"true\"}[30s])) by (dst_replicaset)",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "rs/{{dst_replicaset}}",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "REQUEST RATE",
+ "tooltip": {
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "rps",
+ "label": "",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 1,
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 16,
+ "y": 32
+ },
+ "id": 79,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {},
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "histogram_quantile(0.95, sum(rate(response_latency_ms_bucket{namespace=\"$namespace\", replicaset=\"$replicaset\", direction=\"outbound\"}[30s])) by (le, dst_replicaset))",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "P95 rs/{{dst_replicaset}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "P95 LATENCY",
+ "tooltip": {
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "ms",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "collapsed": true,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 39
+ },
+ "id": 154,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 1,
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 0,
+ "y": 29
+ },
+ "id": 157,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {},
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "tcp_close_total{namespace=\"$namespace\", replicaset=\"$replicaset\", direction=\"outbound\",errno!=\"\"}",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{peer}} {{errno}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "TCP CONNECTION FAILURES",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "decimals": null,
+ "format": "none",
+ "label": "",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 8,
+ "y": 29
+ },
+ "id": 166,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {},
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "tcp_open_connections{namespace=\"$namespace\", replicaset=\"$replicaset\", direction=\"outbound\"}",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "{{peer}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "TCP CONNECTIONS OPEN",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "cards": {
+ "cardPadding": null,
+ "cardRound": null
+ },
+ "color": {
+ "cardColor": "#b4ff00",
+ "colorScale": "sqrt",
+ "colorScheme": "interpolateOranges",
+ "exponent": 0.5,
+ "mode": "spectrum"
+ },
+ "dataFormat": "timeseries",
+ "datasource": "${DS_PROMETHEUS}",
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 16,
+ "y": 29
+ },
+ "heatmap": {},
+ "hideZeroBuckets": false,
+ "highlightCards": true,
+ "id": 160,
+ "legend": {
+ "show": false
+ },
+ "links": [],
+ "options": {},
+ "reverseYBuckets": false,
+ "targets": [
+ {
+ "expr": "tcp_connection_duration_ms_bucket{namespace=\"$namespace\", replicaset=\"$replicaset\", direction=\"outbound\"}",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "refId": "A"
+ }
+ ],
+ "title": "TCP CONNECTION DURATION",
+ "tooltip": {
+ "show": true,
+ "showHistogram": true
+ },
+ "type": "heatmap",
+ "xAxis": {
+ "show": true
+ },
+ "xBucketNumber": null,
+ "xBucketSize": null,
+ "yAxis": {
+ "decimals": null,
+ "format": "dtdurationms",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true,
+ "splitFactor": null
+ },
+ "yBucketBound": "auto",
+ "yBucketNumber": null,
+ "yBucketSize": null
+ }
+ ],
+ "title": "Outbound TCP Metrics",
+ "type": "row"
+ },
+ {
+ "collapsed": false,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 40
+ },
+ "id": 156,
+ "panels": [],
+ "title": "",
+ "type": "row"
+ },
+ {
+ "content": "\n OUTBOUND REPLICASETS \n
",
+ "gridPos": {
+ "h": 2,
+ "w": 24,
+ "x": 0,
+ "y": 41
+ },
+ "id": 80,
+ "links": [],
+ "mode": "html",
+ "options": {},
+ "title": "",
+ "transparent": true,
+ "type": "text"
+ },
+ {
+ "collapsed": true,
+ "gridPos": {
+ "h": 1,
+ "w": 24,
+ "x": 0,
+ "y": 43
+ },
+ "id": 27,
+ "panels": [
+ {
+ "content": "\n
\n
rs/$outbound \n
",
+ "gridPos": {
+ "h": 2,
+ "w": 24,
+ "x": 0,
+ "y": 36
+ },
+ "id": 40,
+ "links": [],
+ "mode": "html",
+ "options": {},
+ "title": "",
+ "transparent": true,
+ "type": "text"
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 1,
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 0,
+ "y": 38
+ },
+ "id": 28,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {},
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(irate(response_total{classification=\"success\", namespace=\"$namespace\", replicaset=\"$replicaset\", dst_replicaset=\"$outbound\", direction=\"outbound\"}[30s])) by (dst_replicaset) / sum(irate(response_total{namespace=\"$namespace\", replicaset=\"$replicaset\", dst_replicaset=\"$outbound\", direction=\"outbound\"}[30s])) by (dst_replicaset)",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "rs/{{dst_replicaset}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "SUCCESS RATE",
+ "tooltip": {
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "decimals": null,
+ "format": "percentunit",
+ "label": null,
+ "logBase": 1,
+ "max": "1",
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 8,
+ "y": 38
+ },
+ "id": 35,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {},
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(irate(request_total{namespace=\"$namespace\", replicaset=\"$replicaset\", dst_replicaset=\"$outbound\", direction=\"outbound\", tls=\"true\"}[30s])) by (dst_replicaset)",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "\ud83d\udd12rs/{{dst_replicaset}}",
+ "refId": "A"
+ },
+ {
+ "expr": "sum(irate(request_total{namespace=\"$namespace\", replicaset=\"$replicaset\", dst_replicaset=\"$outbound\", direction=\"outbound\", tls!=\"true\"}[30s])) by (dst_replicaset)",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "rs/{{dst_replicaset}}",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "REQUEST RATE",
+ "tooltip": {
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "rps",
+ "label": "",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 1,
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 16,
+ "y": 38
+ },
+ "id": 41,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {},
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "histogram_quantile(0.5, sum(rate(response_latency_ms_bucket{namespace=\"$namespace\", replicaset=\"$replicaset\", dst_replicaset=\"$outbound\", direction=\"outbound\"}[30s])) by (le, dst_replicaset))",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "P50 rs/{{dst_replicaset}}",
+ "refId": "A"
+ },
+ {
+ "expr": "histogram_quantile(0.95, sum(rate(response_latency_ms_bucket{namespace=\"$namespace\", replicaset=\"$replicaset\", dst_replicaset=\"$outbound\", direction=\"outbound\"}[30s])) by (le, dst_replicaset))",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "P95 rs/{{dst_replicaset}}",
+ "refId": "B"
+ },
+ {
+ "expr": "histogram_quantile(0.99, sum(rate(response_latency_ms_bucket{namespace=\"$namespace\", replicaset=\"$replicaset\", dst_replicaset=\"$outbound\", direction=\"outbound\"}[30s])) by (le, dst_replicaset))",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "P99 rs/{{dst_replicaset}}",
+ "refId": "C"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "LATENCY",
+ "tooltip": {
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "ms",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ }
+ ],
+ "repeat": "outbound",
+ "title": "rs/$outbound",
+ "type": "row"
+ },
+ {
+ "content": "\n
\n
\n
\n
\n
\n
\n\n\n
",
+ "gridPos": {
+ "h": 3,
+ "w": 24,
+ "x": 0,
+ "y": 44
+ },
+ "height": "1px",
+ "id": 171,
+ "links": [],
+ "mode": "html",
+ "options": {},
+ "title": "",
+ "transparent": true,
+ "type": "text"
+ }
+ ],
+ "refresh": "1m",
+ "schemaVersion": 18,
+ "style": "dark",
+ "tags": [
+ "linkerd"
+ ],
+ "templating": {
+ "list": [
+ {
+ "hide": 0,
+ "label": "datasource",
+ "name": "DS_PROMETHEUS",
+ "options": [],
+ "query": "prometheus",
+ "refresh": 1,
+ "regex": "",
+ "type": "datasource"
+ },
+ {
+ "allValue": ".*",
+ "current": {
+ "text": "default",
+ "value": "default"
+ },
+ "datasource": "${DS_PROMETHEUS}",
+ "definition": "label_values(process_start_time_seconds{replicaset!=\"\"}, namespace)",
+ "hide": 0,
+ "includeAll": false,
+ "label": "Namespace",
+ "multi": false,
+ "name": "namespace",
+ "options": [],
+ "query": "label_values(process_start_time_seconds{replicaset!=\"\"}, namespace)",
+ "refresh": 2,
+ "regex": "",
+ "skipUrlSync": false,
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": ".*",
+ "current": {
+ "text": "rs1",
+ "value": "rs1"
+ },
+ "datasource": "${DS_PROMETHEUS}",
+ "definition": "label_values(process_start_time_seconds{namespace=\"$namespace\"}, replicaset)",
+ "hide": 0,
+ "includeAll": false,
+ "label": "ReplicaSet",
+ "multi": false,
+ "name": "replicaset",
+ "options": [],
+ "query": "label_values(process_start_time_seconds{namespace=\"$namespace\"}, replicaset)",
+ "refresh": 2,
+ "regex": "",
+ "skipUrlSync": false,
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": ".*",
+ "current": {
+ "text": "All",
+ "value": "$__all"
+ },
+ "datasource": "${DS_PROMETHEUS}",
+ "definition": "label_values(request_total{dst_namespace=\"$namespace\", dst_replicaset=\"$replicaset\"}, replicaset)",
+ "hide": 2,
+ "includeAll": true,
+ "label": null,
+ "multi": false,
+ "name": "inbound",
+ "options": [],
+ "query": "label_values(request_total{dst_namespace=\"$namespace\", dst_replicaset=\"$replicaset\"}, replicaset)",
+ "refresh": 2,
+ "regex": "",
+ "skipUrlSync": false,
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": ".*",
+ "current": {
+ "text": "All",
+ "value": "$__all"
+ },
+ "datasource": "${DS_PROMETHEUS}",
+ "definition": "label_values(request_total{namespace=\"$namespace\", replicaset=\"$replicaset\"}, dst_replicaset)",
+ "hide": 2,
+ "includeAll": true,
+ "label": null,
+ "multi": false,
+ "name": "outbound",
+ "options": [],
+ "query": "label_values(request_total{namespace=\"$namespace\", replicaset=\"$replicaset\"}, dst_replicaset)",
+ "refresh": 2,
+ "regex": "",
+ "skipUrlSync": false,
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ }
+ ]
+ },
+ "time": {
+ "from": "now-5m",
+ "to": "now"
+ },
+ "timepicker": {
+ "refresh_intervals": [
+ "5s",
+ "10s",
+ "30s",
+ "1m",
+ "5m",
+ "15m",
+ "30m",
+ "1h",
+ "2h",
+ "1d"
+ ],
+ "time_options": [
+ "5m",
+ "15m",
+ "1h",
+ "6h",
+ "12h",
+ "24h",
+ "2d",
+ "7d",
+ "30d"
+ ]
+ },
+ "timezone": "",
+ "title": "Linkerd ReplicaSet",
+ "uid": "linkerd-replicaset",
+ "version": 1
+}
\ No newline at end of file
diff --git a/roles/prometheus/dashboards/linkerd/linkerd-replicationcontroller.json b/argocd/system/linkerd-viz/dashboards/linkerd-replicationcontroller.json
similarity index 99%
rename from roles/prometheus/dashboards/linkerd/linkerd-replicationcontroller.json
rename to argocd/system/linkerd-viz/dashboards/linkerd-replicationcontroller.json
index c552d749..bde22817 100644
--- a/roles/prometheus/dashboards/linkerd/linkerd-replicationcontroller.json
+++ b/argocd/system/linkerd-viz/dashboards/linkerd-replicationcontroller.json
@@ -1,56 +1,56 @@
{
+ "__elements": [],
"__inputs": [
{
- "name": "DS_PROMETHEUS",
- "label": "prometheus",
"description": "",
- "type": "datasource",
+ "label": "prometheus",
+ "name": "DS_PROMETHEUS",
"pluginId": "prometheus",
- "pluginName": "Prometheus"
+ "pluginName": "Prometheus",
+ "type": "datasource"
}
],
- "__elements": [],
"__requires": [
{
- "type": "panel",
"id": "gauge",
"name": "Gauge",
+ "type": "panel",
"version": ""
},
{
- "type": "grafana",
"id": "grafana",
"name": "Grafana",
+ "type": "grafana",
"version": "8.3.3"
},
{
- "type": "panel",
"id": "graph",
"name": "Graph (old)",
+ "type": "panel",
"version": ""
},
{
- "type": "panel",
"id": "heatmap",
"name": "Heatmap",
+ "type": "panel",
"version": ""
},
{
- "type": "datasource",
"id": "prometheus",
"name": "Prometheus",
+ "type": "datasource",
"version": "1.0.0"
},
{
- "type": "panel",
"id": "stat",
"name": "Stat",
+ "type": "panel",
"version": ""
},
{
- "type": "panel",
"id": "text",
"name": "Text",
+ "type": "panel",
"version": ""
}
],
@@ -571,7 +571,7 @@
"expr": "sum(irate(request_total{namespace=\"$namespace\", replicationcontroller=\"$replicationcontroller\", direction=\"inbound\", tls=\"true\"}[30s])) by (replicationcontroller)",
"format": "time_series",
"intervalFactor": 1,
- "legendFormat": "🔒rc/{{replicationcontroller}}",
+ "legendFormat": "\ud83d\udd12rc/{{replicationcontroller}}",
"refId": "A"
},
{
@@ -1159,7 +1159,7 @@
"expr": "sum(irate(request_total{replicationcontroller!=\"\", replicationcontroller=\"$inbound\", dst_namespace=\"$namespace\", dst_replicationcontroller=\"$replicationcontroller\", direction=\"outbound\", tls=\"true\"}[30s])) by (replicationcontroller, pod)",
"format": "time_series",
"intervalFactor": 1,
- "legendFormat": "🔒po/{{pod}}",
+ "legendFormat": "\ud83d\udd12po/{{pod}}",
"refId": "A"
},
{
@@ -1475,7 +1475,7 @@
"expr": "sum(irate(request_total{namespace=\"$namespace\", replicationcontroller=\"$replicationcontroller\", direction=\"outbound\", tls=\"true\"}[30s])) by (dst_replicationcontroller)",
"format": "time_series",
"intervalFactor": 1,
- "legendFormat": "🔒rc/{{dst_replicationcontroller}}",
+ "legendFormat": "\ud83d\udd12rc/{{dst_replicationcontroller}}",
"refId": "A"
},
{
@@ -2045,7 +2045,7 @@
"expr": "sum(irate(request_total{namespace=\"$namespace\", replicationcontroller=\"$replicationcontroller\", dst_replicationcontroller=\"$outbound\", direction=\"outbound\", tls=\"true\"}[30s])) by (dst_replicationcontroller)",
"format": "time_series",
"intervalFactor": 1,
- "legendFormat": "🔒rc/{{dst_replicationcontroller}}",
+ "legendFormat": "\ud83d\udd12rc/{{dst_replicationcontroller}}",
"refId": "A"
},
{
@@ -2228,6 +2228,16 @@
],
"templating": {
"list": [
+ {
+ "hide": 0,
+ "label": "datasource",
+ "name": "DS_PROMETHEUS",
+ "options": [],
+ "query": "prometheus",
+ "refresh": 1,
+ "regex": "",
+ "type": "datasource"
+ },
{
"allValue": ".*",
"current": {},
@@ -2351,4 +2361,4 @@
"title": "Linkerd ReplicationController",
"uid": "linkerd-replicationcontroller",
"version": 1
-}
+}
\ No newline at end of file
diff --git a/argocd/system/linkerd-viz/dashboards/linkerd-route.json b/argocd/system/linkerd-viz/dashboards/linkerd-route.json
new file mode 100644
index 00000000..97d17cca
--- /dev/null
+++ b/argocd/system/linkerd-viz/dashboards/linkerd-route.json
@@ -0,0 +1,1326 @@
+{
+ "__elements": [],
+ "__inputs": [
+ {
+ "description": "",
+ "label": "prometheus",
+ "name": "DS_PROMETHEUS",
+ "pluginId": "prometheus",
+ "pluginName": "Prometheus",
+ "type": "datasource"
+ }
+ ],
+ "__requires": [
+ {
+ "id": "gauge",
+ "name": "Gauge",
+ "type": "panel",
+ "version": ""
+ },
+ {
+ "id": "grafana",
+ "name": "Grafana",
+ "type": "grafana",
+ "version": "8.3.3"
+ },
+ {
+ "id": "graph",
+ "name": "Graph (old)",
+ "type": "panel",
+ "version": ""
+ },
+ {
+ "id": "heatmap",
+ "name": "Heatmap",
+ "type": "panel",
+ "version": ""
+ },
+ {
+ "id": "prometheus",
+ "name": "Prometheus",
+ "type": "datasource",
+ "version": "1.0.0"
+ },
+ {
+ "id": "stat",
+ "name": "Stat",
+ "type": "panel",
+ "version": ""
+ },
+ {
+ "id": "text",
+ "name": "Text",
+ "type": "panel",
+ "version": ""
+ }
+ ],
+ "annotations": {
+ "list": [
+ {
+ "builtIn": 1,
+ "datasource": "-- Grafana --",
+ "enable": true,
+ "hide": true,
+ "iconColor": "rgba(0, 211, 255, 1)",
+ "name": "Annotations & Alerts",
+ "type": "dashboard"
+ }
+ ]
+ },
+ "editable": true,
+ "gnetId": null,
+ "graphTooltip": 1,
+ "id": null,
+ "iteration": 1539806914987,
+ "links": [],
+ "panels": [
+ {
+ "content": "\n
\n
route/$rt_route \n
",
+ "gridPos": {
+ "h": 2,
+ "w": 24,
+ "x": 0,
+ "y": 0
+ },
+ "id": 2,
+ "links": [],
+ "mode": "html",
+ "options": {},
+ "title": "",
+ "transparent": true,
+ "type": "text"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "#d44a3a",
+ "rgba(237, 129, 40, 0.89)",
+ "#299c46"
+ ],
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": null,
+ "format": "percentunit",
+ "gauge": {
+ "maxValue": 1,
+ "minValue": 0,
+ "show": true,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 4,
+ "w": 8,
+ "x": 0,
+ "y": 2
+ },
+ "id": 4,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "options": {},
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": true,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": true
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "sum(irate(route_response_total{classification=\"success\", namespace=\"$namespace\", direction=\"inbound\", rt_route=\"$rt_route\"}[30s])) / sum(irate(route_response_total{namespace=\"$namespace\", direction=\"inbound\", rt_route=\"$rt_route\"}[30s]))",
+ "format": "time_series",
+ "instant": false,
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "thresholds": "0.9,.99",
+ "title": "SUCCESS RATE",
+ "transparent": true,
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "#299c46",
+ "rgba(237, 129, 40, 0.89)",
+ "#d44a3a"
+ ],
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": null,
+ "format": "none",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 4,
+ "w": 8,
+ "x": 8,
+ "y": 2
+ },
+ "id": 6,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "options": {},
+ "postfix": " RPS",
+ "postfixFontSize": "100%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": true,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": true
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "sum(irate(route_request_total{namespace=\"$namespace\", direction=\"inbound\", rt_route=\"$rt_route\"}[30s]))",
+ "format": "time_series",
+ "instant": false,
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "thresholds": "",
+ "title": "REQUEST RATE",
+ "transparent": true,
+ "type": "singlestat",
+ "valueFontSize": "100%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "#299c46",
+ "rgba(237, 129, 40, 0.89)",
+ "#d44a3a"
+ ],
+ "datasource": "${DS_PROMETHEUS}",
+ "decimals": null,
+ "format": "none",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "gridPos": {
+ "h": 4,
+ "w": 8,
+ "x": 16,
+ "y": 2
+ },
+ "id": 8,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "options": {},
+ "postfix": " ms",
+ "postfixFontSize": "100%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": true,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": true
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "histogram_quantile(0.95, sum(irate(route_response_latency_ms_bucket{namespace=\"$namespace\", direction=\"inbound\", rt_route=\"$rt_route\"}[30s])) by (le, rt_route))",
+ "format": "time_series",
+ "instant": false,
+ "intervalFactor": 1,
+ "legendFormat": "",
+ "refId": "A"
+ }
+ ],
+ "thresholds": "",
+ "title": "P95 LATENCY",
+ "transparent": true,
+ "type": "singlestat",
+ "valueFontSize": "100%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "content": "\n TOP-LINE TRAFFIC \n
",
+ "gridPos": {
+ "h": 2,
+ "w": 24,
+ "x": 0,
+ "y": 6
+ },
+ "id": 10,
+ "links": [],
+ "mode": "html",
+ "options": {},
+ "title": "",
+ "transparent": true,
+ "type": "text"
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 1,
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 0,
+ "y": 8
+ },
+ "id": 12,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {},
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(irate(route_response_total{classification=\"success\", namespace=\"$namespace\", direction=\"inbound\", rt_route=\"$rt_route\"}[30s])) by (rt_route) / sum(irate(route_response_total{namespace=\"$namespace\", direction=\"inbound\", rt_route=\"$rt_route\"}[30s])) by (rt_route)",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "route/{{rt_route}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "SUCCESS RATE",
+ "tooltip": {
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "decimals": null,
+ "format": "percentunit",
+ "label": "",
+ "logBase": 1,
+ "max": "1",
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 8,
+ "y": 8
+ },
+ "id": 14,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {},
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(irate(route_request_total{namespace=\"$namespace\", direction=\"inbound\", rt_route=\"$rt_route\", tls=\"true\"}[30s])) by (rt_route)",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "\ud83d\udd12route/{{rt_route}}",
+ "refId": "A"
+ },
+ {
+ "expr": "sum(irate(route_request_total{namespace=\"$namespace\", direction=\"inbound\", rt_route=\"$rt_route\", tls!=\"true\"}[30s])) by (rt_route)",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "route/{{rt_route}}",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "REQUEST RATE",
+ "tooltip": {
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "decimals": null,
+ "format": "rps",
+ "label": "",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 1,
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 16,
+ "y": 8
+ },
+ "id": 16,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {},
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "histogram_quantile(0.5, sum(irate(route_response_latency_ms_bucket{namespace=\"$namespace\", direction=\"inbound\", rt_route=\"$rt_route\"}[30s])) by (le, rt_route))",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "p50 route/{{rt_route}}",
+ "refId": "A"
+ },
+ {
+ "expr": "histogram_quantile(0.95, sum(irate(route_response_latency_ms_bucket{namespace=\"$namespace\", direction=\"inbound\", rt_route=\"$rt_route\"}[30s])) by (le, rt_route))",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 1,
+ "legendFormat": "p95 route/{{rt_route}}",
+ "refId": "B"
+ },
+ {
+ "expr": "histogram_quantile(0.99, sum(irate(route_response_latency_ms_bucket{namespace=\"$namespace\", direction=\"inbound\", rt_route=\"$rt_route\"}[30s])) by (le, rt_route))",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "p99 route/{{rt_route}}",
+ "refId": "C"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "LATENCY",
+ "tooltip": {
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "decimals": null,
+ "format": "ms",
+ "label": "",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "content": "\n INBOUND TRAFFIC BY DEPLOYMENT \n
",
+ "gridPos": {
+ "h": 2,
+ "w": 24,
+ "x": 0,
+ "y": 15
+ },
+ "id": 18,
+ "links": [],
+ "mode": "html",
+ "options": {},
+ "title": "",
+ "transparent": true,
+ "type": "text"
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 1,
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 0,
+ "y": 17
+ },
+ "id": 20,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {},
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(irate(route_response_total{classification=\"success\", namespace=\"$namespace\", direction=\"outbound\", rt_route=\"$rt_route\"}[30s])) by (deployment, rt_route) / sum(irate(route_response_total{namespace=\"$namespace\", direction=\"outbound\", rt_route=\"$rt_route\"}[30s])) by (deployment, rt_route)",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "deploy/{{deployment}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "SUCCESS RATE",
+ "tooltip": {
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "decimals": null,
+ "format": "percentunit",
+ "label": "",
+ "logBase": 1,
+ "max": "1",
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 8,
+ "y": 17
+ },
+ "id": 22,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {},
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(irate(route_request_total{namespace=\"$namespace\", direction=\"outbound\", tls=\"true\", rt_route=\"$rt_route\"}[30s])) by (deployment, rt_route)",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "\ud83d\udd12deploy/{{deployment}}",
+ "refId": "A"
+ },
+ {
+ "expr": "sum(irate(route_request_total{namespace=\"$namespace\", direction=\"outbound\", tls!=\"true\", rt_route=\"$rt_route\"}[30s])) by (deployment, rt_route)",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "deploy/{{deployment}}",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "REQUEST RATE",
+ "tooltip": {
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "rps",
+ "label": "",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 1,
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 16,
+ "y": 17
+ },
+ "id": 24,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {},
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "histogram_quantile(0.95, sum(rate(route_response_latency_ms_bucket{namespace=\"$namespace\", direction=\"outbound\", rt_route=\"$rt_route\"}[30s])) by (le, deployment, rt_route))",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "P95 deploy/{{deployment}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "P95 LATENCY",
+ "tooltip": {
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "ms",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "content": "\n INBOUND TRAFFIC BY POD \n
",
+ "gridPos": {
+ "h": 2,
+ "w": 24,
+ "x": 0,
+ "y": 24
+ },
+ "id": 26,
+ "links": [],
+ "mode": "html",
+ "options": {},
+ "title": "",
+ "transparent": true,
+ "type": "text"
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 1,
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 0,
+ "y": 26
+ },
+ "id": 28,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {},
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(irate(route_response_total{classification=\"success\", namespace=\"$namespace\", direction=\"outbound\", rt_route=\"$rt_route\"}[30s])) by (pod, rt_route) / sum(irate(route_response_total{namespace=\"$namespace\", direction=\"outbound\", rt_route=\"$rt_route\"}[30s])) by (pod, rt_route)",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "po/{{pod}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "SUCCESS RATE",
+ "tooltip": {
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "decimals": null,
+ "format": "percentunit",
+ "label": "",
+ "logBase": 1,
+ "max": "1",
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 0,
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 8,
+ "y": 26
+ },
+ "id": 30,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {},
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(irate(route_request_total{namespace=\"$namespace\", direction=\"outbound\", tls=\"true\", rt_route=\"$rt_route\"}[30s])) by (pod, rt_route)",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "\ud83d\udd12po/{{pod}}",
+ "refId": "A"
+ },
+ {
+ "expr": "sum(irate(route_request_total{namespace=\"$namespace\", direction=\"outbound\", tls!=\"true\", rt_route=\"$rt_route\"}[30s])) by (pod, rt_route)",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "po/{{pod}}",
+ "refId": "B"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "REQUEST RATE",
+ "tooltip": {
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "rps",
+ "label": "",
+ "logBase": 1,
+ "max": null,
+ "min": "0",
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "${DS_PROMETHEUS}",
+ "fill": 1,
+ "gridPos": {
+ "h": 7,
+ "w": 8,
+ "x": 16,
+ "y": 26
+ },
+ "id": 32,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "null",
+ "options": {},
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "histogram_quantile(0.95, sum(rate(route_response_latency_ms_bucket{namespace=\"$namespace\", direction=\"outbound\", rt_route=\"$rt_route\"}[30s])) by (le, pod))",
+ "format": "time_series",
+ "intervalFactor": 1,
+ "legendFormat": "P95 po/{{pod, rt_route}}",
+ "refId": "A"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeRegions": [],
+ "timeShift": null,
+ "title": "P95 LATENCY",
+ "tooltip": {
+ "shared": true,
+ "sort": 2,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "ms",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ],
+ "yaxis": {
+ "align": false,
+ "alignLevel": null
+ }
+ },
+ {
+ "content": "\n
\n
\n
\n
\n
\n
\n\n\n
",
+ "gridPos": {
+ "h": 3,
+ "w": 24,
+ "x": 0,
+ "y": 33
+ },
+ "height": "1px",
+ "id": 34,
+ "links": [],
+ "mode": "html",
+ "options": {},
+ "title": "",
+ "transparent": true,
+ "type": "text"
+ }
+ ],
+ "refresh": "1m",
+ "schemaVersion": 18,
+ "style": "dark",
+ "tags": [
+ "linkerd"
+ ],
+ "templating": {
+ "list": [
+ {
+ "hide": 0,
+ "label": "datasource",
+ "name": "DS_PROMETHEUS",
+ "options": [],
+ "query": "prometheus",
+ "refresh": 1,
+ "regex": "",
+ "type": "datasource"
+ },
+ {
+ "allValue": null,
+ "current": {},
+ "datasource": "${DS_PROMETHEUS}",
+ "definition": "",
+ "hide": 0,
+ "includeAll": false,
+ "label": "Namespace",
+ "multi": false,
+ "name": "namespace",
+ "options": [],
+ "query": "label_values(route_request_total, namespace)",
+ "refresh": 2,
+ "regex": "",
+ "skipUrlSync": false,
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "allValue": null,
+ "current": {},
+ "datasource": "${DS_PROMETHEUS}",
+ "definition": "",
+ "hide": 0,
+ "includeAll": false,
+ "label": "Route",
+ "multi": false,
+ "name": "rt_route",
+ "options": [],
+ "query": "label_values(route_request_total{namespace=\"$namespace\"}, rt_route)",
+ "refresh": 2,
+ "regex": "",
+ "skipUrlSync": false,
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ }
+ ]
+ },
+ "time": {
+ "from": "now-5m",
+ "to": "now"
+ },
+ "timepicker": {
+ "refresh_intervals": [
+ "5s",
+ "10s",
+ "30s",
+ "1m",
+ "5m",
+ "15m",
+ "30m",
+ "1h",
+ "2h",
+ "1d"
+ ],
+ "time_options": [
+ "5m",
+ "15m",
+ "1h",
+ "6h",
+ "12h",
+ "24h",
+ "2d",
+ "7d",
+ "30d"
+ ]
+ },
+ "timezone": "",
+ "title": "Linkerd Route",
+ "uid": "route",
+ "version": 1
+}
\ No newline at end of file
diff --git a/roles/prometheus/dashboards/linkerd/linkerd-service.json b/argocd/system/linkerd-viz/dashboards/linkerd-service.json
similarity index 98%
rename from roles/prometheus/dashboards/linkerd/linkerd-service.json
rename to argocd/system/linkerd-viz/dashboards/linkerd-service.json
index 71fe14bb..c14d698e 100644
--- a/roles/prometheus/dashboards/linkerd/linkerd-service.json
+++ b/argocd/system/linkerd-viz/dashboards/linkerd-service.json
@@ -1,56 +1,56 @@
{
+ "__elements": [],
"__inputs": [
{
- "name": "DS_PROMETHEUS",
- "label": "prometheus",
"description": "",
- "type": "datasource",
+ "label": "prometheus",
+ "name": "DS_PROMETHEUS",
"pluginId": "prometheus",
- "pluginName": "Prometheus"
+ "pluginName": "Prometheus",
+ "type": "datasource"
}
],
- "__elements": [],
"__requires": [
{
- "type": "panel",
"id": "gauge",
"name": "Gauge",
+ "type": "panel",
"version": ""
},
{
- "type": "grafana",
"id": "grafana",
"name": "Grafana",
+ "type": "grafana",
"version": "8.3.3"
},
{
- "type": "panel",
"id": "graph",
"name": "Graph (old)",
+ "type": "panel",
"version": ""
},
{
- "type": "panel",
"id": "heatmap",
"name": "Heatmap",
+ "type": "panel",
"version": ""
},
{
- "type": "datasource",
"id": "prometheus",
"name": "Prometheus",
+ "type": "datasource",
"version": "1.0.0"
},
{
- "type": "panel",
"id": "stat",
"name": "Stat",
+ "type": "panel",
"version": ""
},
{
- "type": "panel",
"id": "text",
"name": "Text",
+ "type": "panel",
"version": ""
}
],
@@ -489,7 +489,7 @@
"expr": "sum(irate(request_total{namespace=\"$namespace\", dst_service=\"$service\", direction=\"outbound\", tls=\"true\"}[30s])) by (dst_service)",
"format": "time_series",
"intervalFactor": 1,
- "legendFormat": "🔒svc/{{dst_service}}",
+ "legendFormat": "\ud83d\udd12svc/{{dst_service}}",
"refId": "A"
},
{
@@ -788,7 +788,7 @@
"expr": "sum(irate(request_total{namespace=\"$namespace\", dst_service=\"$service\", direction=\"outbound\", tls=\"true\"}[30s])) by (dst_service, deployment)",
"format": "time_series",
"intervalFactor": 1,
- "legendFormat": "🔒deploy/{{deployment}}",
+ "legendFormat": "\ud83d\udd12deploy/{{deployment}}",
"refId": "A"
},
{
@@ -1070,7 +1070,7 @@
"expr": "sum(irate(request_total{namespace=\"$namespace\", dst_service=\"$service\", direction=\"outbound\", tls=\"true\"}[30s])) by (dst_service, pod)",
"format": "time_series",
"intervalFactor": 1,
- "legendFormat": "🔒po/{{pod}}",
+ "legendFormat": "\ud83d\udd12po/{{pod}}",
"refId": "A"
},
{
@@ -1234,6 +1234,16 @@
],
"templating": {
"list": [
+ {
+ "hide": 0,
+ "label": "datasource",
+ "name": "DS_PROMETHEUS",
+ "options": [],
+ "query": "prometheus",
+ "refresh": 1,
+ "regex": "",
+ "type": "datasource"
+ },
{
"allValue": ".*",
"current": {},
@@ -1363,4 +1373,4 @@
"title": "Linkerd Service",
"uid": "linkerd-service",
"version": 1
-}
+}
\ No newline at end of file
diff --git a/roles/prometheus/dashboards/linkerd/linkerd-statefulset.json b/argocd/system/linkerd-viz/dashboards/linkerd-statefulset.json
similarity index 99%
rename from roles/prometheus/dashboards/linkerd/linkerd-statefulset.json
rename to argocd/system/linkerd-viz/dashboards/linkerd-statefulset.json
index 03405f9f..d223c2fc 100644
--- a/roles/prometheus/dashboards/linkerd/linkerd-statefulset.json
+++ b/argocd/system/linkerd-viz/dashboards/linkerd-statefulset.json
@@ -1,56 +1,56 @@
{
+ "__elements": [],
"__inputs": [
{
- "name": "DS_PROMETHEUS",
- "label": "prometheus",
"description": "",
- "type": "datasource",
+ "label": "prometheus",
+ "name": "DS_PROMETHEUS",
"pluginId": "prometheus",
- "pluginName": "Prometheus"
+ "pluginName": "Prometheus",
+ "type": "datasource"
}
],
- "__elements": [],
"__requires": [
{
- "type": "panel",
"id": "gauge",
"name": "Gauge",
+ "type": "panel",
"version": ""
},
{
- "type": "grafana",
"id": "grafana",
"name": "Grafana",
+ "type": "grafana",
"version": "8.3.3"
},
{
- "type": "panel",
"id": "graph",
"name": "Graph (old)",
+ "type": "panel",
"version": ""
},
{
- "type": "panel",
"id": "heatmap",
"name": "Heatmap",
+ "type": "panel",
"version": ""
},
{
- "type": "datasource",
"id": "prometheus",
"name": "Prometheus",
+ "type": "datasource",
"version": "1.0.0"
},
{
- "type": "panel",
"id": "stat",
"name": "Stat",
+ "type": "panel",
"version": ""
},
{
- "type": "panel",
"id": "text",
"name": "Text",
+ "type": "panel",
"version": ""
}
],
@@ -571,7 +571,7 @@
"expr": "sum(irate(request_total{namespace=\"$namespace\", statefulset=\"$statefulset\", direction=\"inbound\", tls=\"true\"}[30s])) by (statefulset)",
"format": "time_series",
"intervalFactor": 1,
- "legendFormat": "🔒sts/{{statefulset}}",
+ "legendFormat": "\ud83d\udd12sts/{{statefulset}}",
"refId": "A"
},
{
@@ -1159,7 +1159,7 @@
"expr": "sum(irate(request_total{statefulset!=\"\", statefulset=\"$inbound\", dst_namespace=\"$namespace\", dst_statefulset=\"$statefulset\", direction=\"outbound\", tls=\"true\"}[30s])) by (statefulset, pod)",
"format": "time_series",
"intervalFactor": 1,
- "legendFormat": "🔒po/{{pod}}",
+ "legendFormat": "\ud83d\udd12po/{{pod}}",
"refId": "A"
},
{
@@ -1475,7 +1475,7 @@
"expr": "sum(irate(request_total{namespace=\"$namespace\", statefulset=\"$statefulset\", direction=\"outbound\", tls=\"true\"}[30s])) by (dst_statefulset)",
"format": "time_series",
"intervalFactor": 1,
- "legendFormat": "🔒sts/{{dst_statefulset}}",
+ "legendFormat": "\ud83d\udd12sts/{{dst_statefulset}}",
"refId": "A"
},
{
@@ -2045,7 +2045,7 @@
"expr": "sum(irate(request_total{namespace=\"$namespace\", statefulset=\"$statefulset\", dst_statefulset=\"$outbound\", direction=\"outbound\", tls=\"true\"}[30s])) by (dst_statefulset)",
"format": "time_series",
"intervalFactor": 1,
- "legendFormat": "🔒sts/{{dst_statefulset}}",
+ "legendFormat": "\ud83d\udd12sts/{{dst_statefulset}}",
"refId": "A"
},
{
@@ -2228,6 +2228,16 @@
],
"templating": {
"list": [
+ {
+ "hide": 0,
+ "label": "datasource",
+ "name": "DS_PROMETHEUS",
+ "options": [],
+ "query": "prometheus",
+ "refresh": 1,
+ "regex": "",
+ "type": "datasource"
+ },
{
"allValue": ".*",
"current": {},
@@ -2351,4 +2361,4 @@
"title": "Linkerd StatefulSet",
"uid": "linkerd-statefulset",
"version": 1
-}
+}
\ No newline at end of file
diff --git a/roles/prometheus/dashboards/linkerd/linkerd-top-line.json b/argocd/system/linkerd-viz/dashboards/linkerd-top-line.json
similarity index 98%
rename from roles/prometheus/dashboards/linkerd/linkerd-top-line.json
rename to argocd/system/linkerd-viz/dashboards/linkerd-top-line.json
index b449d334..748e52e9 100644
--- a/roles/prometheus/dashboards/linkerd/linkerd-top-line.json
+++ b/argocd/system/linkerd-viz/dashboards/linkerd-top-line.json
@@ -1,56 +1,56 @@
{
+ "__elements": [],
"__inputs": [
{
- "name": "DS_PROMETHEUS",
- "label": "prometheus",
"description": "",
- "type": "datasource",
+ "label": "prometheus",
+ "name": "DS_PROMETHEUS",
"pluginId": "prometheus",
- "pluginName": "Prometheus"
+ "pluginName": "Prometheus",
+ "type": "datasource"
}
],
- "__elements": [],
"__requires": [
{
- "type": "panel",
"id": "gauge",
"name": "Gauge",
+ "type": "panel",
"version": ""
},
{
- "type": "grafana",
"id": "grafana",
"name": "Grafana",
+ "type": "grafana",
"version": "8.3.3"
},
{
- "type": "panel",
"id": "graph",
"name": "Graph (old)",
+ "type": "panel",
"version": ""
},
{
- "type": "panel",
"id": "heatmap",
"name": "Heatmap",
+ "type": "panel",
"version": ""
},
{
- "type": "datasource",
"id": "prometheus",
"name": "Prometheus",
+ "type": "datasource",
"version": "1.0.0"
},
{
- "type": "panel",
"id": "stat",
"name": "Stat",
+ "type": "panel",
"version": ""
},
{
- "type": "panel",
"id": "text",
"name": "Text",
+ "type": "panel",
"version": ""
}
],
@@ -567,7 +567,7 @@
"expr": "sum(irate(request_total{namespace=~\"$namespace\", direction=\"inbound\", tls=\"true\"}[30s])) by (namespace)",
"format": "time_series",
"intervalFactor": 1,
- "legendFormat": "🔒ns/{{namespace}}",
+ "legendFormat": "\ud83d\udd12ns/{{namespace}}",
"refId": "A"
},
{
@@ -900,7 +900,7 @@
"expr": "sum(irate(request_total{namespace=\"$namespace\", direction=\"inbound\", tls=\"true\"}[30s])) by (deployment)",
"format": "time_series",
"intervalFactor": 1,
- "legendFormat": "🔒deploy/{{deployment}}",
+ "legendFormat": "\ud83d\udd12deploy/{{deployment}}",
"refId": "A"
},
{
@@ -1053,6 +1053,16 @@
],
"templating": {
"list": [
+ {
+ "hide": 0,
+ "label": "datasource",
+ "name": "DS_PROMETHEUS",
+ "options": [],
+ "query": "prometheus",
+ "refresh": 1,
+ "regex": "",
+ "type": "datasource"
+ },
{
"allValue": ".*",
"current": {
@@ -1138,4 +1148,4 @@
"title": "Linkerd Top Line",
"uid": "linkerd-top-line",
"version": 1
-}
+}
\ No newline at end of file
diff --git a/argocd/system/linkerd-viz/templates/grafana-dashboards.yaml b/argocd/system/linkerd-viz/templates/grafana-dashboards.yaml
new file mode 100644
index 00000000..55a1ad77
--- /dev/null
+++ b/argocd/system/linkerd-viz/templates/grafana-dashboards.yaml
@@ -0,0 +1,17 @@
+{{- $files := .Files.Glob "dashboards/*.json" }}
+{{- if $files }}
+{{- range $path, $fileContents := $files }}
+{{- $dashboardName := regexReplaceAll "(^.*/)(.*)\\.json$" $path "${2}" }}
+---
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ name: {{ $dashboardName }}
+ namespace: {{ $.Release.Namespace }}
+ labels:
+ grafana_dashboard: "1"
+data:
+ {{ $dashboardName }}.json: |-
+ {{- $.Files.Get $path | nindent 4 }}
+{{- end }}
+{{- end }}
diff --git a/argocd/system/linkerd-viz/templates/ingress.yaml b/argocd/system/linkerd-viz/templates/ingress.yaml
new file mode 100644
index 00000000..9513a83b
--- /dev/null
+++ b/argocd/system/linkerd-viz/templates/ingress.yaml
@@ -0,0 +1,48 @@
+# Linkerd Middlewares
+---
+apiVersion: traefik.containo.us/v1alpha1
+kind: Middleware
+metadata:
+ name: l5d-header-middleware-linkerd-viz
+ namespace: {{ .Release.Namespace }}
+spec:
+ headers:
+ customRequestHeaders:
+ l5d-dst-override: "web.{{ .Release.Namespace }}.svc.cluster.local:8084"
+
+---
+# HTTPS Ingress
+apiVersion: networking.k8s.io/v1
+kind: Ingress
+metadata:
+ name: linkerd-viz-ingress
+ namespace: {{ .Release.Namespace }}
+ annotations:
+ # HTTPS as entry point
+ traefik.ingress.kubernetes.io/router.entrypoints: websecure
+ # Enable TLS
+ traefik.ingress.kubernetes.io/router.tls: "true"
+{{- if .Values.ingress.basicAuth.enable }}
+ # Use Basic Auth Midleware configured
+ traefik.ingress.kubernetes.io/router.middlewares:
+ {{ .Values.ingress.basicAuth.middlewareNamespace }}-{{ .Values.ingress.basicAuth.middlewareName }}@kubernetescrd
+{{- end }}
+ # Enable cert-manager to create automatically the SSL certificate and store in Secret
+ cert-manager.io/cluster-issuer: {{ .Values.ingress.certmanager.tlsIssuer }}-issuer
+ cert-manager.io/common-name: {{ .Values.ingress.host }}
+spec:
+ tls:
+ - hosts:
+ - {{ .Values.ingress.host }}
+ secretName: linkerd-viz-tls
+ rules:
+ - host: {{ .Values.ingress.host }}
+ http:
+ paths:
+ - path: /
+ pathType: Prefix
+ backend:
+ service:
+ name: web
+ port:
+ number: 8084
diff --git a/roles/linkerd/linkerd-viz/templates/linkerd_viz_prometheus.yml.j2 b/argocd/system/linkerd-viz/templates/podmonitor.yaml
similarity index 87%
rename from roles/linkerd/linkerd-viz/templates/linkerd_viz_prometheus.yml.j2
rename to argocd/system/linkerd-viz/templates/podmonitor.yaml
index 0f019a49..7ff059d7 100644
--- a/roles/linkerd/linkerd-viz/templates/linkerd_viz_prometheus.yml.j2
+++ b/argocd/system/linkerd-viz/templates/podmonitor.yaml
@@ -1,17 +1,18 @@
+{{- if and .Values.serviceMonitor.enable (.Capabilities.APIVersions.Has "monitoring.coreos.com/v1/PodMonitor") }}
---
apiVersion: monitoring.coreos.com/v1
kind: PodMonitor
metadata:
labels:
app: linkerd
- release: kube-prometheus-stack
+ release: {{ .Values.serviceMonitor.release }}
name: linkerd-controller
- namespace: {{ k3s_monitoring_namespace }}
+ namespace: {{ .Release.Namespace }}
spec:
namespaceSelector:
matchNames:
- - linkerd-viz
- - linkerd
+ - {{ .Release.Namespace }}-viz
+ - {{ .Release.Namespace }}
selector:
matchLabels: {}
podMetricsEndpoints:
@@ -37,9 +38,9 @@ kind: PodMonitor
metadata:
labels:
app: linkerd
- release: kube-prometheus-stack
+ release: {{ .Values.serviceMonitor.release }}
name: linkerd-service-mirror
- namespace: {{ k3s_monitoring_namespace }}
+ namespace: {{ .Release.Namespace }}
spec:
namespaceSelector:
any: true
@@ -68,9 +69,9 @@ kind: PodMonitor
metadata:
labels:
app: linkerd
- release: kube-prometheus-stack
+ release: {{ .Values.serviceMonitor.release }}
name: linkerd-proxy
- namespace: {{ k3s_monitoring_namespace }}
+ namespace: {{ .Release.Namespace }}
spec:
namespaceSelector:
any: true
@@ -119,3 +120,5 @@ spec:
action: replace
targetLabel: job
replacement: linkerd-proxy
+
+{{- end }}
\ No newline at end of file
diff --git a/argocd/system/linkerd-viz/values.yaml b/argocd/system/linkerd-viz/values.yaml
new file mode 100644
index 00000000..77df545f
--- /dev/null
+++ b/argocd/system/linkerd-viz/values.yaml
@@ -0,0 +1,38 @@
+
+# Prometheus monitoring
+serviceMonitor:
+ enable: true
+ release: monitoring
+
+# Ingress configuration
+ingress:
+ host: linkerd.picluster.ricsanfre.com
+ # configure cert-manager issuer
+ certmanager:
+ # tlsIssuer=letsecrypt to generate valid TLS certficiate using IONOS API
+ # tlsIssuer=ca to generate a CA-signed certificate (not valid)
+ tlsIssuer: letsencrypt
+ # tlsIssuer: ca
+ # Enabling traefik basic authorization, reusing global middleware created for Traefik
+ basicAuth:
+ enable: true
+ middlewareName: basic-auth
+ middlewareNamespace: traefik
+
+########################
+# linkerd-viz subchart
+########################
+
+linkerd-viz:
+ # Skip namespace creation
+ installNamespace: false
+ # External Prometheus
+ prometheusUrl: http://monitoring-prometheus.monitoring.svc.cluster.local:9090
+ prometheus:
+ enabled: false
+ # External Grafana
+ grafana:
+ url: monitoring-grafana.monitoring.svc.cluster.local
+ # Disabling DNS rebinding protection
+ dashboard:
+ enforcedHostRegexp: .*
diff --git a/argocd/system/linkerd/Chart.yaml b/argocd/system/linkerd/Chart.yaml
new file mode 100644
index 00000000..99d6e576
--- /dev/null
+++ b/argocd/system/linkerd/Chart.yaml
@@ -0,0 +1,10 @@
+apiVersion: v2
+name: linkerd
+version: 0.0.0
+dependencies:
+ - name: linkerd-crds
+ version: 1.4.0
+ repository: https://helm.linkerd.io/stable
+ - name: linkerd-control-plane
+ version: 1.9.4
+ repository: https://helm.linkerd.io/stable
diff --git a/roles/linkerd/control-plane/templates/linkerd_issuer.yml.j2 b/argocd/system/linkerd/templates/linkerd-identity-issuer.yaml
similarity index 63%
rename from roles/linkerd/control-plane/templates/linkerd_issuer.yml.j2
rename to argocd/system/linkerd/templates/linkerd-identity-issuer.yaml
index a788d8fe..cc6e428e 100644
--- a/roles/linkerd/control-plane/templates/linkerd_issuer.yml.j2
+++ b/argocd/system/linkerd/templates/linkerd-identity-issuer.yaml
@@ -1,15 +1,14 @@
----
apiVersion: cert-manager.io/v1
kind: Certificate
metadata:
name: linkerd-identity-issuer
- namespace: {{ k3s_linkerd_namespace }}
+ namespace: linkerd
spec:
secretName: linkerd-identity-issuer
- duration: {{ issuer_certificate_duration }}
- renewBefore: {{ issuer_certificate_renewbefore }}
+ duration: {{ .Values.identityIssuer.certificate.duration }}
+ renewBefore: {{ .Values.identityIssuer.certificate.renewBefore }}
issuerRef:
- name: ca-issuer
+ name: {{ .Values.identityIssuer.certificate.issuer }}
kind: ClusterIssuer
group: cert-manager.io
commonName: identity.linkerd.cluster.local
@@ -22,4 +21,4 @@ spec:
- cert sign
- crl sign
- server auth
- - client auth
+ - client auth
\ No newline at end of file
diff --git a/argocd/system/linkerd/templates/trust-bundle.yaml b/argocd/system/linkerd/templates/trust-bundle.yaml
new file mode 100644
index 00000000..7b1576f8
--- /dev/null
+++ b/argocd/system/linkerd/templates/trust-bundle.yaml
@@ -0,0 +1,18 @@
+# Cert-manager TrustBundle
+# Share CA root certificate as configMap in linkerd namespace
+
+apiVersion: trust.cert-manager.io/v1alpha1
+kind: Bundle
+metadata:
+ name: linkerd-identity-trust-roots
+spec:
+ sources:
+ - secret:
+ name: "root-secret"
+ key: "ca.crt"
+ target:
+ configMap:
+ key: "ca-bundle.crt"
+ namespaceSelector:
+ matchLabels:
+ kubernetes.io/metadata.name: {{ .Release.Namespace }}
diff --git a/argocd/system/linkerd/values.yaml b/argocd/system/linkerd/values.yaml
new file mode 100644
index 00000000..25e7eb73
--- /dev/null
+++ b/argocd/system/linkerd/values.yaml
@@ -0,0 +1,17 @@
+
+# Identity Issuer certificate
+identityIssuer:
+ certificate:
+ issuer: ca-issuer
+ renewBefore: 360h0m0s # 15d
+ duration: 2160h0m0s # 90d
+
+################################
+# linkerd-control-plane subchart
+################################
+
+linkerd-control-plane:
+ identity:
+ externalCA: true
+ issuer:
+ scheme: kubernetes.io/tls
diff --git a/argocd/system/logging/Chart.yaml b/argocd/system/logging/Chart.yaml
new file mode 100644
index 00000000..3f4b6f7d
--- /dev/null
+++ b/argocd/system/logging/Chart.yaml
@@ -0,0 +1,19 @@
+apiVersion: v2
+name: logging
+version: 0.0.0
+dependencies:
+ - name: eck-operator
+ version: 2.4.0
+ repository: https://helm.elastic.co
+ - name: fluentd
+ version: 0.3.9
+ repository: https://fluent.github.io/helm-charts
+ - name: fluent-bit
+ version: 0.21.0
+ repository: https://fluent.github.io/helm-charts
+ - name: loki
+ version: 3.8.0
+ repository: https://grafana.github.io/helm-charts
+ - name: prometheus-elasticsearch-exporter
+ version: 4.15.1
+ repository: https://prometheus-community.github.io/helm-charts
\ No newline at end of file
diff --git a/argocd/system/logging/templates/elasticsearch-ingress.yaml b/argocd/system/logging/templates/elasticsearch-ingress.yaml
new file mode 100644
index 00000000..40c3c444
--- /dev/null
+++ b/argocd/system/logging/templates/elasticsearch-ingress.yaml
@@ -0,0 +1,45 @@
+---
+apiVersion: traefik.containo.us/v1alpha1
+kind: Middleware
+metadata:
+ name: l5d-header-middleware-elasticsearch
+ namespace: {{ .Release.Namespace }}
+spec:
+ headers:
+ customRequestHeaders:
+ l5d-dst-override: {{ .Values.eck.clusterName }}-es-http.{{ .Release.Namespace }}.svc.cluster.local:9200
+
+---
+# HTTPS Ingress
+apiVersion: networking.k8s.io/v1
+kind: Ingress
+metadata:
+ name: elasticsearch-ingress
+ namespace: {{ .Release.Namespace }}
+ annotations:
+ # HTTPS as entry point
+ traefik.ingress.kubernetes.io/router.entrypoints: websecure
+ # Enable TLS
+ traefik.ingress.kubernetes.io/router.tls: "true"
+ # Enable cert-manager to create automatically the SSL certificate and store in Secret
+ cert-manager.io/cluster-issuer: {{ .Values.elasticsearch.ingress.certmanager.tlsIssuer }}-issuer
+ cert-manager.io/common-name: {{ .Values.elasticsearch.ingress.host }}
+ # Linkerd header
+ traefik.ingress.kubernetes.io/router.middlewares:
+ {{ .Release.Namespace }}-l5d-header-middleware-elasticsearch@kubernetescrd
+spec:
+ tls:
+ - hosts:
+ - {{ .Values.elasticsearch.ingress.host }}
+ secretName: elasticsearch-tls
+ rules:
+ - host: {{ .Values.elasticsearch.ingress.host }}
+ http:
+ paths:
+ - path: /
+ pathType: Prefix
+ backend:
+ service:
+ name: {{ .Values.eck.clusterName }}-es-http
+ port:
+ number: 9200
diff --git a/roles/logging/k3s/templates/elasticsearch.yml.j2 b/argocd/system/logging/templates/elasticsearch.yaml
similarity index 55%
rename from roles/logging/k3s/templates/elasticsearch.yml.j2
rename to argocd/system/logging/templates/elasticsearch.yaml
index ba7cc93a..eb6a7e14 100644
--- a/roles/logging/k3s/templates/elasticsearch.yml.j2
+++ b/argocd/system/logging/templates/elasticsearch.yaml
@@ -1,20 +1,19 @@
----
apiVersion: elasticsearch.k8s.elastic.co/v1
kind: Elasticsearch
metadata:
- name: "{{ efk_cluster_name }}"
- namespace: "{{ k3s_logging_namespace }}"
+ name: {{ .Values.eck.clusterName }}
+ namespace: {{ .Release.Namespace }}
spec:
- version: {{ efk_elasticsearch_version }}
+ version: {{ .Values.elasticsearch.version }}
http: # Making elasticsearch service available from outisde the cluster
tls:
selfSignedCertificate:
disabled: true
nodeSets:
- name: default
- count: {{ efk_elasticsearch_nodes }}
+ count: {{ .Values.elasticsearch.clusterNodes }}
config:
- node.store.allow_mmap: {{ efk_elasticsearch_enable_mmap }}
+ node.store.allow_mmap: {{ .Values.elasticsearch.enableMmap }}
volumeClaimTemplates:
- metadata:
name: elasticsearch-data
@@ -23,20 +22,16 @@ spec:
- ReadWriteOnce
resources:
requests:
- storage: "{{ efk_elasticsearch_storage_size }}"
- storageClassName: "{{ efk_elasticsearch_storage_class }}"
-
+ storage: {{ .Values.elasticsearch.storage.size }}
+ storageClassName: {{ .Values.elasticsearch.storage.class }}
podTemplate:
spec:
-{% if enable_linkerd is sameas true %}
# Enabling service account token. linkerd requirement
automountServiceAccountToken: true
-{% endif %}
+{{- if .Values.elasticsearch.podSpecResources }}
# Limiting Resources consumption
containers:
- name: elasticsearch
resources:
- requests:
- memory: {{ efk_elasticsearch_mem_limit }}
- limits:
- memory: {{ efk_elasticsearch_mem_limit }}
+{{ toYaml .Values.elasticsearch.podSpecResources | indent 14 }}
+{{- end }}
\ No newline at end of file
diff --git a/roles/logging/k3s/templates/fluentd_certificate.yml.j2 b/argocd/system/logging/templates/fluentd-certificate.yaml
similarity index 51%
rename from roles/logging/k3s/templates/fluentd_certificate.yml.j2
rename to argocd/system/logging/templates/fluentd-certificate.yaml
index d12d0a2d..78173082 100644
--- a/roles/logging/k3s/templates/fluentd_certificate.yml.j2
+++ b/argocd/system/logging/templates/fluentd-certificate.yaml
@@ -1,16 +1,14 @@
----
apiVersion: cert-manager.io/v1
kind: Certificate
metadata:
name: fluentd-tls
- namespace: "{{ k3s_logging_namespace }}"
+ namespace: {{ .Release.Namespace }}
spec:
# Secret names are always required.
secretName: fluentd-tls
- duration: 2160h # 90d
- renewBefore: 360h # 15d
- commonName: "{{ fluentd_dns }}"
- isCA: false
+ duration: 2160h0m0s # 90d
+ renewBefore: 360h0m0s # 15d
+ commonName: {{ .Values.external.fluentd.dns }}
privateKey:
algorithm: ECDSA
size: 256
@@ -19,18 +17,9 @@ spec:
- client auth
# At least one of a DNS Name, URI, or IP address is required.
dnsNames:
- - "{{ fluentd_dns }}"
+ - {{ .Values.external.fluentd.dns}}
# ClusterIssuer: ca-issuer.
issuerRef:
name: ca-issuer
kind: ClusterIssuer
- group: cert-manager.io
----
-apiVersion: v1
-kind: Secret
-metadata:
- name: fluentd-shared-key
- namespace: "{{ k3s_logging_namespace }}"
-type: Opaque
-data:
- fluentd-shared-key: "{{ fluentd_shared_key | b64encode }}"
+ group: cert-manager.io
\ No newline at end of file
diff --git a/argocd/system/logging/templates/fluentd-externalsecret.yaml b/argocd/system/logging/templates/fluentd-externalsecret.yaml
new file mode 100644
index 00000000..443006f5
--- /dev/null
+++ b/argocd/system/logging/templates/fluentd-externalsecret.yaml
@@ -0,0 +1,18 @@
+apiVersion: external-secrets.io/v1beta1
+kind: ExternalSecret
+metadata:
+ name: fluentd-externalsecret
+ namespace: {{ .Release.Namespace }}
+spec:
+ secretStoreRef:
+ name: vault-backend
+ kind: ClusterSecretStore
+ target:
+ name: fluentd-shared-key
+ data:
+ - secretKey: fluentd-shared-key
+ remoteRef:
+ key: logging/fluentd
+ property: shared_key
+ conversionStrategy: Default # ArgoCD sync issue
+ decodingStrategy: None # ArgoCD sync issue
diff --git a/roles/logging/k3s/templates/fluentd_service_ext.yml.j2 b/argocd/system/logging/templates/fluentd-extservice.yaml
similarity index 73%
rename from roles/logging/k3s/templates/fluentd_service_ext.yml.j2
rename to argocd/system/logging/templates/fluentd-extservice.yaml
index a1ee225a..9a2dc3d8 100644
--- a/roles/logging/k3s/templates/fluentd_service_ext.yml.j2
+++ b/argocd/system/logging/templates/fluentd-extservice.yaml
@@ -1,12 +1,10 @@
----
-#External Service
apiVersion: v1
kind: Service
metadata:
labels:
app: fluentd
name: fluentd-ext
- namespace: "{{ k3s_logging_namespace }}"
+ namespace: {{ .Release.Namespace }}
spec:
ports:
- name: forward-ext
@@ -18,4 +16,4 @@ spec:
app.kubernetes.io/name: fluentd
sessionAffinity: None
type: LoadBalancer
- loadBalancerIP: {{ k3s_fluentd_external_ip }}
+ loadBalancerIP: {{ .Values.external.fluentd.loadBalancerIp }}
diff --git a/argocd/system/logging/templates/fluentd-hpa.yaml b/argocd/system/logging/templates/fluentd-hpa.yaml
new file mode 100644
index 00000000..2ef9a1d0
--- /dev/null
+++ b/argocd/system/logging/templates/fluentd-hpa.yaml
@@ -0,0 +1,22 @@
+apiVersion: autoscaling/v2
+kind: HorizontalPodAutoscaler
+metadata:
+ labels:
+ app.kubernetes.io/instance: logging
+ app.kubernetes.io/name: fluentd
+ name: logging-fluentd
+ namespace: {{ .Release.Namespace }}
+spec:
+ maxReplicas: 100
+ metrics:
+ - resource:
+ name: cpu
+ target:
+ averageUtilization: 80
+ type: Utilization
+ type: Resource
+ minReplicas: 1
+ scaleTargetRef:
+ apiVersion: apps/v1
+ kind: Deployment
+ name: logging-fluentd
diff --git a/argocd/system/logging/templates/kibana-ingress.yaml b/argocd/system/logging/templates/kibana-ingress.yaml
new file mode 100644
index 00000000..7fa8c7fd
--- /dev/null
+++ b/argocd/system/logging/templates/kibana-ingress.yaml
@@ -0,0 +1,45 @@
+---
+# Traefik Middleware adding linkerd custom header
+apiVersion: traefik.containo.us/v1alpha1
+kind: Middleware
+metadata:
+ name: l5d-header-middleware-kibana
+ namespace: {{ .Release.Namespace }}
+spec:
+ headers:
+ customRequestHeaders:
+ l5d-dst-override: {{ .Values.eck.clusterName }}-kb-http.{{ .Release.Namespace }}.svc.cluster.local:5601
+---
+# HTTPS Ingress
+apiVersion: networking.k8s.io/v1
+kind: Ingress
+metadata:
+ name: kibana-ingress
+ namespace: {{ .Release.Namespace }}
+ annotations:
+ # HTTPS as entry point
+ traefik.ingress.kubernetes.io/router.entrypoints: websecure
+ # Enable TLS
+ traefik.ingress.kubernetes.io/router.tls: "true"
+ # Enable cert-manager to create automatically the SSL certificate and store in Secret
+ cert-manager.io/cluster-issuer: {{ .Values.kibana.ingress.certmanager.tlsIssuer }}-issuer
+ cert-manager.io/common-name: {{ .Values.kibana.ingress.host }}
+ # Linkerd header
+ traefik.ingress.kubernetes.io/router.middlewares:
+ {{ .Release.Namespace }}-l5d-header-middleware-kibana@kubernetescrd
+spec:
+ tls:
+ - hosts:
+ - {{ .Values.kibana.ingress.host }}
+ secretName: kibana-tls
+ rules:
+ - host: {{ .Values.kibana.ingress.host }}
+ http:
+ paths:
+ - path: /
+ pathType: Prefix
+ backend:
+ service:
+ name: {{ .Values.eck.clusterName }}-kb-http
+ port:
+ number: 5601
diff --git a/roles/logging/k3s/templates/kibana.yml.j2 b/argocd/system/logging/templates/kibana.yaml
similarity index 56%
rename from roles/logging/k3s/templates/kibana.yml.j2
rename to argocd/system/logging/templates/kibana.yaml
index d26e614c..c89f99c2 100644
--- a/roles/logging/k3s/templates/kibana.yml.j2
+++ b/argocd/system/logging/templates/kibana.yaml
@@ -1,30 +1,26 @@
----
apiVersion: kibana.k8s.elastic.co/v1
kind: Kibana
metadata:
- name: "{{ efk_cluster_name }}"
- namespace: "{{ k3s_logging_namespace }}"
+ name: {{ .Values.eck.clusterName }}
+ namespace: {{ .Release.Namespace }}
spec:
- version: {{ efk_elasticsearch_version }}
+ version: {{ .Values.elasticsearch.version }}
count: 1
elasticsearchRef:
- name: "{{ efk_cluster_name }}"
+ name: {{ .Values.eck.clusterName }}
http:
tls:
selfSignedCertificate:
disabled: true
podTemplate:
spec:
-{% if enable_linkerd is sameas true %}
# Enabling service account token. linkerd requirement
automountServiceAccountToken: true
-{% endif %}
+{{- if .Values.kibana.podSpecResources }}
# Limiting Resources consumption
containers:
- name: kibana
resources:
- requests:
- memory: 1Gi
- limits:
- memory: 1Gi
+{{ toYaml .Values.kibana.podSpecResources | indent 10 }}
+{{- end }}
diff --git a/argocd/system/logging/templates/logging-cm.yaml b/argocd/system/logging/templates/logging-cm.yaml
new file mode 100644
index 00000000..3cbe0d7d
--- /dev/null
+++ b/argocd/system/logging/templates/logging-cm.yaml
@@ -0,0 +1,10 @@
+# Logging internal services endpoints
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ name: logging-stack-cm
+ namespace: {{ .Release.Namespace }}
+data:
+ lokiURL: "http://loki-gateway"
+ elasticsearchHost: {{ .Values.eck.clusterName }}-es-http
+ fluentdHost: {{ .Release.Name }}-fluentd
diff --git a/argocd/system/logging/templates/loki-externalsecret.yaml b/argocd/system/logging/templates/loki-externalsecret.yaml
new file mode 100644
index 00000000..137adb76
--- /dev/null
+++ b/argocd/system/logging/templates/loki-externalsecret.yaml
@@ -0,0 +1,29 @@
+apiVersion: external-secrets.io/v1beta1
+kind: ExternalSecret
+metadata:
+ name: minio-externalsecret
+ namespace: {{ .Release.Namespace }}
+spec:
+ secretStoreRef:
+ name: vault-backend
+ kind: ClusterSecretStore
+ target:
+ name: loki-minio-secret
+ template:
+ engineVersion: v2
+ data:
+ MINIO_ACCESS_KEY_ID: '{{ printf "{{ .user }}" }}'
+ MINIO_SECRET_ACCESS_KEY: '{{ printf "{{ .key }}" }}'
+ data:
+ - secretKey: user
+ remoteRef:
+ key: minio/loki
+ property: user
+ conversionStrategy: Default # ArgoCD sync issue
+ decodingStrategy: None # ArgoCD sync issue
+ - secretKey: key
+ remoteRef:
+ key: minio/loki
+ property: key
+ conversionStrategy: Default # ArgoCD sync issue
+ decodingStrategy: None # ArgoCD sync issue
diff --git a/argocd/system/logging/templates/servicemonitor.yaml b/argocd/system/logging/templates/servicemonitor.yaml
new file mode 100644
index 00000000..5bc090e9
--- /dev/null
+++ b/argocd/system/logging/templates/servicemonitor.yaml
@@ -0,0 +1,74 @@
+{{- if and .Values.serviceMonitor.enable (.Capabilities.APIVersions.Has "monitoring.coreos.com/v1/ServiceMonitor") }}
+---
+# Elasticsearch ServiceMonitor
+apiVersion: monitoring.coreos.com/v1
+kind: ServiceMonitor
+metadata:
+ labels:
+ app: prometheus-elasticsearch-exporter
+ release: {{ .Values.serviceMonitor.release }}
+ name: elasticsearch-prometheus-servicemonitor
+ namespace: {{ .Release.Namespace }}
+spec:
+ endpoints:
+ - port: http
+ path: /metrics
+ namespaceSelector:
+ matchNames:
+ - {{ .Release.Namespace }}
+ selector:
+ matchLabels:
+ app: prometheus-elasticsearch-exporter
+
+---
+# Fluentd ServiceMonitor
+apiVersion: monitoring.coreos.com/v1
+kind: ServiceMonitor
+metadata:
+ labels:
+ app: fluentd
+ release: {{ .Values.serviceMonitor.release }}
+ name: fluentd-prometheus-servicemonitor
+ namespace: {{ .Release.Namespace }}
+spec:
+ jobLabel: app.kubernetes.io/name
+ endpoints:
+ - port: metrics
+ path: /metrics
+ namespaceSelector:
+ matchNames:
+ - {{ .Release.Namespace }}
+ selector:
+ matchLabels:
+ app.kubernetes.io/instance: {{ .Release.Name }}
+ app.kubernetes.io/name: fluentd
+
+---
+# Fluent-bit ServiceMonitor
+apiVersion: monitoring.coreos.com/v1
+kind: ServiceMonitor
+metadata:
+ labels:
+ app: fluent-bit
+ release: {{ .Values.serviceMonitor.release }}
+ name: fluentbit-prometheus-servicemonitor
+ namespace: {{ .Release.Namespace }}
+spec:
+ jobLabel: app.kubernetes.io/name
+ endpoints:
+ - path: /api/v1/metrics/prometheus
+ targetPort: 2020
+ - params:
+ target:
+ - http://127.0.0.1:2020/api/v1/storage
+ path: /probe
+ targetPort: 7979
+ namespaceSelector:
+ matchNames:
+ - {{ .Release.Namespace }}
+ selector:
+ matchLabels:
+ app.kubernetes.io/instance: {{ .Release.Name }}
+ app.kubernetes.io/name: fluent-bit
+
+{{- end }}
\ No newline at end of file
diff --git a/argocd/system/logging/values.yaml b/argocd/system/logging/values.yaml
new file mode 100644
index 00000000..ea5d7e55
--- /dev/null
+++ b/argocd/system/logging/values.yaml
@@ -0,0 +1,678 @@
+
+# Elasticsearch configuration
+eck:
+ # elasticsearch cluster name
+ clusterName: efk
+
+elasticsearch:
+ # elastic search version
+ version: 8.1.2
+ # Number of Elastic Search nodes
+ clusterNodes: 1
+ # Enable/Disable memory map
+ enableMmap: false
+ # Storage Settings
+ storage:
+ size: "5Gi"
+ class: "longhorn"
+ # Limiting resources of elasticsearch pod
+ podSpecResources:
+ requests:
+ memory: "1Gi"
+ limits:
+ memory: "1Gi"
+
+ # Ingress configuration
+ ingress:
+ host: elasticsearch.picluster.ricsanfre.com
+ # configure cert-manager issuer
+ certmanager:
+ # tlsIssuer=letsecrypt to generate valid TLS certficiate using IONOS API
+ # tlsIssuer=ca to generate a CA-signed certificate (not valid)
+ tlsIssuer: letsencrypt
+
+# Kibana configuration
+kibana:
+ # Limiting resources of kibana pod
+ # podSpecResources:
+ # requests:
+ # memory: "512Mi"
+ # limits:
+ # memory: "512Mi"
+
+ # Ingress configuration
+ ingress:
+ host: kibana.picluster.ricsanfre.com
+ # configure cert-manager issuer
+ certmanager:
+ # tlsIssuer=letsecrypt to generate valid TLS certficiate using IONOS API
+ # tlsIssuer=ca to generate a CA-signed certificate (not valid)
+ tlsIssuer: letsencrypt
+
+# LoadBalancer and DNS config
+external:
+ fluentd:
+ loadBalancerIp: 10.0.0.101
+ dns: fluentd.picluster.ricsanfre.com
+
+# Prometheus monitoring
+serviceMonitor:
+ enable: true
+ release: monitoring
+
+
+##################
+# Loki subchart
+##################
+
+loki:
+ loki:
+ # Disable multi-tenant support
+ auth_enabled: false
+
+ # S3 backend storage configuration
+ storage:
+ bucketNames:
+ chunks: k3s-loki
+ ruler: k3s-loki
+ type: s3
+ s3:
+ endpoint: s3.picluster.ricsanfre.com:9091
+ region: eu-west-1
+ secretAccessKey: ${MINIO_SECRET_ACCESS_KEY}
+ accessKeyId: ${MINIO_ACCESS_KEY_ID}
+ s3ForcePathStyle: true
+ insecure: false
+ http_config:
+ idle_conn_timeout: 90s
+ response_header_timeout: 0s
+ insecure_skip_verify: false
+
+ # Configuration for the write
+ write:
+ # Number of replicas for the write
+ replicas: 2
+ persistence:
+ # -- Size of persistent disk
+ size: 10Gi
+ # -- Storage class to be used.
+ storageClass: longhorn
+
+ # Enable environment variables in config file
+ # https://grafana.com/docs/loki/latest/configuration/#use-environment-variables-in-the-configuration
+ extraArgs:
+ - '-config.expand-env=true'
+ extraEnv:
+ - name: MINIO_ACCESS_KEY_ID
+ valueFrom:
+ secretKeyRef:
+ name: loki-minio-secret
+ key: MINIO_ACCESS_KEY_ID
+ - name: MINIO_SECRET_ACCESS_KEY
+ valueFrom:
+ secretKeyRef:
+ name: loki-minio-secret
+ key: MINIO_SECRET_ACCESS_KEY
+
+ # Configuration for the read
+ read:
+ # Number of replicas for the read
+ replicas: 2
+ persistence:
+ # -- Size of persistent disk
+ size: 10Gi
+ # -- Storage class to be used.
+ storageClass: longhorn
+
+ # Enable environment variables in config file
+ # https://grafana.com/docs/loki/latest/configuration/#use-environment-variables-in-the-configuration
+ extraArgs:
+ - '-config.expand-env=true'
+ extraEnv:
+ - name: MINIO_ACCESS_KEY_ID
+ valueFrom:
+ secretKeyRef:
+ name: loki-minio-secret
+ key: MINIO_ACCESS_KEY_ID
+ - name: MINIO_SECRET_ACCESS_KEY
+ valueFrom:
+ secretKeyRef:
+ name: loki-minio-secret
+ key: MINIO_SECRET_ACCESS_KEY
+
+ # Configuration for the gateway
+ gateway:
+ # -- Specifies whether the gateway should be enabled
+ enabled: true
+ # -- Number of replicas for the gateway
+ replicas: 1
+
+ # Disable mino installation
+ minio:
+ enabled: false
+
+ # Disable self-monitoring
+ monitoring:
+ selfMonitoring:
+ enabled: false
+ grafanaAgent:
+ installOperator: false
+ lokiCanary:
+ enabled: false
+
+ # Disable helm-test
+ test:
+ enabled: false
+
+#########################
+# Fluentd Subchart
+#########################
+
+fluentd:
+
+ # Fluentd image
+ image:
+ repository: "ricsanfre/fluentd-aggregator"
+ pullPolicy: "IfNotPresent"
+ tag: "v1.15.2-debian-1.0"
+
+ # Deploy fluentd as deployment
+ kind: "Deployment"
+ # Number of replicas
+ replicaCount: 1
+ # Enabling HPA
+ autoscaling:
+ enabled: false
+ minReplicas: 1
+ maxReplicas: 100
+ targetCPUUtilizationPercentage: 80
+
+ # Do not create serviceAccount and RBAC. Fluentd does not need to get access to kubernetes API.
+ serviceAccount:
+ create: false
+ rbac:
+ create: false
+
+ ## Additional environment variables to set for fluentd pods
+ env:
+ # Path to fluentd conf file
+ - name: "FLUENTD_CONF"
+ value: "../../../etc/fluent/fluent.conf"
+ # Elastic operator creates elastic service name with format cluster_name-es-http
+ - name: FLUENT_ELASTICSEARCH_HOST
+ valueFrom:
+ configMapKeyRef:
+ name: "logging-stack-cm"
+ key: elasticsearchHost
+ # value: efk-es-http
+ # Default elasticsearch default port
+ - name: FLUENT_ELASTICSEARCH_PORT
+ value: "9200"
+ # Elasticsearch user
+ - name: FLUENT_ELASTICSEARCH_USER
+ value: "elastic"
+ # Elastic operator stores elastic user password in a secret
+ - name: FLUENT_ELASTICSEARCH_PASSWORD
+ valueFrom:
+ secretKeyRef:
+ name: "efk-es-elastic-user"
+ key: elastic
+ # Setting a index-prefix for fluentd. By default index is logstash
+ - name: FLUENT_ELASTICSEARCH_LOGSTASH_PREFIX
+ value: fluentd
+ - name: FLUENT_ELASTICSEARCH_LOG_ES_400_REASON
+ value: "true"
+ # Fluentd forward security
+ - name: FLUENTD_FORWARD_SEC_SHARED_KEY
+ valueFrom:
+ secretKeyRef:
+ name: fluentd-shared-key
+ key: fluentd-shared-key
+ # Loki url
+ - name: LOKI_URL
+ valueFrom:
+ configMapKeyRef:
+ name: "logging-stack-cm"
+ key: lokiURL
+ # value: "http://loki-gateway"
+ # Loki username
+ - name: LOKI_USERNAME
+ value: ""
+ # Loki password
+ - name: LOKI_PASSWORD
+ value: ""
+
+ # Volumes and VolumeMounts (only configuration files and certificates)
+ volumes:
+ - name: etcfluentd-main
+ configMap:
+ name: fluentd-main
+ defaultMode: 0777
+ - name: etcfluentd-config
+ configMap:
+ name: fluentd-config
+ defaultMode: 0777
+ - name: fluentd-tls
+ secret:
+ secretName: fluentd-tls
+
+ volumeMounts:
+ - name: etcfluentd-main
+ mountPath: /etc/fluent
+ - name: etcfluentd-config
+ mountPath: /etc/fluent/config.d/
+ - mountPath: /etc/fluent/certs
+ name: fluentd-tls
+ readOnly: true
+
+ # Service. Exporting forwarder port (Metric already exposed by chart)
+ service:
+ type: "ClusterIP"
+ annotations: {}
+ ports:
+ - name: forwarder
+ protocol: TCP
+ containerPort: 24224
+
+ ## Fluentd list of plugins to install
+ ##
+ plugins: []
+ # - fluent-plugin-out-http
+
+ ## Do not create additional config maps
+ ##
+ configMapConfigs: []
+
+ ## Fluentd configurations:
+ ##
+ fileConfigs:
+ 01_sources.conf: |-
+ ## logs from fluentbit forwarders
+
+ @type forward
+ @label @FORWARD
+ bind "#{ENV['FLUENTD_FORWARD_BIND'] || '0.0.0.0'}"
+ port "#{ENV['FLUENTD_FORWARD_PORT'] || '24224'}"
+ # Enabling TLS
+
+ cert_path /etc/fluent/certs/tls.crt
+ private_key_path /etc/fluent/certs/tls.key
+
+ # Enabling access security
+
+ self_hostname "#{ENV['FLUENTD_FORWARD_SEC_SELFHOSTNAME'] || 'fluentd-aggregator'}"
+ shared_key "#{ENV['FLUENTD_FORWARD_SEC_SHARED_KEY'] || 'sharedkey'}"
+
+
+ ## Enable Prometheus end point
+
+ @type prometheus
+ @id in_prometheus
+ bind "0.0.0.0"
+ port 24231
+ metrics_path "/metrics"
+
+
+ @type prometheus_monitor
+ @id in_prometheus_monitor
+
+
+ @type prometheus_output_monitor
+ @id in_prometheus_output_monitor
+
+ 02_filters.conf: |-
+
+ # Re-route fluentd logs
+
+ @type relabel
+ @label @FLUENT_LOG
+
+ ## Get kubernetes fields
+
+ @type record_modifier
+ remove_keys kubernetes, __dummy__, __dummy2__
+
+ __dummy__ ${ p = record["kubernetes"]["labels"]["app"]; p.nil? ? p : record['app'] = p; }
+ __dummy2__ ${ p = record["kubernetes"]["labels"]["app.kubernetes.io/name"]; p.nil? ? p : record['app'] = p; }
+ namespace ${ record.dig("kubernetes","namespace_name") }
+ pod ${ record.dig("kubernetes", "pod_name") }
+ container ${ record.dig("kubernetes", "container_name") }
+ host ${ record.dig("kubernetes", "host")}
+
+
+
+ @type relabel
+ @label @DISPATCH
+
+
+ 03_dispatch.conf: |-
+
+ # Calculate prometheus metrics
+
+ @type prometheus
+
+ name fluentd_input_status_num_records_total
+ type counter
+ desc The total number of incoming records
+
+ tag ${tag}
+ hostname ${host}
+
+
+
+ # Copy log stream to different outputs
+
+ @type copy
+
+ @type relabel
+ @label @OUTPUT_ES
+
+
+ @type relabel
+ @label @OUTPUT_LOKI
+
+
+
+ 04_outputs.conf: |-
+
+ ## Avoid ES rejection due to conflicting field types when using fluentbit merge_log
+
+ @type record_transformer
+ enable_ruby true
+ remove_keys log_processed
+
+ message_${record["container"]} ${(record.has_key?('log_processed'))? record['log_processed'] : nil}
+
+
+ # Send received logs to elasticsearch
+
+ @type elasticsearch
+ @id out_es
+ @log_level info
+ include_tag_key true
+ host "#{ENV['FLUENT_ELASTICSEARCH_HOST']}"
+ port "#{ENV['FLUENT_ELASTICSEARCH_PORT']}"
+ path "#{ENV['FLUENT_ELASTICSEARCH_PATH']}"
+ scheme "#{ENV['FLUENT_ELASTICSEARCH_SCHEME'] || 'http'}"
+ ssl_verify "#{ENV['FLUENT_ELASTICSEARCH_SSL_VERIFY'] || 'true'}"
+ ssl_version "#{ENV['FLUENT_ELASTICSEARCH_SSL_VERSION'] || 'TLSv1_2'}"
+ user "#{ENV['FLUENT_ELASTICSEARCH_USER'] || use_default}"
+ password "#{ENV['FLUENT_ELASTICSEARCH_PASSWORD'] || use_default}"
+ reload_connections "#{ENV['FLUENT_ELASTICSEARCH_RELOAD_CONNECTIONS'] || 'false'}"
+ reconnect_on_error "#{ENV['FLUENT_ELASTICSEARCH_RECONNECT_ON_ERROR'] || 'true'}"
+ reload_on_failure "#{ENV['FLUENT_ELASTICSEARCH_RELOAD_ON_FAILURE'] || 'true'}"
+ log_es_400_reason "#{ENV['FLUENT_ELASTICSEARCH_LOG_ES_400_REASON'] || 'false'}"
+ logstash_prefix "#{ENV['FLUENT_ELASTICSEARCH_LOGSTASH_PREFIX'] || 'logstash'}"
+ logstash_dateformat "#{ENV['FLUENT_ELASTICSEARCH_LOGSTASH_DATEFORMAT'] || '%Y.%m.%d'}"
+ logstash_format "#{ENV['FLUENT_ELASTICSEARCH_LOGSTASH_FORMAT'] || 'true'}"
+ index_name "#{ENV['FLUENT_ELASTICSEARCH_LOGSTASH_INDEX_NAME'] || 'logstash'}"
+ target_index_key "#{ENV['FLUENT_ELASTICSEARCH_TARGET_INDEX_KEY'] || use_nil}"
+ type_name "#{ENV['FLUENT_ELASTICSEARCH_LOGSTASH_TYPE_NAME'] || 'fluentd'}"
+ include_timestamp "#{ENV['FLUENT_ELASTICSEARCH_INCLUDE_TIMESTAMP'] || 'false'}"
+ template_name "#{ENV['FLUENT_ELASTICSEARCH_TEMPLATE_NAME'] || use_nil}"
+ template_file "#{ENV['FLUENT_ELASTICSEARCH_TEMPLATE_FILE'] || use_nil}"
+ template_overwrite "#{ENV['FLUENT_ELASTICSEARCH_TEMPLATE_OVERWRITE'] || use_default}"
+ sniffer_class_name "#{ENV['FLUENT_SNIFFER_CLASS_NAME'] || 'Fluent::Plugin::ElasticsearchSimpleSniffer'}"
+ request_timeout "#{ENV['FLUENT_ELASTICSEARCH_REQUEST_TIMEOUT'] || '5s'}"
+ application_name "#{ENV['FLUENT_ELASTICSEARCH_APPLICATION_NAME'] || use_default}"
+ suppress_type_name "#{ENV['FLUENT_ELASTICSEARCH_SUPPRESS_TYPE_NAME'] || 'true'}"
+ enable_ilm "#{ENV['FLUENT_ELASTICSEARCH_ENABLE_ILM'] || 'false'}"
+ ilm_policy_id "#{ENV['FLUENT_ELASTICSEARCH_ILM_POLICY_ID'] || use_default}"
+ ilm_policy "#{ENV['FLUENT_ELASTICSEARCH_ILM_POLICY'] || use_default}"
+ ilm_policy_overwrite "#{ENV['FLUENT_ELASTICSEARCH_ILM_POLICY_OVERWRITE'] || 'false'}"
+
+ flush_thread_count "#{ENV['FLUENT_ELASTICSEARCH_BUFFER_FLUSH_THREAD_COUNT'] || '8'}"
+ flush_interval "#{ENV['FLUENT_ELASTICSEARCH_BUFFER_FLUSH_INTERVAL'] || '5s'}"
+ chunk_limit_size "#{ENV['FLUENT_ELASTICSEARCH_BUFFER_CHUNK_LIMIT_SIZE'] || '2M'}"
+ queue_limit_length "#{ENV['FLUENT_ELASTICSEARCH_BUFFER_QUEUE_LIMIT_LENGTH'] || '32'}"
+ retry_max_interval "#{ENV['FLUENT_ELASTICSEARCH_BUFFER_RETRY_MAX_INTERVAL'] || '30'}"
+ retry_forever true
+
+
+
+
+ # Rename log_proccessed to message
+
+ @type record_modifier
+ remove_keys __dummy__, log_processed
+
+ __dummy__ ${if record.has_key?('log_processed'); record['message'] = record['log_processed']; end; nil}
+
+
+
+ @type loki
+ @id out_loki_kube
+ @log_level info
+ url "#{ENV['LOKI_URL']}"
+ username "#{ENV['LOKI_USERNAME'] || use_default}"
+ password "#{ENV['LOKI_PASSWORDD'] || use_default}"
+ extra_labels {"job": "fluentd"}
+ line_format json
+
+ app
+ container
+ pod
+ namespace
+ host
+ filename
+
+
+ flush_thread_count 8
+ flush_interval 5s
+ chunk_limit_size 2M
+ queue_limit_length 32
+ retry_max_interval 30
+ retry_forever true
+
+
+
+
+#########################
+# Fluent-bit configuration
+#########################
+
+fluent-bit:
+
+ #fluentbit-container environment variables:
+ env:
+ # Fluentd deployment service
+ - name: FLUENT_AGGREGATOR_HOST
+ #value: "fluentd"
+ valueFrom:
+ configMapKeyRef:
+ name: "logging-stack-cm"
+ key: fluentdHost
+ # Default fluentd forward port
+ - name: FLUENT_AGGREGATOR_PORT
+ value: "24224"
+ - name: FLUENT_AGGREGATOR_SHARED_KEY
+ valueFrom:
+ secretKeyRef:
+ name: fluentd-shared-key
+ key: fluentd-shared-key
+ - name: FLUENT_SELFHOSTNAME
+ valueFrom:
+ fieldRef:
+ fieldPath: spec.nodeName
+ # Specify TZ
+ - name: TZ
+ value: "Europe/Madrid"
+ # Fluentbit config
+ config:
+ # Helm chart combines service, inputs, outputs, custom_parsers and filters section
+ # fluent-bit.config SERVICE
+ service: |
+
+ [SERVICE]
+ Daemon Off
+ Flush 1
+ Log_Level info
+ Parsers_File parsers.conf
+ Parsers_File custom_parsers.conf
+ HTTP_Server On
+ HTTP_Listen 0.0.0.0
+ HTTP_Port 2020
+ Health_Check On
+ storage.path /var/log/fluentbit/storage
+ storage.sync normal
+ storage.checksum off
+ storage.backlog.mem_limit 5M
+ storage.metrics on
+
+ # fluent-bit.config INPUT:
+ inputs: |
+
+ [INPUT]
+ Name tail
+ Alias input.kube
+ Path /var/log/containers/*.log
+ Path_Key filename
+ multiline.parser docker, cri
+ DB /var/log/fluentbit/flb_kube.db
+ Tag kube.*
+ Mem_Buf_Limit 5MB
+ storage.type filesystem
+ Skip_Long_Lines On
+
+ [INPUT]
+ Name tail
+ Alias input.host
+ Tag host.*
+ DB /var/log/fluentbit/flb_host.db
+ Path /var/log/auth.log,/var/log/syslog
+ Path_Key filename
+ Mem_Buf_Limit 5MB
+ storage.type filesystem
+ Parser syslog-rfc3164-nopri
+
+ # fluent-bit.config OUTPUT
+ outputs: |
+
+ [OUTPUT]
+ Name forward
+ Alias output.aggregator
+ match *
+ Host ${FLUENT_AGGREGATOR_HOST}
+ Port ${FLUENT_AGGREGATOR_PORT}
+ Self_Hostname ${FLUENT_SELFHOSTNAME}
+ Shared_Key ${FLUENT_AGGREGATOR_SHARED_KEY}
+ tls On
+ tls.verify Off
+
+ # fluent-bit.config PARSERS:
+ customParsers: |
+
+ [PARSER]
+ Name syslog-rfc3164-nopri
+ Format regex
+ Regex /^(?[^ ]* {1,2}[^ ]* [^ ]*) (?[^ ]*) (?[a-zA-Z0-9_\/\.\-]*)(?:\[(?[0-9]+)\])?(?:[^\:]*\:)? *(?.*)$/
+ Time_Key time
+ Time_Format %b %d %H:%M:%S
+ Time_Keep Off
+
+ # fluent-bit.config FILTERS:
+ filters: |
+ [FILTER]
+ name multiline
+ match *
+ multiline.key_content log
+ multiline.parser java,python,go
+
+ [FILTER]
+ Name kubernetes
+ Match kube.*
+ Buffer_Size 512k
+ Kube_Tag_Prefix kube.var.log.containers.
+ Merge_Log On
+ Merge_Log_Trim Off
+ Merge_Log_Key log_processed
+ Keep_Log Off
+ K8S-Logging.Parser On
+ K8S-Logging.Exclude On
+ Annotations Off
+ Labels On
+
+ [FILTER]
+ Name modify
+ Match kube.*
+ Remove _p
+ Rename log message
+
+ [FILTER]
+ Name lua
+ Match host.*
+ script /fluent-bit/scripts/adjust_ts.lua
+ call local_timestamp_to_UTC
+ # json-exporter config
+ extraFiles:
+ json-exporter-config.yml: |
+ modules:
+ default:
+ metrics:
+ - name: fluenbit_storage_layer
+ type: object
+ path: '{.storage_layer}'
+ help: The total number of chunks in the fs storage
+ values:
+ fs_chunks_up: '{.chunks.fs_chunks_up}'
+ fs_chunks_down: '{.chunks.fs_chunks_down}'
+
+ # Fluentbit config Lua Scripts.
+ luaScripts:
+ adjust_ts.lua: |
+ function local_timestamp_to_UTC(tag, timestamp, record)
+ local utcdate = os.date("!*t", ts)
+ local localdate = os.date("*t", ts)
+ localdate.isdst = false -- this is the trick
+ utc_time_diff = os.difftime(os.time(localdate), os.time(utcdate))
+ return 1, timestamp - utc_time_diff, record
+ end
+
+ # Enable fluentbit instalaltion on master node.
+ tolerations:
+ - key: node-role.kubernetes.io/master
+ operator: Exists
+ effect: NoSchedule
+
+ # Init container. Create directory for fluentbit
+ initContainers:
+ - name: init-fluentbit-directory
+ image: busybox
+ command: ['/bin/sh', '-c', 'if [ ! -d /var/log/fluentbit ]; then mkdir -p /var/log/fluentbit; fi ; if [ ! -d /var/log/fluentbit/tail-db ]; then mkdir -p /var/log/fluentbit/tail-db; fi ; if [ ! -d /var/log/fluentbit/storage ]; then mkdir -p /var/log/fluentbit/storage; fi']
+ volumeMounts:
+ - name: varlog
+ mountPath: /var/log
+ # Sidecar container to export storage metrics
+ extraContainers:
+ - name: json-exporter
+ image: quay.io/prometheuscommunity/json-exporter
+ command: ['/bin/json_exporter']
+ args: ['--config.file=/json-exporter-config.yml']
+ ports:
+ - containerPort: 7979
+ name: http
+ protocol: TCP
+ volumeMounts:
+ - mountPath: /json-exporter-config.yml
+ name: config
+ subPath: json-exporter-config.yml
+
+
+#########################
+# prometheus-elasticsearch-exporter subchart
+#########################
+
+prometheus-elasticsearch-exporter:
+ # Elastic search user
+ env:
+ ES_USERNAME: elastic
+
+ # Elastic search passord from secret
+ extraEnvSecrets:
+ ES_PASSWORD:
+ secret: efk-es-elastic-user
+ key: elastic
+
+ # Elastic search URI
+ es:
+ uri: http://efk-es-http:9200
diff --git a/argocd/system/longhorn-system/Chart.yaml b/argocd/system/longhorn-system/Chart.yaml
new file mode 100644
index 00000000..d678dc22
--- /dev/null
+++ b/argocd/system/longhorn-system/Chart.yaml
@@ -0,0 +1,7 @@
+apiVersion: v2
+name: longhorn
+version: 0.0.0
+dependencies:
+ - name: longhorn
+ version: 1.3.2
+ repository: https://charts.longhorn.io
\ No newline at end of file
diff --git a/argocd/system/longhorn-system/templates/ingress.yaml b/argocd/system/longhorn-system/templates/ingress.yaml
new file mode 100644
index 00000000..7a49baf8
--- /dev/null
+++ b/argocd/system/longhorn-system/templates/ingress.yaml
@@ -0,0 +1,36 @@
+---
+# HTTPS Ingress
+apiVersion: networking.k8s.io/v1
+kind: Ingress
+metadata:
+ name: longhorn-ingress
+ namespace: {{ .Release.Namespace }}
+ annotations:
+ # HTTPS as entry point
+ traefik.ingress.kubernetes.io/router.entrypoints: websecure
+ # Enable TLS
+ traefik.ingress.kubernetes.io/router.tls: "true"
+{{- if .Values.ingress.basicAuth.enable }}
+ # Use Basic Auth Midleware configured
+ traefik.ingress.kubernetes.io/router.middlewares:
+ {{ .Values.ingress.basicAuth.middlewareNamespace }}-{{ .Values.ingress.basicAuth.middlewareName }}@kubernetescrd
+{{- end }}
+ # Enable cert-manager to create automatically the SSL certificate and store in Secret
+ cert-manager.io/cluster-issuer: {{ .Values.ingress.certmanager.tlsIssuer }}-issuer
+ cert-manager.io/common-name: {{ .Values.ingress.host }}
+spec:
+ tls:
+ - hosts:
+ - {{ .Values.ingress.host }}
+ secretName: longhorn-tls
+ rules:
+ - host: {{ .Values.ingress.host }}
+ http:
+ paths:
+ - path: /
+ pathType: Prefix
+ backend:
+ service:
+ name: longhorn-frontend
+ port:
+ number: 80
\ No newline at end of file
diff --git a/argocd/system/longhorn-system/templates/minio-externalsecret.yaml b/argocd/system/longhorn-system/templates/minio-externalsecret.yaml
new file mode 100644
index 00000000..507a895f
--- /dev/null
+++ b/argocd/system/longhorn-system/templates/minio-externalsecret.yaml
@@ -0,0 +1,30 @@
+apiVersion: external-secrets.io/v1beta1
+kind: ExternalSecret
+metadata:
+ name: minio-externalsecret
+ namespace: {{ .Release.Namespace }}
+spec:
+ secretStoreRef:
+ name: vault-backend
+ kind: ClusterSecretStore
+ target:
+ name: longhorn-minio-secret
+ template:
+ engineVersion: v2
+ data:
+ AWS_ENDPOINTS: {{ .Values.backup.minioUrl }}
+ AWS_ACCESS_KEY_ID: '{{ printf "{{ .user }}" }}'
+ AWS_SECRET_ACCESS_KEY: '{{ printf "{{ .key }}" }}'
+ data:
+ - secretKey: user
+ remoteRef:
+ key: minio/longhorn
+ property: user
+ conversionStrategy: Default # ArgoCD sync issue
+ decodingStrategy: None # ArgoCD sync issue
+ - secretKey: key
+ remoteRef:
+ key: minio/longhorn
+ property: key
+ conversionStrategy: Default # ArgoCD sync issue
+ decodingStrategy: None # ArgoCD sync issue
\ No newline at end of file
diff --git a/roles/prometheus/templates/longhorn_servicemonitor.yml.j2 b/argocd/system/longhorn-system/templates/service-monitor.yaml
similarity index 55%
rename from roles/prometheus/templates/longhorn_servicemonitor.yml.j2
rename to argocd/system/longhorn-system/templates/service-monitor.yaml
index fce461d7..6c3efa20 100644
--- a/roles/prometheus/templates/longhorn_servicemonitor.yml.j2
+++ b/argocd/system/longhorn-system/templates/service-monitor.yaml
@@ -1,12 +1,13 @@
+{{- if and .Values.serviceMonitor.enable (.Capabilities.APIVersions.Has "monitoring.coreos.com/v1/ServiceMonitor") }}
---
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
labels:
app: longhorn
- release: kube-prometheus-stack
+ release: {{ .Values.serviceMonitor.release }}
name: longhorn-prometheus-servicemonitor
- namespace: {{ k3s_monitoring_namespace }}
+ namespace: {{ .Release.Namespace }}
spec:
jobLabel: app.kubernetes.io/name
selector:
@@ -14,6 +15,8 @@ spec:
app: longhorn-manager
namespaceSelector:
matchNames:
- - longhorn-system
+ - {{ .Release.Namespace }}
endpoints:
- port: manager
+
+{{- end }}
\ No newline at end of file
diff --git a/roles/longhorn/templates/longhorn_volumesnapshotclass.yml.j2 b/argocd/system/longhorn-system/templates/volume-snapshot-class.yaml
similarity index 96%
rename from roles/longhorn/templates/longhorn_volumesnapshotclass.yml.j2
rename to argocd/system/longhorn-system/templates/volume-snapshot-class.yaml
index e5b0d675..10ec9f99 100644
--- a/roles/longhorn/templates/longhorn_volumesnapshotclass.yml.j2
+++ b/argocd/system/longhorn-system/templates/volume-snapshot-class.yaml
@@ -1,4 +1,5 @@
----
+# VolumeSnapshotClasses
+
# Default VolumeSnapshotClass
kind: VolumeSnapshotClass
diff --git a/argocd/system/longhorn-system/values.yaml b/argocd/system/longhorn-system/values.yaml
new file mode 100644
index 00000000..a5a8ddf5
--- /dev/null
+++ b/argocd/system/longhorn-system/values.yaml
@@ -0,0 +1,35 @@
+
+# Ingress configuration
+ingress:
+ host: longhorn.picluster.ricsanfre.com
+ # configure cert-manager issuer
+ certmanager:
+ # tlsIssuer=letsecrypt to generate valid TLS certficiate using IONOS API
+ # tlsIssuer=ca to generate a CA-signed certificate (not valid)
+ tlsIssuer: letsencrypt
+ # Enabling traefik basic authorization, reusing global middleware created for Traefik
+ basicAuth:
+ enable: true
+ middlewareName: basic-auth
+ middlewareNamespace: traefik
+
+# Backup S3 backend URL
+backup:
+ minioUrl: "https://s3.picluster.ricsanfre.com:9091"
+
+# Prometheus servicemonitor configuration
+serviceMonitor:
+ enable: true
+ release: monitoring
+
+#####################
+# longhorn subchart
+#####################
+
+longhorn:
+ defaultSettings:
+ defaultDataPath: "/storage"
+ # Backup S3 configuration
+ backupTarget: "s3://k3s-longhorn@eu-west-1/"
+ backupTargetCredentialSecret: longhorn-minio-secret
+
diff --git a/argocd/system/metallb/Chart.yaml b/argocd/system/metallb/Chart.yaml
new file mode 100644
index 00000000..216550c2
--- /dev/null
+++ b/argocd/system/metallb/Chart.yaml
@@ -0,0 +1,7 @@
+apiVersion: v2
+name: metallb
+version: 0.0.0
+dependencies:
+ - name: metallb
+ version: 0.13.7
+ repository: https://metallb.github.io/metallb
diff --git a/roles/metallb/templates/metallb_config.yml.j2 b/argocd/system/metallb/templates/metallb-addresspool.yaml
similarity index 54%
rename from roles/metallb/templates/metallb_config.yml.j2
rename to argocd/system/metallb/templates/metallb-addresspool.yaml
index 559eede8..d9348c19 100644
--- a/roles/metallb/templates/metallb_config.yml.j2
+++ b/argocd/system/metallb/templates/metallb-addresspool.yaml
@@ -1,21 +1,24 @@
----
+{{- if .Values.ipAddressPool.addresses }}
+
# Metallb address pool
apiVersion: metallb.io/v1beta1
kind: IPAddressPool
metadata:
name: picluster-pool
- namespace: {{ k3s_metallb_namespace }}
+ namespace: {{ .Release.Namespace }}
spec:
addresses:
- - "{{ k3s_external_ip_range }}"
+{{ toYaml .Values.ipAddressPool.addresses | indent 4 }}
---
# L2 configuration
apiVersion: metallb.io/v1beta1
kind: L2Advertisement
metadata:
- name: example
- namespace: {{ k3s_metallb_namespace }}
+ name: l2-picluster-pool
+ namespace: {{ .Release.Namespace }}
spec:
ipAddressPools:
- picluster-pool
+
+{{- end }}
\ No newline at end of file
diff --git a/argocd/system/metallb/values.yaml b/argocd/system/metallb/values.yaml
new file mode 100644
index 00000000..59944139
--- /dev/null
+++ b/argocd/system/metallb/values.yaml
@@ -0,0 +1,4 @@
+# Metal LB Ip address pool
+ipAddressPool:
+ addresses:
+ - "10.0.0.100-10.0.0.200"
\ No newline at end of file
diff --git a/argocd/system/monitoring/Chart.yaml b/argocd/system/monitoring/Chart.yaml
new file mode 100644
index 00000000..a3f78915
--- /dev/null
+++ b/argocd/system/monitoring/Chart.yaml
@@ -0,0 +1,7 @@
+apiVersion: v2
+name: monitoring
+version: 0.0.0
+dependencies:
+ - name: kube-prometheus-stack
+ version: 43.3.1
+ repository: https://prometheus-community.github.io/helm-charts
\ No newline at end of file
diff --git a/roles/prometheus/dashboards/elasticsearch.json b/argocd/system/monitoring/dashboards/elasticsearch.json
similarity index 100%
rename from roles/prometheus/dashboards/elasticsearch.json
rename to argocd/system/monitoring/dashboards/elasticsearch.json
diff --git a/roles/prometheus/dashboards/fluent-bit-dashboard.json b/argocd/system/monitoring/dashboards/fluent-bit-dashboard.json
similarity index 99%
rename from roles/prometheus/dashboards/fluent-bit-dashboard.json
rename to argocd/system/monitoring/dashboards/fluent-bit-dashboard.json
index d93f72fa..448c7f9f 100644
--- a/roles/prometheus/dashboards/fluent-bit-dashboard.json
+++ b/argocd/system/monitoring/dashboards/fluent-bit-dashboard.json
@@ -2876,6 +2876,16 @@
"tags": [],
"templating": {
"list": [
+ {
+ "hide": 0,
+ "label": "datasource",
+ "name": "DS_PROMETHEUS",
+ "options": [],
+ "query": "prometheus",
+ "refresh": 1,
+ "regex": "",
+ "type": "datasource"
+ },
{
"current": {
"selected": false,
@@ -3014,7 +3024,7 @@
]
},
"timezone": "",
- "title": "Logging Dashboard",
+ "title": "Fluentbit/Fluentd Dashboard",
"uid": "bNn5LUtiz",
"version": 1
}
\ No newline at end of file
diff --git a/roles/prometheus/dashboards/k3s/k3s-apiserver.json b/argocd/system/monitoring/dashboards/k3s-apiserver.json
similarity index 100%
rename from roles/prometheus/dashboards/k3s/k3s-apiserver.json
rename to argocd/system/monitoring/dashboards/k3s-apiserver.json
diff --git a/roles/prometheus/dashboards/k3s/k3s-controllermanager-dashboard.json b/argocd/system/monitoring/dashboards/k3s-controllermanager-dashboard.json
similarity index 100%
rename from roles/prometheus/dashboards/k3s/k3s-controllermanager-dashboard.json
rename to argocd/system/monitoring/dashboards/k3s-controllermanager-dashboard.json
diff --git a/roles/prometheus/dashboards/k3s/k3s-etcd.json b/argocd/system/monitoring/dashboards/k3s-etcd.json
similarity index 100%
rename from roles/prometheus/dashboards/k3s/k3s-etcd.json
rename to argocd/system/monitoring/dashboards/k3s-etcd.json
diff --git a/roles/prometheus/dashboards/k3s/k3s-kubelet.json b/argocd/system/monitoring/dashboards/k3s-kubelet.json
similarity index 100%
rename from roles/prometheus/dashboards/k3s/k3s-kubelet.json
rename to argocd/system/monitoring/dashboards/k3s-kubelet.json
diff --git a/roles/prometheus/dashboards/k3s/k3s-proxy-dashboard.json b/argocd/system/monitoring/dashboards/k3s-proxy-dashboard.json
similarity index 100%
rename from roles/prometheus/dashboards/k3s/k3s-proxy-dashboard.json
rename to argocd/system/monitoring/dashboards/k3s-proxy-dashboard.json
diff --git a/roles/prometheus/dashboards/k3s/k3s-scheduler-dashboard.json b/argocd/system/monitoring/dashboards/k3s-scheduler-dashboard.json
similarity index 100%
rename from roles/prometheus/dashboards/k3s/k3s-scheduler-dashboard.json
rename to argocd/system/monitoring/dashboards/k3s-scheduler-dashboard.json
diff --git a/roles/prometheus/dashboards/longhorn-dashboard.json b/argocd/system/monitoring/dashboards/longhorn-dashboard.json
similarity index 99%
rename from roles/prometheus/dashboards/longhorn-dashboard.json
rename to argocd/system/monitoring/dashboards/longhorn-dashboard.json
index fa1190f8..119f0495 100644
--- a/roles/prometheus/dashboards/longhorn-dashboard.json
+++ b/argocd/system/monitoring/dashboards/longhorn-dashboard.json
@@ -1,49 +1,49 @@
{
"__inputs": [
{
- "name": "DS_PROMETHEUS",
- "label": "prometheus",
"description": "",
- "type": "datasource",
+ "label": "prometheus",
+ "name": "DS_PROMETHEUS",
"pluginId": "prometheus",
- "pluginName": "Prometheus"
+ "pluginName": "Prometheus",
+ "type": "datasource"
}
],
"__requires": [
{
- "type": "grafana",
"id": "grafana",
"name": "Grafana",
+ "type": "grafana",
"version": "7.1.5"
},
{
- "type": "panel",
"id": "graph",
"name": "Graph",
+ "type": "panel",
"version": ""
},
{
- "type": "datasource",
"id": "prometheus",
"name": "Prometheus",
+ "type": "datasource",
"version": "1.0.0"
},
{
- "type": "panel",
"id": "stat",
"name": "Stat",
+ "type": "panel",
"version": ""
},
{
- "type": "panel",
"id": "table",
"name": "Table",
+ "type": "panel",
"version": ""
},
{
- "type": "panel",
"id": "text",
"name": "Text",
+ "type": "panel",
"version": "7.1.0"
}
],
@@ -60,6 +60,7 @@
}
]
},
+ "description": "An example dashboard for Longhorn v1.1.0",
"editable": true,
"gnetId": 13032,
"graphTooltip": 0,
@@ -2131,7 +2132,18 @@
"style": "dark",
"tags": [],
"templating": {
- "list": []
+ "list": [
+ {
+ "hide": 0,
+ "label": "datasource",
+ "name": "DS_PROMETHEUS",
+ "options": [],
+ "query": "prometheus",
+ "refresh": 1,
+ "regex": "",
+ "type": "datasource"
+ }
+ ]
},
"time": {
"from": "now-1h",
@@ -2152,8 +2164,7 @@
]
},
"timezone": "",
- "title": "Longhorn Example v1.1.0",
+ "title": "Longhorn Dashboard",
"uid": "2BCgsldGz",
- "version": 101,
- "description": "An example dashboard for Longhorn v1.1.0"
+ "version": 101
}
\ No newline at end of file
diff --git a/roles/prometheus/dashboards/minio-dashboard.json b/argocd/system/monitoring/dashboards/minio-dashboard.json
similarity index 99%
rename from roles/prometheus/dashboards/minio-dashboard.json
rename to argocd/system/monitoring/dashboards/minio-dashboard.json
index 42fa5eb3..dfcd7474 100644
--- a/roles/prometheus/dashboards/minio-dashboard.json
+++ b/argocd/system/monitoring/dashboards/minio-dashboard.json
@@ -1,49 +1,49 @@
{
"__inputs": [
{
- "name": "DS_PROMETHEUS",
- "label": "Prometheus",
"description": "",
- "type": "datasource",
+ "label": "Prometheus",
+ "name": "DS_PROMETHEUS",
"pluginId": "prometheus",
- "pluginName": "Prometheus"
+ "pluginName": "Prometheus",
+ "type": "datasource"
}
],
"__requires": [
{
- "type": "panel",
"id": "bargauge",
"name": "Bar gauge",
+ "type": "panel",
"version": ""
},
{
- "type": "panel",
"id": "gauge",
"name": "Gauge",
+ "type": "panel",
"version": ""
},
{
- "type": "grafana",
"id": "grafana",
"name": "Grafana",
+ "type": "grafana",
"version": "8.0.6"
},
{
- "type": "panel",
"id": "graph",
"name": "Graph",
+ "type": "panel",
"version": ""
},
{
- "type": "datasource",
"id": "prometheus",
"name": "Prometheus",
+ "type": "datasource",
"version": "1.0.0"
},
{
- "type": "panel",
"id": "stat",
"name": "Stat",
+ "type": "panel",
"version": ""
}
],
@@ -2614,6 +2614,16 @@
],
"templating": {
"list": [
+ {
+ "hide": 0,
+ "label": "datasource",
+ "name": "DS_PROMETHEUS",
+ "options": [],
+ "query": "prometheus",
+ "refresh": 1,
+ "regex": "",
+ "type": "datasource"
+ },
{
"allValue": null,
"current": {},
diff --git a/roles/prometheus/dashboards/pi-cluster-dashboard.json b/argocd/system/monitoring/dashboards/pi-cluster-dashboard.json
similarity index 100%
rename from roles/prometheus/dashboards/pi-cluster-dashboard.json
rename to argocd/system/monitoring/dashboards/pi-cluster-dashboard.json
diff --git a/roles/prometheus/dashboards/prometheus-dashboard-2.json b/argocd/system/monitoring/dashboards/prometheus-dashboard-2.json
similarity index 100%
rename from roles/prometheus/dashboards/prometheus-dashboard-2.json
rename to argocd/system/monitoring/dashboards/prometheus-dashboard-2.json
diff --git a/roles/prometheus/dashboards/traefik-dashboard.json b/argocd/system/monitoring/dashboards/traefik-dashboard.json
similarity index 99%
rename from roles/prometheus/dashboards/traefik-dashboard.json
rename to argocd/system/monitoring/dashboards/traefik-dashboard.json
index 3a2bd739..150715ec 100644
--- a/roles/prometheus/dashboards/traefik-dashboard.json
+++ b/argocd/system/monitoring/dashboards/traefik-dashboard.json
@@ -1202,6 +1202,16 @@
],
"templating": {
"list": [
+ {
+ "hide": 0,
+ "label": "datasource",
+ "name": "DS_PROMETHEUS",
+ "options": [],
+ "query": "prometheus",
+ "refresh": 1,
+ "regex": "",
+ "type": "datasource"
+ },
{
"allValue": null,
"current": {},
diff --git a/roles/prometheus/dashboards/velero-dashboard.json b/argocd/system/monitoring/dashboards/velero-dashboard.json
similarity index 100%
rename from roles/prometheus/dashboards/velero-dashboard.json
rename to argocd/system/monitoring/dashboards/velero-dashboard.json
diff --git a/roles/prometheus/rules/k3s.rule b/argocd/system/monitoring/rules/k3s.rule
similarity index 100%
rename from roles/prometheus/rules/k3s.rule
rename to argocd/system/monitoring/rules/k3s.rule
diff --git a/argocd/system/monitoring/templates/_help.tpl b/argocd/system/monitoring/templates/_help.tpl
new file mode 100644
index 00000000..f2a81e8f
--- /dev/null
+++ b/argocd/system/monitoring/templates/_help.tpl
@@ -0,0 +1,9 @@
+{{/* Set kube-prometheus stack name */}}
+{{- define "monitoring-stack.name" -}}
+ {{- if index .Values "kube-prometheus-stack" -}}
+ {{- $stack:= index .Values "kube-prometheus-stack" }}
+ {{- $stack.fullnameOverride }}
+ {{- else -}}
+ {{- .Release.Name -}}
+ {{- end -}}
+{{- end -}}
\ No newline at end of file
diff --git a/roles/prometheus/templates/external_node_service_metrics.yml.j2 b/argocd/system/monitoring/templates/external-node-service-metrics.yaml
similarity index 56%
rename from roles/prometheus/templates/external_node_service_metrics.yml.j2
rename to argocd/system/monitoring/templates/external-node-service-metrics.yaml
index dee93c55..b4ad7cd5 100644
--- a/roles/prometheus/templates/external_node_service_metrics.yml.j2
+++ b/argocd/system/monitoring/templates/external-node-service-metrics.yaml
@@ -1,3 +1,4 @@
+{{- if .Values.externalServices.nodeMetrics.enabled }}
---
# Headless service for External Node metrics. No Selector
apiVersion: v1
@@ -5,10 +6,12 @@ kind: Service
metadata:
name: external-node-metrics-service
labels:
+ # Labels matching kube-prometheus-stack's ServiceMonitor resource discovering
+ # prometheus-node-exporter metrics.
app: prometheus-node-exporter
- release: kube-prometheus-stack
+ release: {{ .Release.Namespace }}
jobLabel: node-exporter
- namespace: {{ k3s_monitoring_namespace }}
+ namespace: {{ .Release.Namespace }}
spec:
clusterIP: None
ports:
@@ -18,17 +21,20 @@ spec:
targetPort: 9100
type: ClusterIP
+{{- if .Values.externalServices.nodeMetrics.servers }}
---
# Endpoint for the headless service without selector
apiVersion: v1
kind: Endpoints
metadata:
name: external-node-metrics-service
- namespace: {{ k3s_monitoring_namespace }}
+ namespace: {{ .Release.Namespace }}
subsets:
- addresses:
- - ip: 10.0.0.1
+{{ toYaml .Values.externalServices.nodeMetrics.servers | indent 2 }}
ports:
- name: http-metrics
port: 9100
protocol: TCP
+{{- end }}
+{{- end }}
diff --git a/argocd/system/monitoring/templates/grafana-dashboards.yaml b/argocd/system/monitoring/templates/grafana-dashboards.yaml
new file mode 100644
index 00000000..55a1ad77
--- /dev/null
+++ b/argocd/system/monitoring/templates/grafana-dashboards.yaml
@@ -0,0 +1,17 @@
+{{- $files := .Files.Glob "dashboards/*.json" }}
+{{- if $files }}
+{{- range $path, $fileContents := $files }}
+{{- $dashboardName := regexReplaceAll "(^.*/)(.*)\\.json$" $path "${2}" }}
+---
+apiVersion: v1
+kind: ConfigMap
+metadata:
+ name: {{ $dashboardName }}
+ namespace: {{ $.Release.Namespace }}
+ labels:
+ grafana_dashboard: "1"
+data:
+ {{ $dashboardName }}.json: |-
+ {{- $.Files.Get $path | nindent 4 }}
+{{- end }}
+{{- end }}
diff --git a/argocd/system/monitoring/templates/grafana-externalsecret.yaml b/argocd/system/monitoring/templates/grafana-externalsecret.yaml
new file mode 100644
index 00000000..c55136dd
--- /dev/null
+++ b/argocd/system/monitoring/templates/grafana-externalsecret.yaml
@@ -0,0 +1,24 @@
+apiVersion: external-secrets.io/v1beta1
+kind: ExternalSecret
+metadata:
+ name: grafana-externalsecret
+ namespace: {{ .Release.Namespace }}
+spec:
+ secretStoreRef:
+ name: vault-backend
+ kind: ClusterSecretStore
+ target:
+ name: grafana
+ data:
+ - secretKey: admin-user
+ remoteRef:
+ key: grafana/admin
+ property: user
+ conversionStrategy: Default # ArgoCD sync issue
+ decodingStrategy: None # ArgoCD sync issue
+ - secretKey: admin-password
+ remoteRef:
+ key: grafana/admin
+ property: password
+ conversionStrategy: Default # ArgoCD sync issue
+ decodingStrategy: None # ArgoCD sync issue
\ No newline at end of file
diff --git a/argocd/system/monitoring/templates/ingress.yaml b/argocd/system/monitoring/templates/ingress.yaml
new file mode 100644
index 00000000..96c05780
--- /dev/null
+++ b/argocd/system/monitoring/templates/ingress.yaml
@@ -0,0 +1,123 @@
+# Linkerd Middlewares
+---
+apiVersion: traefik.containo.us/v1alpha1
+kind: Middleware
+metadata:
+ name: l5d-header-middleware-prometheus
+ namespace: {{ .Release.Namespace }}
+spec:
+ headers:
+ customRequestHeaders:
+ l5d-dst-override: "{{ template "monitoring-stack.name" . }}-prometheus.{{ .Release.Namespace }}.svc.cluster.local:9090"
+
+---
+apiVersion: traefik.containo.us/v1alpha1
+kind: Middleware
+metadata:
+ name: l5d-header-middleware-alertmanager
+ namespace: {{ .Release.Namespace }}
+spec:
+ headers:
+ customRequestHeaders:
+ l5d-dst-override: "{{ template "monitoring-stack.name" . }}-alertmanager.{{ .Release.Namespace }}.svc.cluster.local:9093"
+
+---
+#Linkerd middleware
+apiVersion: traefik.containo.us/v1alpha1
+kind: Middleware
+metadata:
+ name: l5d-header-middleware-grafana
+ namespace: {{ .Release.Namespace }}
+spec:
+ headers:
+ customRequestHeaders:
+ l5d-dst-override: "{{ .Release.Name }}-grafana.{{ .Release.Namespace }}.svc.cluster.local:80"
+
+---
+# Strip prefix middleware
+
+apiVersion: traefik.containo.us/v1alpha1
+kind: Middleware
+metadata:
+ name: stripprefix
+ namespace: {{ .Release.Namespace }}
+spec:
+ stripPrefix:
+ prefixes:
+ - "/prometheus"
+ - "/alertmanager"
+ - "/grafana"
+ forceSlash: false
+
+---
+# Create certificate
+apiVersion: cert-manager.io/v1
+kind: Certificate
+metadata:
+ name: monitoring-cert
+ namespace: {{ .Release.Namespace }}
+spec:
+ secretName: monitoring-secret
+ issuerRef:
+ name: {{ .Values.ingress.certmanager.tlsIssuer }}-issuer
+ kind: ClusterIssuer
+ commonName: {{ .Values.ingress.host }}
+ dnsNames:
+ - {{ .Values.ingress.host }}
+ privateKey:
+ algorithm: ECDSA
+
+---
+# IngressRoute https
+apiVersion: traefik.containo.us/v1alpha1
+kind: IngressRoute
+metadata:
+ name: monitoring-https
+ namespace: {{ .Release.Namespace }}
+spec:
+ entryPoints:
+ - websecure
+ routes:
+ - kind: Rule
+ match: Host(`{{ .Values.ingress.host }}`) && PathPrefix(`/prometheus`)
+ services:
+ - name: {{ template "monitoring-stack.name" . }}-prometheus
+ port: 9090
+ namespace: {{ .Release.Namespace }}
+ middlewares:
+{{- if .Values.ingress.basicAuth.enable }}
+ - name: {{ .Values.ingress.basicAuth.middlewareName }}
+ namespace: {{ .Values.ingress.basicAuth.middlewareNamespace }}
+{{- end }}
+ - name: stripprefix
+ namespace: {{ .Release.Namespace }}
+ - name: l5d-header-middleware-prometheus
+ namespace: {{ .Release.Namespace }}
+ - kind: Rule
+ match: Host(`{{ .Values.ingress.host }}`) && PathPrefix(`/alertmanager`)
+ services:
+ - name: {{ template "monitoring-stack.name" . }}-alertmanager
+ port: 9093
+ namespace: {{ .Release.Namespace }}
+ middlewares:
+{{- if .Values.ingress.basicAuth.enable }}
+ - name: {{ .Values.ingress.basicAuth.middlewareName }}
+ namespace: {{ .Values.ingress.basicAuth.middlewareNamespace }}
+{{- end }}
+ - name: stripprefix
+ namespace: {{ .Release.Namespace }}
+ - name: l5d-header-middleware-alertmanager
+ namespace: {{ .Release.Namespace }}
+ - kind: Rule
+ match: Host(`{{ .Values.ingress.host }}`) && PathPrefix(`/grafana`)
+ services:
+ - name: {{ .Release.Name }}-grafana
+ port: 80
+ namespace: {{ .Release.Namespace }}
+ middlewares:
+ - name: stripprefix
+ namespace: {{ .Release.Namespace }}
+ - name: l5d-header-middleware-grafana
+ namespace: {{ .Release.Namespace }}
+ tls:
+ secretName: monitoring-secret
\ No newline at end of file
diff --git a/roles/prometheus/templates/k3s_service_metrics.yml.j2 b/argocd/system/monitoring/templates/k3s-service-metric.yaml
similarity index 82%
rename from roles/prometheus/templates/k3s_service_metrics.yml.j2
rename to argocd/system/monitoring/templates/k3s-service-metric.yaml
index 1fe5ad2a..c7e5361f 100644
--- a/roles/prometheus/templates/k3s_service_metrics.yml.j2
+++ b/argocd/system/monitoring/templates/k3s-service-metric.yaml
@@ -1,3 +1,4 @@
+{{- if .Values.k3sMetrics.enabled }}
---
# Headless service (no selector) for K3S metrics.
# Only kubelet port (TCP: 10250) need to be scrapped
@@ -18,6 +19,7 @@ spec:
targetPort: 10250
type: ClusterIP
+{{- if .Values.k3sMetrics.servers }}
---
# Endpoint for the headless service without selector
apiVersion: v1
@@ -27,12 +29,11 @@ metadata:
namespace: kube-system
subsets:
- addresses:
- - ip: 10.0.0.11
- - ip: 10.0.0.12
- - ip: 10.0.0.13
- - ip: 10.0.0.14
- - ip: 10.0.0.15
+{{ toYaml .Values.k3sMetrics.servers | indent 2 }}
ports:
- name: https-metrics
port: 10250
protocol: TCP
+
+{{- end }}
+{{- end }}
\ No newline at end of file
diff --git a/roles/prometheus/templates/k3s_servicemonitor.yml.j2 b/argocd/system/monitoring/templates/k3s-servicemonitor.yaml
similarity index 95%
rename from roles/prometheus/templates/k3s_servicemonitor.yml.j2
rename to argocd/system/monitoring/templates/k3s-servicemonitor.yaml
index 2bb57c10..2129d129 100644
--- a/roles/prometheus/templates/k3s_servicemonitor.yml.j2
+++ b/argocd/system/monitoring/templates/k3s-servicemonitor.yaml
@@ -1,11 +1,11 @@
----
+{{- if .Values.k3sMetrics.enabled }}
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
labels:
- release: kube-prometheus-stack
+ release: {{ .Release.Name | quote }}
name: k3s-monitoring
- namespace: {{ k3s_monitoring_namespace }}
+ namespace: {{ .Release.Namespace }}
spec:
endpoints:
# /metrics endpoint
@@ -84,3 +84,4 @@ spec:
selector:
matchLabels:
app.kubernetes.io/name: kubelet
+{{- end }}
diff --git a/argocd/system/monitoring/templates/minio-bearer-externalsecret.yaml b/argocd/system/monitoring/templates/minio-bearer-externalsecret.yaml
new file mode 100644
index 00000000..6a95c5c0
--- /dev/null
+++ b/argocd/system/monitoring/templates/minio-bearer-externalsecret.yaml
@@ -0,0 +1,22 @@
+apiVersion: external-secrets.io/v1beta1
+kind: ExternalSecret
+metadata:
+ name: minio-bearer-externalsecret
+ namespace: {{ .Release.Namespace }}
+spec:
+ secretStoreRef:
+ name: vault-backend
+ kind: ClusterSecretStore
+ target:
+ name: minio-monitor-token
+ template:
+ engineVersion: v2
+ data:
+ token: '{{ printf "{{ .token }}" }}'
+ data:
+ - secretKey: token
+ remoteRef:
+ key: minio/prometheus
+ property: bearer-token
+ conversionStrategy: Default # ArgoCD sync issue
+ decodingStrategy: None # ArgoCD sync issue
diff --git a/roles/prometheus/templates/minio_service_metrics.yml.j2 b/argocd/system/monitoring/templates/minio-service-metrics.yaml
similarity index 73%
rename from roles/prometheus/templates/minio_service_metrics.yml.j2
rename to argocd/system/monitoring/templates/minio-service-metrics.yaml
index 106a50ec..7d8fcfaa 100644
--- a/roles/prometheus/templates/minio_service_metrics.yml.j2
+++ b/argocd/system/monitoring/templates/minio-service-metrics.yaml
@@ -1,3 +1,4 @@
+{{- if .Values.externalServices.minioMetrics.enabled }}
---
# Headless service for Minio metrics. No Selector
apiVersion: v1
@@ -16,6 +17,7 @@ spec:
targetPort: 9091
type: ClusterIP
+{{- if .Values.externalServices.minioMetrics.servers }}
---
# Endpoint for the headless service without selector
apiVersion: v1
@@ -25,8 +27,11 @@ metadata:
namespace: kube-system
subsets:
- addresses:
- - ip: 10.0.0.11
+{{ toYaml .Values.externalServices.minioMetrics.servers | indent 2 }}
ports:
- name: http-metrics
port: 9091
protocol: TCP
+
+{{- end }}
+{{- end }}
\ No newline at end of file
diff --git a/roles/prometheus/templates/minio_servicemonitor.yml.j2 b/argocd/system/monitoring/templates/minio-servicemonitor.yaml
similarity index 66%
rename from roles/prometheus/templates/minio_servicemonitor.yml.j2
rename to argocd/system/monitoring/templates/minio-servicemonitor.yaml
index b058ff92..8095d2bc 100644
--- a/roles/prometheus/templates/minio_servicemonitor.yml.j2
+++ b/argocd/system/monitoring/templates/minio-servicemonitor.yaml
@@ -1,22 +1,12 @@
----
-apiVersion: v1
-kind: Secret
-type: Opaque
-metadata:
- name: minio-monitor-token
- namespace: {{ k3s_monitoring_namespace }}
-data:
- token: {{ minio_bearer_token | b64encode }}
-
----
+{{- if .Values.externalServices.minioMetrics.enabled }}
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
labels:
app: minio
- release: kube-prometheus-stack
+ release: {{ .Release.Name | quote }}
name: minio-prometheus-servicemonitor
- namespace: {{ k3s_monitoring_namespace }}
+ namespace: {{ .Release.Namespace }}
spec:
jobLabel: app.kubernetes.io/name
endpoints:
@@ -34,3 +24,4 @@ spec:
selector:
matchLabels:
app.kubernetes.io/name: minio
+{{- end }}
diff --git a/argocd/system/monitoring/templates/prometheus-rules.yaml b/argocd/system/monitoring/templates/prometheus-rules.yaml
new file mode 100644
index 00000000..cf16344f
--- /dev/null
+++ b/argocd/system/monitoring/templates/prometheus-rules.yaml
@@ -0,0 +1,18 @@
+{{- $files := .Files.Glob "rules/*.rule" }}
+{{- if $files }}
+{{- $release:= include "monitoring-stack.name" . }}
+{{- range $path, $fileContents := $files }}
+{{- $ruleName := regexReplaceAll "(^.*/)(.*)\\.rule$" $path "${2}" }}
+---
+apiVersion: monitoring.coreos.com/v1
+kind: PrometheusRule
+metadata:
+ labels:
+ release: {{ $release }}
+ name: {{ $ruleName }}
+ namespace: {{ $.Release.Namespace }}
+spec:
+{{- $.Files.Get $path | nindent 2 }}
+
+{{- end }}
+{{- end }}
\ No newline at end of file
diff --git a/argocd/system/monitoring/values.yaml b/argocd/system/monitoring/values.yaml
new file mode 100644
index 00000000..1f1a549f
--- /dev/null
+++ b/argocd/system/monitoring/values.yaml
@@ -0,0 +1,207 @@
+
+# Ingress configuration
+ingress:
+ host: monitoring.picluster.ricsanfre.com
+ # configure cert-manager issuer
+ certmanager:
+ # tlsIssuer=letsecrypt to generate valid TLS certficiate using IONOS API
+ # tlsIssuer=ca to generate a CA-signed certificate (not valid)
+ tlsIssuer: letsencrypt
+ # Enabling traefik basic authorization, reusing global middleware created for Traefik
+ basicAuth:
+ enable: true
+ middlewareName: basic-auth
+ middlewareNamespace: traefik
+
+# Enable K3s metrics
+k3sMetrics:
+ enabled: true
+ servers:
+ - ip: 10.0.0.11
+ - ip: 10.0.0.12
+ - ip: 10.0.0.13
+ - ip: 10.0.0.14
+ - ip: 10.0.0.15
+
+# Enable monitoring of external services
+externalServices:
+ # External Minio service
+ minioMetrics:
+ enabled: true
+ servers:
+ - ip: 10.0.0.11
+ # External node (gateway)
+ nodeMetrics:
+ enabled: true
+ servers:
+ - ip: 10.0.0.1
+
+################################
+# kube-prometheus-stack subchart
+################################
+
+kube-prometheus-stack:
+ # Making full name stack: monitoring
+ fullnameOverride: monitoring
+ prometheusOperator:
+ # Disable linkerd injection for admission webhooks jobs
+ admissionWebhooks:
+ patch:
+ podAnnotations:
+ linkerd.io/inject: disabled
+ # Relabeling job name for operator metrics
+ serviceMonitor:
+ relabelings:
+ # Replace job value
+ - sourceLabels:
+ - __address__
+ action: replace
+ targetLabel: job
+ replacement: prometheus-operator
+ # Disable creation of kubelet service
+ kubeletService:
+ enabled: false
+
+ # Enable serviceaccount automount
+ prometheus-node-exporter:
+ serviceAccount:
+ automountServiceAccountToken: true
+
+ alertmanager:
+ alertmanagerSpec:
+ # Subpath /alertmanager configuration
+ externalUrl: http://monitoring.picluster.ricsanfre.com/alertmanager/
+ routePrefix: /
+ # PVC config
+ storage:
+ volumeClaimTemplate:
+ spec:
+ storageClassName: longhorn
+ accessModes: ["ReadWriteOnce"]
+ resources:
+ requests:
+ storage: 5Gi
+ serviceMonitor:
+ relabelings:
+ # Replace job value
+ - sourceLabels:
+ - __address__
+ action: replace
+ targetLabel: job
+ replacement: alertmanager
+ prometheus:
+ prometheusSpec:
+ # Subpath /prometheus configuration
+ externalUrl: http://monitoring.picluster.ricsanfre.com/prometheus/
+ routePrefix: /
+ # Resources request and limits
+ resources:
+ requests:
+ memory: 1Gi
+ limits:
+ memory: 1Gi
+ # PVC configuration
+ storageSpec:
+ volumeClaimTemplate:
+ spec:
+ storageClassName: longhorn
+ accessModes: ["ReadWriteOnce"]
+ resources:
+ requests:
+ storage: 5Gi
+ serviceMonitor:
+ relabelings:
+ # Replace job value
+ - sourceLabels:
+ - __address__
+ action: replace
+ targetLabel: job
+ replacement: prometheus
+ grafana:
+ # Use an existing secret for the admin user.
+ adminUser: ""
+ adminPassword: ""
+ admin:
+ existingSecret: grafana
+ userKey: admin-user
+ passwordKey: admin-password
+ # Configuring /grafana subpath
+ grafana.ini:
+ server:
+ domain: monitoring.picluster.ricsanfre.com
+ root_url: "%(protocol)s://%(domain)s:%(http_port)s/grafana/"
+ serve_from_sub_path: true
+ # Set admin password
+ # adminPassword: ""
+ # Install required plugins
+ plugins:
+ - grafana-piechart-panel
+ # Relabel job name of Grafana's metrics
+ serviceMonitor:
+ labels:
+ release: kube-prometheus-stack
+ relabelings:
+ # Replace job value
+ - sourceLabels:
+ - __address__
+ action: replace
+ targetLabel: job
+ replacement: grafana
+ # Additional data source
+ additionalDataSources:
+ - name: Loki
+ type: loki
+ uid: loki
+ access: proxy
+ url: http://loki-gateway.logging.svc.cluster.local
+ jsonData:
+ derivedFields:
+ - datasourceUid: tempo
+ matcherRegex: '"request_X-B3-Traceid":"(\w+)"'
+ name: TraceID
+ url: $${__value.raw}
+ - name: Tempo
+ uid: tempo
+ type: tempo
+ access: proxy
+ url: http://tracing-tempo-query-frontend.tracing.svc.cluster.local:3100
+
+ # Additional configuration to grafana dashboards sidecar
+ # Search in all namespaces for configMaps containing label `grafana_dashboard`
+ sidecar:
+ dashboards:
+ searchNamespace: ALL
+
+ # Disabling monitoring of K8s services.
+ # Monitoring of K3S components will be configured out of kube-prometheus-stack
+ kubelet:
+ enabled: false
+ kubeApiServer:
+ enabled: false
+ kubeControllerManager:
+ enabled: false
+ kubeScheduler:
+ enabled: false
+ kubeProxy:
+ enabled: false
+ kubeEtcd:
+ enabled: false
+ # Disable K8S Prometheus Rules
+ # Rules for K3S components will be configured out of kube-prometheus-stack
+ defaultRules:
+ create: true
+ rules:
+ etcd: false
+ k8s: false
+ kubeApiserverAvailability: false
+ kubeApiserverBurnrate: false
+ kubeApiserverHistogram: false
+ kubeApiserverSlos: false
+ kubeControllerManager: false
+ kubelet: false
+ kubeProxy: false
+ kubernetesApps: false
+ kubernetesResources: false
+ kubernetesStorage: false
+ kubernetesSystem: false
+ kubeScheduler: false
diff --git a/argocd/system/tracing/Chart.yaml b/argocd/system/tracing/Chart.yaml
new file mode 100644
index 00000000..b205dc6d
--- /dev/null
+++ b/argocd/system/tracing/Chart.yaml
@@ -0,0 +1,7 @@
+apiVersion: v2
+name: tracing
+version: 0.0.0
+dependencies:
+ - name: tempo-distributed
+ version: 0.27.4
+ repository: https://grafana.github.io/helm-charts
\ No newline at end of file
diff --git a/argocd/system/tracing/templates/tempo-externalsecret.yaml b/argocd/system/tracing/templates/tempo-externalsecret.yaml
new file mode 100644
index 00000000..5835520c
--- /dev/null
+++ b/argocd/system/tracing/templates/tempo-externalsecret.yaml
@@ -0,0 +1,29 @@
+apiVersion: external-secrets.io/v1beta1
+kind: ExternalSecret
+metadata:
+ name: tempo-externalsecret
+ namespace: {{ .Release.Namespace }}
+spec:
+ secretStoreRef:
+ name: vault-backend
+ kind: ClusterSecretStore
+ target:
+ name: tempo-minio-secret
+ template:
+ engineVersion: v2
+ data:
+ MINIO_ACCESS_KEY_ID: '{{ printf "{{ .user }}" }}'
+ MINIO_SECRET_ACCESS_KEY: '{{ printf "{{ .key }}" }}'
+ data:
+ - secretKey: user
+ remoteRef:
+ key: minio/tempo
+ property: user
+ conversionStrategy: Default # ArgoCD sync issue
+ decodingStrategy: None # ArgoCD sync issue
+ - secretKey: key
+ remoteRef:
+ key: minio/tempo
+ property: key
+ conversionStrategy: Default # ArgoCD sync issue
+ decodingStrategy: None # ArgoCD sync issue
diff --git a/argocd/system/tracing/values.yaml b/argocd/system/tracing/values.yaml
new file mode 100644
index 00000000..218fe550
--- /dev/null
+++ b/argocd/system/tracing/values.yaml
@@ -0,0 +1,124 @@
+#############################
+# tempo-distributed subchart
+#############################
+tempo-distributed:
+ # Enable trace ingestion
+ traces:
+ otlp:
+ grpc:
+ enabled: true
+ http:
+ enabled: true
+ zipkin:
+ enabled: true
+ jaeger:
+ thriftCompact:
+ enabled: true
+ thriftHttp:
+ enabled: true
+ opencensus:
+ enabled: true
+
+ # Configure S3 backend
+ storage:
+ trace:
+ backend: s3
+ s3:
+ bucket: k3s-tempo
+ endpoint: s3.picluster.ricsanfre.com:9091
+ region: eu-west-1
+ access_key: ${MINIO_ACCESS_KEY_ID}
+ secret_key: ${MINIO_SECRET_ACCESS_KEY}
+ insecure: false
+
+ # Configure distributor
+ distributor:
+ config:
+ log_received_spans:
+ enabled: true
+ # Enable environment variables in config file
+ # https://grafana.com/docs/tempo/latest/configuration/#use-environment-variables-in-the-configuration
+ extraArgs:
+ - '-config.expand-env=true'
+ extraEnv:
+ - name: MINIO_ACCESS_KEY_ID
+ valueFrom:
+ secretKeyRef:
+ name: tempo-minio-secret
+ key: MINIO_ACCESS_KEY_ID
+ - name: MINIO_SECRET_ACCESS_KEY
+ valueFrom:
+ secretKeyRef:
+ name: tempo-minio-secret
+ key: MINIO_SECRET_ACCESS_KEY
+ # Configure ingester
+ ingester:
+ # Enable environment variables in config file
+ # https://grafana.com/docs/tempo/latest/configuration/#use-environment-variables-in-the-configuration
+ extraArgs:
+ - '-config.expand-env=true'
+ extraEnv:
+ - name: MINIO_ACCESS_KEY_ID
+ valueFrom:
+ secretKeyRef:
+ name: tempo-minio-secret
+ key: MINIO_ACCESS_KEY_ID
+ - name: MINIO_SECRET_ACCESS_KEY
+ valueFrom:
+ secretKeyRef:
+ name: tempo-minio-secret
+ key: MINIO_SECRET_ACCESS_KEY
+ # Configure compactor
+ compactor:
+ # Enable environment variables in config file
+ # https://grafana.com/docs/tempo/latest/configuration/#use-environment-variables-in-the-configuration
+ extraArgs:
+ - '-config.expand-env=true'
+ extraEnv:
+ - name: MINIO_ACCESS_KEY_ID
+ valueFrom:
+ secretKeyRef:
+ name: tempo-minio-secret
+ key: MINIO_ACCESS_KEY_ID
+ - name: MINIO_SECRET_ACCESS_KEY
+ valueFrom:
+ secretKeyRef:
+ name: tempo-minio-secret
+ key: MINIO_SECRET_ACCESS_KEY
+ # Configure querier
+ querier:
+ # Enable environment variables in config file
+ # https://grafana.com/docs/tempo/latest/configuration/#use-environment-variables-in-the-configuration
+ extraArgs:
+ - '-config.expand-env=true'
+ extraEnv:
+ - name: MINIO_ACCESS_KEY_ID
+ valueFrom:
+ secretKeyRef:
+ name: tempo-minio-secret
+ key: MINIO_ACCESS_KEY_ID
+ - name: MINIO_SECRET_ACCESS_KEY
+ valueFrom:
+ secretKeyRef:
+ name: tempo-minio-secret
+ key: MINIO_SECRET_ACCESS_KEY
+ # Configure query-frontend
+ queryFrontend:
+ # Enable environment variables in config file
+ # https://grafana.com/docs/tempo/latest/configuration/#use-environment-variables-in-the-configuration
+ extraArgs:
+ - '-config.expand-env=true'
+ extraEnv:
+ - name: MINIO_ACCESS_KEY_ID
+ valueFrom:
+ secretKeyRef:
+ name: tempo-minio-secret
+ key: MINIO_ACCESS_KEY_ID
+ - name: MINIO_SECRET_ACCESS_KEY
+ valueFrom:
+ secretKeyRef:
+ name: tempo-minio-secret
+ key: MINIO_SECRET_ACCESS_KEY
+ # Disable Minio server installation
+ minio:
+ enabled: false
\ No newline at end of file
diff --git a/argocd/system/traefik/Chart.yaml b/argocd/system/traefik/Chart.yaml
new file mode 100644
index 00000000..f1bd8e51
--- /dev/null
+++ b/argocd/system/traefik/Chart.yaml
@@ -0,0 +1,7 @@
+apiVersion: v2
+name: traefik
+version: 0.0.0
+dependencies:
+ - name: traefik
+ version: 20.6.0
+ repository: https://helm.traefik.io/traefik
diff --git a/argocd/system/traefik/templates/basicauth-middleware.yaml b/argocd/system/traefik/templates/basicauth-middleware.yaml
new file mode 100644
index 00000000..fbebdf0d
--- /dev/null
+++ b/argocd/system/traefik/templates/basicauth-middleware.yaml
@@ -0,0 +1,10 @@
+---
+# Basic-auth middleware
+apiVersion: traefik.containo.us/v1alpha1
+kind: Middleware
+metadata:
+ name: basic-auth
+ namespace: {{ .Release.Namespace }}
+spec:
+ basicAuth:
+ secret: basic-auth-secret
\ No newline at end of file
diff --git a/argocd/system/traefik/templates/redirect-to-https.yaml b/argocd/system/traefik/templates/redirect-to-https.yaml
new file mode 100644
index 00000000..a57231a5
--- /dev/null
+++ b/argocd/system/traefik/templates/redirect-to-https.yaml
@@ -0,0 +1,27 @@
+apiVersion: traefik.containo.us/v1alpha1
+kind: IngressRoute
+metadata:
+ name: http-to-https-redirect
+ namespace: {{ .Release.Namespace }}
+spec:
+ entryPoints:
+ - web
+ routes:
+ - kind: Rule
+ match: PathPrefix(`/`)
+ priority: 1
+ middlewares:
+ - name: redirect-to-https
+ services:
+ - kind: TraefikService
+ name: noop@internal
+---
+apiVersion: traefik.containo.us/v1alpha1
+kind: Middleware
+metadata:
+ name: redirect-to-https
+ namespace: {{ .Release.Namespace }}
+spec:
+ redirectScheme:
+ scheme: https
+ permanent: true
\ No newline at end of file
diff --git a/roles/prometheus/templates/traefik_servicemonitor.yml.j2 b/argocd/system/traefik/templates/servicemonitor.yaml
similarity index 53%
rename from roles/prometheus/templates/traefik_servicemonitor.yml.j2
rename to argocd/system/traefik/templates/servicemonitor.yaml
index c508f6cd..830e8e4b 100644
--- a/roles/prometheus/templates/traefik_servicemonitor.yml.j2
+++ b/argocd/system/traefik/templates/servicemonitor.yaml
@@ -1,12 +1,13 @@
+{{- if and .Values.serviceMonitor.enable (.Capabilities.APIVersions.Has "monitoring.coreos.com/v1/ServiceMonitor") }}
---
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
labels:
app: traefik
- release: kube-prometheus-stack
+ release: {{ .Values.serviceMonitor.release }}
name: traefik-prometheus-servicemonitor
- namespace: {{ k3s_monitoring_namespace }}
+ namespace: {{ .Release.Namespace }}
spec:
jobLabel: app.kubernetes.io/name
endpoints:
@@ -14,9 +15,9 @@ spec:
path: /metrics
namespaceSelector:
matchNames:
- - {{ k3s_traefik_namespace }}
+ - {{ .Release.Namespace }}
selector:
matchLabels:
- app.kubernetes.io/instance: traefik
app.kubernetes.io/name: traefik
- app.kubernetes.io/component: traefik-metrics
+ app.kubernetes.io/component: metrics
+{{- end -}}
\ No newline at end of file
diff --git a/argocd/system/traefik/templates/traefik-dashboard.yaml b/argocd/system/traefik/templates/traefik-dashboard.yaml
new file mode 100644
index 00000000..aea4f49a
--- /dev/null
+++ b/argocd/system/traefik/templates/traefik-dashboard.yaml
@@ -0,0 +1,42 @@
+{{- if .Values.ingress.enabled }}
+---
+# Create certificate
+apiVersion: cert-manager.io/v1
+kind: Certificate
+metadata:
+ name: traefik-cert
+ namespace: {{ .Release.Namespace }}
+spec:
+ secretName: traefik-secret
+ issuerRef:
+ name: {{ .Values.ingress.certmanager.tlsIssuer }}-issuer
+ kind: ClusterIssuer
+ commonName: {{ .Values.ingress.host }}
+ dnsNames:
+ - {{ .Values.ingress.host }}
+ privateKey:
+ algorithm: ECDSA
+---
+# IngressRoute https
+apiVersion: traefik.containo.us/v1alpha1
+kind: IngressRoute
+metadata:
+ name: traefik-dashboard
+ namespace: {{ .Release.Namespace }}
+spec:
+ entryPoints:
+ - websecure
+ routes:
+ - kind: Rule
+ match: Host(`{{ .Values.ingress.host }}`) && (PathPrefix(`/dashboard`) || PathPrefix(`/api`))
+ services:
+ - kind: TraefikService
+ name: api@internal
+ middlewares:
+{{- if .Values.ingress.basicAuth.enable }}
+ - name: {{ .Values.ingress.basicAuth.middlewareName }}
+ namespace: {{ .Values.ingress.basicAuth.middlewareNamespace }}
+{{- end }}
+ tls:
+ secretName: traefik-secret
+{{- end }}
\ No newline at end of file
diff --git a/argocd/system/traefik/templates/traefik-externalsecret.yaml b/argocd/system/traefik/templates/traefik-externalsecret.yaml
new file mode 100644
index 00000000..ad95565b
--- /dev/null
+++ b/argocd/system/traefik/templates/traefik-externalsecret.yaml
@@ -0,0 +1,23 @@
+apiVersion: external-secrets.io/v1beta1
+kind: ExternalSecret
+metadata:
+ name: basic-auth-externalsecret
+ namespace: {{ .Release.Namespace }}
+spec:
+ secretStoreRef:
+ name: vault-backend
+ kind: ClusterSecretStore
+ target:
+ name: basic-auth-secret
+ template:
+ engineVersion: v2
+ data:
+ users: |2
+ {{ printf "{{ .htpasswdPair }}" }}
+ data:
+ - secretKey: htpasswdPair
+ remoteRef:
+ key: traefik/basic_auth
+ property: htpasswd-pair
+ conversionStrategy: Default # ArgoCD sync issue
+ decodingStrategy: None # ArgoCD sync issue
diff --git a/argocd/system/traefik/values.yaml b/argocd/system/traefik/values.yaml
new file mode 100644
index 00000000..f17a93dd
--- /dev/null
+++ b/argocd/system/traefik/values.yaml
@@ -0,0 +1,111 @@
+# Dashboard Ingress configuration
+# helm chart creates dashboard ingress-route but it does not support tls specification yet
+# https://github.com/traefik/traefik-helm-chart/issues/763
+# Creating with specific template
+ingress:
+ enabled: true
+ host: traefik.picluster.ricsanfre.com
+ # configure cert-manager issuer
+ certmanager:
+ # tlsIssuer=letsecrypt to generate valid TLS certficiate using IONOS API
+ # tlsIssuer=ca to generate a CA-signed certificate (not valid)
+ tlsIssuer: letsencrypt
+ # Enabling traefik basic authorization, reusing global middleware created for Traefik
+ basicAuth:
+ enable: true
+ middlewareName: basic-auth
+ middlewareNamespace: traefik
+
+# Prometheus servicemonitor configuration
+serviceMonitor:
+ enable: true
+ release: monitoring
+
+###################
+# Traefik subchart
+###################
+
+traefik:
+ # Configure access log
+ logs:
+ access:
+ # Enable access log
+ enabled: true
+ # Generate access log in json format
+ format: json
+ # Configure access log field
+ fields:
+ # keep all access fields
+ general:
+ defaultmode: keep
+ # Add header field B3 tracing protocol header field
+ headers:
+ defaultmode: drop
+ names:
+ X-B3-Traceid: keep
+
+ # Print access log to file instead of stdout
+ additionalArguments:
+ - "--accesslog.filepath=/data/access.log"
+
+ deployment:
+ # Linkerd ingress configuration
+ podAnnotations:
+ linkerd.io/inject: ingress
+ config.linkerd.io/skip-outbound-ports: "443"
+ # Access log sidecar container
+ additionalContainers:
+ - name: stream-accesslog
+ image: busybox
+ args:
+ - /bin/sh
+ - -c
+ - tail -n+1 -F /data/access.log
+ imagePullPolicy: Always
+ resources: {}
+ terminationMessagePath: /dev/termination-log
+ terminationMessagePolicy: File
+ volumeMounts:
+ - mountPath: /data
+ name: data
+ # Set specific LoadBalancer IP address for Ingress service
+ service:
+ spec:
+ loadBalancerIP: 10.0.0.100
+
+ providers:
+ kubernetesCRD:
+ enabled: true
+ # Enable cross namespace references
+ allowCrossNamespace: true
+ # traefik by default does not update status.loadbalancer field
+ # In argo-cd, this field is used to obtaing the ingress object health status
+ # ingress resource are not getting health status and so application gets stucked
+ # See issue https://github.com/argoproj/argo-cd/issues/968
+ # Traefik need to be confgured enabling published service
+ # https://doc.traefik.io/traefik/providers/kubernetes-ingress/#publishedservice
+ kubernetesIngress:
+ publishedService:
+ enabled: true
+
+ # Enable prometheus metric service
+ metrics:
+ prometheus:
+ service:
+ enabled: true
+
+ # Configuring tracing backend
+ tracing:
+ zipkin:
+ httpEndpoint: http://tracing-tempo-distributor.tracing.svc.cluster.local:9411/api/v2/spans
+ sameSpan: true
+ id128Bit: true
+ sampleRate: 1.0
+
+ # Disable dashboard ingress-route
+ # It does not support tls specification yet
+ # https://github.com/traefik/traefik-helm-chart/issues/763
+ ingressRoute:
+ dashboard:
+ enabled: false
+
diff --git a/argocd/system/velero/Chart.yaml b/argocd/system/velero/Chart.yaml
new file mode 100644
index 00000000..dc4eb8f9
--- /dev/null
+++ b/argocd/system/velero/Chart.yaml
@@ -0,0 +1,7 @@
+apiVersion: v2
+name: velero
+version: 0.0.0
+dependencies:
+ - name: velero
+ version: 2.32.1
+ repository: https://vmware-tanzu.github.io/helm-charts
\ No newline at end of file
diff --git a/argocd/system/velero/templates/minio-externalsecret.yaml b/argocd/system/velero/templates/minio-externalsecret.yaml
new file mode 100644
index 00000000..cebb4716
--- /dev/null
+++ b/argocd/system/velero/templates/minio-externalsecret.yaml
@@ -0,0 +1,31 @@
+apiVersion: external-secrets.io/v1beta1
+kind: ExternalSecret
+metadata:
+ name: minio-externalsecret
+ namespace: {{ .Release.Namespace }}
+spec:
+ secretStoreRef:
+ name: vault-backend
+ kind: ClusterSecretStore
+ target:
+ name: velero-secret
+ template:
+ engineVersion: v2
+ data:
+ cloud: |
+ [default]
+ aws_access_key_id: {{ printf "{{ .user }}" }}
+ aws_secret_access_key: {{ printf "{{ .key }}" }}
+ data:
+ - secretKey: user
+ remoteRef:
+ key: minio/velero
+ property: user
+ conversionStrategy: Default # ArgoCD sync issue
+ decodingStrategy: None # ArgoCD sync issue
+ - secretKey: key
+ remoteRef:
+ key: minio/velero
+ property: key
+ conversionStrategy: Default # ArgoCD sync issue
+ decodingStrategy: None # ArgoCD sync issue
diff --git a/roles/backup/velero/templates/velero_full_schedule.yml.j2 b/argocd/system/velero/templates/schedule.yaml
similarity index 80%
rename from roles/backup/velero/templates/velero_full_schedule.yml.j2
rename to argocd/system/velero/templates/schedule.yaml
index fba6a995..e01dcfd4 100644
--- a/roles/backup/velero/templates/velero_full_schedule.yml.j2
+++ b/argocd/system/velero/templates/schedule.yaml
@@ -1,16 +1,15 @@
----
apiVersion: velero.io/v1
kind: Schedule
metadata:
name: full
- namespace: {{ k3s_velero_namespace }}
+ namespace: {{ .Release.Namespace }}
spec:
schedule: 0 4 * * *
template:
hooks: {}
includedNamespaces:
- '*'
- included_resources:
+ includedResources:
- '*'
includeClusterResources: true
metadata:
diff --git a/roles/prometheus/templates/velero_servicemonitor.yml.j2 b/argocd/system/velero/templates/servicemonitor.yaml
similarity index 60%
rename from roles/prometheus/templates/velero_servicemonitor.yml.j2
rename to argocd/system/velero/templates/servicemonitor.yaml
index 430aaac6..bddf4e08 100644
--- a/roles/prometheus/templates/velero_servicemonitor.yml.j2
+++ b/argocd/system/velero/templates/servicemonitor.yaml
@@ -1,12 +1,13 @@
+{{- if and .Values.serviceMonitor.enable (.Capabilities.APIVersions.Has "monitoring.coreos.com/v1/ServiceMonitor") }}
---
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
labels:
app: velero
- release: kube-prometheus-stack
+ release: {{ .Values.serviceMonitor.release }}
name: velero-prometheus-servicemonitor
- namespace: {{ k3s_monitoring_namespace }}
+ namespace: {{ .Release.Namespace }}
spec:
jobLabel: app.kubernetes.io/name
endpoints:
@@ -14,8 +15,9 @@ spec:
path: /metrics
namespaceSelector:
matchNames:
- - {{ k3s_velero_namespace }}
+ - {{ .Release.Namespace }}
selector:
matchLabels:
app.kubernetes.io/instance: velero
app.kubernetes.io/name: velero
+{{- end -}}
\ No newline at end of file
diff --git a/roles/backup/velero/templates/velero_volume_snapshot_class.yml.j2 b/argocd/system/velero/templates/volume-snapshot.yaml
similarity index 98%
rename from roles/backup/velero/templates/velero_volume_snapshot_class.yml.j2
rename to argocd/system/velero/templates/volume-snapshot.yaml
index e0aa1613..7e9e6e35 100644
--- a/roles/backup/velero/templates/velero_volume_snapshot_class.yml.j2
+++ b/argocd/system/velero/templates/volume-snapshot.yaml
@@ -1,4 +1,3 @@
----
# CSI VolumeSnapshot Associated With Longhorn Backup
kind: VolumeSnapshotClass
apiVersion: snapshot.storage.k8s.io/v1
diff --git a/argocd/system/velero/values.yaml b/argocd/system/velero/values.yaml
new file mode 100644
index 00000000..07833731
--- /dev/null
+++ b/argocd/system/velero/values.yaml
@@ -0,0 +1,54 @@
+
+# Prometheus servicemonitor configuration
+serviceMonitor:
+ enable: true
+ release: monitoring
+
+###################
+# velero subchart
+###################
+
+velero:
+ # AWS backend and CSI plugins configuration
+ initContainers:
+ - name: velero-plugin-for-aws
+ image: velero/velero-plugin-for-aws:v1.3.0
+ imagePullPolicy: IfNotPresent
+ volumeMounts:
+ - mountPath: /target
+ name: plugins
+ - name: velero-plugin-for-csi
+ image: ricsanfre/velero-plugin-for-csi:v0.3.1
+ imagePullPolicy: IfNotPresent
+ volumeMounts:
+ - mountPath: /target
+ name: plugins
+
+ # Use a kubectl image supporting ARM64
+ # bitnami default is not suppporting it
+ kubectl:
+ image:
+ repository: ricsanfre/docker-kubectl-helm
+ tag: latest
+
+ # Velero Configuration
+ configuration:
+ # Configure Minio as backup backend
+ provider: aws
+ backupStorageLocation:
+ provider: aws
+ bucket: k3s-velero
+ config:
+ region: eu-west-1
+ s3ForcePathStyle: true
+ s3Url: https://s3.picluster.ricsanfre.com:9091
+ # insecureSkipTLSVerify: true
+ # Enable CSI snapshot support
+ features: EnableCSI
+ credentials:
+ existingSecret: velero-secret
+
+ # Enable linkerd
+ podAnnotations:
+ linkerd.io/inject: enabled
+
diff --git a/backup_configuration.yml b/backup_configuration.yml
deleted file mode 100644
index 1f2c47ec..00000000
--- a/backup_configuration.yml
+++ /dev/null
@@ -1,141 +0,0 @@
----
-
-- name: Install certbot in localhost and create minio certificate
- hosts: localhost
- gather_facts: true
- vars:
- certbot_venv: "{{ playbook_dir }}/certificates"
- certbot_email: "{{ acme_issuer_email }}"
- dns_ionos_prefix: "{{ vault.certmanager.ionos.public_prefix }}"
- dns_ionos_secret: "{{ vault.certmanager.ionos.secret }}"
- pre_tasks:
- # Include vault variables
- - name: Include vault variables
- include_vars: "vars/vault.yml"
- tags: ["always"]
- # Include picluster variables
- - name: Include picluster variables
- include_vars: "vars/picluster.yml"
- tags: ["always"]
- roles:
- - role: certbot
- vars:
- - install_python_packages: false
- when: enable_letsencrypt
-
- tasks:
- - name: Create Letsencrytp certificate for minio
- command: "{{ certbot_venv }}/certbot-create-cert.sh {{ minio_hostname }}"
- register: certbot_create
- changed_when:
- - certbot_create.rc==0
- - '"Certificate not yet due for renewal; no action taken." not in certbot_create.stdout'
- when: enable_letsencrypt
-
-- name: Configure Backup Server - S3 Storage
- hosts: node1
- gather_facts: true
- tags: [backup]
- become: true
- vars:
- server_hostname: "{{ minio_hostname }}"
- ssl_key_size: 4096
- ssl_certificate_provider: selfsigned
- key_type: RSA
- country_name: ES
- email_address: admin@ricsanfre.com
- organization_name: Ricsanfre
- certbot_venv: "{{ playbook_dir }}/certificates"
-
- pre_tasks:
- # Include vault variables
- - name: Include vault variables
- include_vars: "vars/vault.yml"
- tags: ["always"]
- # Include picluster variables
- - name: Include picluster variables
- include_vars: "vars/picluster.yml"
- tags: ["always"]
-
- - name: Create customCA-signed TLS certificate for minio
- block:
- # Generate custom CA
- - name: Generate custom CA
- include_tasks: tasks/generate_custom_ca.yml
- args:
- apply:
- delegate_to: localhost
- become: false
- # Generate selfsigned TLS certificate
- - name: Generate customCA-signed SSL certificates for minio
- include_tasks: tasks/generate_ca_signed_cert.yml
- args:
- apply:
- delegate_to: localhost
- become: false
- # Load tls key and cert into variables
- - name: Load tls key and cert
- set_fact:
- minio_key: "{{ lookup('file','certificates/' + server_hostname + '.key') }}"
- minio_cert: "{{ lookup('file','certificates/' + server_hostname + '.pem') }}"
- when: not enable_letsencrypt
-
- - name: Get letsencrypt TLS certificate for minio
- block:
- - name: check TLS certificate for minio exits
- command: "{{ certbot_venv }}/certbot-wrapper.sh certificates -d {{ minio_hostname }}"
- register: certbot_certificates
- delegate_to: localhost
- become: false
- changed_when: false
- failed_when:
- - '"Certificate Name: " + minio_hostname not in certbot_certificates.stdout'
- - name: Get certificate and key paths for minio
- set_fact:
- cert_path: "{{ certbot_certificates.stdout | regex_search(regexp1,'\\1') }}"
- cert_key_path: "{{ certbot_certificates.stdout | regex_search(regexp2,'\\1') }}"
- vars:
- regexp1: 'Certificate Path: (\S+)'
- regexp2: 'Private Key Path: (\S+)'
- when:
- - certbot_certificates.rc==0
- - '"Certificate Name: " + minio_hostname in certbot_certificates.stdout'
-
- - name: Load tls key and cert
- set_fact:
- minio_key: "{{ lookup('file', cert_key_path[0] ) }}"
- minio_cert: "{{ lookup('file', cert_path[0] ) }}"
- when: enable_letsencrypt
-
- tasks:
- - name: Include S3 configuration variables
- include_vars:
- file: vars/backup/s3_minio.yml
- - name: Configure Minio S3 server
- include_role:
- name: ricsanfre.minio
-
-- name: Configure Pi-cluster nodes backup
- hosts: raspberrypi
- gather_facts: true
- tags: [backup]
- become: true
- pre_tasks:
- - name: Include vault variables
- include_vars: "vars/vault.yml"
- tags: ["always"]
- # Include picluster variables
- - name: Include picluster variables
- include_vars: "vars/picluster.yml"
- tags: ["always"]
- - name: Load CA certificate for restic
- set_fact:
- restic_ca_cert: "{{ lookup('file','certificates/CA.pem') }}"
- when: not enable_letsencrypt
- - name: Do not use CA certificate
- set_fact:
- restic_use_ca_cert: false
- when: enable_letsencrypt
- roles:
- - role: ricsanfre.backup
- tags: [backup]
diff --git a/docs/_data/docs.yml b/docs/_data/docs.yml
index cc65945f..7e2cf24c 100644
--- a/docs/_data/docs.yml
+++ b/docs/_data/docs.yml
@@ -15,12 +15,24 @@
- san
- gateway
- node
+- title: External Services
+ docs:
+ - minio
+ - vault
- title: K3S Installation
docs:
- k3s-installation
+- title: Networking
+ docs:
- k3s-networking
+ - metallb
- traefik
+ - service-mesh
+- title: Certificate Management
+ docs:
- certmanager
+- title: Storage
+ docs:
- longhorn
- title: Monitoring
docs:
@@ -34,9 +46,9 @@
- title: Backup
docs:
- backup
-- title: Service Mesh
+- title: GitOps
docs:
- - service-mesh
+ - argocd
- title: Reference Docs
docs:
- k8s-networking
diff --git a/docs/_docs/ansible-instructions.md b/docs/_docs/ansible-instructions.md
index 3dfb9faa..c32a5f41 100644
--- a/docs/_docs/ansible-instructions.md
+++ b/docs/_docs/ansible-instructions.md
@@ -1,15 +1,18 @@
---
title: Quick Start Instructions
permalink: /docs/ansible/
-description: Quick Start guide to deploy our Raspberry Pi Kuberentes Cluster using cloud-init and ansible playbooks.
-last_modified_at: "02-10-2022"
+description: Quick Start guide to deploy our Raspberry Pi Kuberentes Cluster using cloud-init, ansible playbooks and ArgoCD
+last_modified_at: "23-01-2023"
---
-This are the instructions to quickly deploy Kuberentes Pi-cluster using cloud-init and Ansible Playbooks
+This are the instructions to quickly deploy Kuberentes Pi-cluster using the following tools:
+- [cloud-init](https://cloudinit.readthedocs.io/en/latest/): to automate initial OS installation/configuration on each node of the cluster
+- [Ansible](https://docs.ansible.com/): to automatically configure cluster nodes, install and configure external services (DNS, DHCP, Firewall, S3 Storage server, Hashicorp Vautl) install K3S, and bootstraping cluster through installation and configuration of ArgoCD
+- [Argo CD](https://argo-cd.readthedocs.io/en/stable/): to automatically deploy Applications to Kuberenetes cluster from manifest files in Git repository.
{{site.data.alerts.note}}
-Step-by-step manual process is also described in this documentation.
+Step-by-step manual process to deploy and configure each component is also described in this documentation.
{{site.data.alerts.end}}
@@ -25,23 +28,30 @@ Step-by-step manual process is also described in this documentation.
git clone https://github.com/ricsanfre/pi-cluster.git
```
-- Install Ansible requirements:
+- Edit GPG_NAME and GPG_EMAIL variables in `Makefile`
- Developed Ansible playbooks depend on external roles that need to be installed.
+- Prepare Ansible execution environment:
```shell
- ansible-galaxy install -r requirements.yml
+ make prepare-ansible
```
+ Ansible playbooks depend on external roles that need to be installed, command `ansible-galaxy install -r requirements.yml` is executed, GPG key for encrypting passwords created, ansible vault automatic encrytpion configured, and pi-cluster credentials file (`vault.yml`) created and encrypted.
-## Ansible playbooks configuration
+{{site.data.alerts.important}}
+
+All ansible commands (`ansible`, `ansible-galaxy`, `ansible-playbook`, `ansible-vault`) need to be executed within [`/ansible`] directory, so the configuration file [`/ansible/ansible.cfg`]({{ site.git_edit_address }}/ansible/ansible.cfg) can be used. Playbooks are configured to be launched from this directory.
+
+{{site.data.alerts.end}}
+
+## Ansible configuration
### Inventory file
-Adjust [`inventory.yml`]({{ site.git_edit_address }}/inventory.yml) inventory file to meet your cluster configuration: IPs, hostnames, number of nodes, etc.
+Adjust [`ansible/inventory.yml`]({{ site.git_edit_address }}/ansible/inventory.yml) inventory file to meet your cluster configuration: IPs, hostnames, number of nodes, etc.
{{site.data.alerts.tip}}
-If you maintain the private network assigned to the cluster (10.0.0.0/24) and the hostnames and IP addresses. The only field that you must change in `inventory.yml` file is the field `mac` containing the node's mac address. This information will be used to configure automatically DHCP server and assign the proper IP to each node.
+If you maintain the private network assigned to the cluster (10.0.0.0/24) and nodes' hostname and IP address, field `mac` (node's mac address) is the only field that you need to change in `inventory.yml` file. MAC addresses will be used to configure automatically DHCP server and assign the proper IP to each node.
This information can be taken when Raspberry PI is booted for first time during the firmware update step: see [Raspberry PI Firmware Update](/docs/firmware).
@@ -51,21 +61,25 @@ This information can be taken when Raspberry PI is booted for first time during
The UNIX user to be used in remote connection (i.e.: `ansible`) user and its SSH key file location need to be specified
-- Set in [`group_vars/all.yml`]({{ site.git_edit_address }}/group_vars/all.yml) file the UNIX user to be used by Ansible in the remote connection (default value `ansible`)
+- Modify [`ansible/group_vars/all.yml`]({{ site.git_edit_address }}/ansible/group_vars/all.yml) to set the UNIX user to be used by Ansible in the remote connection (default value `ansible`)
-- Modify [`ansible.cfg`]({{ site.git_edit_address }}/ansible.cfg) file to include the path to the SSH key of the `ansible` user used in remote connections (`private-file-key` variable)
+- Modify [`ansible/ansible.cfg`]({{ site.git_edit_address }}/ansible/ansible.cfg) file to include the path to the SSH key of the `ansible` user used in remote connections (`private_key_file` variable)
```
# SSH key
private_key_file = $HOME/ansible-ssh-key.pem
```
-- Modify [`all.yml`]({{ site.git_edit_address }}/group_vars/all.yml) file to include your ansible remote UNIX user (`ansible_user` variable) and
-
+### Encrypting secrets/key variables
+
+{{site.data.alerts.important}}
+
+All tasks in this section are automated as part of the execution of preperation command `make prepare-ansible`
-### Configuring Ansible Playbooks
+This command will initialize GPG encryption key, configure ansible vault encryption, and generate `vault.yml` automatically with random passwords.
+
+{{site.data.alerts.end}}
-#### Encrypting secrets/key variables
All secrets/key/passwords variables are stored in a dedicated file, `vars/vault.yml`, so this file can be encrypted using [Ansible Vault](https://docs.ansible.com/ansible/latest/user_guide/vault.html)
@@ -75,32 +89,31 @@ vault.yml sample file is like this:
```yml
---
-# Encrypted variables - Ansible Vault
vault:
# K3s secrets
k3s:
k3s_token: s1cret0
# traefik secrets
traefik:
- basic_auth_passwd: s1cret0
+ basic_auth:
+ user: admin
+ passwd: s1cret0
# Minio S3 secrets
minio:
- root_password: supers1cret0
- longhorn_key: supers1cret0
- velero_key: supers1cret0
- restic_key: supers1cret0
- # elastic search
+ root:
+ user: root
+ key: supers1cret0
+ restic:
+ user: restic
+ key: supers1cret0
....
```
-All needed password-type variables used by the Playbooks are in the sample file `var/picluster-vault.yml`. This file is not encrypted and must be used to start the ansible setup.
-The steps to configure passwords/keys used in all Playbooks is the following:
+The manual steps to encrypt passwords/keys used in all Playbooks is the following:
-1. Copy sample yaml `var/picluster-vault.yml` file and rename it as `var/vault.yml`
+1. Edit content `var/vault.yml` file specifying your own values for each of the key/password/secret specified.
-2. Edit content of the file specifying your own values for each of the key/password/secret specified.
-
-3. Encrypt file using ansible-vault
+2. Encrypt file using ansible-vault
```shell
ansible-vault encrypt vault.yml
@@ -118,20 +131,159 @@ The steps to configure passwords/keys used in all Playbooks is the following:
The password using during encryption need to be provided to decrypt the file
After executing the command the file `vault.yml` is decrypted and show the content in plain text.
- {{site.data.alerts.end}}
+ File can be viewed decrypted without modifiying the file using the command
+ ```shell
+ ansible-vault view vault.yaml
+ ```
+ {{site.data.alerts.end}}
{{site.data.alerts.important}}
+You do not need to modify and ecrypt manually `vault.yml` file. The file is generated automatically and encrypted executing an Ansible playbook, see instructions below.
+
+{{site.data.alerts.end}}
+
+#### Automate Ansible Vault decryption with GPG
+
When using encrypted vault.yaml file all playbooks executed with `ansible-playbook` command need the argument `--ask-vault-pass`, so the password used to encrypt vault file can be provided when starting the playbook.
```shell
ansible-playbook playbook.yml --ask-vault-pass
```
-{{site.data.alerts.end}}
+Ansible vault password decryption can be automated using `--vault-password-file` parameter , instead of manually providing the password with each execution (`--ask-vault-pass`).
+
+Ansible vault password file can contain the password in plain-text or a script able to obtain the password.
+
+vault-password-file location can be added to ansible.cfg file, so it is not needed to pass as parameter each time ansible-playbook command is executed
+
+Linux GPG will be used to encrypt Ansible Vault passphrase and automatically obtain the vault password using a vault-password-file script.
+
+- [GnuPG](https://gnupg.org/) Installation and configuration
+
+ In Linux GPG encryption can be used to encrypt/decrypt passwords and tokens data using a GPG key-pair
+
+ GnuPG package has to be installed and a GPG key pair need to be created for encrytion/decryption
+
+ - Step 1. Install GnuPG packet
+
+ ```shell
+ sudo apt install gnupg
+ ```
+
+ Check if it is installed
+ ```shell
+ gpg --help
+ ```
+
+ - Step 2. Generating Your GPG Key Pair
+
+ GPG key-pair consist on a public and private key used for encrypt/decrypt
+
+ ```shell
+ gpg --gen-key
+ ```
+
+ The process requires to provide a name, email-address and user-id which identify the recipient
+
+ The output of the command is like this:
+
+ ```
+ gpg (GnuPG) 2.2.4; Copyright (C) 2017 Free Software Foundation, Inc.
+ This is free software: you are free to change and redistribute it.
+ There is NO WARRANTY, to the extent permitted by law.
+
+ Note: Use "gpg --full-generate-key" for a full featured key generation dialog.
+
+ GnuPG needs to construct a user ID to identify your key.
+
+ Real name: Ricardo
+ Email address: ricsanfre@gmail.com
+ You selected this USER-ID:
+ "Ricardo "
+
+ Change (N)ame, (E)mail, or (O)kay/(Q)uit? O
+ We need to generate a lot of random bytes. It is a good idea to perform
+ some other action (type on the keyboard, move the mouse, utilize the
+ disks) during the prime generation; this gives the random number
+ generator a better chance to gain enough entropy.
+ We need to generate a lot of random bytes. It is a good idea to perform
+ some other action (type on the keyboard, move the mouse, utilize the
+ disks) during the prime generation; this gives the random number
+ generator a better chance to gain enough entropy.
+ gpg: /home/ansible/.gnupg/trustdb.gpg: trustdb created
+ gpg: key D59E854B5DD93199 marked as ultimately trusted
+ gpg: directory '/home/ansible/.gnupg/openpgp-revocs.d' created
+ gpg: revocation certificate stored as '/home/ansible/.gnupg/openpgp-revocs.d/A4745167B84C8C9A227DC898D59E854B5DD93199.rev'
+ public and secret key created and signed.
+
+ pub rsa3072 2021-08-13 [SC] [expires: 2023-08-13]
+ A4745167B84C8C9A227DC898D59E854B5DD93199
+ uid Ricardo
+ sub rsa3072 2021-08-13 [E] [expires: 2023-08-13]
+
+ ```
+
+ During the generation process you will be prompted to provide a passphrase.
+
+ This passphrase is needed to decryp
+
+
+- Generate Vault password and store it in GPG
+
+ Generate the password to be used in ansible-vault encrypt/decrypt process and ecrypt it in using GPG
+
+ - Step 1. Install pwgen packet
+
+ ```shell
+ sudo apt install pwgen
+ ```
+
+ - Step 2: Generate Vault password and encrypt it using GPG. Store the result as a file in $HOME/.vault
+
+ ```shell
+ mkdir -p $HOME/.vault
+ pwgen -n 71 -C | head -n1 | gpg --armor --recipient -e -o $HOME/.vault/vault_passphrase.gpg
+ ```
+
+ where `` must be the email address configured during GPG key creation.
+
+ - Step 3: Generate a script `vault_pass.sh`
-#### Modify Ansible Playbook variables
+ ```shell
+ #!/bin/sh
+ gpg --batch --use-agent --decrypt $HOME/.vault/vault_passphrase.gpg
+ ```
+ - Step 4: Modify `ansible.cfg` file, so you can omit the `--vault-password-file` argument.
+
+ ```
+ [defaults]
+ vault_password_file=vault_pass.sh
+ ```
+
+ {{site.data.alerts.note}}
+ If this repository is clone steps 3 and 4 are not needed since the files are already there.
+ {{site.data.alerts.end}}
+
+- Encrypt vautl.yaml file using ansible-vault and GPG password
+
+ ```shell
+ ansible-vault encrypt vault.yaml
+ ```
+ This time only your GPG key passphrase will be asked to automatically encrypt/decrypt the file
+
+#### Vault credentials generation
+
+Execute playbook to generate ansible vault variable file (`var/vault.yml`) containing all credentials/passwords. Random generated passwords will be generated for all cluster services.
+
+Execute the following command:
+```shell
+ansible-playbook create_vault_credentials.yml
+```
+Credentials for external cloud services (IONOS DNS API credentials) will be asked during the execution of the script.
+
+### Modify Ansible Playbook variables
Adjust ansible playbooks/roles variables defined within `group_vars`, `host_vars` and `vars` directories to meet your specific configuration.
@@ -139,31 +291,30 @@ The following table shows the variable files defined at ansible's group and host
| Group/Host Variable file | Nodes affected |
|----|----|
-| [`group_vars/all.yml`]({{ site.git_edit_address }}/group_vars/all.yml) | all nodes of cluster + gateway node + pimaster |
-| [`group_vars/control.yml`]({{ site.git_edit_address }}/group_vars/control.yml) | control group: gateway node + pimaster |
-| [`group_vars/k3s_cluster.yml`]({{ site.git_edit_address }}/group_vars/k3s_cluster.yml) | all nodes of the k3s cluster |
-| [`group_vars/k3s_master.yml`]({{ site.git_edit_address }}/group_vars/k3s_master.yml) | K3s master nodes |
-| [`host_vars/gateway.yml`]({{ site.git_edit_address }}/host_vars/gateway.yml) | gateway node specific variables|
-{: .table }
+| [ansible/group_vars/all.yml]({{ site.git_edit_address }}/ansible/group_vars/all.yml) | all nodes of cluster + gateway node + pimaster |
+| [ansible/group_vars/control.yml]({{ site.git_edit_address }}/ansible/group_vars/control.yml) | control group: gateway node + pimaster |
+| [ansible/group_vars/k3s_cluster.yml]({{ site.git_edit_address }}/ansible/group_vars/k3s_cluster.yml) | all nodes of the k3s cluster |
+| [ansible/group_vars/k3s_master.yml]({{ site.git_edit_address }}/ansible/group_vars/k3s_master.yml) | K3s master nodes |
+| [ansible/host_vars/gateway.yml]({{ site.git_edit_address }}/ansible/host_vars/gateway.yml) | gateway node specific variables|
+{: .table .table-white .border-dark }
The following table shows the variable files used for configuring the storage, backup server and K3S cluster and services.
| Specific Variable File | Configuration |
|----|----|
-| [`vars/picluster.yml`]({{ site.git_edit_address }}/vars/picluster.yml) | K3S cluster and services configuration variables |
-| [`vars/dedicated_disks/local_storage.yml`]({{ site.git_edit_address }}/vars/dedicated_disks/local_storage.yml) | Configuration nodes local storage: Dedicated disks setup|
-| [`vars/centralized_san/centralized_san_target.yml`]({{ site.git_edit_address }}/vars/centralized_san/centralized_san_target.yml) | Configuration iSCSI target local storage and LUNs: Centralized SAN setup|
-| [`vars/centralized_san/centralized_san_initiator.yml`]({{ site.git_edit_address }}/vars/centralized_san/centralized_san_initiator.yml) | Configuration iSCSI Initiator: Centralized SAN setup|
-| [`vars/backup/s3_minio.yml`]({{ site.git_edit_address }}/vars/backup/s3_minio.yml) | Configuration S3 Minio server |
-{: .table }
+| [ansible/vars/picluster.yml]({{ site.git_edit_address }}/ansible/vars/picluster.yml) | K3S cluster and external services configuration variables |
+| [ansible/vars/dedicated_disks/local_storage.yml]({{ site.git_edit_address }}/ansible/vars/dedicated_disks/local_storage.yml) | Configuration nodes local storage: Dedicated disks setup|
+| [ansible/vars/centralized_san/centralized_san_target.yml]({{ site.git_edit_address }}/ansible/vars/centralized_san/centralized_san_target.yml) | Configuration iSCSI target local storage and LUNs: Centralized SAN setup|
+| [ansible/vars/centralized_san/centralized_san_initiator.yml]({{ site.git_edit_address }}/ansible/vars/centralized_san/centralized_san_initiator.yml) | Configuration iSCSI Initiator: Centralized SAN setup|
+{: .table .table-white .border-dark }
{{site.data.alerts.important}}: **About storage configuration**
-Ansible Playbook used for doing the basic OS configuration (`setup_picluster.yml`) is able to configure two different storage setups (dedicated disks or centralized SAN) depending on the value of the variable `centralized_san` located in [`group_vars/all.yml`]({{ site.git_edit_address }}/group_vars/all.yml). If `centralized_san` is `false` (default value) dedicated disk setup will be applied, otherwise centralized san setup will be configured.
+Ansible Playbook used for doing the basic OS configuration (`setup_picluster.yml`) is able to configure two different storage setups (dedicated disks or centralized SAN) depending on the value of the variable `centralized_san` located in [`ansible/group_vars/all.yml`]({{ site.git_edit_address }}/ansible/group_vars/all.yml). If `centralized_san` is `false` (default value) dedicated disk setup will be applied, otherwise centralized san setup will be configured.
-- **Centralized SAN** setup assumes `gateway` node has a SSD disk attached (`/dev/sda`) that it is partitioned the first time the server is booted (part of the cloud-init configuration) reserving 30Gb for the root partition and the rest of available disk for hosting the LUNs
+- **Centralized SAN** setup assumes `gateway` node has a SSD disk attached (`/dev/sda`) that has been partitioned during server first boot (part of the cloud-init configuration) reserving 30Gb for the root partition and the rest of available disk for hosting the LUNs
Final `gateway` disk configuration is:
@@ -174,7 +325,7 @@ Ansible Playbook used for doing the basic OS configuration (`setup_picluster.yml
LVM configuration is done by `setup_picluster.yml` Ansible's playbook and the variables used in the configuration can be found in `vars/centralized_san/centralized_san_target.yml`: `storage_volumegroups` and `storage_volumes` variables. Sizes of the different LUNs can be tweaked to fit the size of the SSD Disk used. I used a 480GB disk so, I was able to create LUNs of 100GB for each of the nodes.
-- **Dedicated disks** setup assumes that all cluster nodes (`node1-5`) have a SSD disk attached that it is partitioned the first time the server is booted (part of the cloud-init configuration) reserving 30Gb for the root partition and the rest of available disk for creating a logical volume (LVM) mounted as `/storage`
+- **Dedicated disks** setup assumes that all cluster nodes (`node1-5`) have a SSD disk attached that has been partitioned during server first boot (part of the cloud-init configuration) reserving 30Gb for the root partition and the rest of available disk for creating a logical volume (LVM) mounted as `/storage`
Final `node1-5` disk configuration is:
@@ -187,6 +338,19 @@ Ansible Playbook used for doing the basic OS configuration (`setup_picluster.yml
{{site.data.alerts.end}}
+{{site.data.alerts.important}}: **About TLS Certificates configuration**
+
+Default configuration, assumes the use of Letscrypt TLS certificates and IONOS DNS for DNS01 challenge.
+
+As an alternative, a custom CA can be created and use it to sign all certificates:
+The following changes need to be done:
+
+- Modify Ansible variable `enable_letsencrypt` to false in `/ansible/picluster.yml` file
+- Modify Kubernetes applications `ingress.tlsIssuer` (`/argocd/system//values.yaml`) to `ca` instead of `letsencrypt`.
+
+{{site.data.alerts.end}}
+
+
## Installing the nodes
### Update Raspberry Pi firmware
@@ -203,7 +367,7 @@ The installation procedure followed is the described in ["Ubuntu OS Installation
|--------------------| ------------- |-------------|
| Dedicated Disks |[user-data]({{ site.git_edit_address }}/cloud-init/dedicated_disks/gateway/user-data) | [network-config]({{ site.git_edit_address }}/cloud-init/dedicated_disks/gateway/network-config)|
| Centralized SAN | [user-data]({{ site.git_edit_address }}/cloud-init/centralized_san/gateway/user-data) | [network-config]({{ site.git_edit_address }}/cloud-init/centralized_san/gateway/network-config) |
-{: .table }
+{: .table .table-white .border-dark }
{{site.data.alerts.warning}}**About SSH keys**
@@ -221,10 +385,10 @@ Before applying the cloud-init files of the table above, remember to change the
### Configure gateway node
-For automatically execute basic OS setup tasks and configuration of gateway's services (DNS, DHCP, NTP, Firewall, etc.), executes the playbook:
+For automatically execute basic OS setup tasks and configuration of gateway's services (DNS, DHCP, NTP, Firewall, etc.), execute the command:
```shell
-ansible-playbook setup_picluster.yml --tags "gateway" [--ask-vault-pass]
+make gateway-setup
```
### Install cluster nodes.
@@ -239,7 +403,7 @@ Follow the installation procedure indicated in ["Ubuntu OS Installation"](/docs/
|-----------| ------- |-------|-------|--------|--------|
| Dedicated Disks | [user-data]({{ site.git_edit_address }}/cloud-init/dedicated_disks/node1/user-data) | [user-data]({{ site.git_edit_address }}/cloud-init/dedicated_disks/node2/user-data)| [user-data]({{ site.git_edit_address }}/cloud-init/dedicated_disks/node3/user-data) | [user-data]({{ site.git_edit_address }}/cloud-init/dedicated_disks/node4/user-data) | [user-data]({{ site.git_edit_address }}/cloud-init/dedicated_disks/node5/user-data) |
| Centralized SAN | [user-data]({{ site.git_edit_address }}/cloud-init/centralized_san/node1/user-data) | [user-data]({{ site.git_edit_address }}/cloud-init/centralized_san/node2/user-data)| [user-data]({{ site.git_edit_address }}/cloud-init/centralized_san/node3/user-data) | [user-data]({{ site.git_edit_address }}/cloud-init/centralized_san/node4/user-data) | [user-data]({{ site.git_edit_address }}/cloud-init/centralized_san/node5/user-data) |
-{: .table }
+{: .table .table-white .border-dark }
{{site.data.alerts.warning}}**About SSH keys**
@@ -253,120 +417,89 @@ Before applying the cloud-init files of the table above, remember to change the
### Configure cluster nodes
-For automatically execute basic OS setup tasks (DNS, DHCP, NTP, etc.), executes the playbook:
+For automatically execute basic OS setup tasks (DNS, DHCP, NTP, etc.), execute the command:
```shell
-ansible-playbook setup_picluster.yml --tags "node"
+make nodes-setup
```
-### Configuring backup server (S3) and OS level backup
+## Configuring external services (Minio and Hashicorp Vault)
-Configure backup server (Playbook assumes S3 server is installed in `node1`) and automated backup tasks at OS level with restic in all nodes (`node1-node5` and `gateway`) running the playbook:
+Install and configure S3 Storage server (Minio), and Secret Manager (Hashicorp Vault) running the command:
```shell
-ansible-playbook backup_configuration.yml
+make external-services
```
+Ansible Playbook assumes S3 server is installed in `node1` and Hashicorp Vault in `gateway`.
{{site.data.alerts.note}}
-
-List of directories to be backed up by restic in each node can be found in variables file `var/all.yml`: `restic_backups_dirs`
-
-Variable `restic_clean_service` which configure and schedule restic's purging activities need to be set to "true" only in one of the nodes. Defaul configuration set `gateway` as the node for executing these tasks.
-
+All Ansible vault credentials (vault.yml) are also stored in Hashicorp Vault
{{site.data.alerts.end}}
-## K3S
-
-### K3S Installation
+## Configuring OS level backup (restic)
-To install K3S cluster execute the playbook:
+Automate backup tasks at OS level with restic in all nodes (`node1-node5` and `gateway`) running the command:
```shell
-ansible-playbook k3s_install.yml
+make configure-os-backup
```
+Minio S3 server running in `node1` will be used as backup backend.
-### K3S basic services deployment
-
-To deploy and configure basic services (metallb, traefik, certmanager, linkerd, longhorn, EFK, Prometheus, Velero) run the playbook:
-
-```shell
-ansible-playbook k3s_deploy.yml
-```
+{{site.data.alerts.note}}
-Different ansible tags can be used to select the componentes to deploy:
+List of directories to be backed up by restic in each node can be found in variables file `var/all.yml`: `restic_backups_dirs`
-```shell
-ansible-playbook k3s_deploy.yml --tags
-```
+Variable `restic_clean_service` which configure and schedule restic's purging activities need to be set to "true" only in one of the nodes. Defaul configuration set `gateway` as the node for executing these tasks.
-The following table shows the different components and their dependencies.
-
-| Ansible Tag | Component to configure/deploy | Dependencies
-|---|---|
-| `metallb` | Metal LB | - |
-| `certmanager` | Cert-manager | - |
-| `linkerd` | Linkerd | Cert-manager |
-| `traefik` | Traefik | Linkerd |
-| `longhorn` | Longhorn | Linkerd |
-| `monitoring` | Prometheus Stack | Longhorn, Linkerd |
-| `linkerd-viz` | Linkerd Viz | Prometheus Stack, Linkerd |
-| `logging` | EFK Stack | Longhorn, Linkerd |
-| `backup` | Velero | Linkerd |
-{: .table }
+{{site.data.alerts.end}}
-### K3s Cluster reset
+## Kubernetes Applications (GitOps)
-If you mess anything up in your Kubernetes cluster, and want to start fresh, the K3s Ansible playbook includes a reset playbook, that you can use to remove the installation of K3S:
+ArgoCD is used to deploy automatically packaged applications contained in the repository. These applications are located in [`/argocd`]({{site.git_address}}/tree/master/argocd) directory.
-```shell
-ansible-playbook k3s_reset.yml
-```
+- Modify Root application (App of Apps pattern) to point to your own repository
-### Updating K3S and cluster component releases
+ Edit file [`/argocd/bootstrap/root/values.yaml`]({{ site.git_edit_address }}/argocd/bootstrap/root/values.yaml).
+
+ `gitops.repo` should point to your own cloned repository.
+
+ ```yml
+ gitops:
+ repo: https://github.com//pi-cluster
+ ```
-Release version of each component to be installed is specified within variables in `var/pi_cluster.yml`
+- Tune parameters of the different packaged Applications to meet your specific configuration
-```yml
-# k3s version
-k3s_version: v1.24.7+k3s1
+ Edit `values.yaml` file of the different applications located in [`/argocd/system`]({{site.git_address}}/tree/master/argocd/system) directory.
-# Metallb helm chart version
-metallb_chart_version: 0.13.7
+## K3S
-# Traefik chart version
-traefik_chart_version: 18.1.0
+### K3S Installation
-# Cert-manager chart version
-certmanager_chart_version: v1.10.0
-certmanager_ionos_chart_version: 1.0.1
+To install K3S cluster, execute the command:
-# Linkerd version
-linkerd_version: "stable-2.12.2"
-linkerd_chart_version: 1.9.4
-linkerd_viz_chart_version: 30.3.4
+```shell
+make k3s-install
+```
-# Velero version
-velero_chart_version: 2.32.1
-velero_version: v1.9.2
+### K3S Bootstrap
-# Longhorn chart version
-longhorn_chart_version: 1.3.2
+To bootstrap the cluster, run the command:
-# ECK operator chart version
-eck_operator_chart_version: 2.4.0
+```shell
+make k3s-bootstrap
+```
+Argo CD will be installed and it will automatically deploy all cluster applications automatically from git repo
-# Promethes-eslasticsearch-exporter helm chart
-prometheus_es_exporter_chart_version: 4.15.1
+- `argocd\bootstrap\root`: Containing root application (App of Apss ArgoCD pattern)
+- `argocd\system\`: Containing manifest files for application
-# Fluentbit/Fluentd helm chart version
-fluentd_chart_version: 0.3.9
-fluentbit_chart_version: 0.20.9
+### K3s Cluster reset
-# Loki helm version
-loki_chart_version: 3.3.0
+If you mess anything up in your Kubernetes cluster, and want to start fresh, the K3s Ansible playbook includes a reset playbook, that you can use to remove the installation of K3S:
-# kube-prometheus-stack helm chart
-kube_prometheus_stack_chart_version: 41.6.1
+```shell
+make k3s-reset
```
## Shutting down the Raspberry Pi Cluster
@@ -380,32 +513,30 @@ For doing a controlled shutdown of the cluster execute the following commands
- Step 1: Shutdown K3S workers nodes:
```shell
- ansible-playbook shutdown.yml --limit k3s_worker
+ make shutdown-k3s-worker
```
- Command `shutdown -h 1m` is sent to each k3s-worker. Wait for workers nodes to shutdown.
- Step 2: Shutdown K3S master nodes:
```shell
- ansible-playbook shutdown.yml --limit k3s_master
+ make shutdown-k3s-master
```
- Command `shutdown -h 1m` is sent to each k3s-master. Wait for master nodes to shutdown.
- Step 3: Shutdown gateway node:
```shell
- ansible-playbook shutdown.yml --limit gateway
+ make shutdown-gateway
```
-`shutdown.yml` playbook connects to each Raspberry PI in the cluster and execute the command `sudo shutdown -h 1m`, commanding the raspberry-pi to shutdown in 1 minute.
+`shutdown` commands connects to each Raspberry PI in the cluster and execute the command `sudo shutdown -h 1m`, commanding the raspberry-pi to shutdown in 1 minute.
After a few minutes, all raspberry pi will be shutdown. You can notice that when the Switch ethernet ports LEDs are off. Then it is safe to unplug the Raspberry PIs.
## Updating Ubuntu packages
-To automatically update Ubuntu OS packages run the following playbook:
+To automatically update Ubuntu OS packages, run the following command:
```shell
-ansible-playbook update.yml
+make os-upgrade
```
This playbook automatically updates OS packages to the latest stable version and it performs a system reboot if needed.
\ No newline at end of file
diff --git a/docs/_docs/argocd.md b/docs/_docs/argocd.md
new file mode 100644
index 00000000..c2232df1
--- /dev/null
+++ b/docs/_docs/argocd.md
@@ -0,0 +1,502 @@
+---
+title: GitOps (ArgoCD)
+permalink: /docs/argocd/
+description: How to apply GitOps to Pi cluster configuration using ArgoCD.
+last_modified_at: "23-01-2023"
+---
+
+
+[Argo CD](https://argo-cd.readthedocs.io/) is a declarative, GitOps continuous delivery tool for Kubernetes.
+
+It can be integrated with Git repositories, and used jointly with CI tools, like [Jenkins](https://www.jenkins.io/) or [Github-actions](https://docs.github.com/en/actions) to define end-to-end CI/CD pipeline to automatically build and deploy applications in Kubernetes.
+
+![picluster-cicd-gitops-architecture](/assets/img/cicd-gitops-architecture.png)
+
+Argo CD follows the GitOps pattern of using Git repositories as the source of truth for defining the desired application state, through a set of kubernetes manifests. Kubernetes manifests can be specified in several ways:
+
+- kustomize applications
+- helm charts
+- Plain directory of YAML/json manifests
+
+Argo CD automates the deployment of the desired application states in the specified target environments (git repository). Application deployments can track updates to branches, tags, or pinned to a specific version of manifests at a Git commit.
+
+ArgoCD will be used in Pi Cluster to automatically deploy the different applications in Kuberenets cluster.
+
+## ArgoCD installation
+
+### Helm Chart installation
+ArgoCD can be installed through helm chart
+
+- Step 1: Add ArgoCD helm repository:
+ ```shell
+ helm repo add argo https://argoproj.github.io/argo-helm
+ ```
+- Step 2: Fetch the latest charts from the repository:
+ ```shell
+ helm repo update
+ ```
+- Step 3: Create namespace
+ ```shell
+ kubectl create namespace argocd
+ ```
+- Step 4: Create argocd-values.yml
+
+ ```yml
+ configs:
+ params:
+ # Run server without TLS
+ # Traefik finishes TLS connections
+ server.insecure: true
+ cm:
+ statusbadge.enabled: 'true'
+ # Adding Applications health check
+ resource.customizations.health.argoproj.io_Application: |
+ hs = {}
+ hs.status = "Progressing"
+ hs.message = ""
+ if obj.status ~= nil then
+ if obj.status.health ~= nil then
+ hs.status = obj.status.health.status
+ if obj.status.health.message ~= nil then
+ hs.message = obj.status.health.message
+ end
+ end
+ end
+ return hs
+ ```
+
+ With this config, Application resource health check is included so App of Apps pattern can be used. See below.
+
+- Step 5: Install helm chart
+ ```shell
+ helm install argocd argo/argo-cd --namespace argocd -f argocd-values.yml
+ ```
+
+- Step 6: Check Argo CD admin password
+
+ ```
+ kubectl get secret argocd-initial-admin-secret -o jsonpath='{.data.password}' -n argocd | base64 -d
+ ```
+
+- Step 7: Configure Port Forward
+
+ ```
+ kubectl port-forward svc/argocd-server -n argocd 8080:80 --address 0.0.0.0
+ ```
+
+- Step 8: Access Argo CD UI, using `admin` user and password obtained from step 6.
+
+ ```
+ http://:8080
+ ```
+
+### Configuring Ingress
+
+Traefik will be used as ingress controller, terminating TLS traffic, so ArgoCD does not need to expose its API using HTTPS.
+
+- Configure ArgoCD to run its API server with TLS disabled
+
+ The following helm chart values need to be provided:
+ ```yml
+ configs:
+ params:
+ # Run server without TLS
+ # Traefik finishes TLS connections
+ server.insecure: true
+ ```
+
+- Create Ingress resource yaml file
+
+ ```yml
+ # HTTPS Ingress
+ apiVersion: networking.k8s.io/v1
+ kind: Ingress
+ metadata:
+ name: argocd-ingress
+ namespace: argocd
+ annotations:
+ # HTTPS as entry point
+ traefik.ingress.kubernetes.io/router.entrypoints: websecure
+ # Enable TLS
+ traefik.ingress.kubernetes.io/router.tls: "true"
+ # Enable cert-manager to create automatically the SSL certificate and store in Secret
+ cert-manager.io/cluster-issuer: ca-issuer
+ cert-manager.io/common-name: argocd.picluster.ricsanfre.com
+ spec:
+ tls:
+ - hosts:
+ - argocd.picluster.ricsanfre.com
+ secretName: argocd-tls
+ rules:
+ - host: argocd.picluster.ricsanfre.com
+ http:
+ paths:
+ - path: /
+ pathType: Prefix
+ backend:
+ service:
+ name: argocd-server
+ port:
+ number: 80
+
+ ```
+
+See more details in [Argo-CD Ingress configuration doc](https://argo-cd.readthedocs.io/en/stable/operator-manual/ingress/)
+
+
+## ArgoCD Applications
+
+ArgoCD applications to be deployed can be configured using ArgoCD UI or using ArgoCD specific CRDs (Application/ApplicationSet).
+
+Different types of applications will be needed for the Pi Cluster
+
+- Directory Applications
+
+ A [directory-type application](https://argo-cd.readthedocs.io/en/stable/user-guide/directory/) loads plain manifest files from .yml, .yaml, and .json files from a specific directory in a git repository.
+
+ Using declarative Application CRD a directory application can be created applying the following manifest file
+
+ ```yml
+ apiVersion: argoproj.io/v1alpha1
+ kind: Application
+ metadata:
+ name: test-app
+ spec:
+ destination:
+ namespace:
+ server: https://kubernetes.default.svc
+ project: default
+ source:
+ # Enabling Recursive Resource Detection
+ directory:
+ recurse: true
+ # Repo path
+ path: test-app
+ # Repo URL
+ repoURL: https://github.com//.git
+ # Branch, tag tracking
+ targetRevision: HEAD
+ syncPolicy:
+ # Automatic sync options
+ automated:
+ prune: true
+ selfHeal: true
+ ```
+
+ Where:
+ - `destination.namespace`: namespace to deploy the application
+ - `destination.server`: cluster to deploy the application (`https://kuberentes.default.svc` indicates local cluster)
+ - `source.repoURL` is the URL of the Git Repository
+ - `sourcepath` is the path within the Git repository where the application is located
+ - `source.targetRevision` is the Git tag, branch or commit to track
+ - `syncPolicy.automated` are [ArgoCD auto-sync policies](https://argo-cd.readthedocs.io/en/stable/user-guide/auto_sync/), to automatically keep in synch application manifest files in the cluster, delete old resources (`prune` option) and launch sych when changes are made to the cluster (`selfHeal` option)
+
+- Helm Chart Applications in ArgoCD
+
+ [Helm chart applications](https://argo-cd.readthedocs.io/en/stable/user-guide/helm/) can be installed in a declarative GitOps way using ArgoCD's Application CRD.
+
+ ```yml
+ apiVersion: argoproj.io/v1alpha1
+ kind: Application
+ metadata:
+ name: cert-manager
+ namespace: argocd
+ spec:
+ project: default
+ source:
+ chart: cert-manager
+ repoURL: https://charts.jetstack.io
+ targetRevision: v1.10.0
+ helm:
+ releaseName: cert-manager
+ parameters:
+ - name: installCRDs
+ value: "true"
+ # valueFiles:
+ # - values.yaml
+ destination:
+ server: "https://kubernetes.default.svc"
+ namespace: cert-manager
+ ```
+
+ Where:
+ - `chart` is the name of the chart to deploy from the Helm Repository.
+ - `repoURL` is the URL of the Helm Repository.
+ - `releaseName` is the version of the chart to deploy
+ - `parameters` - Helm chart parameters (overrriding values in values.yaml file)
+
+ Alternatively, to provide individual parameters, a values file can be specified (`.spec.source.helm.valueFiles`).
+
+- Kustomize Application
+
+ [Kustomize](https://kustomize.io/) traverses a Kubernetes manifest to add, remove or update configuration options without forking. It is available both as a standalone binary and as a native feature of kubectl
+ Kustomize can be used to over a set of plain yaml manifest files or a Chart.
+
+ Argo CD has native support for Kustomize and has the ability to read a kustomization.yaml file to enable deployment with Kustomize and allow ArgoCD to manage the state of the YAML files.
+
+ A directory type application can be configured to apply kustomize to a set of directories just deploying in the directory a kustomize yaml file.
+
+### Helm Umbrella Charts
+
+ArgoCD Helm application has the limitation that helm Values file must be in the same git repository as the Helm chart.
+
+Since all charts we want to deploy in the cluster belongs to third party repositories, we could not use the values file option (values file will be in our repository and not in the 3rd party repository) and specifying all chart parameters within the Application definition is not manageable since some of the charts contain lots of parameters.
+
+As an alternative a Helm Umbrella Chart pattern can be used. Helm Umbrella chart is sort of a "meta" (empty) Helm Chart that lists other Helm Charts as a dependency ([subcharts](https://helm.sh/docs/chart_template_guide/subcharts_and_globals/)).
+It consists of a empty helm chart in a repo directory containing only chart definition file (`Chart.yaml`), listing all subcharts, and its corresponding `values.yaml` file.
+
+- `/Chart.yaml`
+
+ ```yml
+ apiVersion: v2
+ name: certmanager
+ version: 0.0.0
+ dependencies:
+ - name: cert-manager
+ version: v1.10.0
+ repository: https://charts.jetstack.io
+ ```
+- `/values.yaml`
+
+ ```yml
+ cert-manager:
+ installCRDs: true
+ ```
+
+Using this pattern, ArgoCD directory-type application can be declarative deployed.
+
+```yml
+apiVersion: argoproj.io/v1alpha1
+kind: Application
+metadata:
+ name: umbrella-chart-app
+spec:
+ destination:
+ namespace:
+ server: https://kubernetes.default.svc
+ project: default
+ source:
+ path:
+ repoURL: https://github.com//.git
+ targetRevision: HEAD
+ helm:
+
+ syncPolicy:
+ automated:
+ prune: true
+ selfHeal: true
+```
+
+Argo CD looks for a Chart.yaml file under . If present, it will check the apiVersion inside it and for v2 it uses Helm 3 to render the chart. Actually, ArgoCD will not use `helm install` to install charts. It will render the chart with `helm template` command and then apply the output with kubectl.
+
+```shell
+helm template \
+ --dependency-update \
+ --namespace \
+ \
+ | kubectl apply -n -f -
+```
+
+Additional options can be passed to helm command using `.spec.helm` parametes in Application resource.
+
+- `helm.valueFiles`: To specify the name of the values file (default values.yaml)
+- `helm.skipCRDs`: To skip installation of CDRs defined in the helm chart
+
+{{site.data.alerts.note}}
+
+Packaged helm applications, using umbrella helm chart pattern, and kustomize applications have been created to deploy all Kuberentes services in the Pi Cluster.
+
+When using umbrella helm charts, empty chart pattern has not always been used. `template` directory, containing additional manifest files required to configure the application, has been added whenever neccesary.
+
+{{site.data.alerts.end}}
+
+## Bootstrapig the cluster using App of Apps pattern
+
+For bootstraping the cluster [app of apps pattern](https://argo-cd.readthedocs.io/en/stable/operator-manual/cluster-bootstrapping/) can be used. The App-of-Apps design is basically an Argo CD Application made up of other Argo CD Applications.
+
+Basically it will consist of a ArgoCD application, (root application) containing a set of Application manifest files.
+
+Syncwaves can be used to specify the order in which each application need to be deployed.
+
+[Syncwaves and Synchooks](https://argo-cd.readthedocs.io/en/stable/user-guide/sync-waves/) are a way to order how Argo CD applies individual manifests within an Argo CD Application. The order is specified by annotating the object (`argocd.argoproj.io/sync-wave` annotation) with the desired order to apply the manifest. Sync-wave is a integer number (negative numbers are allowed) indicating the order. Manifest files containing lower numbers of synch-waves are applied first.
+
+All resources belonging to same sync-wave have to report healthy status before ArgoCD decices to apply next sync-wave.
+
+Argo CD has health checks for several standard Kubernetes objects built-in. These checks then are bubbled up to the overall Application health status as one unit. For example, an Application that has a Service and a Deployment will be marked “healthy” only if both objects are considered healthy.
+
+There are built-in health checks for Deployment, ReplicaSet, StatefulSet DaemonSet, Service, Ingress, PersistentVolumeClaim, etc. Custom health checks can be defined. See [ArgoCD documentation - Resource Health](https://argo-cd.readthedocs.io/en/stable/operator-manual/health/)
+
+[As described in the documentation](https://argo-cd.readthedocs.io/en/stable/operator-manual/health/#argocd-app), ArgoCD removed Application CRD health check from release 1.8. If App of Apps pattern is used Application health status check need to be added to ArgoCD configuration.
+
+```
+resource.customizations.health.argoproj.io_Application: |
+ hs = {}
+ hs.status = "Progressing"
+ hs.message = ""
+ if obj.status ~= nil then
+ if obj.status.health ~= nil then
+ hs.status = obj.status.health.status
+ if obj.status.health.message ~= nil then
+ hs.message = obj.status.health.message
+ end
+ end
+ end
+ return hs
+```
+
+### Root App
+
+Root application will be specified as a helm chart, so Helm templating can be leveraged to automatically create and configure Application resources and initial resources needed.
+
+Within git repo the following directory structure can be created
+
+```shell
+root
+├── Chart.yaml
+├── templates
+│ ├── app-set.yaml
+│ └── namespaces.yaml
+└── values.yaml
+```
+
+- Chart.yaml
+ ```yml
+ apiVersion: v2
+ name: bootstrap
+ version: 0.0.0
+ ```
+- values.yaml
+
+ ```yml
+ gitops:
+ repo: https://github.com/ricsanfre/pi-cluster
+ revision: master
+
+ # List of application corresponding to different sync waves
+ apps:
+ # CDRs App
+ - name: crds
+ namespace: default
+ path: argocd/bootstrap/crds
+ syncWave: 0
+ # External Secrets Operator
+ - name: external-secrets
+ namespace: external-secrets
+ path: argocd/system/external-secrets
+ syncWave: 1
+ # Metal LB
+ - name: metallb
+ namespace: metallb
+ path: argocd/system/metallb
+ syncWave: 1
+ ```
+
+- templates/app-set.yaml
+
+ This will create a ArgoCD application for each item in the values file under `apps` dictionary. Each of the item defined contains information about the name of the application (`name`), the namespace to be used during deployment (`namespace`), the sync-wave to be used (`syncWave`), and the path under `gitops.repo` where the application is located (`path`).
+
+ {% raw %}
+ ```yml
+ {{- range $index, $app := .Values.apps }}
+ ---
+ apiVersion: argoproj.io/v1alpha1
+ kind: Application
+ metadata:
+ name: {{ $app.name }}
+ namespace: {{ $.Release.Namespace }}
+ annotations:
+ argocd.argoproj.io/sync-wave: '{{ default 0 $app.syncWave }}'
+ spec:
+ destination:
+ namespace: {{ $app.namespace }}
+ server: https://kubernetes.default.svc
+ project: default
+ source:
+ path: {{ $app.path }}
+ repoURL: {{ $.Values.gitops.repo }}
+ targetRevision: {{ $.Values.gitops.revision }}
+ {{- if $app.helm }}
+ helm:
+ {{ toYaml $app.helm | indent 6 }}
+ {{- end }}
+ syncPolicy:
+ automated:
+ prune: true
+ selfHeal: true
+ retry:
+ limit: 10
+ backoff:
+ duration: 1m
+ maxDuration: 16m
+ factor: 2
+ syncOptions:
+ - CreateNamespace=true
+ - ServerSideApply=true
+ - ApplyOutOfSyncOnly=true
+ {{- end }}
+ ```
+ {% endraw %}
+- templates/namespaces.yml
+
+ Create namespaces with linkerd annotation
+
+- templates/other-manifests.yaml
+
+ Other manifest files can be provided to bootstrap the cluster.
+
+{{site.data.alerts.note}}
+
+Root application created for Pi-Cluster can be found in [/argocd/bootstrap/root]({{ site.git_address }}/tree/master/argocd/bootstrap/root)
+
+{{site.data.alerts.end}}
+
+#### Deploying Root application
+
+Root application can be deployed declarative applying the following manifest file:
+
+```yml
+apiVersion: argoproj.io/v1alpha1
+kind: Application
+metadata:
+ name: root
+ namespace: argocd
+spec:
+ destination:
+ namespace: argocd
+ server: https://kubernetes.default.svc
+ project: default
+ source:
+ path: argocd/bootstrap/root
+ repoURL: https://github.com/ricsanfre/pi-cluster
+ targetRevision: master
+ syncPolicy:
+ automated:
+ prune: true
+ selfHeal: true
+ retry:
+ limit: 10
+ backoff:
+ duration: 1m
+ maxDuration: 16m
+ factor: 2
+ syncOptions:
+ - CreateNamespace=true
+```
+
+### CRDs Application
+
+Application containing all CRDs could be created and deployed in the first sync-wave. So all other applications making use of CRDs can be deployed with success even when the corresponding Controller services are not yet deployed. For example: Deploying Prometheus Operator CRDs as part of a CRDs Application, allows to deploy prometheus monitoring objects (ServiceMonitor, PodMonitor, etc) for applications that are deployed before kube-prometheus-stack application.
+
+For an example of such CRDs application, check repository [/argocd/bootstrap/crds]({{ site.git_address }}/tree/master/argocd/bootstrap/crds).
+
+## References
+
+- [Argo CD Working With Helm](https://kubebyexample.com/learning-paths/argo-cd/argo-cd-working-helm)
+
+- [ArgoCD App of Apps pattern to bootstrap de cluster](https://argo-cd.readthedocs.io/en/stable/operator-manual/cluster-bootstrapping/)
+
+- [ArgoCD SyncWaves](https://argo-cd.readthedocs.io/en/stable/user-guide/sync-waves/)
+
+- [How to set Argo CD Application Dependencies](https://codefresh.io/blog/argo-cd-application-dependencies/)
+
+- [How to avoid CRD tool long error](https://www.arthurkoziel.com/fixing-argocd-crd-too-long-error/)
\ No newline at end of file
diff --git a/docs/_docs/backup.md b/docs/_docs/backup.md
index 7205b285..8733d5bf 100644
--- a/docs/_docs/backup.md
+++ b/docs/_docs/backup.md
@@ -2,7 +2,7 @@
title: Backup & Restore
permalink: /docs/backup/
description: How to deploy a backup solution based on Velero and Restic in our Raspberry Pi Kubernetes Cluster.
-last_modified_at: "11-10-2022"
+last_modified_at: "27-12-2022"
---
## Backup Architecture and Design
@@ -72,286 +72,13 @@ The backup architecture is the following:
- Minio as backup backend
- All the above mechanisms supports as backup backend, a S3-compliant storage infrastructure. For this reason, open-source project [Minio](https://min.io/) will be deployed for the Pi Cluster.
+ All the above mechanisms supports as backup backend, a S3-compliant storage infrastructure. For this reason, open-source project [Minio](https://min.io/) has been deployed for the Pi Cluster.
+{{site.data.alerts.note}}
-## Backup server hardware infrastructure
-
-For installing Minio S3 storage server, `node1` will be used. `node1` has attached a SSD Disk of 480 GB that is not being used by Longhorn Distributed Storage solution. Longhorn storage solution is not deployed in k3s master node and thus storage replicas are only using storage resources of `node2`, `node3` and `node4`.
-
-## Minio S3 Object Storage Server
-
-Official [documentation](https://docs.min.io/minio/baremetal/installation/deploy-minio-standalone.html) can be used for installing stand-alone Minio Server in bare-metal environment.
-
-Minio can be also installed as a Kuberentes service, to offer S3 storage service to Cluster users. Since I want to use Minio Server for backing-up/restoring the cluster itself, I will go with a bare-metal installation.
-
-For a more secured and multi-user Minio installation the instructions of this [post](https://www.civo.com/learn/create-a-multi-user-minio-server-for-s3-compatible-object-hosting) can be used
-
-Minio installation and configuration tasks have been automated with Ansible developing a role: **ricsanfre.minio**. This role, installs Minio Server and Minio Client and automatically create S3 buckets, and configure users and ACLs for securing the access.
-
-### Minio installation (baremetal server)
-
-- Step 1. Create minio's UNIX user/group
-
- ```shell
- sudo groupadd minio
- sudo useradd minio -g minio
- sudo
- $ sudo chown minio-user -R /srv/minio/data
- ```
-- Step 2. Create minio's S3 storage directory
-
- ```shell
- sudo mkdir /storage/minio
- chown -R minio:minio /storage/minio
- chmod -R 750 /storage/minio
- ```
-
-- Step 3. Create minio's config directories
-
- ```shell
- sudo mkdir -p /etc/minio
- sudo mkdir -p /etc/minio/ssl
- sudo mkdir -p /etc/minio/policy
- chown -R minio:minio /etc/minio
- chmod -R 750 /etc/minio
- ```
-
-- Step 4. Download server binary (`minio`) and minio client (`mc`) and copy them to `/usr/local/bin`
-
- ```shell
- wget https://dl.min.io/server/minio/release/linux-/minio
- wget https://dl.minio.io/client/mc/release/linux-/mc
- chmod +x minio
- chmod +x mc
- sudo mv minio /usr/local/bin/minio
- sudo mv mc /usr/local/bin/mc
- ```
- where `` is amd64 or arm64.
-
-- Step 5: Create minio Config file `/etc/minio/minio.conf`
-
- This file contains environment variables that will be used by minio server.
- ```
- # Minio local volumes.
- MINIO_VOLUMES="/storage/minio"
-
- # Minio cli options.
- MINIO_OPTS="--address :9091 --console-address :9092 --certs-dir /etc/minio/ssl"
-
- # Access Key of the server.
- MINIO_ROOT_USER=""
- # Secret key of the server.
- MINIO_ROOT_PASSWORD=""
- # Minio server region
- MINIO_SITE_REGION="eu-west-1"
- # Minio server URL
- MINIO_SERVER_URL="https://s3.picluster.ricsanfre.com:9091"
- ```
-
- Minio is configured with the following parameters:
-
- - Minio Server API Port 9091 (`MINIO_OPTS`="--address :9091")
- - Minio Console Port: 9092 (`MINIO_OPTS`="--console-address :9092")
- - Minio Storage data dir (`MINIO_VOLUMES`): `/storage/minio`
- - Minio Site Region (`MINIO_SITE_REGION`): `eu-west-1`
- - SSL certificates stored in (`MINIO_OPTS`="--certs-dir /etc/minio/ssl"): `/etc/minio/ssl`.
- - Minio server URL (`MINIO_SERVER_URL`): Url used to connecto to Minio Server API
-
-- Step 6. Create systemd minio service file `/etc/systemd/system/minio.service`
-
- ```
- [Unit]
- Description=MinIO
- Documentation=https://docs.min.io
- Wants=network-online.target
- After=network-online.target
- AssertFileIsExecutable=/usr/local/bin/minio
-
- [Service]
- WorkingDirectory=/usr/local/
-
- User=minio
- Group=minio
- ProtectProc=invisible
-
- EnvironmentFile=/etc/minio/minio.conf
- ExecStartPre=/bin/bash -c "if [ -z \"${MINIO_VOLUMES}\" ]; then echo \"Variable MINIO_VOLUMES not set in /etc/minio/minio.conf\"; exit 1; fi"
-
- ExecStart=/usr/local/bin/minio server $MINIO_OPTS $MINIO_VOLUMES
-
- # Let systemd restart this service always
- Restart=always
-
- # Specifies the maximum file descriptor number that can be opened by this process
- LimitNOFILE=65536
-
- # Specifies the maximum number of threads this process can create
- TasksMax=infinity
-
- # Disable timeout logic and wait until process is stopped
- TimeoutStopSec=infinity
- SendSIGKILL=no
-
- [Install]
- WantedBy=multi-user.target
- ```
- This service start minio server using minio UNIX group, loading environment variables located in `/etc/minio/minio.conf` and executing the following startup command:
-
- ```shell
- /usr/local/minio server $MINIO_OPTS $MINIO_VOLUMES
- ```
-
-- Step 7. Enable minio systemd service
-
- ```shell
- sudo systemctl enable minio.service
- ```
-
-- Step 8. Create Minio SSL certificate
-
- In case you have your own domain, a valid SSL certificate signed by [Letsencrypt](https://letsencrypt.org/) can be obtained for Minio server, using [Certbot](https://certbot.eff.org/).
-
- See certbot installation instructions in [CertManager - Letsencrypt Certificates Section](/docs/certmanager/#installing-certbot-ionos). Those instructions indicate how to install certbot using DNS challenge with IONOS DNS provider (my DNS provider). Similar procedures can be followed for other DNS providers.
-
- Letsencrypt using HTTP challenge is avoided for security reasons (cluster services are not exposed to public internet).
-
- If generating valid SSL certificate is not possible, selfsigned certificates with a custom CA can be used instead.
-
- {{site.data.alerts.important}}
-
- `restic` backup to a S3 Object Storage backend using self-signed certificates does not work (See issue [#26](https://github.com/ricsanfre/pi-cluster/issues/26)). However, it works if SSL certificates are signed using a custom CA.
-
- {{site.data.alerts.end}}
-
- Follow this procedure for creating a self-signed certificate for Minio Server
-
- 1. Create a self-signed CA key and self-signed certificate
-
- ```shell
- openssl req -x509 \
- -sha256 \
- -nodes \
- -newkey rsa:4096 \
- -subj "/CN=Ricsanfre CA" \
- -keyout rootCA.key -out rootCA.crt
- ```
- 2. Create a SSL certificate for Minio server signed using the custom CA
-
- ```shell
- openssl req -new -nodes -newkey rsa:4096 \
- -keyout minio.key \
- -out minio.csr \
- -batch \
- -subj "/C=ES/ST=Madrid/L=Madrid/O=Ricsanfre CA/OU=picluster/CN=s3.picluster.ricsanfre.com"
-
- openssl x509 -req -days 365000 -set_serial 01 \
- -extfile <(printf "subjectAltName=DNS:s3.picluster.ricsanfre.com") \
- -in minio.csr \
- -out minio.crt \
- -CA rootCA.crt \
- -CAkey rootCA.key
- ```
-
- Once the certificate is created, public certificate and private key need to be installed in Minio server following this procedure:
-
-
- 1. Copy public certificate `minio.crt` as `/etc/minio/ssl/public.crt`
-
- ```shell
- sudo cp minio.crt /etc/minio/ssl/public.crt
- sudo chown minio:minio /etc/minio/ssl/public.crt
- ```
- 2. Copy private key `minio.key` as `/etc/minio/ssl/private.key`
-
- ```shell
- cp minio.key /etc/minio/ssl/private.key
- sudo chown minio:minio /etc/minio/ssl/private.key
- ```
- 3. Restart minio server.
-
- ```shell
- sudo systemctl restart minio.service
- ```
-
- {{site.data.alerts.note}}
-
- Certificate must be created for the DNS name associated to MINIO S3 service, i.e `s3.picluster.ricsanfre.com`.
-
- `MINIO_SERVER_URL` environment variable need to be configured, to avoid issues with TLS certificates without IP Subject Alternative Names.
-
- {{site.data.alerts.end}}
-
- To connect to Minio console use the URL https://s3.picluster.ricsanfre.com:9091
-
-- Step 9. Configure minio client: `mc`
-
- Configure connection alias to minio server.
-
- ```shell
- mc alias set minio_alias
- ```
-
-- Step 10. Create Minio Buckets using `mc`
-
- The following buckets need to be created for backing-up different cluster components:
-
- - Longhorn Backup: `k3s-longhorn`
- - Velero Backup: `k3s-velero`
- - OS backup: `restic`
-
- Also as backend storage for Loki the following bucket need to be configured
-
- - Loki Storage: `k3s-loki`
-
- Buckets can be created using Minio's CLI (`mc`)
-
- ```shell
- mc mb /
- ```
- Where: `` is the mc's alias connection to Minio Server using admin user credentials, created in step 10.
-
-- Step 11. Configure Minio Users and ACLs
-
- Following users will be created to grant access to Minio S3 buckets:
-
- - `longhorn` with read-write access to `k3s-longhorn` bucket.
- - `velero` with read-write access to `k3s-velero` bucket.
- - `restic` with read-write access to `restic` bucket
- - `loki` with read-write access to `k3s-loki` bucket
-
- Users can be created usinng Minio's CLI
- ```shell
- mc admin user add
- ```
- Access policies to the different buckets can be assigned to the different users using the command:
-
- ```shell
- mc admin policy add user_policy.json
- ```
- Where `user_policy.json`, contains AWS access policies definition like:
+Minio S3 server installed as stand-alone service and configured as described in [Pi Cluster S3 Object Storage Service](/docs/minio) will be used as backup backend.
- ```json
- {
- "Version": "2012-10-17",
- "Statement": [
- {
- "Effect": "Allow",
- "Action": [
- "s3:DeleteObject",
- "s3:GetObject",
- "s3:ListBucket",
- "s3:PutObject"
- ],
- "Resource": [
- "arn:aws:s3:::bucket_name",
- "arn:aws:s3:::bucket_name/*"
- ]
- }
- ]
- }
- ```
- This policy grants read-write access to `bucket_name`. For each user a different json need to be created, granting access to dedicated bucket. Those json files can be stored in `/etc/minio/policy` directory.
+{{site.data.alerts.end}}
## OS Filesystem backup with Restic
@@ -480,7 +207,7 @@ The folling directories are backed-up from the cluster nodes
| /home/oss | .cache |
| /root | .cache |
| /home/ansible | .cache .ansible |
-{: .table }
+{: .table .table-white .border-dark }
Backup policies scheduling
@@ -804,6 +531,7 @@ Installation using `Helm` (Release 3):
```shell
kubectl create namespace velero
```
+
- Step 4: Create values.yml for Velero helm chart deployment
```yml
@@ -978,6 +706,35 @@ Installation using `Helm` (Release 3):
In case of using a self-signed certificate for Minio server, custom CA certificate must be passed as `configuration.backupStorageLocation.caCert` parameter (base64 encoded and removing any '\n' character)
{{site.data.alerts.end}}
+#### GitOps installation (ArgoCD)
+
+As alternative, for GitOps deployment (ArgoCD), instead of putting minio credentiasl into helm values in plain text, a Secret can be used to store the credentials.
+
+```yml
+apiVersion: v1
+kind: Secret
+metadata:
+ name: velero-secret
+ namespace: velero
+type: Opaque
+data:
+ cloud:
+```
+Where is:
+
+```
+[default]
+aws_access_key_id: # Not encoded
+aws_secret_access_key: # Not encoded
+```
+
+And the following helm values need to be provided, instead of `credentias.secretContent`
+
+```yml
+credentials:
+ existingSecret: velero-secret
+```
+
### Testing Velero installation
- Step 1: Deploy a testing application (nginx), which uses a Longhorn's Volume for storing its logs (`/var/logs/nginx`)
diff --git a/docs/_docs/certmanager.md b/docs/_docs/certmanager.md
index 10487ba0..d6572e68 100644
--- a/docs/_docs/certmanager.md
+++ b/docs/_docs/certmanager.md
@@ -1,19 +1,19 @@
---
-title: SSL Certificates (Cert-Manager)
+title: TLS Certificates (Cert-Manager)
permalink: /docs/certmanager/
-description: How to deploy a centralized SSL certification management solution based on Cert-manager in our Raspberry Pi Kuberentes cluster.
-last_modified_at: "02-10-2022"
+description: How to deploy a centralized TLS certificates management solution based on Cert-manager in our Raspberry Pi Kuberentes cluster.
+last_modified_at: "17-01-2023"
---
In the Kubernetes cluster, [Cert-Manager](https://cert-manager.io/docs/) can be used to automate the certificate management tasks (issue certificate request, renewals, etc.). Cert-manager adds certificates and certificate issuers as resource types in Kubernetes clusters, and simplifies the process of obtaining, renewing and using those certificates.
-It can issue certificates from a variety of supported sources, including support for auto-signed certificates or use [Let's Encrypt](https://letsencrypt.org/) service to obtain validated SSL certificates. It will ensure certificates are valid and up to date, and attempt to renew certificates at a configured time before expiry. It also keep up to date the associated Kuberentes Secrets storing key pairs used by Ingress resources when securing the incoming communications.
+It can issue certificates from a variety of supported sources, including support for auto-signed certificates or use [Let's Encrypt](https://letsencrypt.org/) service to obtain validated TLS certificates. It will ensure certificates are valid and up to date, and attempt to renew certificates at a configured time before expiry. It also keep up to date the associated Kuberentes Secrets storing key pairs used by Ingress resources when securing the incoming communications.
![picluster-certmanager](/assets/img/cert-manager.png)
## Cert-Manager certificates issuers
-In cert-manager different kind of certificate issuer can be configured to generate signed SSL certificates
+In cert-manager different kind of certificate issuer can be configured to generate signed TLS certificates
### Self-signed Issuer
@@ -55,7 +55,7 @@ Cert-manager add a set of Kubernetes custom resource (CRD):
- `Certificate`, resources that represent a human readable definition of a certificate request that need to be generated and keep up to date by an issuer.
-In order to generate new SSL certificates a `Certificate` resource can be created.
+In order to generate new TLS certificates a `Certificate` resource can be created.
```yml
apiVersion: cert-manager.io/v1
@@ -214,9 +214,25 @@ Algorithm used for creating private keys is ECDSA P-256. The use of this algorit
{{site.data.alerts.end}}
+## Trust Manager Installation
+
+[Trust-manager](https://cert-manager.io/docs/projects/trust-manager/) is an operator for distributing trust bundles across a Kubernetes cluster. trust-manager is designed to complement cert-manager by enabling services to trust X.509 certificates signed by Issuers, distributing data from trust namespace (cert-manager).
+
+trust ships with a single cluster scoped Bundle resource. A Bundle represents a set of data (configMap, secret) from the trust namespace that should be distributed and made available across the cluster.
+
+To install Trust-Manager, from Helm chart execute the following command:
+```shell
+helm install trust-manager jetstack/cert-manager --namespace certmanager
+```
+{{site.data.alerts.note}}
+
+Trust Manager can be used to automatically share CA certificate created by Cert-Manager during linkerd installation.
+
+{{site.data.alerts.end}}
+
## Lets Encrypt Certificates
-Lets Encrypt provide publicly validated TLS certificates for free. Not need to generate auto-signed SSL Certificates for the websites that are not automatic validated by HTTP browsers.
+Lets Encrypt provide publicly validated TLS certificates for free. Not need to generate auto-signed TLS Certificates for the websites that are not automatic validated by HTTP browsers.
The process is the following, we issue a request for a certificate to Let's Encrypt for a domain name that we own. Let's Encrypt verifies that we own that domain by using an ACME DNS or HTTP validation mechanism. If the verification is successful, Let's Encrypt provides us with certificates that cert-manager installs in our website (or other TLS encrypted endpoint). These certificates are good for 90 days before the process needs to be repeated. Cert-manager, however, will automatically keep the certificates up-to-date for us.
@@ -224,7 +240,7 @@ For details see cert-manager [ACME issuer type documentation](https://cert-manag
### Let's Encrypt DNS validation method
-DNS validation method requires to expose a "challenge DNS" record within the DNS domain associated to the SSL certificate.
+DNS validation method requires to expose a "challenge DNS" record within the DNS domain associated to the TLS certificate.
This method do not require to expose to the Public Internet the web services hosted within my K3S cluster and so it would be the preferred method to use Let's Encrypt.
1. Cert-manager issues a certifate request to Let's Encrypt
@@ -429,7 +445,7 @@ Execute all the following commands from $HOME directory.
### Lets Encrypt HTTP validation method
-HTTP validation method requires to actually expose a "challenge URL" in the Public Internet using the DNS domain associated to the SSL certificate.
+HTTP validation method requires to actually expose a "challenge URL" in the Public Internet using the DNS domain associated to the TLS certificate.
HTTP validation method is as follows:
1. Cert-manager issues a certificate request to Let's Encrypt.
@@ -483,7 +499,7 @@ Enable port forwarding for TCP ports 80/443 to `gateway` node.
|----------|--------|----------|
| 80 | `gateway` | 8080 |
| 443 | `gateway`| 4430 |
-{: .table }
+{: .table .table-white .border-dark }
#### Configure Pi cluster Gateway
diff --git a/docs/_docs/gateway.md b/docs/_docs/gateway.md
index 3beabecc..cfa00878 100644
--- a/docs/_docs/gateway.md
+++ b/docs/_docs/gateway.md
@@ -75,7 +75,7 @@ The installation procedure followed is the described in ["Ubuntu OS Installation
|--------------------| ------------- |-------------|
| Dedicated Disks |[user-data]({{ site.git_edit_address }}/cloud-init/dedicated_disks/gateway/user-data) | [network-config]({{ site.git_edit_address }}/cloud-init/dedicated_disks/gateway/network-config)|
| Centralized SAN | [user-data]({{ site.git_edit_address }}/cloud-init/centralized_san/gateway/user-data) | [network-config]({{ site.git_edit_address }}/cloud-init/centralized_san/gateway/network-config) |
-{: .table }
+{: .table .table-white .border-dark }
## Ubuntu OS Initital Configuration
@@ -377,7 +377,7 @@ Rules are stored in the following location:
### Configuring Ansible Role
-nftables default rules establish by the role can be updated by changing roles variables for `gateway` host (see `gateway` host variables in [`host_vars\gateway.yml`]({{ site.git_edit_address }}/host_vars/gateway.yml) file)
+nftables default rules establish by the role can be updated by changing roles variables for `gateway` host (see `gateway` host variables in [`ansible/host_vars/gateway.yml`]({{ site.git_edit_address }}/ansible/host_vars/gateway.yml) file)
The rules configured for `gateway` allow incoming traffic (icmp, http, https, iscsi, ssh, dns, dhcp, ntp and snmp) and forward http, https, ssh, dns and ntp traffic.
@@ -526,7 +526,7 @@ For automating configuration tasks, ansible role [**ricsanfre.dnsmasq**](https:/
### Configuring Ansible Role
-DHCP static IP leases and DNS records are taken automatically from ansible inventory file for those hosts with `ip`, `hostname` and `mac` variables are defined. See [`inventory.yml`]({{ site.git_edit_address }}/inventory.yml) file.
+DHCP static IP leases and DNS records are taken automatically from ansible inventory file for those hosts with `ip`, `hostname` and `mac` variables are defined. See [`ansible/inventory.yml`]({{ site.git_edit_address }}/ansible/inventory.yml) file.
```yml
...
@@ -547,7 +547,7 @@ DHCP static IP leases and DNS records are taken automatically from ansible inven
Additional DHCP static IP leases and DNS records can be added using `dnsmasq_additional_dhcp_hosts` and `dnsmasq_additional_dns_hosts` role variables.
-DNS/DHCP specific configuration, dnsmasq role variables for `gateway` host, are located in [`host_vars\gateway.yml`]({{ site.git_edit_address }}/host_vars/gateway.yml) file.
+DNS/DHCP specific configuration, dnsmasq role variables for `gateway` host, are located in [`ansible/host_vars/gateway.yml`]({{ site.git_edit_address }}/ansible/host_vars/gateway.yml) file.
### Useful Commands
@@ -699,7 +699,7 @@ Check time synchronization with Chronyc
iSCSI configuration in `gateway` has been automated developing a couple of ansible roles: **ricsanfre.storage** for managing LVM and **ricsanfre.iscsi_target** for configuring a iSCSI target.
-Specific `gateway` ansible variables to be used by these roles are stored in [`vars/centralized_san/centralized_san_target.yml`]({{ site.git_edit_address }}/vars/centralized_san/centralized_san_target.yml)
+Specific `gateway` ansible variables to be used by these roles are stored in [`ansible/vars/centralized_san/centralized_san_target.yml`]({{ site.git_edit_address }}/ansible/vars/centralized_san/centralized_san_target.yml)
Further details about iSCSI configurations and step-by-step manual instructions are defined in ["Cluster SAN installation"](/docs/san/).
diff --git a/docs/_docs/hardware.md b/docs/_docs/hardware.md
index 03d72079..ca84454a 100644
--- a/docs/_docs/hardware.md
+++ b/docs/_docs/hardware.md
@@ -115,13 +115,15 @@ Tools used `fio` and `iozone`.
Average-metrics obtained during the tests removing the worst and the best result can be found in the next table and the following graphs:
+
| | Disk Read (MB/s) | Cache Disk Read (MB/s) | Disk Write (MB/s) | 4K Random Read (IOPS) | 4K Random Read (KB/s) | 4K Random Write (IOPS) | 4K Random Write (KB/s) | 4k read (KB/s) | 4k write (KB/s) | 4k random read (KB/s) | 4k random write (KB/s) | Global Score |
| --------- | ---------------- | ---------------------- | ----------------- | --------------------- | --------------------- | ---------------------- | ---------------------- | -------------- | --------------- | --------------------- | ---------------------- | ------- |
| SDCard | 41.89 | 39.02 | 19.23 | 2767.33 | 11071.00 | 974.33 | 3899.33 | 8846.33 | 2230.33 | 7368.67 | 3442.33 | 1169.67 |
| FlashDisk | 55.39 | 50.51 | 21.30 | 3168.40 | 12675.00 | 2700.20 | 10802.40 | 14842.20 | 11561.80 | 11429.60 | 10780.60 | 2413.60 |
| SSD | 335.10 | 304.67 | 125.67 | 22025.67 | 88103.33 | 18731.33 | 74927.00 | 31834.33 | 26213.33 | 17064.33 | 29884.00 | 8295.67 |
| iSCSI | 70.99 | 71.46 | 54.07 | 5104.00 | 20417.00 | 5349.67 | 21400.00 | 7954.33 | 7421.33 | 6177.00 | 7788.33 | 2473.00 |
-{: .table }
+{: .table .table-white .table-borderer .border-dark }
+
- Sequential I/O
diff --git a/docs/_docs/index.md b/docs/_docs/index.md
index 2056f55b..7b970dc6 100644
--- a/docs/_docs/index.md
+++ b/docs/_docs/index.md
@@ -2,22 +2,15 @@
title: What is this project about?
permalink: /docs/home/
redirect_from: /docs/index.html
-description: The scope of this project is to create a kubernetes cluster at home using Raspberry Pis and to use Ansible to automate the deployment and configuration. How to automatically deploy K3s baesed kubernetes cluster, Longhorn as distributed block storage for PODs' persisten volumes, Prometheus as monitoring solution, EFK+Loki stack as centralized log management solution, Velero and Restic as backup solution and Linkerd as service mesh architecture.
-last_modified_at: "30-10-2022"
+description: The scope of this project is to create a kubernetes cluster at home using Raspberry Pis and to automate its deployment and configuration applying IaC (infrastructure as a code) and GitOps methodologies with tools like Ansible and ArgoCD. How to automatically deploy K3s baesed kubernetes cluster, Longhorn as distributed block storage for PODs' persistent volumes, Prometheus as monitoring solution, EFK+Loki stack as centralized log management solution, Velero and Restic as backup solution and Linkerd as service mesh architecture.
+last_modified_at: "25-01-2023"
---
## Scope
-The scope of this project is to create a kubernetes cluster at home using **Raspberry Pis** and to use **Ansible** to automate the deployment and configuration.
+The scope of this project is to create a kubernetes cluster at home using **Raspberry Pis** and to automate its deployment and configuration applying **IaC (infrastructure as a code)** and **GitOps** methodologies with tools like [Ansible](https://docs.ansible.com/), [cloud-init](https://cloudinit.readthedocs.io/en/latest/) and [Argo CD](https://argo-cd.readthedocs.io/en/stable/).
-This is an educational project to explore kubernetes cluster configurations using an ARM architecture and its automation using Ansible.
-
-As part of the project the goal is to use a lightweight Kubernetes flavor based on [K3S](https://k3s.io/) and deploy cluster basic services such as: 1) distributed block storage for POD's persistent volumes, [LongHorn](https://longhorn.io/), 2) backup/restore solution for the cluster, [Velero](https://velero.io/) and [Restic](https://restic.net/), 3) service mesh architecture, [Linkerd](https://linkerd.io/), and 4) observability platform based on metrics monitoring solution, [Prometheus](https://prometheus.io/), logging and analytics solution, EFḰ+LG stack ([Elasticsearch](https://www.elastic.co/elasticsearch/)-[Fluentd](https://www.fluentd.org/)/[Fluentbit](https://fluentbit.io/)-[Kibana](https://www.elastic.co/kibana/) + [Loki](https://grafana.com/oss/loki/)-[Grafana](https://grafana.com/oss/grafana/)), and distributed tracing solution, [Tempo](https://grafana.com/oss/tempo/).
-
-
-The following picture shows the set of opensource solutions used for building this cluster:
-
-![Cluster-Icons](/assets/img/pi-cluster-icons.png)
+As part of the project, the goal is to use a lightweight Kubernetes flavor based on [K3S](https://k3s.io/) and deploy cluster basic services such as: 1) distributed block storage for POD's persistent volumes, [LongHorn](https://longhorn.io/), 2) backup/restore solution for the cluster, [Velero](https://velero.io/) and [Restic](https://restic.net/), 3) service mesh architecture, [Linkerd](https://linkerd.io/), and 4) observability platform based on metrics monitoring solution, [Prometheus](https://prometheus.io/), logging and analytics solution, EFḰ+LG stack ([Elasticsearch](https://www.elastic.co/elasticsearch/)-[Fluentd](https://www.fluentd.org/)/[Fluentbit](https://fluentbit.io/)-[Kibana](https://www.elastic.co/kibana/) + [Loki](https://grafana.com/oss/loki/)-[Grafana](https://grafana.com/oss/grafana/)), and distributed tracing solution, [Tempo](https://grafana.com/oss/tempo/).
## Design Principles
@@ -27,7 +20,196 @@ The following picture shows the set of opensource solutions used for building th
- Use of distributed storage block technology, instead of centralized NFS system, for pod persistent storage. Kubernetes block distributed storage solutions, like Rook/Ceph or Longhorn, in their latest versions have included ARM 64 bits support.
- Use of opensource projects under the [CNCF: Cloud Native Computing Foundation](https://www.cncf.io/) umbrella
- Use latest versions of each opensource project to be able to test the latest Kubernetes capabilities.
-- Use of [Ansible](https://docs.ansible.com/) for automating the configuration of the cluster and [cloud-init](https://cloudinit.readthedocs.io/en/latest/) to automate the initial installation of the Raspberry Pis.
+- Use of [cloud-init](https://cloudinit.readthedocs.io/en/latest/) to automate the initial OS installation.
+- Use of [Ansible](https://docs.ansible.com/) for automating the configuration of the cluster nodes, installation of kubernetes and external services, and triggering cluster bootstrap (ArgoCD bootstrap).
+- Use of [Argo CD](https://argo-cd.readthedocs.io/en/stable/) to automatically provision Kubernetes applications from git repository.
+
+
+## Technology Stack
+
+The following picture shows the set of opensource solutions used for building this cluster:
+
+![Cluster-Icons](/assets/img/pi-cluster-icons.png)
+
+
+
+
+## External Resources and Services
+
+Even whe the premise is to deploy all services in the kubernetes cluster, there is still a need for a few external services/resources. Below is a list of external resources/services and why we need them.
+
+### Cloud external services
+
+{{site.data.alerts.note}}
+ These resources are optional, the homelab still works without them but it won't have trusted certificates.
+{{site.data.alerts.end}}
+
+| |Provider | Resource | Purpose |
+| --- | --- | --- | --- |
+| | [Letsencrypt](https://letsencrypt.org/) | TLS CA Authority | Signed valid TLS certificates |
+| |[IONOS](https://www.ionos.es/) | DNS | DNS and [DNS-01 challenge](https://letsencrypt.org/docs/challenge-types/#dns-01-challenge) for certificates |
+{: .table .table-white .border-dark }
+
+**Alternatives:**
+
+1. Use a private PKI (custom CA to sign certificates).
+
+ Currently supported. Only minor changes are required. See details in [Doc: Quick Start instructions](/docs/ansible).
+
+2. Use other DNS provider.
+
+ Cert-manager / Certbot used to automatically obtain certificates from Let's Encrypt can be used with other DNS providers. This will need further modifications in the way cert-manager application is deployed (new providers and/or webhooks/plugins might be required).
+
+ Currently only acme issuer (letsencytp) using IONOS as dns-01 challenge provider is configured. Check list of [supported dns01 providers](https://cert-manager.io/docs/configuration/acme/dns01/#supported-dns01-providers).
+
+### Self-hosted external services
+
+There is another list of services that I have decided to run outside the kuberentes cluster but not using any public cloud service.
+
+| |External Service | Resource | Purpose |
+| --- | --- | --- | --- |
+| |[Minio](https://mini.io) | S3 Object Store | Cluster Backup |
+| |[Hashicorp Vault](https://www.vaultproject.io/) | Secrets Management | Cluster secrets management |
+{: .table .table-white .border-dark .align-middle }
+
## What I have built so far
@@ -42,23 +224,26 @@ From hardware perspective I built two different versions of the cluster
![!Cluster-2.0](/assets/img/pi-cluster-2.0.png)
-
## What I have developed so far
-From software perspective I have develop the following: Ansible playbooks and roles
+{{site.data.alerts.important}}
+All source code can be found in the project's github repository [{{site.data.icons.github}}]({{site.git_address}}).
-1. `cloud-init` config files and Ansible playbooks/roles for automatizing the installation and deployment of Pi-Cluster.
+{{site.data.alerts.end}}
- All source code can be found in the following github repository
+From software perspective, I have developed the following:
- | Repo | Description | Github |
- | ---| --- | --- |
- | pi-cluster | PI Cluster Ansible | [{{site.data.icons.github}}]({{site.git_address}})|
- {: .table }
+1. **Cloud-init** template files for initial OS installation
+
+ Source code can be found in Pi Cluster Git repository under [`/cloud-init`]({{site.git_address}}/tree/master/cloud-init) directory.
+
+
+2. **Ansible** playbook and roles for configuring cluster nodes and installating and bootstraping K3S cluster
+ Source code can be found in Pi Cluster Git repository under [`/ansible`]({{site.git_address}}/tree/master/ansible) directory.
-2. Aditionally several ansible roles have been developed to automate different configuration tasks on Ubuntu-based servers that can be reused in other projects. These roles are used by Pi-Cluster Ansible Playbooks
+ Aditionally several ansible roles have been developed to automate different configuration tasks on Ubuntu-based servers that can be reused in other projects. These roles are used by Pi-Cluster Ansible Playbooks
Each ansible role source code can be found in its dedicated Github repository and is published in Ansible-Galaxy to facilitate its installation with `ansible-galaxy` command.
@@ -75,14 +260,18 @@ From software perspective I have develop the following: Ansible playbooks and ro
| [ricsanfre.fluentbit](https://galaxy.ansible.com/ricsanfre/fluentbit)| Configure fluentbit | [{{site.data.icons.github}}](https://github.com/ricsanfre/ansible-role-fluentbit) |
| [ricsanfre.minio](https://galaxy.ansible.com/ricsanfre/minio)| Configure Minio S3 server | [{{site.data.icons.github}}](https://github.com/ricsanfre/ansible-role-minio) |
| [ricsanfre.backup](https://galaxy.ansible.com/ricsanfre/backup)| Configure Restic | [{{site.data.icons.github}}](https://github.com/ricsanfre/ansible-role-backup) |
- {: .table }
+ | [ricsanfre.vault](https://galaxy.ansible.com/ricsanfre/vault)| Configure Hashicorp Vault | [{{site.data.icons.github}}](https://github.com/ricsanfre/ansible-role-vault) |
+ {: .table .table-white .border-dark }
+
+3. **Packaged Kuberentes applications** (Helm, Kustomize, manifest files) to be deployed using ArgoCD
+ Source code can be found in Pi Cluster Git repository under [`/argocd`]({{site.git_address}}/tree/master/argocd) directory.
-3. This documentation website [picluster.ricsanfre.com](https://picluster.ricsanfre.com), hosted in Github pages.
+4. This **documentation website** [picluster.ricsanfre.com](https://picluster.ricsanfre.com), hosted in Github pages.
Static website generated with [Jekyll](https://jekyllrb.com/).
- Source code can be found in the Pi-cluster repository under [`docs` directory]({{site.git_address}}/tree/master/docs)
+ Source code can be found in the Pi-cluster repository under [`/docs`]({{site.git_address}}/tree/master/docs) directory.
## Software used and latest version tested
@@ -104,21 +293,24 @@ The software used and the latest version tested of each component
| Service Mesh | Linkerd | v2.12.2 | Helm chart version: linkerd-control-plane-1.9.4 |
| Service Proxy | Traefik | v2.9.1 | Helm chart version: 18.1.0 |
| Storage | Longhorn | v1.3.2 | Helm chart version: 1.3.2 |
-| SSL Certificates | Certmanager | v1.10.0 | Helm chart version: v1.10.0 |
+| TLS Certificates | Certmanager | v1.10.0 | Helm chart version: v1.10.0 |
| Logging | ECK Operator | 2.4.0 | Helm chart version: 2.4.0 |
| Logging | Elastic Search | 8.1.2 | Deployed with ECK Operator |
| Logging | Kibana | 8.1.2 | Deployed with ECK Operator |
| Logging | Fluentbit | 2.0.4 | Helm chart version: 0.21.0 |
| Logging | Fluentd | 1.15.2 | Helm chart version: 0.3.9. [Custom docker image](https://github.com/ricsanfre/fluentd-aggregator) from official v1.15.2|
| Logging | Loki | 2.6.1 | Helm chart grafana/loki version: 3.3.0 |
-| Monitoring | Kube Prometheus Stack | 0.60.1 | Helm chart version: 41.6.1 |
-| Monitoring | Prometheus Operator | 0.60.1 | Installed by Kube Prometheus Stack. Helm chart version: 41.6.1 |
-| Monitoring | Prometheus | 2.39.1 | Installed by Kube Prometheus Stack. Helm chart version: 41.6.1 |
-| Monitoring | AlertManager | 0.24.0 | Installed by Kube Prometheus Stack. Helm chart version: 41.6.1 |
-| Monitoring | Grafana | 9.2.1 | Helm chart version grafana-6.43.0. Installed as dependency of Kube Prometheus Stack chart. Helm chart version: 41.6.1 |
-| Monitoring | Prometheus Node Exporter | 1.3.1 | Helm chart version: prometheus-node-exporter-4.3.1. Installed as dependency of Kube Prometheus Stack chart. Helm chart version: 41.6.1 |
+| Monitoring | Kube Prometheus Stack | 0.61.1 | Helm chart version: 43.3.1 |
+| Monitoring | Prometheus Operator | 0.61.1 | Installed by Kube Prometheus Stack. Helm chart version: 43.3.1 |
+| Monitoring | Prometheus | 2.40.5 | Installed by Kube Prometheus Stack. Helm chart version: 43.3.1 |
+| Monitoring | AlertManager | 0.25.0 | Installed by Kube Prometheus Stack. Helm chart version: 43.3.1 |
+| Monitoring | Grafana | 9.3.1 | Helm chart version grafana-6.48.2. Installed as dependency of Kube Prometheus Stack chart. Helm chart version: 43.3.1 |
+| Monitoring | Prometheus Node Exporter | 1.5.0 | Helm chart version: prometheus-node-exporter-4.8.2. Installed as dependency of Kube Prometheus Stack chart. Helm chart version: 43.3.1 |
| Monitoring | Prometheus Elasticsearch Exporter | 1.5.0 | Helm chart version: prometheus-elasticsearch-exporter-4.15.1 |
| Backup | Minio | RELEASE.2022-09-22T18-57-27Z | |
| Backup | Restic | 0.12.1 | |
| Backup | Velero | 1.9.3 | Helm chart version: 2.32.1 |
-{: .table }
+| Secrets | Hashicorp Vault | 1.12.2 | |
+| Secrets| External Secret Operator | 0.7.1 | Helm chart version: 0.7.1 |
+| GitOps | Argo CD | v2.5.6 | Helm chart version: 5.17.1 |
+{: .table .table-white .border-dark }
diff --git a/docs/_docs/ingress-controller.md b/docs/_docs/ingress-controller.md
index d5194b02..9b4c07be 100644
--- a/docs/_docs/ingress-controller.md
+++ b/docs/_docs/ingress-controller.md
@@ -2,7 +2,7 @@
title: Ingress Controller (Traefik)
permalink: /docs/traefik/
description: How to configure Ingress Contoller based on Traefik in our Raspberry Pi Kuberentes cluster.
-last_modified_at: "10-09-2022"
+last_modified_at: "09-01-2023"
---
All HTTP/HTTPS traffic comming to K3S exposed services should be handled by a Ingress Controller.
@@ -75,11 +75,16 @@ Installation using `Helm` (Release 3):
spec:
# Set load balancer external IP
loadBalancerIP: 10.0.0.100
- # Enable cross namespace references
+
providers:
+ # Enable cross namespace references
kubernetesCRD:
enabled: true
- allowCrossNamespace: true
+ allowCrossNamespace: true
+ # Enable published service
+ kubernetesIngress:
+ publishedService:
+ enabled: true
```
- Step 5: Install Traefik
@@ -161,8 +166,8 @@ This configuration enables Traefik access log writing to `/data/acess.log` file
As alternative to standard `Ingress` kuberentes resources, Traefik's specific CRD, `IngressRoute` can be used to define access to cluster services. This CRD allows advanced routing configurations not possible to do with `Ingress` available Traefik's annotations.
-`IngressRoute` resources only can reference other Traefik's resources, i.e: `Middleware` located in the same namespace.
-To change this, and allow IngresRoute access resources defined in other namespaces, [`allowCrossNamespace`](https://doc.traefik.io/traefik/providers/kubernetes-crd/#allowcrossnamespace) Traefik helm chart value must be set to true.
+`IngressRoute` and `Ingress` resources only can reference other Traefik's resources, i.e: `Middleware` located in the same namespace.
+To change this, and allow `Ingress/IngressRoute` resources to access other resources defined in other namespaces, [`allowCrossNamespace`](https://doc.traefik.io/traefik/providers/kubernetes-crd/#allowcrossnamespace) Traefik helm chart value must be set to true.
The following values need to be specified within helm chart configuration.
@@ -175,6 +180,26 @@ providers:
allowCrossNamespace: true
```
+#### Enabling Published service
+
+Traefik by default, when using an external load balancer (Metal LB) does not update `status.loadbalancer` field in ingress resources. See [Traefik issue #3377](https://github.com/traefik/traefik/issues/3377).
+
+In argo-cd, this field is used to obtaing the ingress object health status ingress resource are not getting health status and so application gets stucked.
+
+Traefik need to be confgured [enabling published service](https://doc.traefik.io/traefik/providers/kubernetes-ingress/#publishedservice), and thus Traefik will copy Traefik's service loadbalancer.status (containing the service's external IPs, allocated by Metal-LB) to the ingresses.
+
+See more details in [Argo CD issue #968](https://github.com/argoproj/argo-cd/issues/968)
+
+The following values need to be specified within helm chart configuration.
+
+```yml
+providers:
+ # Enable published service
+ kubernetesIngress:
+ publishedService:
+ enabled: true
+```
+
### Creating Traefik-metric Service
A Kuberentes Service must be created for enabling the access to Prometheus metrics
@@ -209,6 +234,21 @@ A Kuberentes Service must be created for enabling the access to Prometheus metri
```shell
curl http://:9100/metrics
```
+{{site.data.alerts.note}}
+
+Latest versions of Traefik helm chart automatically create this metrics service. Tested with 20.6.0 version.
+The following additional values need to be provided:
+
+```yml
+# Enable prometheus metric service
+metrics:
+ prometheus:
+ service:
+ enabled: true
+```
+
+{{site.data.alerts.end}}
+
### Enabling access to Traefik-Dashboard
@@ -307,6 +347,53 @@ A Kuberentes Service must be created for enabling the access to UI Dashboard
- Acces UI through configured dns: `https://traefik.picluster.ricsanfre.com/dashboard/`
+{{site.data.alerts.note}}
+
+Instead of defining a Service and Ingress resource, Traefik's IngressRoute object can be created to access to Traefik internal service. It is not needed to expose traefik dashboard as a service
+
+```yml
+# IngressRoute https
+apiVersion: traefik.containo.us/v1alpha1
+kind: IngressRoute
+metadata:
+ name: traefik-dashboard
+ namespace: traefik
+spec:
+ entryPoints:
+ - websecure
+ routes:
+ - kind: Rule
+ match: Host(`{{ traefik.picluster.ricsanfre.com }}`) && (PathPrefix(`/dashboard`) || PathPrefix(`/api`))
+ services:
+ - kind: TraefikService
+ name: api@internal
+ tls:
+ secretName: traefik-secret
+```
+
+For generating the TLS secret, `traefik-secret` containing the certificate, cert-manager can be used:
+
+```yml
+# Create certificate
+apiVersion: cert-manager.io/v1
+kind: Certificate
+metadata:
+ name: traefik-cert
+ namespace: traefik
+spec:
+ secretName: traefik-secret
+ issuerRef:
+ name: ca-issuer
+ kind: ClusterIssuer
+ commonName: traefik.picluster.ricsanfre.com
+ dnsNames:
+ - traefik.picluster.ricsanfre.com
+ privateKey:
+ algorithm: ECDSA
+```
+
+{{site.data.alerts.end}}
+
## Configuring access to cluster services with Traefik
@@ -379,7 +466,7 @@ data:
type: kubernetes.io/tls
```
-This manual step can be avoided using Cert-manager and annotating the Ingress resource: `cert-manager.io/cluster-issuer: `. See further details in [SSL certification management documentation](/docs/certmanager/).
+This manual step can be avoided using Cert-manager and annotating the Ingress resource: `cert-manager.io/cluster-issuer: `. See further details in [TLS certification management documentation](/docs/certmanager/).
#### Redirecting HTTP traffic to HTTPS
@@ -434,6 +521,29 @@ spec:
number: 80
```
+A global Traefik ingress route can be created for redirecting all incoming HTTP traffic to HTTPS
+
+```yml
+apiVersion: traefik.containo.us/v1alpha1
+kind: IngressRoute
+metadata:
+ name: http-to-https-redirect
+ namespace: traefik
+spec:
+ entryPoints:
+ - web
+ routes:
+ - kind: Rule
+ match: PathPrefix(`/`)
+ priority: 1
+ middlewares:
+ - name: redirect-to-https
+ services:
+ - kind: TraefikService
+ name: noop@internal
+```
+This route has priority 1 and it will be executed before any other routing rule.
+
### Providing HTTP basic authentication
In case that the backend does not provide authentication/autherization functionality (i.e: longhorn ui), Traefik can be configured to provide HTTP authentication mechanism (basic authentication, digest and forward authentication).
diff --git a/docs/_docs/installing-k3s.md b/docs/_docs/installing-k3s.md
index 12bf3b1d..b934e05a 100644
--- a/docs/_docs/installing-k3s.md
+++ b/docs/_docs/installing-k3s.md
@@ -85,14 +85,15 @@ Enable `cgroup` via boot commandline, if not already enabled, for Ubuntu on a Ra
For installing the master node execute the following command:
```shell
- curl -sfL https://get.k3s.io | K3S_TOKEN= sh -s - server --write-kubeconfig-mode '0644' --node-taint 'node-role.kubernetes.io/master=true:NoSchedule' --disable 'servicelb' --disable 'traefik' --kube-controller-manager-arg 'bind-address=0.0.0.0' --kube-proxy-arg 'metrics-bind-address=0.0.0.0' --kube-scheduler-arg 'bind-address=0.0.0.0' --kubelet-arg 'config=/etc/rancher/k3s/kubelet.config' --kube-controller-manager-arg 'terminated-pod-gc-threshold=10'
+ curl -sfL https://get.k3s.io | K3S_TOKEN= sh -s - server --write-kubeconfig-mode '0644' --node-taint 'node-role.kubernetes.io/master=true:NoSchedule' --disable 'servicelb' --disable 'traefik' --disable 'local-path' --kube-controller-manager-arg 'bind-address=0.0.0.0' --kube-proxy-arg 'metrics-bind-address=0.0.0.0' --kube-scheduler-arg 'bind-address=0.0.0.0' --kubelet-arg 'config=/etc/rancher/k3s/kubelet.config' --kube-controller-manager-arg 'terminated-pod-gc-threshold=10'
```
Where:
- `server_token` is shared secret within the cluster for allowing connection of worker nodes
- `--write-kubeconfig-mode '0644'` gives read permissions to kubeconfig file located in `/etc/rancher/k3s/k3s.yaml`
- `--node-taint 'node-role.kubernetes.io/master=true:NoSchedule'` makes master node not schedulable to run any pod. Only pods marked with specific tolerance will be scheduled on master node.
- - `--disable servicelb` to disable default service load balancer installed by K3S (Klipper Load Balancer)
- - `--disable traefik` to disable default ingress controller installed by K3S (Traefik)
+ - `--disable servicelb` to disable default service load balancer installed by K3S (Klipper Load Balancer). Metallb will be used instead.
+ - `--disable local-storage` to disable local storage persistent volumes provider installed by K3S (local-path-provisioner). Longhorn will be used instead
+ - `--disable traefik` to disable default ingress controller installed by K3S (Traefik). Traefik will be installed from helm chart.
- `--kube-controller-manager.arg`, `--kube-scheduler-arg` and `--kube-proxy-arg` to bind those components not only to 127.0.0.1 and enable metrics scraping from a external node.
- `--kubelet-arg 'config=/etc/rancher/k3s/kubelet.config'` provides kubelet configuraion parameters. See [Kubernetes Doc: Kubelet Config File](https://kubernetes.io/docs/tasks/administer-cluster/kubelet-config-file/)
- `--kube-controller-manager-arg 'terminated-pod-gc-threshold=10'`. Setting limit to 10 terminated pods that can exist before the terminated pod garbage collector starts deleting terminated pods. See [Kubernetes Doc: Pod Garbage collection](https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#pod-garbage-collection)
diff --git a/docs/_docs/k3s-networking.md b/docs/_docs/k3s-networking.md
index 84fe89d6..0d162b4f 100644
--- a/docs/_docs/k3s-networking.md
+++ b/docs/_docs/k3s-networking.md
@@ -1,8 +1,8 @@
---
title: K3S Networking
permalink: /docs/k3s-networking/
-description: How to configure K3S networking inour Raspberry Pi Kubernetes cluster. How to disable default K3s load balancer and configure Metal LB.
-last_modified_at: "21-07-2022"
+description: Description of K3S default networking components and how they can be configured.
+last_modified_at: "17-01-2023"
---
{{site.data.alerts.note}}
@@ -26,10 +26,10 @@ k3s server installation options can be provided in order to configure Network CI
| k3s server option | default value | Description |
| ----- | ---- |---- |
-| `--cluster-cidr value` | “10.42.0.0/16” | Network CIDR to use for pod IPs
-| `--service-cidr value` | “10.43.0.0/16” | Network CIDR to use for services IPs
-| `--flannel-backend value` | “vxlan” | ‘none’ to disable or ‘vxlan’, ‘ipsec’, ‘host-gw’, or ‘wireguard’
-{: .table }
+| `--cluster-cidr` | "10.42.0.0/16" | Network CIDR to use for pod IPs
+| `--service-cidr` | "10.43.0.0/16" | Network CIDR to use for services IPs
+| `--flannel-backend` | "vxlan" | "none" to disable or "vxlan", "ipsec", "host-gw", or "wireguard"
+{: .table .table-white .border-dark }
By default, flannel will have a 10.42.X.0/24 subnet allocated to each node (X=0, 1, 2, 3, etc.), K3S Pod will use IP address from subnet's address space.
@@ -69,164 +69,24 @@ k3s server installation options can be provided in order to configure coreDNS
| k3s server option | default value | Description |
| ----- | ---- |---- |
-| `--cluster-dns value` | “10.43.0.10” | Cluster IP for coredns service. Should be in your service-cidr range
-| `--cluster-domain value` | “cluster.local” | Cluster Domain
-{: .table }
+| `--cluster-dns` | "10.43.0.10" | Cluster IP for coredns service. Should be in your service-cidr range
+| `--cluster-domain` | "cluster.local" | Cluster Domain
+{: .table .table-white .border-dark }
## Traefik as Ingress Controller
-Traefik is a modern HTTP reverse proxy and load balancer made to deploy microservices with ease. It simplifies networking complexity while designing, deploying, and running applications.
+[Traefik](https://traefik.io/) is a modern HTTP reverse proxy and load balancer made to deploy microservices with ease. It is embedded in K3s installatio and deployed by default when starting K3s cluster.
-## Metal LB as Cluster Load Balancer
+Traefik K3S add-on is disabled during K3s installation, so it can be installed manually to have full control over the version and its initial configuration.
-Instead of using the embeded service load balancer that only comes with K3S, a more generic kubernetes load balancer like [Metal LB](https://metallb.universe.tf/) will be used. This load balancer can be used with almost any distribution of kubernetes.
+To disable embedded Traefik, install K3s with `--disable traefik` option.
-In order to use Metal LB, K3S embedded Klipper Load Balancer must be disabled: K3s server installation option `--disable servicelb`.
+Further details about how to configure Traefik can be found in ["Ingress-Controller Traefik documentation"](/docs/traefik).
-K3S fresh installation (disabling embedded service load balanced) the following pods and services are started by default:
+## Klipper-LB as Load Balancer
-```shell
-kubectl get pods --all-namespaces
-NAMESPACE NAME READY STATUS RESTARTS AGE
-kube-system metrics-server-86cbb8457f-k52mz 1/1 Running 0 7m45s
-kube-system local-path-provisioner-5ff76fc89d-qzfpp 1/1 Running 0 7m45s
-kube-system coredns-7448499f4d-wk4sd 1/1 Running 0 7m45s
-kube-system helm-install-traefik-crd-5r72x 0/1 Completed 0 7m46s
-kube-system helm-install-traefik-86kpb 0/1 Completed 2 7m46s
-kube-system traefik-97b44b794-vtj7x 1/1 Running 0 5m24s
+[Klipper Load Balancer](https://github.com/k3s-io/klipper-lb) is deployed by default when starting the k3s cluster.
+In the cluster, Metal LB load balancer will be used so it is needed to disable Klipper-LB first.
+To disable the embedded LB, configure all servers in the cluster with the `--disable servicelb` option.
-kubectl get services --all-namespaces
-NAMESPACE NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
-default kubernetes ClusterIP 10.43.0.1 443/TCP 8m32s
-kube-system kube-dns ClusterIP 10.43.0.10 53/UDP,53/TCP,9153/TCP 8m29s
-kube-system metrics-server ClusterIP 10.43.169.140 443/TCP 8m28s
-kube-system traefik LoadBalancer 10.43.50.56 80:30582/TCP,443:30123/TCP 5m53s
-```
-
-### Why Metal LB
-
-Kubernetes does not offer an implementation of network load balancers (Services of type LoadBalancer) for bare-metal clusters. The implementations of network load balancers that Kubernetes does ship with are all glue code that calls out to various IaaS platforms (GCP, AWS, Azure…). In bare-metal kubernetes clusters, like the one I am building, "LoadBalancer" services will remain in the “pending” state indefinitely when created.
-(see in previous output of `kubectl get services` command how `traefik` LoadBAlancer service "External IP" is "pending")
-
-For Bare-metal cluster only two optios remain availale for managing incoming traffic to the cluster: “NodePort” and “externalIPs” services. Both of these options have significant downsides for production use, which makes bare-metal clusters second-class citizens in the Kubernetes ecosystem.
-
-MetalLB provides a network load balacer that can be integrated with standard network equipment, so that external services on bare-metal clusters can be accesible using a pool of "external" ip addresses.
-
-### How Metal LB works
-
-MetalLB can work in two modes, BGP and Layer 2. The major advantage of the layer 2 mode is its universality: it will work on any Ethernet network. In BGP mode specific routers are needed to deploy the solution.
-
-In [layer 2 mode](https://metallb.universe.tf/concepts/layer2/), one node assumes the responsibility of advertising a particular kuberentes service (LoadBalance type) to the local network, this is call the `leader` node. From the network’s perspective, it simply looks like that node has multiple IP addresses assigned to its network interface and it just responds to ARP requests for IPv4 services, and NDP requests for IPv6.
-
-When configuring MetalLB in layer 2 mode, all traffic for a service IP goes to the leader node. From there, kube-proxy spreads the traffic to all the service’s pods. Thus MetalLB layer 2 really does not implement a load balancer. Rather, it implements a failover mechanism so that a different node can take over should the current leader node fail for some reason.
-
-MetalLB consists of two different pods:
-
-- Controller: resposible for handling IP address assigments from a configured Pool.
-- Speaker: DaemonSet pod running on each worker node, resposible for announcing the allocated IPs.
-
-
-![metal-lb-architecture](/assets/img/metallb_architecture.png)
-
-
-### Requesting Specific IPs
-
-MetalLB respects the Kubernetes service `spec.loadBalancerIP` parameter, so if a static IP address from the available pool need to be set up for a specific service, it can be requested by setting that parameter. If MetalLB does not own the requested address, or if the address is already in use by another service, assignment will fail and MetalLB will log a warning event visible in `kubectl describe service `.
-
-
-### Install Metal Load Balancer
-
-
-Installation using `Helm` (Release 3):
-
-- Step 1: Add the Metal LB Helm repository:
-
- ```shell
- helm repo add metallb https://metallb.github.io/metallb
- ```
-
-- Step 2: Fetch the latest charts from the repository:
-
- ```shell
- helm repo update
- ```
-
-- Step 3: Create namespace
-
- ```shell
- kubectl create namespace metallb
- ```
-
-- Step 4: Install Metallb in the `metallb` namespace.
-
- ```shell
- helm install metallb metallb/metallb --namespace metallb
- ```
-
-
-- Step 5: Confirm that the deployment succeeded, run:
-
- ```shell
- kubectl -n metallb get pod
- ```
-
-- Step 6: Configure IP addess pool and the announcement method (L2 configuration)
-
- Create the following manifest file: `metallb-config.yaml`
- ```yml
- ---
- # Metallb address pool
- apiVersion: metallb.io/v1beta1
- kind: IPAddressPool
- metadata:
- name: picluster-pool
- namespace: metallb
- spec:
- addresses:
- - 10.0.0.100-10.0.0.200
-
- ---
- # L2 configuration
- apiVersion: metallb.io/v1beta1
- kind: L2Advertisement
- metadata:
- name: example
- namespace: metallb
- spec:
- ipAddressPools:
- - picluster-pool
-
- ```
-
- Apply the manifest file
-
- ```shell
- kubectl apply -f metallb-config.yaml
- ```
-
- After a while, metallb is deployed and traefik LoadBalancer service gets its externa-ip from the configured pool and is accessible from outside the cluster
-
- ```shell
- kubectl get services --all-namespaces
- NAMESPACE NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
- default kubernetes ClusterIP 10.43.0.1 443/TCP 63m
- kube-system kube-dns ClusterIP 10.43.0.10 53/UDP,53/TCP,9153/TCP 63m
- kube-system metrics-server ClusterIP 10.43.169.140 443/TCP 63m
- kube-system traefik LoadBalancer 10.43.50.56 10.0.0.100 80:30582/TCP,443:30123/TCP 60m
- ```
-{{site.data.alerts.important}}
-
- In previous chart releases there was a way to configure MetallB in deployment time providing the following values.yaml:
-
- ```yml
- configInline:
- address-pools:
- - name: default
- protocol: layer2
- addresses:
- - 10.0.0.100-10.0.0.200
- ```
- Helm chart `configInline` in `values.yaml` has been deprecated since MetalLB 0.13.
- Configuration must be done creating the corresponding MetalLB Kubernets CRD (`IPAddressPool` and `L2Advertisement`). See [MetalLB configuration documentation](https://metallb.universe.tf/configuration/).
-
-{{site.data.alerts.end}}
\ No newline at end of file
+Further details about how to install Metal LB can be found in ["Load Balancer (Metal LB) documentation"](/docs/traefik).
diff --git a/docs/_docs/k8s-commands.md b/docs/_docs/k8s-commands.md
index 571e4fa5..6bfd5bf0 100644
--- a/docs/_docs/k8s-commands.md
+++ b/docs/_docs/k8s-commands.md
@@ -176,3 +176,42 @@ Based on procedure described [in this post](https://alysivji.github.io/helm-post
Ansible does not support yet --post-rendering option to helm module. There is [open issue in kubernetes core asible collection](https://github.com/ansible-collections/kubernetes.core/issues/30) for providing this functionallity.
{{site.data.alerts.end}}
+
+
+## Move pods from one node to another
+
+In case one pods need to be executed in other node, maybe because it is pushig the node to its limits in terms of resources and there is another node less used.
+
+The procedure is the following:
+
+- Step 1: Get information about the node where the pod is running
+
+ ```shell
+ kubectl get pod -n -o wide
+ ```
+
+- Step 2: Cordon the node where the pod is currently running, so Kubernetes scheduler cannot use it to schedule new PODs
+
+ ```shell
+ kubectl cordon
+ ```
+
+ {{site.data.alerts.note}}
+
+ Kubernetes cordon is an operation that marks or taints a node in your existing node pool as unschedulable. By using it on a node, you can be sure that no new pods will be scheduled for this node. The command prevents the Kubernetes scheduler from placing new pods onto that node, but it doesn’t affect existing pods on that node.
+
+ {{site.data.alerts.end}}
+
+- Step 3: Delete POD. It is assumed that POD is controlled by a replica set or statefulset so after deleting it, Kubernetes will reschedule it automatically in any node which is not cordoned
+
+ ```shell
+ kubectl delete pod -n
+ ```
+
+- Step 4: Check the POD is started in another node
+
+- Step 5: Uncordon the node, so it can be used again to schedule pods.
+
+ ```shell
+ kubectl uncordon
+ ```
\ No newline at end of file
diff --git a/docs/_docs/loki.md b/docs/_docs/loki.md
index 6ed613e6..7a44fb7d 100644
--- a/docs/_docs/loki.md
+++ b/docs/_docs/loki.md
@@ -114,7 +114,6 @@ Installation from helm chart. There are two alternatives:
This is the helm chart we will use to deploy Loki in HA (simple scalable deployment mode).
-
- Step 1: Add the Grafana repository:
```shell
helm repo add grafana https://grafana.github.io/helm-charts
@@ -219,7 +218,127 @@ Installation from helm chart. There are two alternatives:
```shell
kubectl get pods -l app.kubernetes.io/name=loki -n logging
```
-
+
+### GitOps installation (ArgoCD)
+
+As an alternative, for GitOps deployments (using ArgoCD), instead of hardcoding minio credentials within Helm chart values, a external secret can be configured leveraging [Loki's capability of using environment variables in config file](https://grafana.com/docs/loki/latest/configuration/#use-environment-variables-in-the-configuration).
+
+
+The following secret need to be created:
+```yml
+apiVersion: v1
+kind: Secret
+metadata:
+ name: loki-minio-secret
+ namespace: logging
+type: Opaque
+data:
+ MINIO_ACCESS_KEY_ID: < minio_loki_user | b64encode >
+ MINIO_SECRET_ACCESS_KEY: < minio_loki_key | b64encode >
+```
+
+And the following Helm values has to be provided:
+
+```yml
+loki:
+ # Disable multi-tenant support
+ auth_enabled: false
+
+ # S3 backend storage configuration
+ storage:
+ bucketNames:
+ chunks: k3s-loki
+ ruler: k3s-loki
+ type: s3
+ s3:
+ endpoint: s3.picluster.ricsanfre.com:9091
+ region: eu-west-1
+ secretAccessKey: ${MINIO_SECRET_ACCESS_KEY}
+ accessKeyId: ${MINIO_ACCESS_KEY_ID}
+ s3ForcePathStyle: true
+ insecure: false
+ http_config:
+ idle_conn_timeout: 90s
+ response_header_timeout: 0s
+ insecure_skip_verify: false
+
+# Configuration for the write
+write:
+ # Number of replicas for the write
+ replicas: 2
+ persistence:
+ # -- Size of persistent disk
+ size: 10Gi
+ # -- Storage class to be used.
+ storageClass: longhorn
+
+ # Enable environment variables in config file
+ # https://grafana.com/docs/loki/latest/configuration/#use-environment-variables-in-the-configuration
+ extraArgs:
+ - '-config.expand-env=true'
+ extraEnv:
+ - name: MINIO_ACCESS_KEY_ID
+ valueFrom:
+ secretKeyRef:
+ name: loki-minio-secret
+ key: MINIO_ACCESS_KEY_ID
+ - name: MINIO_SECRET_ACCESS_KEY
+ valueFrom:
+ secretKeyRef:
+ name: loki-minio-secret
+ key: MINIO_SECRET_ACCESS_KEY
+
+# Configuration for the read
+read:
+ # Number of replicas for the read
+ replicas: 2
+ persistence:
+ # -- Size of persistent disk
+ size: 10Gi
+ # -- Storage class to be used.
+ storageClass: longhorn
+
+ # Enable environment variables in config file
+ # https://grafana.com/docs/loki/latest/configuration/#use-environment-variables-in-the-configuration
+ extraArgs:
+ - '-config.expand-env=true'
+ extraEnv:
+ - name: MINIO_ACCESS_KEY_ID
+ valueFrom:
+ secretKeyRef:
+ name: loki-minio-secret
+ key: MINIO_ACCESS_KEY_ID
+ - name: MINIO_SECRET_ACCESS_KEY
+ valueFrom:
+ secretKeyRef:
+ name: loki-minio-secret
+ key: MINIO_SECRET_ACCESS_KEY
+
+# Configuration for the gateway
+gateway:
+ # -- Specifies whether the gateway should be enabled
+ enabled: true
+ # -- Number of replicas for the gateway
+ replicas: 1
+
+# Disable mino installation
+minio:
+ enabled: false
+
+# Disable self-monitoring
+monitoring:
+ selfMonitoring:
+ enabled: false
+ grafanaAgent:
+ installOperator: false
+ lokiCanary:
+ enabled: false
+
+# Disable helm-test
+test:
+ enabled: false
+
+```
## Grafana Configuration
diff --git a/docs/_docs/longhorn.md b/docs/_docs/longhorn.md
index 73e0c430..1c45bad2 100644
--- a/docs/_docs/longhorn.md
+++ b/docs/_docs/longhorn.md
@@ -1,5 +1,5 @@
---
-title: Distributed Storage (Longhorn)
+title: Distributed Block Storage (Longhorn)
permalink: /docs/longhorn/
description: How to deploy distributed block storage solution based on Longhorn in our Raspberry Pi Kubernetes Cluster.
last_modified_at: "11-10-2022"
@@ -259,6 +259,15 @@ Ansible playbook has been developed for automatically create this testing POD `r
## Setting Longhorn as default Kubernetes StorageClass
+
+{{site.data.alerts.note}}
+
+This step is not needed if K3s is installed disabling Local Path Provisioner (installation option: `--disable local-storage`).
+
+In case that this parameter is not configured the following procedure need to be applied.
+
+{{site.data.alerts.end}}
+
By default K3S comes with Rancher’s Local Path Provisioner and this enables the ability to create persistent volume claims out of the box using local storage on the respective node.
In order to use Longhorn as default storageClass whenever a new Helm is installed, Local Path Provisioner need to be removed from default storage class.
@@ -275,7 +284,7 @@ local-path (default) rancher.io/local-path Delete WaitForFirstConsu
longhorn (default) driver.longhorn.io Delete Immediate true 3m27s
```
-Both Local-Path and longhor are defined as default storage classes:
+Both Local-Path and longhorn are defined as default storage classes:
Remove Local path from default storage classes with the command:
@@ -284,3 +293,5 @@ kubectl patch storageclass local-path -p '{"metadata": {"annotations":{"storagec
```
Procedure is explained in kubernetes documentation: ["Change default Storage Class"](https://kubernetes.io/docs/tasks/administer-cluster/change-default-storage-class/).
+
+
diff --git a/docs/_docs/metallb.md b/docs/_docs/metallb.md
new file mode 100644
index 00000000..52fb1e8f
--- /dev/null
+++ b/docs/_docs/metallb.md
@@ -0,0 +1,159 @@
+---
+title: Load Balancer (Metal LB)
+permalink: /docs/metallb/
+description: How to configure Metal LB as load balancer in our Raspberry Pi Kubernetes cluster. How to disable default K3s load balancer and configure Metal LB.
+last_modified_at: "17-01-2023"
+---
+
+Instead of using the embeded service load balancer that only comes with K3S, kippler-lb, a more generic kubernetes load balancer like [Metal LB](https://metallb.universe.tf/) will be used. This load balancer can be used with almost any distribution of kubernetes.
+
+In order to use Metal LB, K3S embedded Klipper Load Balancer must be disabled: K3s server installation option `--disable servicelb`.
+
+After K3S fresh installation disabling embedded service load balanced, the following pods and services are started by default:
+
+```shell
+kubectl get pods --all-namespaces
+NAMESPACE NAME READY STATUS RESTARTS AGE
+kube-system metrics-server-86cbb8457f-k52mz 1/1 Running 0 7m45s
+kube-system local-path-provisioner-5ff76fc89d-qzfpp 1/1 Running 0 7m45s
+kube-system coredns-7448499f4d-wk4sd 1/1 Running 0 7m45s
+kube-system helm-install-traefik-crd-5r72x 0/1 Completed 0 7m46s
+kube-system helm-install-traefik-86kpb 0/1 Completed 2 7m46s
+kube-system traefik-97b44b794-vtj7x 1/1 Running 0 5m24s
+
+kubectl get services --all-namespaces
+NAMESPACE NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
+default kubernetes ClusterIP 10.43.0.1 443/TCP 8m32s
+kube-system kube-dns ClusterIP 10.43.0.10 53/UDP,53/TCP,9153/TCP 8m29s
+kube-system metrics-server ClusterIP 10.43.169.140 443/TCP 8m28s
+kube-system traefik LoadBalancer 10.43.50.56 80:30582/TCP,443:30123/TCP 5m53s
+```
+
+## Why Metal LB
+
+Kubernetes does not offer an implementation of network load balancers (Services of type LoadBalancer) for bare-metal clusters. The implementations of network load balancers that Kubernetes does ship with are all glue code that calls out to various IaaS platforms (GCP, AWS, Azure…).
+
+In bare-metal kubernetes clusters, like the one I am building, "LoadBalancer" services will remain in the “pending” state indefinitely when created. See in previous output of `kubectl get services` command how `traefik` LoadBAlancer service "External IP" is "pending".
+
+For Bare-metal cluster only two optios remain availale for managing incoming traffic to the cluster: “NodePort” and “externalIPs” services. Both of these options have significant downsides for production use, which makes bare-metal clusters second-class citizens in the Kubernetes ecosystem.
+
+MetalLB provides a network load balacer that can be integrated with standard network equipment, so that external services on bare-metal clusters can be accesible using a pool of "external" ip addresses.
+
+## How Metal LB works
+
+MetalLB can work in two modes, BGP and Layer 2. The major advantage of the layer 2 mode is its universality: it will work on any Ethernet network. In BGP mode specific routers are needed to deploy the solution.
+
+In [layer 2 mode](https://metallb.universe.tf/concepts/layer2/), one node assumes the responsibility of advertising a particular kuberentes service (LoadBalance type) to the local network, this is call the `leader` node. From the network’s perspective, it simply looks like that node has multiple IP addresses assigned to its network interface and it just responds to ARP requests for IPv4 services, and NDP requests for IPv6.
+
+When configuring MetalLB in layer 2 mode, all traffic for a service IP goes to the leader node. From there, kube-proxy spreads the traffic to all the service’s pods. Thus MetalLB layer 2 really does not implement a load balancer. Rather, it implements a failover mechanism so that a different node can take over should the current leader node fail for some reason.
+
+MetalLB consists of two different pods:
+
+- Controller: resposible for handling IP address assigments from a configured Pool.
+- Speaker: DaemonSet pod running on each worker node, resposible for announcing the allocated IPs.
+
+
+![metal-lb-architecture](/assets/img/metallb_architecture.png)
+
+
+## Requesting Specific IPs
+
+MetalLB respects the Kubernetes service `spec.loadBalancerIP` parameter, so if a static IP address from the available pool need to be set up for a specific service, it can be requested by setting that parameter. If MetalLB does not own the requested address, or if the address is already in use by another service, assignment will fail and MetalLB will log a warning event visible in `kubectl describe service `.
+
+
+## Install Metal Load Balancer
+
+
+Installation using `Helm` (Release 3):
+
+- Step 1: Add the Metal LB Helm repository:
+
+ ```shell
+ helm repo add metallb https://metallb.github.io/metallb
+ ```
+
+- Step 2: Fetch the latest charts from the repository:
+
+ ```shell
+ helm repo update
+ ```
+
+- Step 3: Create namespace
+
+ ```shell
+ kubectl create namespace metallb
+ ```
+
+- Step 4: Install Metallb in the `metallb` namespace.
+
+ ```shell
+ helm install metallb metallb/metallb --namespace metallb
+ ```
+
+
+- Step 5: Confirm that the deployment succeeded, run:
+
+ ```shell
+ kubectl -n metallb get pod
+ ```
+
+- Step 6: Configure IP addess pool and the announcement method (L2 configuration)
+
+ Create the following manifest file: `metallb-config.yaml`
+ ```yml
+ ---
+ # Metallb address pool
+ apiVersion: metallb.io/v1beta1
+ kind: IPAddressPool
+ metadata:
+ name: picluster-pool
+ namespace: metallb
+ spec:
+ addresses:
+ - 10.0.0.100-10.0.0.200
+
+ ---
+ # L2 configuration
+ apiVersion: metallb.io/v1beta1
+ kind: L2Advertisement
+ metadata:
+ name: example
+ namespace: metallb
+ spec:
+ ipAddressPools:
+ - picluster-pool
+
+ ```
+
+ Apply the manifest file
+
+ ```shell
+ kubectl apply -f metallb-config.yaml
+ ```
+
+ After a while, metallb is deployed and traefik LoadBalancer service gets its externa-ip from the configured pool and is accessible from outside the cluster
+
+ ```shell
+ kubectl get services --all-namespaces
+ NAMESPACE NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
+ default kubernetes ClusterIP 10.43.0.1 443/TCP 63m
+ kube-system kube-dns ClusterIP 10.43.0.10 53/UDP,53/TCP,9153/TCP 63m
+ kube-system metrics-server ClusterIP 10.43.169.140 443/TCP 63m
+ kube-system traefik LoadBalancer 10.43.50.56 10.0.0.100 80:30582/TCP,443:30123/TCP 60m
+ ```
+{{site.data.alerts.important}}
+
+ In previous chart releases there was a way to configure MetallB in deployment time providing the following values.yaml:
+
+ ```yml
+ configInline:
+ address-pools:
+ - name: default
+ protocol: layer2
+ addresses:
+ - 10.0.0.100-10.0.0.200
+ ```
+ Helm chart `configInline` in `values.yaml` has been deprecated since MetalLB 0.13.
+ Configuration must be done creating the corresponding MetalLB Kubernets CRD (`IPAddressPool` and `L2Advertisement`). See [MetalLB configuration documentation](https://metallb.universe.tf/configuration/).
+
+{{site.data.alerts.end}}
\ No newline at end of file
diff --git a/docs/_docs/minio.md b/docs/_docs/minio.md
new file mode 100644
index 00000000..c676a966
--- /dev/null
+++ b/docs/_docs/minio.md
@@ -0,0 +1,281 @@
+---
+title: Minio S3 Object Storage Service
+permalink: /docs/minio/
+description: How to deploy a Minio S3 object storage server in our Raspberry Pi Kubernetes Cluster.
+last_modified_at: "27-12-2022"
+---
+
+Minio can be deployed as a Kuberentes service or as stand-alone in bare-metal environment. Since I want to use Minio Server for backing-up/restoring the cluster itself, I will go with a bare-metal installation, considering Minio as an external service in Kubernetes.
+
+Official [documentation](https://docs.min.io/minio/baremetal/installation/deploy-minio-standalone.html) can be used for installing stand-alone Minio Server in bare-metal environment.
+
+For a more secured and multi-user Minio installation the instructions of this [post](https://www.civo.com/learn/create-a-multi-user-minio-server-for-s3-compatible-object-hosting) can be used.
+
+For installing Minio S3 storage server, `node1` will be used. `node1` has attached a SSD Disk of 480 GB that is not being used by Longhorn Distributed Storage solution. Longhorn storage solution is not deployed in k3s master node and thus storage replicas are only using storage resources of `node2`, `node3` and `node4`.
+
+Minio installation and configuration tasks have been automated with Ansible developing a role: **ricsanfre.minio**. This role, installs Minio Server and Minio Client and automatically create S3 buckets, and configure users and ACLs for securing the access.
+
+## Minio installation (baremetal server)
+
+- Step 1. Create minio's UNIX user/group
+
+ ```shell
+ sudo groupadd minio
+ sudo useradd minio -g minio
+ ```
+- Step 2. Create minio's S3 storage directory
+
+ ```shell
+ sudo mkdir /storage/minio
+ chown -R minio:minio /storage/minio
+ chmod -R 750 /storage/minio
+ ```
+
+- Step 3. Create minio's config directories
+
+ ```shell
+ sudo mkdir -p /etc/minio
+ sudo mkdir -p /etc/minio/ssl
+ sudo mkdir -p /etc/minio/policy
+ chown -R minio:minio /etc/minio
+ chmod -R 750 /etc/minio
+ ```
+
+- Step 4. Download server binary (`minio`) and minio client (`mc`) and copy them to `/usr/local/bin`
+
+ ```shell
+ wget https://dl.min.io/server/minio/release/linux-/minio
+ wget https://dl.minio.io/client/mc/release/linux-/mc
+ chmod +x minio
+ chmod +x mc
+ sudo mv minio /usr/local/bin/minio
+ sudo mv mc /usr/local/bin/mc
+ ```
+ where `` is amd64 or arm64.
+
+- Step 5: Create minio Config file `/etc/minio/minio.conf`
+
+ This file contains environment variables that will be used by minio server.
+ ```
+ # Minio local volumes.
+ MINIO_VOLUMES="/storage/minio"
+
+ # Minio cli options.
+ MINIO_OPTS="--address :9091 --console-address :9092 --certs-dir /etc/minio/ssl"
+
+ # Access Key of the server.
+ MINIO_ROOT_USER=""
+ # Secret key of the server.
+ MINIO_ROOT_PASSWORD=""
+ # Minio server region
+ MINIO_SITE_REGION="eu-west-1"
+ # Minio server URL
+ MINIO_SERVER_URL="https://s3.picluster.ricsanfre.com:9091"
+ ```
+
+ Minio is configured with the following parameters:
+
+ - Minio Server API Port 9091 (`MINIO_OPTS`="--address :9091")
+ - Minio Console Port: 9092 (`MINIO_OPTS`="--console-address :9092")
+ - Minio Storage data dir (`MINIO_VOLUMES`): `/storage/minio`
+ - Minio Site Region (`MINIO_SITE_REGION`): `eu-west-1`
+ - SSL certificates stored in (`MINIO_OPTS`="--certs-dir /etc/minio/ssl"): `/etc/minio/ssl`.
+ - Minio server URL (`MINIO_SERVER_URL`): Url used to connecto to Minio Server API
+
+- Step 6. Create systemd minio service file `/etc/systemd/system/minio.service`
+
+ ```
+ [Unit]
+ Description=MinIO
+ Documentation=https://docs.min.io
+ Wants=network-online.target
+ After=network-online.target
+ AssertFileIsExecutable=/usr/local/bin/minio
+
+ [Service]
+ WorkingDirectory=/usr/local/
+
+ User=minio
+ Group=minio
+ ProtectProc=invisible
+
+ EnvironmentFile=/etc/minio/minio.conf
+ ExecStartPre=/bin/bash -c "if [ -z \"${MINIO_VOLUMES}\" ]; then echo \"Variable MINIO_VOLUMES not set in /etc/minio/minio.conf\"; exit 1; fi"
+
+ ExecStart=/usr/local/bin/minio server $MINIO_OPTS $MINIO_VOLUMES
+
+ # Let systemd restart this service always
+ Restart=always
+
+ # Specifies the maximum file descriptor number that can be opened by this process
+ LimitNOFILE=65536
+
+ # Specifies the maximum number of threads this process can create
+ TasksMax=infinity
+
+ # Disable timeout logic and wait until process is stopped
+ TimeoutStopSec=infinity
+ SendSIGKILL=no
+
+ [Install]
+ WantedBy=multi-user.target
+ ```
+ This service start minio server using minio UNIX group, loading environment variables located in `/etc/minio/minio.conf` and executing the following startup command:
+
+ ```shell
+ /usr/local/minio server $MINIO_OPTS $MINIO_VOLUMES
+ ```
+
+- Step 7. Enable minio systemd service
+
+ ```shell
+ sudo systemctl enable minio.service
+ ```
+
+- Step 8. Create Minio SSL certificate
+
+ In case you have your own domain, a valid SSL certificate signed by [Letsencrypt](https://letsencrypt.org/) can be obtained for Minio server, using [Certbot](https://certbot.eff.org/).
+
+ See certbot installation instructions in [CertManager - Letsencrypt Certificates Section](/docs/certmanager/#installing-certbot-ionos). Those instructions indicate how to install certbot using DNS challenge with IONOS DNS provider (my DNS provider). Similar procedures can be followed for other DNS providers.
+
+ Letsencrypt using HTTP challenge is avoided for security reasons (cluster services are not exposed to public internet).
+
+ If generating valid SSL certificate is not possible, selfsigned certificates with a custom CA can be used instead.
+
+ {{site.data.alerts.important}}
+
+ `restic` backup to a S3 Object Storage backend using self-signed certificates does not work (See issue [#26](https://github.com/ricsanfre/pi-cluster/issues/26)). However, it works if SSL certificates are signed using a custom CA.
+
+ {{site.data.alerts.end}}
+
+ Follow this procedure for creating a self-signed certificate for Minio Server
+
+ 1. Create a self-signed CA key and self-signed certificate
+
+ ```shell
+ openssl req -x509 \
+ -sha256 \
+ -nodes \
+ -newkey rsa:4096 \
+ -subj "/CN=Ricsanfre CA" \
+ -keyout rootCA.key -out rootCA.crt
+ ```
+ 2. Create a SSL certificate for Minio server signed using the custom CA
+
+ ```shell
+ openssl req -new -nodes -newkey rsa:4096 \
+ -keyout minio.key \
+ -out minio.csr \
+ -batch \
+ -subj "/C=ES/ST=Madrid/L=Madrid/O=Ricsanfre CA/OU=picluster/CN=s3.picluster.ricsanfre.com"
+
+ openssl x509 -req -days 365000 -set_serial 01 \
+ -extfile <(printf "subjectAltName=DNS:s3.picluster.ricsanfre.com") \
+ -in minio.csr \
+ -out minio.crt \
+ -CA rootCA.crt \
+ -CAkey rootCA.key
+ ```
+
+ Once the certificate is created, public certificate and private key need to be installed in Minio server following this procedure:
+
+
+ 1. Copy public certificate `minio.crt` as `/etc/minio/ssl/public.crt`
+
+ ```shell
+ sudo cp minio.crt /etc/minio/ssl/public.crt
+ sudo chown minio:minio /etc/minio/ssl/public.crt
+ ```
+ 2. Copy private key `minio.key` as `/etc/minio/ssl/private.key`
+
+ ```shell
+ cp minio.key /etc/minio/ssl/private.key
+ sudo chown minio:minio /etc/minio/ssl/private.key
+ ```
+ 3. Restart minio server.
+
+ ```shell
+ sudo systemctl restart minio.service
+ ```
+
+ {{site.data.alerts.note}}
+
+ Certificate must be created for the DNS name associated to MINIO S3 service, i.e `s3.picluster.ricsanfre.com`.
+
+ `MINIO_SERVER_URL` environment variable need to be configured, to avoid issues with TLS certificates without IP Subject Alternative Names.
+
+ {{site.data.alerts.end}}
+
+ To connect to Minio console use the URL https://s3.picluster.ricsanfre.com:9091
+
+- Step 9. Configure minio client: `mc`
+
+ Configure connection alias to minio server.
+
+ ```shell
+ mc alias set minio_alias
+ ```
+
+## Minio Configuration
+
+### Buckets
+
+The following buckets need to be created for backing-up different cluster components:
+
+- Longhorn Backup: `k3s-longhorn`
+- Velero Backup: `k3s-velero`
+- OS backup: `restic`
+
+Also as backend storage for Loki and Tempo, the following buckets need to be configured
+
+- Loki Storage: `k3s-loki`
+- Tempo Storage: `k3s-tempo`
+
+Buckets can be created using Minio's CLI (`mc`)
+
+```shell
+mc mb /
+```
+Where: `` is the mc's alias connection to Minio Server using admin user credentials, created during Minio installation in step 9.
+
+### Users and ACLs
+
+Following users will be created to grant access to Minio S3 buckets:
+
+- `longhorn` with read-write access to `k3s-longhorn` bucket.
+- `velero` with read-write access to `k3s-velero` bucket.
+- `restic` with read-write access to `restic` bucket
+- `loki` with read-write access to `k3s-loki` bucket
+
+Users can be created usinng Minio's CLI
+```shell
+mc admin user add
+```
+Access policies to the different buckets can be assigned to the different users using the command:
+
+```shell
+mc admin policy add user_policy.json
+```
+Where `user_policy.json`, contains AWS access policies definition like:
+
+```json
+{
+"Version": "2012-10-17",
+"Statement": [
+ {
+ "Effect": "Allow",
+ "Action": [
+ "s3:DeleteObject",
+ "s3:GetObject",
+ "s3:ListBucket",
+ "s3:PutObject"
+ ],
+ "Resource": [
+ "arn:aws:s3:::bucket_name",
+ "arn:aws:s3:::bucket_name/*"
+ ]
+ }
+]
+}
+```
+This policy grants read-write access to `bucket_name`. For each user a different json need to be created, granting access to dedicated bucket. Those json files can be stored in `/etc/minio/policy` directory.
\ No newline at end of file
diff --git a/docs/_docs/monitoring.md b/docs/_docs/monitoring.md
index e05b0c95..d4b79c41 100644
--- a/docs/_docs/monitoring.md
+++ b/docs/_docs/monitoring.md
@@ -2,7 +2,7 @@
title: Monitoring (Prometheus)
permalink: /docs/prometheus/
description: How to deploy kuberentes cluster monitoring solution based on Prometheus. Installation based on Prometheus Operator using kube-prometheus-stack project.
-last_modified_at: "01-11-2022"
+last_modified_at: "22-01-2023"
---
Prometheus stack installation for kubernetes using Prometheus Operator can be streamlined using [kube-prometheus](https://github.com/prometheus-operator/kube-prometheus) project maintaned by the community.
@@ -158,6 +158,13 @@ Kube-prometheus stack can be installed using helm [kube-prometheus-stack](https:
- name: Loki
type: loki
url: http://loki-gateway.logging.svc.cluster.local
+
+ # Additional configuration to grafana dashboards sidecar
+ # Search in all namespaces for configMaps containing label `grafana_dashboard`
+ sidecar:
+ dashboards:
+ searchNamespace: ALL
+
# Disabling monitoring of K8s services.
# Monitoring of K3S components will be configured out of kube-prometheus-stack
kubelet:
@@ -212,6 +219,8 @@ Kube-prometheus stack can be installed using helm [kube-prometheus-stack](https:
{{site.data.alerts.end}}
+ - Configure Grafana to discover ConfigMaps containing dashobards definitions in all namespaces (`grafana.sidecar.dashboards.searchNamespaces`)
+
- Disables monitoring of kubernetes components (apiserver, etcd, kube-scheduler, kube-controller-manager, kube-proxy and kubelet): `kubeApiServer.enabled`, `kubeControllerManager.enabled`, `kubeScheduler.enabled`, `kubeProxy.enabled` , `kubelet.enabled` and `kubeEtcd.enabled`.
Monitoring of K3s components will be configured outside kube-prometheus-stack. See explanation in section [K3S components monitoring](#k3s-components-monitoring) below.
@@ -633,6 +642,7 @@ The following chart configuration is provided:
- Additional plugin(`grafana.plugins`), `grafana-piechart-panel` needed in by Traefik's dashboard is installed.
- Loki data source is added (`grafana.additionalDataSource`)
- Grafana ServiceMonitor label and job label is configured (`serviceMonitor`)
+- Grafana sidecar dashboard provisioner, additional configuration (on top of the one added by kube-prometheus-stack, to search in all namespaces (`sidecar.dashboards.searchNamespace`)
```yml
grafana:
@@ -663,6 +673,45 @@ grafana:
- name: Loki
type: loki
url: http://loki-gateway.logging.svc.cluster.local
+ # Additional configuration to grafana dashboards sidecar
+ # Search in all namespaces for configMaps containing label `grafana_dashboard`
+ sidecar:
+ dashboards:
+ searchNamespace: ALL
+```
+
+#### GitOps installation (ArgoCD)
+
+As an alternative, for GitOps deployments (using ArgoCD), instead of hardcoding Grafana's admin password within Helm chart values, admin credentials can be in stored in an existing Secret.
+
+The following secret need to be created:
+```yml
+apiVersion: v1
+kind: Secret
+metadata:
+ name: grafana
+ namespace: grafana
+type: Opaque
+data:
+ admin-user: < grafana_admin_user | b64encode>
+ admin-password: < grafana_admin_password | b64encode>
+```
+For encoding the admin and passord execute the following commands:
+```shell
+echo -n "" | base64
+echo -n "" | base64
+```
+And the following Helm values has to be provided:
+
+```yml
+grafana:
+ # Use an existing secret for the admin user.
+ adminUser: ""
+ adminPassword: ""
+ admin:
+ existingSecret: grafana
+ userKey: admin-user
+ passwordKey: admin-password
```
#### Provisioning Dashboards automatically
@@ -693,7 +742,7 @@ Check out ["Grafana chart documentation: Sidecar for Dashboards"](https://github
`kube-prometheus-stack` configure by default grafana provisioning sidecar to check only for new ConfigMaps containing label `grafana_dashboard`
-This are the default helm chart values configuring the sidecar:
+kube-prometheus-stack default helm chart values is the following
```yml
grafana:
@@ -724,6 +773,15 @@ data:
```
+Additional helm chart configuration is required for enabling the search for ConfigMaps in all namespaces (by default search is limited to grafana's namespace).
+
+```yaml
+grafana:
+ sidecar:
+ dashboards:
+ searchNamespace: ALL
+```
+
Following this procedure kube-prometheus-stack helm chart automatically deploy a set of Dashboards for monitoring metrics coming from Kubernetes processes and from Node Exporter. The list of [kube-prometheus-stack grafana dashboards](https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack/templates/grafana/dashboards-1.14)
For each dashboard a ConfigMap containing the json definition is created.
diff --git a/docs/_docs/node.md b/docs/_docs/node.md
index 9d45ebf6..c51f4686 100644
--- a/docs/_docs/node.md
+++ b/docs/_docs/node.md
@@ -43,7 +43,7 @@ For `node1-node5`, the new partition created in boot time, `/dev/sda2`, uses mos
LVM partition and formatting tasks have been automated with Ansible developing the ansible role: **ricsanfre.storage** for managing LVM.
-Specific `node1-node5` ansible variables to be used by this role are stored in [`vars/dedicated_disks/local_storage.yml`]({{ site.git_edit_address }}/vars/dedicated_disks/local_storage.yml)
+Specific `node1-node5` ansible variables to be used by this role are stored in [`ansible/vars/dedicated_disks/local_storage.yml`]({{ site.git_edit_address }}/ansible/vars/dedicated_disks/local_storage.yml)
## Network Configuration
@@ -60,7 +60,7 @@ Follow the procedure indicated in ["Ubuntu OS Installation"](/docs/ubuntu/) usin
|-----------| ------- |-------|-------|--------|
| Dedicated Disks | [user-data]({{ site.git_edit_address }}/cloud-init/dedicated_disks/node1/user-data) | [user-data]({{ site.git_edit_address }}/cloud-init/dedicated_disks/node2/user-data)| [user-data]({{ site.git_edit_address }}/cloud-init/dedicated_disks/node3/user-data) | [user-data]({{ site.git_edit_address }}/cloud-init/dedicated_disks/node4/user-data) |
| Centralized SAN | [user-data]({{ site.git_edit_address }}/cloud-init/centralized_san/node1/user-data) | [user-data]({{ site.git_edit_address }}/cloud-init/centralized_san/node2/user-data)| [user-data]({{ site.git_edit_address }}/cloud-init/centralized_san/node3/user-data) | [user-data]({{ site.git_edit_address }}/cloud-init/centralized_san/node4/user-data) |
-{: .table }
+{: .table .table-white .border-dark }
## Ubuntu OS Initital Configuration
@@ -93,7 +93,7 @@ Further details about iSCSI configurations and step-by-step manual instructions
Each node add the iSCSI LUN exposed by `gateway` to a LVM Volume Group and create a unique Logical Volume which formatted (ext4) and mounted as `/storage`.
-Specific `node1-node5` ansible variables to be used by these roles are stored in [`vars/centralized_san/centralized_san_initiator.yml`]({{ site.git_edit_address }}/vars/centralized_san/centralized_san_initiator.yml)
+Specific `node1-node5` ansible variables to be used by these roles are stored in [`ansible/vars/centralized_san/centralized_san_initiator.yml`]({{ site.git_edit_address }}/ansible/vars/centralized_san/centralized_san_initiator.yml)
{{site.data.alerts.important}}
diff --git a/docs/_docs/pimaster.md b/docs/_docs/pimaster.md
index 7f6f1d9c..19054049 100644
--- a/docs/_docs/pimaster.md
+++ b/docs/_docs/pimaster.md
@@ -283,3 +283,19 @@ We will create SSH keys for two different users:
cat id_rsa.pub
ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDsVSvxBitgaOiqeX4foCfhIe4yZj+OOaWP+wFuoUOBCZMWQ3cW188nSyXhXKfwYK50oo44O6UVEb2GZiU9bLOoy1fjfiGMOnmp3AUVG+e6Vh5aXOeLCEKKxV3I8LjMXr4ack6vtOqOVFBGFSN0ThaRTZwKpoxQ+pEzh+Q4cMJTXBHXYH0eP7WEuQlPIM/hmhGa4kIw/A92Rm0ZlF2H6L2QzxdLV/2LmnLAkt9C+6tH62hepcMCIQFPvHVUqj93hpmNm9MQI4hM7uK5qyH8wGi3nmPuX311km3hkd5O6XT5KNZq9Nk1HTC2GHqYzwha/cAka5pRUfZmWkJrEuV3sNAl ansible@pimaster
```
+
+## Install aditional packages
+
+The following packages are also needed:
+
+- Install Gnu GPG packet. Ansible files encryption
+
+ ```shell
+ sudo apt install gnupg
+ ```
+
+- Install pwgen to generate random passwords
+
+ ```shell
+ sudo apt install pwgen
+ ```
\ No newline at end of file
diff --git a/docs/_docs/service-mesh.md b/docs/_docs/service-mesh.md
index f4714d9f..692d65d7 100644
--- a/docs/_docs/service-mesh.md
+++ b/docs/_docs/service-mesh.md
@@ -2,7 +2,7 @@
title: Service Mesh (Linkerd)
permalink: /docs/service-mesh/
description: How to deploy service-mesh architecture based on Linkerd. Adding observability, traffic management and security to our Kubernetes cluster.
-last_modified_at: "27-11-2022"
+last_modified_at: "09-01-2023"
---
@@ -162,7 +162,7 @@ Installation using `Helm` (Release 3):
Certificate is creates as CA (isCA:true) because it will be use by linkerd to issue mTLS certificates.
-- Step 5: Command certmanger to create the `Certificate` and the associated `Secret`.
+- Step 5: Command cert-manager to create the `Certificate` and the associated `Secret`.
```shell
kubectl apply -f linkerd-identity-issuer.yml
@@ -223,6 +223,59 @@ Installation using `Helm` (Release 3):
-----END CERTIFICATE-----
```
+#### GitOps installation (ArgoCD)
+
+As an alternative, for GitOps deployments (using ArgoCD), instead of hardcoding CA certificate within Helm chart values, a external configMap can be created,`linkerd-identity-trust-roots`, containing the ca certificate (ca-bundle.crt key). If external configMap is provided, helm value `identity.externalCA=true` need to be provided during installation.
+
+[Trust Manager](https://cert-manager.io/docs/projects/trust-manager/), installed jointly with Cert-Manager, can be used to automate the generation of that configMap containing the information about the ca secret.
+See detailed procedure described in [linkerd issue #7345](https://github.com/linkerd/linkerd2/issues/7345#issuecomment-979207861).
+
+See Trust-Manager installation procedure in [TLS certification management documentation](/docs/certmanager/).
+
+In the previous installation procedure, step 6 and step 8 can be replaced by the following:
+
+- Step 6: Create a Trust-Manager `Bundle` resource to distribute CA certificate in linkerd namespace as a configmap (source is taken from the namespace trust was installed in, i.e cert-manager)
+
+ Create Trust Manager bundle resource to share `ca.crt` stored in `root-secret` within a configMap (`linkerd-identity-trust-roots`) in linkerd namespace.
+
+ ```yml
+ apiVersion: trust.cert-manager.io/v1alpha1
+ kind: Bundle
+ metadata:
+ name: linkerd-identity-trust-roots
+ spec:
+ sources:
+ - secret:
+ name: "root-secret"
+ key: "ca.crt"
+ target:
+ configMap:
+ key: "ca-bundle.crt"
+ namespaceSelector:
+ matchLabels:
+ kubernetes.io/metadata.name: linkerd
+ ```
+
+ Apply this resource using `kubectl apply -f` command
+
+ Check that config map is created and the content is the expected one
+
+ ```shell
+ kubectl get cm linkerd-identity-trust-roots -o jsonpath="{.data.ca-bundle\.crt}" -n linkerd
+ ```
+
+- Step 8: Install Linkerd control Plane Helm (`identity.externalCA` need to be set to true)
+
+ ```shell
+ helm install linkerd-control-plane \
+ --set identity.externalCA=true \
+ --set identity.issuer.scheme=kubernetes.io/tls \
+ --set installNamespace=false \
+ linkerd/linkerd-control-plane \
+ -n linkerd
+ ```
+
+
### Linkerd Viz extension installation
Linkerd provides a full on-cluster metrics stack, a web dashboard, and pre-configured Grafana dashboards. This is the linkerd viz extension.
diff --git a/docs/_docs/tracing.md b/docs/_docs/tracing.md
index af454673..29aa4a6f 100644
--- a/docs/_docs/tracing.md
+++ b/docs/_docs/tracing.md
@@ -187,6 +187,148 @@ Where `user_policy.json`, contains the following AWS access policies definition:
```shell
kubectl get pods -l app.kubernetes.io/name=loki -n logging
```
+
+### GitOps installation (ArgoCD)
+
+As an alternative, for GitOps deployments (using ArgoCD), instead of hardcoding minio credentials within Helm chart values, a external secret can be configured leveraging [Tempo's capability of using environment variables in config file](https://grafana.com/docs/tempo/latest/configuration/#use-environment-variables-in-the-configuration)
+
+The following secret need to be created:
+```yml
+apiVersion: v1
+kind: Secret
+metadata:
+ name: tempo-minio-secret
+ namespace: tracing
+type: Opaque
+data:
+ MINIO_ACCESS_KEY_ID: < minio_tempo_user | b64encode >
+ MINIO_SECRET_ACCESS_KEY: < minio_tempo_key | b64encode >
+```
+
+And the following Helm values has to be provided:
+
+```yml
+# Enable trace ingestion
+traces:
+ otlp:
+ grpc:
+ enabled: true
+ http:
+ enabled: true
+ zipkin:
+ enabled: true
+ jaeger:
+ thriftCompact:
+ enabled: true
+ thriftHttp:
+ enabled: true
+ opencensus:
+ enabled: true
+
+# Configure S3 backend
+storage:
+ trace:
+ backend: s3
+ s3:
+ bucket: k3s-tempo
+ endpoint: s3.picluster.ricsanfre.com:9091
+ region: eu-west-1
+ access_key: ${MINIO_ACCESS_KEY_ID}
+ secret_key: ${MINIO_SECRET_ACCESS_KEY}
+ insecure: false
+
+# Configure distributor
+distributor:
+ config:
+ log_received_spans:
+ enabled: true
+ # Enable environment variables in config file
+ # https://grafana.com/docs/tempo/latest/configuration/#use-environment-variables-in-the-configuration
+ extraArgs:
+ - '-config.expand-env=true'
+ extraEnv:
+ - name: MINIO_ACCESS_KEY_ID
+ valueFrom:
+ secretKeyRef:
+ name: tempo-minio-secret
+ key: MINIO_ACCESS_KEY_ID
+ - name: MINIO_SECRET_ACCESS_KEY
+ valueFrom:
+ secretKeyRef:
+ name: tempo-minio-secret
+ key: MINIO_SECRET_ACCESS_KEY
+# Configure ingester
+ingester:
+ # Enable environment variables in config file
+ # https://grafana.com/docs/tempo/latest/configuration/#use-environment-variables-in-the-configuration
+ extraArgs:
+ - '-config.expand-env=true'
+ extraEnv:
+ - name: MINIO_ACCESS_KEY_ID
+ valueFrom:
+ secretKeyRef:
+ name: tempo-minio-secret
+ key: MINIO_ACCESS_KEY_ID
+ - name: MINIO_SECRET_ACCESS_KEY
+ valueFrom:
+ secretKeyRef:
+ name: tempo-minio-secret
+ key: MINIO_SECRET_ACCESS_KEY
+# Configure compactor
+compactor:
+ # Enable environment variables in config file
+ # https://grafana.com/docs/tempo/latest/configuration/#use-environment-variables-in-the-configuration
+ extraArgs:
+ - '-config.expand-env=true'
+ extraEnv:
+ - name: MINIO_ACCESS_KEY_ID
+ valueFrom:
+ secretKeyRef:
+ name: tempo-minio-secret
+ key: MINIO_ACCESS_KEY_ID
+ - name: MINIO_SECRET_ACCESS_KEY
+ valueFrom:
+ secretKeyRef:
+ name: tempo-minio-secret
+ key: MINIO_SECRET_ACCESS_KEY
+# Configure querier
+querier:
+ # Enable environment variables in config file
+ # https://grafana.com/docs/tempo/latest/configuration/#use-environment-variables-in-the-configuration
+ extraArgs:
+ - '-config.expand-env=true'
+ extraEnv:
+ - name: MINIO_ACCESS_KEY_ID
+ valueFrom:
+ secretKeyRef:
+ name: tempo-minio-secret
+ key: MINIO_ACCESS_KEY_ID
+ - name: MINIO_SECRET_ACCESS_KEY
+ valueFrom:
+ secretKeyRef:
+ name: tempo-minio-secret
+ key: MINIO_SECRET_ACCESS_KEY
+# Configure query-frontend
+queryFrontend:
+ # Enable environment variables in config file
+ # https://grafana.com/docs/tempo/latest/configuration/#use-environment-variables-in-the-configuration
+ extraArgs:
+ - '-config.expand-env=true'
+ extraEnv:
+ - name: MINIO_ACCESS_KEY_ID
+ valueFrom:
+ secretKeyRef:
+ name: tempo-minio-secret
+ key: MINIO_ACCESS_KEY_ID
+ - name: MINIO_SECRET_ACCESS_KEY
+ valueFrom:
+ secretKeyRef:
+ name: tempo-minio-secret
+ key: MINIO_SECRET_ACCESS_KEY
+# Disable Minio server installation
+minio:
+ enabled: false
+```
## Linkerd traces integration
diff --git a/docs/_docs/vault.md b/docs/_docs/vault.md
new file mode 100644
index 00000000..597ff0b1
--- /dev/null
+++ b/docs/_docs/vault.md
@@ -0,0 +1,820 @@
+---
+title: Secret Management (Vault)
+permalink: /docs/vault/
+description: How to deploy Hashicorp Vault as a Secret Manager for our Raspberry Pi Kubernetes Cluster.
+last_modified_at: "27-12-2022"
+---
+
+[HashiCorp Vault](https://www.vaultproject.io/) is used as Secret Management solution for Raspberry PI cluster. All cluster secrets (users, passwords, api tokens, etc) will be securely encrypted and stored in Vault.
+
+Vault will be deployed as a external service, not running as a Kuberentes service, so it can be used by GitOps solution, ArgoCD, to deploy automatically all cluster services.
+
+Vault could be installed as Kuberentes service, deploying it using an official Helm Chart or a community operator like [Banzai Bank-Vault](https://banzaicloud.com/products/bank-vaults/).
+
+Installing Vault as Kubernetes service will drive us to a chicken/egg situation if we want to use Vault as only source of secrets/credentials for all Kuberentes services deployed. Vault requires to have Block storage solution (Longhorn) deployed first since its POD needs Perstistent Volumes, and to install Longhorn, a few secrets need to be provided to configure its backup (Minio credentials).
+
+[External Secrets Operator](https://external-secrets.io/) will be used to automatically generate the Kubernetes Secrets from Vault data that is needed to deploy the different services using ArgoCD.
+
+![picluster-secretsmanagement-architecture](/assets/img/vault-externalsecrets.png)
+
+## Vault installation
+
+Vault installation and configuration tasks have been automated with Ansible developing a role: **ricsanfre.vault**. This role, installs Vault Server, initialize it and install a systemd service to automatically unseal it whenever vault server is restarted.
+
+### Vault installation from binaries
+
+Instead of installing Vault using official Ubuntu packages, installation will be done manually from binaries, so the version to be installed can be decided.
+
+- Step 1. Create vault's UNIX user/group
+
+ vault user is a system user, not login allowed
+ ```shell
+ sudo groupadd vault
+ sudo useradd vault -g vault -r -s /sbin/nologin
+ ```
+- Step 2. Create vault's storage directory
+
+ ```shell
+ sudo mkdir /var/lib/vault
+ chown -R vault:vault /var/lib/vault
+ chmod -R 750 /vault/lib/vault
+ ```
+
+- Step 3. Create vault's config directories
+
+ ```shell
+ sudo mkdir -p /etc/vault
+ sudo mkdir -p /etc/vault/tls
+ sudo mkdir -p /etc/vault/policy
+ sudo mkidr -p /etc/vault/plugin
+ chown -R vault:vault /etc/vault
+ chmod -R 750 /etc/vault
+ ```
+
+- Step 4: Create vault's log directory
+
+ ```shell
+ sudo mkdir /var/log/vault
+ chown -R vault:vault /var/log/vault
+ chmod -R 750 /vault/log/vault
+ ```
+- Step 5. Download server binary (`vault`) and copy them to `/usr/local/bin`
+
+ ```shell
+ wget https://releases.hashicorp.com/vault//vault__linux_.zip
+ unzip vault__linux_.zip
+ chmod +x vault
+ sudo mv vault /usr/local/bin/.
+ ```
+ where `` is amd64 or arm64, and `` is vault version (for example: 1.12.2).
+
+
+- Step 6. Create Vault TLS certificate
+
+ In case you have your own domain, a valid TLS certificate signed by [Letsencrypt](https://letsencrypt.org/) can be obtained for Minio server, using [Certbot](https://certbot.eff.org/).
+
+ See certbot installation instructions in [CertManager - Letsencrypt Certificates Section](/docs/certmanager/#installing-certbot-ionos). Those instructions indicate how to install certbot using DNS challenge with IONOS DNS provider (my DNS provider). Similar procedures can be followed for other DNS providers.
+
+ Letsencrypt using HTTP challenge is avoided for security reasons (cluster services are not exposed to public internet).
+
+ If generating valid TLS certificate is not possible, selfsigned certificates with a custom CA can be used instead.
+
+ Follow this procedure for creating a self-signed certificate for Vault Server
+
+ 1. Create a self-signed CA key and self-signed certificate
+
+ ```shell
+ openssl req -x509 \
+ -sha256 \
+ -nodes \
+ -newkey rsa:4096 \
+ -subj "/CN=Ricsanfre CA" \
+ -keyout rootCA.key -out rootCA.crt
+ ```
+
+ {{site.data.alerts.note}}
+
+ The one created during Minio installation can be re-used.
+
+ {{site.data.alerts.end}}
+
+ 2. Create a TLS certificate for Vault server signed using the custom CA
+
+ ```shell
+ openssl req -new -nodes -newkey rsa:4096 \
+ -keyout vault.key \
+ -out vault.csr \
+ -batch \
+ -subj "/C=ES/ST=Madrid/L=Madrid/O=Ricsanfre CA/OU=picluster/CN=vault.picluster.ricsanfre.com"
+
+ openssl x509 -req -days 365000 -set_serial 01 \
+ -extfile <(printf "subjectAltName=DNS:vault.picluster.ricsanfre.com") \
+ -in vault.csr \
+ -out vault.crt \
+ -CA rootCA.crt \
+ -CAkey rootCA.key
+ ```
+
+ Once the certificate is created, public certificate and private key need to be installed in Vault server following this procedure:
+
+
+ 1. Copy public certificate `vault.crt` as `/etc/vault/tls/vault.crt`
+
+ ```shell
+ sudo cp vault.crt /etc/vault/tls/public.crt
+ sudo chown vault:vault /etc/vault/tls/public.crt
+ ```
+ 2. Copy private key `vault.key` as `/etc/vault/tls/vault.key`
+
+ ```shell
+ cp vault.key /etc/vault/tls/vault.key
+ sudo chown vault:vault /etc/vault/tls/vault.key
+ ```
+ 3. Copy CA certificate `rootCA.crt` as `/etc/vault/tls/vault-ca.crt`
+
+ {{site.data.alerts.note}}
+
+ This step is only needed if using selfsigned certificate.
+
+ {{site.data.alerts.end}}
+
+ ```shell
+ cp rootCA.crt /etc/vault/tls/vault-ca.crt
+ sudo chown vault:vault /etc/vault/tls/vault-ca.crt
+ ```
+
+- Step 7: Create vault config file `/etc/vault/vault_main.hcl`
+
+ ```
+ cluster_addr = "https://:8201"
+ api_addr = "https://:8200"
+
+ plugin_directory = "/etc/vault/plugin"
+
+ disable_mlock = true
+
+ listener "tcp" {
+ address = "0.0.0.0:8200"
+ tls_cert_file = "/etc/vault/tl/vault.crt"
+ tls_key_file = "/etc/vault/tls/vault.key"
+
+ tls_disable_client_certs = true
+
+ }
+
+ storage "raft" {
+ path = /var/lib/vault
+
+ }
+ ```
+
+ Vault is configured, as a single node of HA cluster, with the following parameters:
+
+ - Node's URL address to be used in internal communications between nodes of the cluster. (`cluster_addr` and `api_addr`)
+ - Vault server API listening in all node's addresses at port 8200: (`listener "tcp" address=0.0.0.0:8200`)
+ - TLS certifificates are stored in `/etc/vault/tls`
+ - Client TLS certificates validation is disabled (`tls_disable_client_certs`)
+ - Vault is configured to use integrated storage [Raft](https://developer.hashicorp.com/vault/docs/configuration/storage/raft) data dir `/var/lib/vault`
+ - Disables the server from executing the mlock syscall (`disable_mlock`) recommended when using Raft storage
+
+
+- Step 8. Create systemd vault service file `/etc/systemd/system/vault.service`
+
+ ```
+ [Unit]
+ Description="HashiCorp Vault - A tool for managing secrets"
+ Documentation=https://www.vaultproject.io/docs/
+ Requires=network-online.target
+ After=network-online.target
+ ConditionPathExists=/etc/vault/vault_main.hcl
+
+ [Service]
+ User=vault
+ Group=vault
+ ProtectSystem=full
+ ProtectHome=read-only
+ PrivateTmp=yes
+ PrivateDevices=yes
+ SecureBits=keep-caps
+ Capabilities=CAP_IPC_LOCK+ep
+ AmbientCapabilities=CAP_SYSLOG CAP_IPC_LOCK
+ CapabilityBoundingSet=CAP_SYSLOG CAP_IPC_LOCK
+ NoNewPrivileges=yes
+ ExecStart=/bin/sh -c 'exec {{ vault_bin_path }}/vault server -config=/etc/vault/vault_main.hcl -log-level=info'
+ ExecReload=/bin/kill --signal HUP $MAINPID
+ KillMode=process
+ KillSignal=SIGINT
+ Restart=on-failure
+ RestartSec=5
+ TimeoutStopSec=30
+ StartLimitInterval=60
+ StartLimitBurst=3
+ LimitNOFILE=524288
+ LimitNPROC=524288
+ LimitMEMLOCK=infinity
+ LimitCORE=0
+
+ [Install]
+ WantedBy=multi-user.target
+ ```
+
+ {{site.data.alerts.note}}
+
+ This systemd configuration is the one that official vault ubuntu's package installs.
+
+ {{site.data.alerts.end}}
+
+ This service start vault server using vault UNIX group and executing the following startup command:
+
+ ```shell
+ /usr/local/vault server -config=/etc/vault/vault_main.hcl -log-level=info
+ ```
+
+- Step 9. Enable vault systemd service and start it
+
+ ```shell
+ sudo systemctl enable vault.service
+ sudo systemctl start vault.service
+ ```
+
+- Step 10. Check vault server status
+
+ ```shell
+ export VAULT_ADDR=https://:8200
+ export VAULT_CACERT=/etc/vault/tls/vault-ca.crt
+
+ vault status
+ ```
+
+ The output should be like the following
+
+ ```shell
+ Key Value
+ --- -----
+ Seal Type shamir
+ Initialized false
+ Sealed true
+ Total Shares 0
+ Threshold 0
+ Unseal Progress 0/0
+ Unseal Nonce n/a
+ Version 1.12.2
+ Build Date 2022-11-23T12:53:46Z
+ Storage Type raft
+ HA Enabled true
+ ```
+
+ It shows Vault server status as not initialized (Initialized = false) and sealed (Sealed = true).
+
+ {{site.data.alerts.note}}
+
+ VAULT_CACERT variable is only needed if Vault's TLS certifica is signed using custom CA. This will be used by vault client to validate Vault's certificate.
+
+ {{site.data.alerts.end}}
+
+
+### Vault initialization and useal
+
+During initialization, Vault generates a root key, which is stored in the storage backend alongside all other Vault data. The root key itself is encrypted and requires an unseal key to decrypt it.
+
+Unseal process, where uneal keys are provided to rebuid the root key, need to be completed every time vault server is started.
+
+The default Vault configuration uses [Shamir's Secret Sharing](https://en.wikipedia.org/wiki/Shamir%27s_Secret_Sharing) to split the root key into a configured number of shards (referred as key shares, or unseal keys). A certain threshold of shards is required to reconstruct the root key, which is then used to decrypt the Vault's encryption key.
+
+To initialize vault [`vault operator init`](https://developer.hashicorp.com/vault/docs/commands/operator/init) command must be used.
+
+```shell
+vault operator init -key-shares=1 -key-threshold=1 -format=json > /etc/vault/unseal.json
+```
+where number of key shares (`-key-shares`) and threshold (`-key-threshold`) is set to 1. Only one key is needed to unseal vault.
+
+The vault init command output is redirected to a file (`/etc/vautl/unseal.json`) containing unseal keys values and root token needed to connect to vault.
+
+```json
+{
+ "unseal_keys_b64": [
+ "UEDYFGa/oVUehw5eflXt2mdoE8zJD3QVub8b++rNCm8="
+ ],
+ "unseal_keys_hex": [
+ "5040d81466bfa1551e870e5e7e55edda676813ccc90f7415b9bf1bfbeacd0a6f"
+ ],
+ "unseal_shares": 1,
+ "unseal_threshold": 1,
+ "recovery_keys_b64": [],
+ "recovery_keys_hex": [],
+ "recovery_keys_shares": 0,
+ "recovery_keys_threshold": 0,
+ "root_token": "hvs.AJxt0CgXT9BcVe5dMNeI0Unm"
+}
+```
+
+`vault status` shows Vault server initialized but sealed
+
+```shell
+vault status
+Key Value
+--- -----
+Seal Type shamir
+Initialized true
+Sealed true
+Total Shares 1
+Threshold 1
+Unseal Progress 0/1
+Unseal Nonce n/a
+Version 1.12.2
+Build Date 2022-11-23T12:53:46Z
+Storage Type raft
+HA Enabled true
+```
+
+To unseal vault `vault operator unseal` command need to be executed, providing unseal keys generated during initialization process.
+
+
+Using the key stored in `unseal.json` file the following command can be executed:
+
+```shell
+vault operator unseal $(jq -r '.unseal_keys_b64[0]' /etc/vault/unseal.json)
+```
+
+`vault status` shows Vault server initialized and unsealed
+
+```shell
+vault status
+Key Value
+--- -----
+Seal Type shamir
+Initialized true
+Sealed true
+Total Shares 1
+Threshold 1
+Unseal Progress 0/1
+Unseal Nonce n/a
+Version 1.12.2
+Build Date 2022-11-23T12:53:46Z
+Storage Type raft
+HA Enabled true
+```
+
+### Vault automatic unseal
+
+A systemd service can be created to automatically unseal vault every time it is started.
+
+
+- Step 1: Create a script (`/etc/vault/vault-unseal.sh`) for automating the unseal process using the keys stored in `/etc/vault/unseal.json`
+
+ ```shell
+ #!/usr/bin/env sh
+
+ #Define a timestamp function
+ timestamp() {
+ date "+%b %d %Y %T %Z"
+ }
+
+
+ URL=https://:8200
+ KEYS_FILE=/etc/vault/unseal.json
+
+ LOG=info
+
+ SKIP_TLS_VERIFY=true
+
+ if [ true = "$SKIP_TLS_VERIFY" ]
+ then
+ CURL_PARAMS="-sk"
+ else
+ CURL_PARAMS="-s"
+ fi
+
+ # Add timestamp
+ echo "$(timestamp): Vault-useal started" | tee -a $LOG
+ echo "-------------------------------------------------------------------------------" | tee -a $LOG
+
+ initialized=$(curl $CURL_PARAMS $URL/v1/sys/health | jq '.initialized')
+
+ if [ true = "$initialized" ]
+ then
+ echo "$(timestamp): Vault already initialized" | tee -a $LOG
+ while true
+ do
+ status=$(curl $CURL_PARAMS $URL/v1/sys/health | jq '.sealed')
+ if [ true = "$status" ]
+ then
+ echo "$(timestamp): Vault Sealed. Trying to unseal" | tee -a $LOG
+ # Get keys from json file
+ for i in `jq -r '.keys[]' $KEYS_FILE`
+ do curl $CURL_PARAMS --request PUT --data "{\"key\": \"$i\"}" $URL/v1/sys/unseal
+ done
+ sleep 10
+ else
+ echo "$(timestamp): Vault unsealed" | tee -a $LOG
+ break
+ fi
+ done
+ else
+ echo "$(timestamp): Vault not initialized yet"
+ fi
+ ```
+
+- Step 2: Create systemd vault service file `/etc/systemd/system/vault-unseal.service`
+
+ ```
+ [Unit]
+ Description=Vault Unseal
+ After=vault.service
+ Requires=vault.service
+ PartOf=vault.service
+
+ [Service]
+ Type=oneshot
+ User=vault
+ Group=vault
+ ExecStartPre=/bin/sleep 10
+ ExecStart=/bin/sh -c '/etc/vault/vault-unseal.sh'
+ RemainAfterExit=false
+
+ [Install]
+ WantedBy=multi-user.target vault.service
+ ```
+
+ This service is defined as part of vault.service (`PartOf`), so stopping/starting vault.service is propagated to this service.
+
+- Step 3. Enable vault systemd service and start it
+
+ ```shell
+ sudo systemctl enable vault-unseal.service
+ sudo systemctl start vault-unseal.service
+ ```
+
+
+## Vault configuration
+
+
+Once vault is unsealed following configuration requires to provide vault's root token generated during initialization procces. See `root_token` in `unseal.json` output.
+
+```shell
+export VAULT_TOKEN=$(jq -r '.root_token' /etc/vault/unseal.json)
+```
+
+{{site.data.alerts.note}}
+
+As an alternative to `vault` commands, API can be used. See [Vault API documentation](https://developer.hashicorp.com/vault/api-docs)
+
+`curl` command can be used. Vault token need to be provider as a HTTP header `X-Vault-Token`
+
+Get request
+```shell
+curl -k -H "X-Vault-Token: $VAULT_TOKEN" $VAULT_ADDR/
+```
+
+Post request
+
+```shell
+curl -k -x POST -H "X-Vault-Token: $VAULT_TOKEN" -d '{"key1":"value1", "key2":"value2"}' $VAULT_ADDR/
+```
+
+{{site.data.alerts.end}}
+
+### Enabling KV secrets
+
+Enable [KV (KeyValue) secrets engine](https://developer.hashicorp.com/vault/docs/secrets/kv) to manage static secrets.
+
+```shell
+vault secrets enable -version=2 -path=secret kv
+```
+
+This command enables KV version 2 at path `/secret`
+
+### Vault policies
+
+Create vault policies to read and read/write KV secrets
+
+- Read-write policy
+
+ Create file `/etc/vault/policy/secrets-write.hcl`
+
+ ```
+ path "secret/*" {
+ capabilities = [ "create", "read", "update", "delete", "list", "patch" ]
+ }
+ ```
+ Add policy to vault
+
+ ```shell
+ vault policy write readwrite /etc/vault/policy/secrets-readwrite.hcl
+ ```
+
+- Read-only policy
+
+ Create file `/etc/vault/policy/secrets-read.hcl`
+ ```
+ path "secret/*" {
+ capabilities = [ "read" ]
+ }
+ ```
+
+ Add policy to vault
+
+ ```shell
+ vault policy write readonly /etc/vault/policy/secrets-read.hcl
+ ```
+
+Testing policies:
+
+- Generate tokens for read and write policies
+
+ ```shell
+ READ_TOKEN=$(vault token create -policy="readonly" -field=token)
+ WRITE_TOKEN=$(vault token create -policy="readwrite" -field=token)
+ ```
+
+- Try write a secret using read token
+
+ ```shell
+ VAULT_TOKEN=$READ_TOKEN
+ vault kv put secret/secret1 user="user1" password="s1cret0"
+ ```
+
+ Permission denied error:
+
+ ```
+ Code: 403. Errors:
+
+ * 1 error occurred:
+ * permission denied
+ ```
+
+- Try write a secret using write token
+
+ ```shell
+ VAULT_TOKEN=$WRITE_TOKEN
+ vault kv put secret/secret1 user="user1" password="s1cret0"
+ ```
+ The secret is stored with success:
+ ```
+ === Secret Path ===
+ secret/data/secret1
+
+ ======= Metadata =======
+ Key Value
+ --- -----
+ created_time 2023-01-02T11:04:21.01853116Z
+ custom_metadata
+ deletion_time n/a
+ destroyed false
+ version 1
+ ```
+
+- Secret can be read using both tokens
+
+ ```shell
+ vault kv get secret/secret1
+ ```
+
+ ```
+ === Secret Path ===
+ secret/data/secret1
+
+ ======= Metadata =======
+ Key Value
+ --- -----
+ created_time 2023-01-02T11:04:21.01853116Z
+ custom_metadata
+ deletion_time n/a
+ destroyed false
+ version 1
+
+ ====== Data ======
+ Key Value
+ --- -----
+ password s1cret0
+ user user1
+ ```
+
+### Kubernetes Auth Method
+
+Enabling [Vault kubernetes auth method](https://developer.hashicorp.com/vault/docs/auth/kubernetes) to authenticate with Vault using a Kubernetes Service Account Token. This method of authentication makes it easy to introduce a Vault token into a Kubernetes Pod.
+
+
+
+- Step 1. Create `vault` namespace
+
+ ```shell
+ kubectl create namespace vault
+ ```
+
+- Step 2. Create service account `vault-auth` to be used by Vault kuberentes authentication
+
+ ```yml
+ ---
+ apiVersion: v1
+ kind: ServiceAccount
+ metadata:
+ name: vault-auth
+ namespace: vault
+ ```
+
+- Step 3. Add proper permissions to service account
+
+ Vault kubernetes authentication method accesses the Kubernetes TokenReview API to validate the provided JWT is still valid. Service Accounts used in this auth method will need to have access to the TokenReview API. If Kubernetes is configured to use RBAC roles, the Service Account should be granted permissions to access this API.
+ Check more details in [Vault - Kubernetes Auth Method](https://developer.hashicorp.com/vault/docs/auth/kubernetes#configuring-kubernetes)
+
+ ```yml
+ ---
+ apiVersion: rbac.authorization.k8s.io/v1
+ kind: ClusterRoleBinding
+ metadata:
+ name: role-tokenreview-binding
+ namespace: vault
+ roleRef:
+ apiGroup: rbac.authorization.k8s.io
+ kind: ClusterRole
+ name: system:auth-delegator
+ subjects:
+ - kind: ServiceAccount
+ name: vault-auth
+ namespace: vault
+ ```
+
+- Step 4. Create long-lived token for vault-auth service account.
+ From Kubernetes v1.24, secrets contained long-lived tokens associated to service accounts are not longer created.
+ See how to create it in [Kubernetes documentation](https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/#manually-create-a-long-lived-api-token-for-a-serviceaccount)
+
+ ```yml
+ apiVersion: v1
+ kind: Secret
+ type: kubernetes.io/service-account-token
+ metadata:
+ name: vault-auth-secret
+ namespace: vault
+ annotations:
+ kubernetes.io/service-account.name: vault-auth
+ ```
+
+- Step 5. Get Service Account token
+
+ ```shell
+ KUBERNETES_SA_SECRET_NAME=$(kubectl get secrets --output=json -n vault | jq -r '.items[].metadata | select(.name|startswith("vault-auth")).name')
+ TOKEN_REVIEW_JWT=$(kubectl get secret $KUBERNETES_SA_SECRET_NAME -n vault -o jsonpath='{.data.token}' | base64 --decode)
+ ```
+
+- Step 6. Get Kubernetes CA cert and API URL
+
+ ```shell
+ # Get Kubernetes CA
+ kubectl config view --raw --minify --flatten --output='jsonpath={.clusters[].cluster.certificate-authority-data}' | base64 --decode > k3s_ca.crt
+
+ # Get Kubernetes Url
+ KUBERNETES_HOST=$(kubectl config view -o jsonpath='{.clusters[].cluster.server}')
+ ```
+
+- Step 7. Enable Kubernetes auth method
+
+ ```shell
+ vault auth enable kubernetes
+ ```
+
+ Or using Vault API
+
+ ```shell
+ curl -k --header "X-Vault-Token:$VAULT_TOKEN" --request POST\
+ --data '{"type":"kubernetes","description":"kubernetes auth"}' \
+ https://vault.picluster.ricsanfre.com:8200/v1/sys/auth/kubernetes
+ ```
+
+- Step 8. Configure Vault kubernetes auth method
+
+ ```shell
+ vault write auth/kubernetes/config \
+ token_reviewer_jwt="${TOKEN_REVIEW_JWT}" \
+ kubernetes_host="${KUBERNETES_HOST}" \
+ kubernetes_ca_cert=@k3s_ca.crt
+ disable_iss_validation=true
+ ```
+
+ Or using Vault API:
+
+ ```shell
+ KUBERNETES_CA_CERT=$(kubectl config view --raw --minify --flatten --output='jsonpath={.clusters[].cluster.certificate-authority-data}' | base64 --decode | awk 'NF {sub(/\r/, ""); printf "%s\\n",$0;}')
+
+ curl --cacert /etc/vault/tls/vault_ca.pem --header "X-Vault-Token:$VAULT_TOKEN" --request POST --data '{"kubernetes_host": "'"$KUBERNETES_HOST"'", "kubernetes_ca_cert":"'"$KUBERNETES_CA_CERT"'", "token_reviewer_jwt":"'"$TOKEN_REVIEW_JWT"'"}' https://vault.picluster.ricsanfre.com:8200/v1/auth/kubernetes/config
+ ```
+
+## External Secrets Operator installation
+
+External Secrets Operator is installed through its helm chart.
+
+- Step 1: Add External sercrets repository:
+ ```shell
+ helm repo add external-secrets https://charts.external-secrets.io
+ ```
+- Step 2: Fetch the latest charts from the repository:
+ ```shell
+ helm repo update
+ ```
+- Step 3: Create namespace
+ ```shell
+ kubectl create namespace external-secrets
+ ```
+- Step 4: Install helm chart
+ ```shell
+ helm install external-secrets \
+ external-secrets/external-secrets \
+ -n external-secrets \
+ --set installCRDs=true
+ ```
+- Step 5: Create external secrets vault role. Applying read policy
+
+ ```shell
+ vault write auth/kubernetes/role/external-secrets \
+ bound_service_account_names=external-secrets \
+ bound_service_account_namespaces=external-secrets \
+ policies=readonly \
+ ttl=24h
+ ```
+
+ Or using the Vault API
+
+ ```shell
+ curl -k --header "X-Vault-Token:$VAULT_TOKEN" --request POST \
+ --data '{ "bound_service_account_names": "external-secrets", "bound_service_account_namespaces": "external-secrets", "policies": ["readonly"], "ttl" : "24h"}' \
+ https://vault.picluster.ricsanfre.com:8200/v1/auth/kubernetes/role/external-secrets
+ ```
+
+
+- Step 6: Create Cluster Secret Store
+
+ ```yml
+ apiVersion: external-secrets.io/v1beta1
+ kind: ClusterSecretStore
+ metadata:
+ name: vault-backend
+ namespace: external-secrets
+ spec:
+ provider:
+ vault:
+ server: "https://vault.picluster.ricsanfre.com:8200"
+ # caBundle needed if vault TLS is signed using a custom CA.
+ # If Vault TLS is valid signed by Letsencrypt this is not needed?
+ # ca cert base64 encoded and remobed '\n' characteres"
+ # =`cat vault-ca.pem | base64 | tr -d "\n"`
+ caBundle:
+ path: "secret"
+ version: "v2"
+ auth:
+ kubernetes:
+ mountPath: "kubernetes"
+ role: "external-secrets"
+ ```
+
+ Check ClusterSecretStore status
+
+ ```shell
+ kubectl get clustersecretstore -n external-secrets
+ NAME AGE STATUS CAPABILITIES READY
+ vault-backend 10m Valid ReadWrite True
+ ```
+
+- Step 7: Create External secret
+
+ ```yml
+ apiVersion: external-secrets.io/v1beta1
+ kind: ExternalSecret
+ metadata:
+ name: vault-example
+ spec:
+ secretStoreRef:
+ name: vault-backend
+ kind: ClusterSecretStore
+ target:
+ name: mysecret
+ data:
+ - secretKey: password
+ remoteRef:
+ key: secret1
+ property: password
+ - secretKey: user
+ remoteRef:
+ key: secret1
+ property: user
+ ```
+
+ Check ExternalSecret status
+
+ ```shell
+ kubectl get externalsecret
+ NAME STORE REFRESH INTERVAL STATUS READY
+ vault-example vault-backend 1h SecretSynced True
+ ```
+
+ Check Secret created
+
+ ```shell
+ kubectl get secret mysecret -o yaml
+ ```
+
+## References
+
+- [Vault - Kubernetes Auth Method](https://developer.hashicorp.com/vault/docs/auth/kubernetes)
+- [External Vault configuriation guide](https://developer.hashicorp.com/vault/tutorials/kubernetes/kubernetes-external-vault)
+
+- [Tutorial: How to Set External-Secrets with Hashicorp Vault](https://blog.container-solutions.com/tutorialexternal-secrets-with-hashicorp-vault)
\ No newline at end of file
diff --git a/docs/_layouts/post.html b/docs/_layouts/post.html
index 89a4e758..dd46c82d 100644
--- a/docs/_layouts/post.html
+++ b/docs/_layouts/post.html
@@ -4,7 +4,7 @@
-
+
Recent News
@@ -23,16 +23,16 @@ {{ page.title }}
{{ page.date | date: "%b %-d, %Y" }}{% if page.author %} • {{ page.author }}{% endif %}{% if page.meta %} • {{ page.meta }}{% endif %}
{{ content }}
-