Skip to content

Commit

Permalink
Merge pull request #27 from ricsanfre/feature/backup
Browse files Browse the repository at this point in the history
Adding cluster backup capability
  • Loading branch information
ricsanfre authored Dec 29, 2021
2 parents c8ea299 + 59dc85d commit 727eeb4
Show file tree
Hide file tree
Showing 42 changed files with 1,571 additions and 26 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
ansible/roles/ricsanfre.*
ansible_collections
certificates
12 changes: 11 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ As part of the project the goal is to deploy on the Kuberenets cluster basic ser
- [K3S Distributed Storage](documentation/longhorn.md). Installing LongHorn as cluster distributed storage solution for providing Persistent Volumes to pods.
- [K3S centralized logging monitoring](documentation/logging.md). Installing a centralized log monitoring tool based on EFK stack. Real-time processing of Kuberentes pods and services and homelab servers logs.
- [K3S centralized monitoring](documentation/monitoring.md). Installing Kube Prometheus Stack for monitoring Kuberentes cluster
5) [Cluster backup and restore](documentation/backup.md). Deployment of a backup server (Minio S3 Object Store) and backup policies implementation at 3 levels: 1) OS filesystem level, using Restic, 2) Kubernetes configuration, using Velero, 3) POD's Persistent Volumes, using Longhorn backup/snapshots and Velero-restic.

## Automatic deployment instructions using Ansible

Expand Down Expand Up @@ -86,6 +87,14 @@ As part of the project the goal is to deploy on the Kuberenets cluster basic ser
```
ansible-playbook setup_picluster.yml --tags "node"
```
- Configure backup server (S3) (`node1`) and configuring OS backup with restic in all nodes (`node1-node4` and `gateway`)
Run the playbook:
```
ansible-playbook backup_configuration.yml
```
- Install K3S cluster
Run the playbook:
Expand All @@ -94,7 +103,7 @@ As part of the project the goal is to deploy on the Kuberenets cluster basic ser
ansible-playbook k3s_install.yml
```
- Deploy and configure basic services (metallb, traefik, certmanager, longhorn, EFK and Prometheus )
- Deploy and configure basic services (metallb, traefik, certmanager, longhorn, EFK, Prometheus, Velero )
Run the playbook:
Expand All @@ -116,6 +125,7 @@ As part of the project the goal is to deploy on the Kuberenets cluster basic ser
| `longhorn` | Longhorn |
| `logging` | EFK Stack |
| `monitoring` | Prometheus Stack |
| `backup` | Velero |
### Resetting K3s
Expand Down
55 changes: 55 additions & 0 deletions ansible/backup_configuration.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
---

- name: Configure Backup Server - S3 Storage
hosts: node1
gather_facts: true
tags: [backup]
vars:
ansible_become: true
server_hostname: "{{ minio_hostname }}"
ssl_key_size: 4096
ssl_certificate_provider: selfsigned
key_type: RSA
country_name: ES
email_address: [email protected]
organization_name: Ricsanfre

pre_tasks:
- name: Generate custom CA
include_tasks: tasks/generate_custom_ca.yml
args:
apply:
delegate_to: localhost
become: false
- name: Generate customCA-signed SSL certificates for minio
include_tasks: tasks/generate_ca_signed_cert.yml
args:
apply:
delegate_to: localhost
become: false
- name: Load tls key and cert
set_fact:
minio_key: "{{ lookup('file','certificates/' + server_hostname + '.key') }}"
minio_cert: "{{ lookup('file','certificates/' + server_hostname + '.pem') }}"

tasks:
- name: Include S3 configuration variables
include_vars:
file: vars/backup/s3_minio.yml
- name: Configure Minio S3 server
include_role:
name: ricsanfre.minio

- name: Configure Pi-cluster nodes backup
hosts: raspberrypi
gather_facts: true
tags: [backup]
vars:
ansible_become: true
pre_tasks:
- name: Load CA certificate for restic
set_fact:
restic_ca_cert: "{{ lookup('file','certificates/CA.pem') }}"
roles:
- role: ricsanfre.backup
tags: [backup]
36 changes: 36 additions & 0 deletions ansible/group_vars/all.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,39 @@ ansible_user: ansible
# Cluster Lab Architecture
# Whether to use centralized SAN architecture or not
centralized_san: false

#######################
# DNS configuration
#######################
# DNS server
dns_server: 10.0.0.1
dns_domain: picluster.ricsanfre.com

#######################
# backup configuration
#######################

# Minio S3 Server
minio_hostname: s3.picluster.ricsanfre.com
minio_url: "https://{{ minio_hostname }}:9091"
minio_site_region: "eu-west-1"


restic_repository: "s3:{{ minio_url }}/restic"
restic_aws_access_key_id: restic
restic_aws_secret_access_key: supers1cret0
restic_use_ca_cert: true
# Deactivate clean service. Only enabled in one node
restic_clean_service: false
restic_backups_dirs:
- path: '/etc'
- path: '/root'
exclude:
- pattern: '.cache'
- path: '/home/ansible'
exclude:
- pattern: '.cache'
- pattern: '.ansible'
- path: '/home/oss'
exclude:
- pattern: '.cache'
12 changes: 12 additions & 0 deletions ansible/group_vars/k3s_cluster.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ k3s_longhorn_namespace: longhorn-system
k3s_certmanager_namespace: certmanager-system
k3s_logging_namespace: k3s-logging
k3s_monitoring_namespace: k3s-monitoring
k3s_velero_namespace: velero-system

# DNS service end-points

Expand All @@ -45,3 +46,14 @@ k3s_external_ip_range: "10.0.0.100-10.0.0.200"
# HTTP Basic auth credentials
traefik_basic_auth_user: admin
traefik_basic_auth_passwd: s1cret0

# Velero
minio_velero_user: velero
minio_velero_key: supers1cret0
minio_velero_bucket: k3s-velero

# Enabling longhorn backup
longhorn_s3_backup: true
minio_longhorn_user: longhorn
minio_longhorn_key: supers1cret0
minio_longhorn_bucket: k3s-longhorn
11 changes: 11 additions & 0 deletions ansible/host_vars/gateway.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ dnsmasq_additional_dns_hosts:
desc: "DNS Server"
hostname: dns
ip: 10.0.0.1
s3_server:
desc: "S3 Server"
hostname: s3
ip: 10.0.0.11

####################
# ntp role variables
Expand Down Expand Up @@ -94,7 +98,14 @@ nft_forward_host_rules:
- iifname $wan_interface oifname $lan_interface ip daddr $lan_network tcp dport ssh ct state new accept
230 http from wan:
- iifname $wan_interface oifname $lan_interface ip daddr $lan_network tcp dport {http, https} ct state new accept
240 s3 from wan:
- iifname $wan_interface oifname $lan_interface ip daddr 10.0.0.11 tcp dport {9091, 9092} ct state new accept
# NAT Post-routing
nft_nat_host_postrouting_rules:
005 masquerade lan to wan:
- ip saddr $lan_network oifname $wan_interface masquerade

############################
# restic backup role variables
############################
restic_clean_service: true
2 changes: 2 additions & 0 deletions ansible/k3s_deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@
tags: ['monitoring']
- role: logging/k3s
tags: ['logging']
- role: backup/velero
tags: ['backup']

- name: Deploy fluentbit on control nodes (gateway and pimaster)
hosts: control
Expand Down
4 changes: 4 additions & 0 deletions ansible/requirements.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@ roles:
version: v1.0.0
- name: ricsanfre.fluentbit
version: v1.0.3
- name: ricsanfre.minio
version: v1.0.7
- name: ricsanfre.backup
version: v1.0.2
collections:
- name: kubernetes.core
version: 2.1.1
3 changes: 3 additions & 0 deletions ansible/roles/backup/velero/defaults/main.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
---
velero_version: v1.7.1
velero_arch: arm64
30 changes: 30 additions & 0 deletions ansible/roles/backup/velero/tasks/configure_velero_cli.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
---
- name: Get CLI configured namespace
command:
cmd: "velero client config get namespace"
register: get_velero_namespace
changed_when: false
ignore_errors: true

- name: Configure velero CLI namespace
command:
cmd: "velero client config set namespace={{ k3s_velero_namespace }}"
when:
- get_velero_namespace.rc==0
- '"namespace: <NOT SET>" in get_velero_namespace.stdout or "namespace: " + k3s_velero_namespace not in get_velero_namespace.stdout'
changed_when: true

- name: Get CLI configured colored
command:
cmd: "velero client config get colored"
register: get_velero_colored
changed_when: false
ignore_errors: true

- name: Configure velero CLI colored output
command:
cmd: "velero client config set colored=true"
when:
- get_velero_colored.rc==0
- '"colored: <NOT SET>" in get_velero_colored.stdout or "colored: true" not in get_velero_colored.stdout'
changed_when: true
20 changes: 20 additions & 0 deletions ansible/roles/backup/velero/tasks/install_velero_cli.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
---

- name: Download Velero CLI
get_url:
url: https://github.com/vmware-tanzu/velero/releases/download/{{ velero_version }}/velero-{{ velero_version }}-linux-{{ velero_arch }}.tar.gz
dest: /tmp/velero-{{ velero_version }}-linux-{{ velero_arch }}.tar.gz
mode: '0766'

- name: Extract archives
unarchive:
src: /tmp/velero-{{ velero_version }}-linux-{{ velero_arch }}.tar.gz
dest: /tmp
remote_src: true

- name: Copy binary to /usr/local/bin
copy:
src: /tmp/velero-{{ velero_version }}-linux-{{ velero_arch }}/velero
dest: /usr/local/bin/velero
mode: '0755'
remote_src: true
79 changes: 79 additions & 0 deletions ansible/roles/backup/velero/tasks/main.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
---

- name: Install velero client
include_tasks: install_velero_cli.yml
args:
apply:
become: true

- name: Create velero namespace.
kubernetes.core.k8s:
name: "{{ k3s_velero_namespace }}"
api_version: v1
kind: Namespace
state: present


- name: Configure velero CLI
include_tasks: configure_velero_cli.yml

- name: Add vmware-tanzu chart repo.
kubernetes.core.helm_repository:
name: vmware-tanzu
repo_url: "https://vmware-tanzu.github.io/helm-charts"

- name: Deploy Velero with Minio storage provider
kubernetes.core.helm:
name: velero
chart_ref: vmware-tanzu/velero
update_repo_cache: true
release_namespace: "{{ k3s_velero_namespace }}"
state: present
release_values:
# AWS backend plugin configuration
initContainers:
- name: velero-plugin-for-aws
image: velero/velero-plugin-for-aws:v1.3.0
imagePullPolicy: IfNotPresent
volumeMounts:
- mountPath: /target
name: plugins
# Upgrading CRDs is causing issues
upgradeCRDs: false
# Use a kubectl image supporting ARM64
# bitnami default is not suppporting it
# kubectl:
# image:
# repository: rancher/kubectl
# tag: v1.21.5
# Disable volume snapshots. Longhorn deals with them
snapshotsEnabled: false
# Deploy restic
deployRestic: true
# Minio storage configuration
configuration:
# Cloud provider being used
provider: aws
backupStorageLocation:
provider: aws
bucket: "{{ minio_velero_bucket }}"
caCert: "{{ lookup('file','certificates/CA.pem') | b64encode | replace('\n', '') }}"
config:
region: "{{ minio_site_region }}"
s3ForcePathStyle: true
s3Url: "{{ minio_url }}"
insecureSkipTLSVerify: true
credentials:
secretContents:
cloud: |
[default]
aws_access_key_id: "{{ minio_velero_user }}"
aws_secret_access_key: "{{ minio_velero_key }}"
- name: Create Full backup Schedule policy
kubernetes.core.k8s:
definition: "{{ lookup('template', 'templates/' + item ) }}"
state: present
with_items:
- velero_full_schedule.yml.j2
20 changes: 20 additions & 0 deletions ansible/roles/backup/velero/templates/velero_full_schedule.yml.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
---
apiVersion: velero.io/v1
kind: Schedule
metadata:
name: full
namespace: velero-system
spec:
schedule: 0 4 * * *
template:
hooks: {}
includedNamespaces:
- '*'
included_resources:
- '*'
includeClusterResources: true
metadata:
labels:
type: 'full'
schedule: 'daily'
ttl: 720h0m0s
11 changes: 11 additions & 0 deletions ansible/roles/backup/velero/tests/cleaning_testing.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
---
- name: Cleaning velero testing pod.
hosts: k3s_master

tasks:
- name: Cleaning testing
kubernetes.core.k8s:
definition: "{{ lookup('file', 'files/' + item ) }}"
state: absent
with_items:
- nginx_test_application.yml
Loading

0 comments on commit 727eeb4

Please sign in to comment.