diff --git a/.envrc b/.envrc
index a4b55848..0eb5c961 100644
--- a/.envrc
+++ b/.envrc
@@ -1,5 +1,6 @@
#shellcheck disable=SC2148,SC2155
export KUBECONFIG="$(expand_path ./kubeconfig)"
export SOPS_AGE_KEY_FILE="$(expand_path ./age.key)"
+export OBJC_DISABLE_INITIALIZE_FORK_SAFETY=YES
export TALOSCONFIG="$(expand_path ./kubernetes/talos/clusterconfig/talosconfig)"
use flake
diff --git a/.sops.yaml b/.sops.yaml
index ef98b50f..a2bd7ee9 100644
--- a/.sops.yaml
+++ b/.sops.yaml
@@ -10,3 +10,7 @@ creation_rules:
key_groups:
- age:
- "age1k5xl02aujw4rsgghnnd0sdymmwd095w5nqgjvf76warwrdc0uqpqsm2x8m"
+ - path_regex: .*\.sops\.ya?ml
+ key_groups:
+ - age:
+ - "age1k5xl02aujw4rsgghnnd0sdymmwd095w5nqgjvf76warwrdc0uqpqsm2x8m"
diff --git a/.taskfiles/Ansible/Taskfile.yaml b/.taskfiles/Ansible/Taskfile.yaml
new file mode 100644
index 00000000..41ba250c
--- /dev/null
+++ b/.taskfiles/Ansible/Taskfile.yaml
@@ -0,0 +1,18 @@
+---
+# yaml-language-server: $schema=https://taskfile.dev/schema.json
+version: "3"
+
+vars:
+ ANSIBLE_INVENTORY_DIR: "{{.ANSIBLE_DIR}}/inventory"
+ ANSIBLE_PLAYBOOK_DIR: "{{.ANSIBLE_DIR}}/playbooks"
+
+tasks:
+ proxmox-setup:
+ desc: Run Ansible setup playbook on the nodes
+ cmds:
+ - ansible-playbook -i {{.ANSIBLE_INVENTORY_DIR}}/hosts.yaml {{.ANSIBLE_PLAYBOOK_DIR}}/proxmox-setup.yaml -v
+
+ proxmox-update:
+ desc: Update proxmox packages
+ cmds:
+ - ansible-playbook -i {{.ANSIBLE_INVENTORY_DIR}}/hosts.yaml {{.ANSIBLE_PLAYBOOK_DIR}}/proxmox-apt-upgrade.yaml
diff --git a/README.md b/README.md
index 439c18ec..26643d5b 100644
--- a/README.md
+++ b/README.md
@@ -29,16 +29,30 @@ exploring Kubernetes and Infrastructure as Code (IaC) practices using tools like
## ๐ Table of contents
- [๐ผ Overview](#-overview)
- - [๐ Table of contents](#-table-of-contents)
- - [๐ Documentation](#-documentation)
- - [๐ฅ๏ธ Technological Stack](#-technological-stack)
- - [๐ง Hardware](#-hardware)
- - [โ๏ธ External Dependencies](#-external-dependencies)
- - [๐ค Automation](#-automation)
- - [๐ค Thanks](#-thanks)
+- [๐ Table of contents](#-table-of-contents)
+- [๐ Documentation](#-documentation)
+- [๐ฅ๏ธ Technological Stack](#-technological-stack)
+- [๐ง Hardware](#-hardware)
+- [โ๏ธ External Dependencies](#-external-dependencies)
+- [๐ค Automation](#-automation)
+- [๐ค Thanks](#-thanks)
## ๐ Documentation
+1. [Prerequisites](docs/prerequisites.md)
+ - [Cloudflare](docs/prerequisites.md#1-set-up-cloudflare)
+ - [Secrets store](docs/prerequisites.md#2-set-up-secrets-store)
+ - [UDM](docs/prerequisites.md#3-set-up-udm)
+ - [Discord](docs/prerequisites.md#4-get-discord-token)
+ - [PiHole](docs/prerequisites.md#5-set-up-pihole-and-generate-token-for-homepage)
+ - [NAS and Minio](docs/prerequisites.md#6-nas-set-up)
+2. [Setup Guide](docs/set-up.md)
+ - [Install and Configure Proxmox](docs/set-up.md#install-and-configure-proxmox)
+ - [Create and Install Talos Images](docs/set-up.md#create-and-install-talos-images)
+ - [Bootstrap Kubernetes Cluster](docs/set-up.md#bootstrap-kubernetes-cluster)
+ - [Install Flux](docs/set-up.md#install-flux)
+3. [How To](docs/howto.md)
+
## ๐ฅ๏ธ Technological Stack
| | Name | Description |
@@ -74,17 +88,17 @@ exploring Kubernetes and Infrastructure as Code (IaC) practices using tools like
-| Device | Count | Disk Size | RAM | OS | Purpose |
-|----------------------------|-------|-----------|------|---------|-------------------------|
-| Lenovo M910Q Tiny i5-6500T | 3 | 256G | 32GB | Talos | Kubernetes Master Nodes |
-| Raspberry Pi 5 | 1 | | 8GB | RpiOS | DNS, SmartHome |
-| Synology RS422+ | 1 | 4x16TB | 2GB | DSM | NAS |
-| UPS 5UTRA91227 | 1 | | | | UPS |
-| UniFi UDM Pro | 1 | | | UnifiOS | Router |
-| UniFi USW PRO 24 Gen2 | 1 | | | | Switch |
-| UniFi USW Lite 8 | 1 | | | | Switch |
-| UniFi U6 In-Wall | 1 | | | | Access Point |
-| UniFi U6 Mesh | 1 | | | | Access Point |
+| Device | Count | Disk Size | RAM | OS | Purpose |
+|----------------------------|-------|------------|------|---------|-------------------------|
+| Lenovo M910Q Tiny i5-6500T | 3 | 2x1TB SSD | 32GB | Talos | Kubernetes Master Nodes |
+| Raspberry Pi 5 | 1 | | 8GB | RpiOS | DNS, SmartHome |
+| Synology RS422+ | 1 | 4x16TB HDD | 2GB | DSM | NAS |
+| UPS 5UTRA91227 | 1 | | | | UPS |
+| UniFi UDM Pro | 1 | | | UnifiOS | Router |
+| UniFi USW PRO 24 Gen2 | 1 | | | | Switch |
+| UniFi USW Lite 8 | 1 | | | | Switch |
+| UniFi U6 In-Wall | 1 | | | | Access Point |
+| UniFi U6 Mesh | 1 | | | | Access Point |
## โ๏ธ External Dependencies
diff --git a/Taskfile.yaml b/Taskfile.yaml
index 0940a1c3..b5c85d94 100644
--- a/Taskfile.yaml
+++ b/Taskfile.yaml
@@ -5,13 +5,17 @@ version: "3"
vars:
# Directories
KUBERNETES_DIR: "{{.ROOT_DIR}}/kubernetes"
+ INFRA_DIR: "{{.ROOT_DIR}}/infrastructure"
+ ANSIBLE_DIR: "{{.INFRA_DIR}}/ansible"
# Files
AGE_FILE: "{{.ROOT_DIR}}/age.key"
KUBECONFIG_FILE: "{{.ROOT_DIR}}/kubeconfig"
+ INFRA_SECRETS_FILE: "{{.INFRA_DIR}}/secrets.sops.yaml"
env:
KUBECONFIG: "{{.KUBECONFIG_FILE}}"
SOPS_AGE_KEY_FILE: "{{.AGE_FILE}}"
+ INFRA_SECRETS_FILE: "{{.INFRA_SECRETS_FILE}}"
includes:
kubernetes:
@@ -21,6 +25,7 @@ includes:
talos: .taskfiles/Talos/Taskfile.yaml
sops: .taskfiles/Sops/Taskfile.yaml
volsync: .taskfiles/VolSync/Taskfile.yaml
+ ansible: .taskfiles/Ansible/Taskfile.yaml
secrets: .taskfiles/ExternalSecrets/Taskfile.yaml
tasks:
diff --git a/docs/howto.md b/docs/howto.md
new file mode 100644
index 00000000..27ffbc95
--- /dev/null
+++ b/docs/howto.md
@@ -0,0 +1,72 @@
+## How to
+
+### Reset node ephemeral storage
+
+In case some of the local hostpath PVs use all the node storage and fill up the disk, the only way is to completely
+reset the disk. It can be done with the following command:
+
+```sh
+talosctl --talosconfig=./kubernetes/bootstrap/talos/clusterconfig/talosconfig --nodes=[NODE_IP] reset --system-labels-to-wipe EPHEMERAL
+ ```
+
+1. Start the node from the Proxmox UI.
+2. Manually delete all previous PVCs and PVs for a local-hostpath storage class that were hosted on the node.
+3. Manually delete pods so they are recreated
+
+### Upgrade ssd storage
+
+1. Add a new SSD to the machine
+2. Wipe it from the Proxmox UI and press โInitialize Disk with GPT.โ
+3. Create a new LVM Volume group. LVM allows creating snapshots, which is probably not needed.
+4. Add the disk as hardware to the VM. Donโt forget to disable backup.
+
+### Replace a node
+
+1. Reset the Talos node
+ ```sh
+ talosctl --talosconfig=./kubernetes/bootstrap/talos/clusterconfig/talosconfig --nodes=[node-ip] reset`
+ ```
+2. Delete the node from Kubernetes
+ ```shell
+ Add the disk as hardware to the VM. Donโt forget to disable backup.
+ ```
+3. Delete the node from the Proxmox cluster. SSH to an existing node and run:
+ ```sh
+ pvecm delnode [node-name]
+ ```
+ where node-name is the name from the Proxmox cluster configuration.
+4. Delete information about the node on Proxmox machines from /etc/pve/nodes.
+5. Continue with the [setup guide](./set-up.md) until the bootstrapping cluster point.
+6. Apply the configuration to the new node:
+ ```sh
+ talosctl apply-config --talosconfig=./clusterconfig/talosconfig --nodes=[node-ip] --file=./clusterconfig/home-kubernetes-k8s-control-1.yaml --insecure`
+ ```
+
+### Remove Cluster Info from Proxmox Node
+
+```sh
+systemctl stop pve-cluster corosync
+pmxcfs -l
+rm -rf /etc/corosync/*
+rm /etc/pve/corosync.conf
+killall pmxcfs
+systemctl start pve-cluster
+```
+
+Delete information about rest nodes in /etc/pve/nodes
+
+### Set Up GitHub App for a New Repository
+
+1. Create a GitHub app following
+ the [guideline](https://docs.github.com/en/apps/creating-github-apps/registering-a-github-app/registering-a-github-app)
+2. Copy the app ID and save it to a `BOT_APP_ID` repository secret and to a `ACTION_RUNNER_CONTROLLER_GITHUB_APP_ID`
+ property of an `actions-runner-controller` 1Password secret.
+3. Generate a new app private key and add it to a `BOT_APP_PRIVATE_KEY` repository secret and to
+ the `ACTION_RUNNER_CONTROLLER_GITHUB_PRIVATE_KEY` property of an `actions-runner-controller` 1Password secret in the
+ format
+ ```
+ -----BEGIN RSA PRIVATE KEY-----
+ ...
+ -----END RSA PRIVATE KEY-----
+ ```
+
diff --git a/docs/prerequisites.md b/docs/prerequisites.md
new file mode 100644
index 00000000..b92784f0
--- /dev/null
+++ b/docs/prerequisites.md
@@ -0,0 +1,114 @@
+## Prerequisites
+
+### 1. Set up cloudflare
+1. Go to [Cloudflare API Tokens](https://dash.cloudflare.com/profile/api-tokens) and create an API Token.
+2. Under the `API Tokens` section, click the blue `Create Token` button.
+3. Select the `Edit zone DNS` template by clicking the blue `Use template` button.
+4. Under `Permissions`, click `+ Add More` and add the following permissions:
+ - `Zone - DNS - Edit`
+ - `Account - Cloudflare Tunnel - Read`
+5. Limit the permissions to specific account and zone resources.
+6. Click the blue `Continue to Summary` button and then the blue `Create Token` button.
+7. Copy the token and save it to the secrets store under a `CF_API_TOKEN` field.
+
+### 2. Set up secrets store
+I use 1Password as the secrets store for my homelab cluster. To execute the IaC scripts that provision the
+infrastructure, the [1Password Connect](https://developer.1password.com/docs/connect/) must be set up separately with access
+to the 1Password vault. Once the cluster setup is complete, 1Password Connect will be hosted inside the cluster.
+
+Ensure you update `OP_CONNECT_HOST` and `OP_CONNECT_TOKEN` in the [env file](../infrastructure/secrets.sops.yaml).
+
+The 1Password vault should contain the following items:
+
+1Password Vault Items
+
+| Item name | Fields | Description |
+|---------------------------|-------------------------------------------------|-----------------------------------------------------------|
+| mino | MINIO_ROOT_USER | |
+| | MINO_ROOT_PASSWORD | |
+| | MINO_LOKI_BUCKET | |
+| | MINO_LOKI_SECRET_KEY | |
+| | MINO_LOKI_ACCESS_KEY | |
+| | MINO_THANOS_BUCKET | |
+| | MINO_THANOS_SECRET_KEY | |
+| | MINO_THANOS_ACCESS_KEY | |
+| cloudnative-pg | POSTGRESS_SUPER_USER | |
+| | POSTGRESS_SUPER_PASS | |
+| cloudflare | CLOUDFLARE_ACCOUNT_TAG | |
+| | CLOUDFLARE_TUNNEL_SECRET | |
+| | CLUSTER_CLOUDFLARE_TUNNEL_ID | |
+| | CLOUDFLARE_HOMEPAGE_TUNNEL_SECRET | |
+| | CF_API_TOKEN | |
+| proxmox | username | |
+| | password | |
+| | HOMEPAGE_PROXMOX_USERNAME | |
+| | HOMEPAGE_PROXMOX_PASSWORD | |
+| actions-runner-controller | ACTION_RUNNER_CONTROLLER_GITHUB_APP_ID | |
+| | ACTION_RUNNER_CONTROLLER_GITHUB_INSTALLATION_ID | |
+| | ACTION_RUNNER_CONTROLLER_GITHUB_PRIVATE_KEY | In a format starting with -----BEGIN RSA PRIVATE KEY----- |
+| unifipoller | username | |
+| | password | |
+| discord | GATUS_DISCORD_WEBHOOK | |
+| | ALERTMANAGER_DISCORD_WEBHOOK | |
+| gatus | GATUS_POSTGRES_USER | |
+| | GATUS_POSTGRES_PASS | |
+| nodered | CREDENTIAL_SECRET | Used to encrypt nodered secrets |
+| overseerr | OVERSEERR_TOKEN | Used in homepage |
+| pihole | HOMEPAGE_PI_HOLE_TOKEN | |
+| synology | HOMEPAGE_SYNOLOGY_USERNAME | |
+| | HOMEPAGE_SYNOLOGY_PASSWORD | |
+| plex | PLEX_TOKEN | Used in homepage |
+| prowlarr | PROWLARR_API_KEY | Used in homepage |
+| | PROWLARR_POSTGRES_USER | |
+| | PROWLARR_POSTGRES_PASSWORD | |
+| sonarr | SONARR_API_KEY | Used in homepage |
+| | SONARR_POSTGRES_USER | |
+| | SONARR_POSTGRES_PASSWORD | |
+| radarr | RADARR_API_KEY | Used in homepage |
+| | RADARR_POSTGRES_USER | |
+| | RADARR_POSTGRES_PASSWORD | |
+| qbittorrent | username | |
+| | password | |
+| grafana | GRAFANA_POSTGRESS_USER | |
+| | GRAFANA_POSTGRESS_PASS | |
+| pihole | HOMEPAGE_PI_HOLE_TOKEN | |
+
+
+### 3. Set up UDM
+
+1. Set up the unifipoller user (TODO docs).
+2. Forward port for qBittorrent (TODO docs).
+
+### 4. Get discord token
+
+1. Go to Server settings -> Integrations and create two webhooks:
+ - Webhook for Prometheus alerts. Save it to the `ALERTMANAGER_DISCORD_WEBHOOK` item in 1Password.
+ - Webhook for Gatus alerts. Save it to the `GATUS_DISCORD_WEBHOOK` item in 1Password.
+
+### 5. Set up pihole and generate token for Homepage
+
+1. Set up Pi-hole on a separate Raspberry Pi.
+2. Generate a token for the Homepage widget in Pi-hole and save it to the `HOMEPAGE_PI_HOLE_TOKEN` item in 1Password.
+
+### 6. NAS set up
+
+#### Install and Configure Minio on NAS
+
+1. **Install Synology Container Manager:**
+ 1. Install the `Synology Container Manager` package from the Package Center.
+ 2. Open the `Synology Container Manager` and run a Docker container using the `minio/minio` image. Ensure that port `9000` is forwarded.
+
+2. **Create Minio Buckets:**
+ - Manually create the following buckets:
+ - `cloudnative-pg` for PostgreSQL backups.
+ - `loki-bucket` to store logs.
+ - `thanos` to store old metrics data with Thanos.
+ - Update the corresponding 1Password items with the necessary details.
+
+#### Configure NFS Connections
+
+1. **Create a Shared Folder:**
+ 1. Open the Synology Control Panel and navigate to `Shared Folders`.
+ 2. Create a shared folder for the Kubernetes cluster.
+ 3. Go to the folder settings and select `NFS Permissions`.
+ 4. Add the IP addresses of all Kubernetes nodes. Select `Squash` as `No`.
diff --git a/docs/set-up.md b/docs/set-up.md
index 18ca2ef4..ff37e77a 100644
--- a/docs/set-up.md
+++ b/docs/set-up.md
@@ -1,66 +1,56 @@
-# Set up guide
-
-## Install and configure Proxmox
-
-1. Download official image from an official Proxmox [site](https://www.proxmox.com/en/downloads/proxmox-virtual-environment/iso)
-2. Flush image and install it to the machines. During installation specify and write down static ip address that will be
-used by the machine.
-3. Disable subscription repositories. Go to Repositories setting menu and disable all components marked as `enterprise` and
-`pve-enterprise`
-4. ssh to the node and run `apt get update` following by `apt get upgrade`
-5. Go to Network, select Linux Bridge and check `VLAN aware checkox` in order to be able to assign virtual machines to a
-different VLANs.
-6. Set up a simple proxmox cluster using menu wizard. No need to make it HA since kubernetes will handle the HA.
-
-### Set up GPU passthrough
-1. Edit `/etc/default/grub` with the following changes:
- ```
- GRUB_CMDLINE_LINUX_DEFAULT="quiet intel_iommu=on"
- ```
-2. Run `update-grub` and reboot the node
-3. Verify that IOMMU is enabled
-```
-dmesg | grep -e DMAR -e IOMMU
-```
-There should be a line that looks like `DMAR: IOMMU enabled`
-4. For any troubleshouting check out [this guide](https://3os.org/infrastructure/proxmox/gpu-passthrough/igpu-passthrough-to-vm/#proxmox-configuration-for-igpu-full-passthrough)
-
-## Create and install Talos images
-1. Head over to https://factory.talos.dev and follow the instructions which will eventually lead you to download a Talos
-Linux iso file. Make sure to note the schematic ID you will need this later on. Add following extensions
- - siderolabs/iscsi-tools -- for longhorn
- - siderolabs/util-linux-tools -- for longhorn
- - siderolabs/qemu-guest-agent -- for being able to manage VM from a proxmox UI
-2. Create VM with following configuration:
- - Startup on boot
- - Bios: SeaBios
- - Machine: q35
- - Memory: baloon disabled
- - CPU: type host, cpu units 1024
- - Network: vlan 20, firewall disabled, mac address one of the following: BC:24:11:B5:DD:1F, BC:24:11:0C:FD:22, BC:24:11:A8:19:33
-3. Add PCI device `Inter HD Graphics`
+# Setup Guide
+
+## Install and Configure Proxmox
+
+1. Download the official image from
+ the [Proxmox site](https://www.proxmox.com/en/downloads/proxmox-virtual-environment/iso).
+2. Flash the image and install it on the machines. During installation, specify and write down the static IP address
+ that will be used by the machine.
+3. Go to the machine disks, click on an SSD, and select "Initialize disk with GPT."
+4. Go to the LVM subsection and add a new Volume Group based on the disk, named "SSD."
+5. Inspect the [Ansible inventory file](../infrastructure/ansible/inventory/hosts.yaml) and
+ run `task ansible:proxmox-setup` to configure Proxmox nodes. This will provision the SSH key, update Proxmox to the
+ latest versions, and set up GPU passthrough. For any troubleshooting with GPU, check
+ out [this guide](https://3os.org/infrastructure/proxmox/gpu-passthrough/igpu-passthrough-to-vm/#proxmox-configuration-for-igpu-full-passthrough).
+6. Go to Network, select Linux Bridge, and check the `VLAN aware` checkbox to assign virtual machines to different
+ VLANs.
+7. Set up a simple Proxmox cluster using the menu wizard. No need to make it HA since Kubernetes will handle the HA.
+
+## Create and Install Talos Images
+
+1. Head over to [Talos Factory](https://factory.talos.dev) and follow the instructions to download a Talos Linux ISO
+ file. Note the schematic ID; you will need this later on. Add the following extensions:
+
+- siderolabs/iscsi-tools -- for Longhorn
+- siderolabs/util-linux-tools -- for Longhorn
+- siderolabs/qemu-guest-agent -- for managing VMs from the Proxmox UI
+
+2. Go to `/infrastructure/terraform/talos.tf` and update the ISO URL if needed.
+3. Check the Terraform changes with `terraform plan`.
+4. Run Terraform to create VMs with Talos nodes:
+ ```sh
+ terraform apply
+ ```
## Bootstrap kubernetes cluster
-1. Deploy the talos cluster to machines
-```
-task talos:bootstrap
-```
-
-2. It might take a while for the cluster to be setup (10+ minutes is normal), during which time you will see a variety of
-error messages like: "couldn't get current server API group list," "error: no matching resources found", etc. This is a
-normal. If this step gets interrupted, e.g. by pressing Ctrl + C, you likely will need to nuke the cluster
-before trying again.
-
-This task will create a `talosconfig` in a `/kubernetes/bootstrap/talos/clusterconfig` directory. You can use it to
-get access to a Talos cluster for troubleshooting
-```
-talosctl --talosconfig=./kubernetes/bootstrap/talos/clusterconfig/talosconfig --nodes=192.168.20.51 health
-```
-
-3. The `kubeconfig` for interacting with the cluster will be generated in the root directory.
-
- Verify the nodes are online:
- ```shell
+
+1. Deploy the Talos cluster to machines:
+ ```sh
+ task talos:bootstrap
+ ```
+ It might take a while for the cluster to be set up (10+ minutes is normal), during which time you will see various
+ error messages like: โcouldnโt get current server API group list,โ โerror: no matching resources found,โ etc. This is
+ normal. If this step gets interrupted, e.g., by pressing Ctrl + C, you likely will need to nuke the cluster before
+ trying again.
+
+ This task will create a talosconfig in the /kubernetes/bootstrap/talos/clusterconfig directory. You can use it to get
+ access to a Talos cluster for troubleshooting:
+ ```sh
+ talosctl --talosconfig=./kubernetes/bootstrap/talos/clusterconfig/talosconfig --nodes=192.168.20.51 health
+ ```
+
+2. The `kubeconfig` for interacting with the cluster will be generated in the root directory. Verify the nodes are online:
+ ```sh
kubectl get nodes -o wide
# NAME STATUS ROLES AGE VERSION INTERNAL-IP EXTERNAL-IP OS-IMAGE KERNEL-VERSION CONTAINER-RUNTIME
# k8s-control-1 Ready control-plane 4d21h v1.30.1 192.168.20.51 Talos (v1.7.2) 6.6.30-talos containerd://1.7.16
@@ -68,6 +58,14 @@ talosctl --talosconfig=./kubernetes/bootstrap/talos/clusterconfig/talosconfig --
# k8s-control-3 Ready control-plane 4d21h v1.30.1 192.168.20.53 Talos (v1.7.2) 6.6.30-talos containerd://1.7.16
```
+3. Add longhorn annotations to each node
+
+ ```shell
+ kubectl annotate node k8s-control-1 node.longhorn.io/default-disks-config='[{"name": "nvme","path":"/var/lib/longhorn","tags":["nvme"]},{"name": "ssd","path":"/var/mnt/ssd/longhorn","allowScheduling":true,"tags":["ssd"]}]'
+ kubectl annotate node k8s-control-2 node.longhorn.io/default-disks-config='[{"name": "nvme","path":"/var/lib/longhorn","tags":["nvme"]},{"name": "ssd","path":"/var/mnt/ssd/longhorn","allowScheduling":true,"tags":["ssd"]}]'
+ kubectl annotate node k8s-control-3 node.longhorn.io/default-disks-config='[{"name": "nvme","path":"/var/lib/longhorn","tags":["nvme"]},{"name": "ssd","path":"/var/mnt/ssd/longhorn","allowScheduling":true,"tags":["ssd"]}]'
+ ```
+
4. Continue with installing flux
## Install Flux
diff --git a/infrastructure/.envrc b/infrastructure/.envrc
new file mode 100644
index 00000000..bcbff56c
--- /dev/null
+++ b/infrastructure/.envrc
@@ -0,0 +1,9 @@
+use_sops() {
+ local path=${1:-$PWD/secrets.sops.yaml}
+ eval "$(sops -d --output-type dotenv "$path" | direnv dotenv bash /dev/stdin)"
+ watch_file "$path"
+}
+
+source_up
+# will load secrets.yaml
+use sops
diff --git a/infrastructure/ansible/collections/requirements.yml b/infrastructure/ansible/collections/requirements.yml
new file mode 100644
index 00000000..8282a646
--- /dev/null
+++ b/infrastructure/ansible/collections/requirements.yml
@@ -0,0 +1,5 @@
+---
+collections:
+ - name: prometheus.prometheus
+ version: 0.17.0
+ - name: onepassword.connect
diff --git a/infrastructure/ansible/inventory/hosts.yaml b/infrastructure/ansible/inventory/hosts.yaml
new file mode 100644
index 00000000..1f45d623
--- /dev/null
+++ b/infrastructure/ansible/inventory/hosts.yaml
@@ -0,0 +1,15 @@
+---
+all:
+ hosts:
+ children:
+ proxmox:
+ hosts:
+ proxmox-1:
+ ansible_host: 192.168.0.41
+ proxmox-2:
+ ansible_host: 192.168.0.42
+ proxmox-3:
+ ansible_host: 192.168.0.43
+ vars:
+ ansible_user: root
+ sops: "{{ lookup('community.sops.sops', lookup('env', 'INFRA_SECRETS_FILE')) | ansible.builtin.from_yaml }}"
diff --git a/infrastructure/ansible/playbooks/proxmox-apt-upgrade.yaml b/infrastructure/ansible/playbooks/proxmox-apt-upgrade.yaml
new file mode 100644
index 00000000..5261eeb7
--- /dev/null
+++ b/infrastructure/ansible/playbooks/proxmox-apt-upgrade.yaml
@@ -0,0 +1,18 @@
+---
+- name: Upgrade proxmox packages
+ hosts:
+ - proxmox
+ become: true
+ gather_facts: true
+ any_errors_fatal: true
+ tasks:
+ - name: Upgrade packages
+ ansible.builtin.apt:
+ upgrade: full
+ update_cache: true
+ cache_valid_time: 3600
+ autoclean: true
+ autoremove: true
+ register: apt_upgrade
+ retries: 5
+ until: apt_upgrade is success
diff --git a/infrastructure/ansible/playbooks/proxmox-setup.yaml b/infrastructure/ansible/playbooks/proxmox-setup.yaml
new file mode 100644
index 00000000..cbedb2e9
--- /dev/null
+++ b/infrastructure/ansible/playbooks/proxmox-setup.yaml
@@ -0,0 +1,188 @@
+---
+- name: Setup proxmox nodes
+ hosts:
+ - proxmox
+ become: false
+ gather_facts: false
+ any_errors_fatal: true
+
+ tasks:
+ - name: Get proxmox password from 1Password
+ onepassword.connect.field_info:
+ token: "{{ sops.OP_CONNECT_TOKEN }}"
+ hostname: "{{ sops.OP_CONNECT_HOST }}"
+ item: Proxmox root
+ field: password
+ vault: 4ebapsbutjt6t66s3y22ne4l4u
+ no_log: true # Turn off logs to avoid logging sensitive data
+ delegate_to: localhost # this task is only run on localhost
+ register: password
+
+ - set_fact: # use registered password and vars to setup connection
+ ansible_password: "{{ password.field.value }}"
+ no_log: true
+
+ - name: Gathering facts
+ setup:
+
+
+ - name: Remove PVE Enterprise repo
+ ansible.builtin.apt_repository:
+ repo: deb https://enterprise.proxmox.com/debian/pve {{ ansible_distribution_release }} pve-enterprise
+ state: absent
+ filename: pve-enterprise
+
+ - name: Remove PVE Enterprise repo ceph
+ ansible.builtin.apt_repository:
+ repo: deb https://enterprise.proxmox.com/debian/ceph-quincy {{ ansible_distribution_release }} enterprise
+ state: absent
+ filename: pve-enterprise
+
+ - name: Add PVE no-subscription repo
+ ansible.builtin.apt_repository:
+ repo: deb http://download.proxmox.com/debian/pve {{ ansible_distribution_release }} pve-no-subscription
+ state: present
+ filename: pve-no-subscription
+
+ - name: Upgrade all system packages
+ ansible.builtin.apt:
+ upgrade: full
+ update_cache: true
+ cache_valid_time: 3600
+ autoclean: true
+ autoremove: true
+ register: apt_upgrade
+ retries: 5
+ until: apt_upgrade is success
+
+ - name: Print the field definition
+ ansible.builtin.debug:
+ msg: "{{ sops.SSH_PUB_KEY | regex_replace(\"[']\", '') }}"
+
+ - name: Provision ssh key
+ ansible.builtin.lineinfile:
+ path: /{{ ansible_user }}/.ssh/authorized_keys
+ line: "{{ sops.SSH_PUB_KEY | regex_replace(\"[']\", '') }}"
+ create: yes
+ state: present
+
+ - name: Install common packages
+ ansible.builtin.apt:
+ name:
+ - vim
+ - htop
+ - linux-cpupower
+ - lm-sensors
+ install_recommends: true
+ update_cache: true
+ cache_valid_time: 3600
+ autoclean: true
+ autoremove: true
+
+ - name: Load lm_sensors modules
+ community.general.modprobe:
+ name: "{{ item }}"
+ state: present
+ loop:
+ - coretemp
+ - drivetemp
+ - vfio
+ - vfio_iommu_type1
+ - vfio_pci
+ - kvmgt
+
+ - name: Enable lm_sensors modules on boot
+ ansible.builtin.copy:
+ mode: "0644"
+ content: "{{ item }}"
+ dest: "/etc/modules-load.d/{{ item }}.conf"
+ loop:
+ - coretemp
+ - drivetemp
+ - vfio
+ - vfio_iommu_type1
+ - vfio_pci
+ - kvmgt
+
+ - name: "Gather installed packages"
+ ansible.builtin.package_facts:
+ manager: auto
+
+ - name: "Install pve fake subscription"
+ when: "'pve-fake-subscription' not in ansible_facts.packages"
+ block:
+ - name: Get newest pve-fake-subscription release
+ ansible.builtin.uri:
+ url: https://api.github.com/repos/Jamesits/pve-fake-subscription/releases/latest
+ return_content: true
+ register: json_reponse
+
+ - name: Create tmpdir
+ ansible.builtin.tempfile:
+ state: directory
+ register: tmpdir
+
+ - name: Download pve-fake-subscription
+ ansible.builtin.get_url:
+ url: "{{ json_reponse.json.assets[0].browser_download_url }}"
+ dest: "{{ tmpdir.path }}"
+ mode: "0644"
+ register: download
+
+ - name: Install pve-fake-subscription
+ ansible.builtin.apt:
+ deb: "{{ download.dest }}"
+
+ - name: Install node-exporter
+ ansible.builtin.include_role:
+ name: prometheus.prometheus.node_exporter
+
+ - name: Set up GPU Passthrough
+ block:
+ - name: Read /etc/default/grub content
+ ansible.builtin.slurp:
+ path: /etc/default/grub
+ register: grub_file_content
+
+ - name: Check if GRUB_CMDLINE_LINUX_DEFAULT contains intel_iommu=on
+ set_fact:
+ grub_cmdline_contains_iommu: "{{ (grub_file_content.content | b64decode).split('\n') | select('search', 'GRUB_CMDLINE_LINUX_DEFAULT=.*intel_iommu=on') | list | length > 0 }}"
+
+ - name: Ensure GRUB_CMDLINE_LINUX_DEFAULT contains intel_iommu=on
+ ansible.builtin.lineinfile:
+ path: /etc/default/grub
+ regexp: '^GRUB_CMDLINE_LINUX_DEFAULT='
+ line: 'GRUB_CMDLINE_LINUX_DEFAULT="quiet intel_iommu=on"'
+ backrefs: yes
+ create: yes
+ register: grub_config
+ when: not grub_cmdline_contains_iommu
+
+ - name: Update grub configuration if /etc/default/grub was modified
+ ansible.builtin.command:
+ cmd: update-grub
+ when: grub_config.changed
+
+ - name: Reboot the machine if /etc/default/grub was modified
+ ansible.builtin.reboot:
+ when: grub_config.changed
+
+ - name: Reboot the machine if /etc/default/grub was modified
+ ansible.builtin.reboot:
+ when: grub_config.changed
+ register: reboot_result
+
+ - name: Wait for the machine to reboot
+ ansible.builtin.wait_for_connection:
+ timeout: 300
+ when: reboot_result is changed
+
+ - name: Check dmesg for DMAR or IOMMU messages
+ ansible.builtin.command: dmesg | grep -e DMAR -e IOMMU
+ register: dmesg_output
+ when: reboot_result is changed
+
+ - name: Fail if IOMMU is not enabled
+ ansible.builtin.fail:
+ msg: "IOMMU is not enabled"
+ when: reboot_result is changed and 'IOMMU enabled' not in dmesg_output.stdout
diff --git a/infrastructure/secrets.sops.yaml b/infrastructure/secrets.sops.yaml
new file mode 100644
index 00000000..514e8299
--- /dev/null
+++ b/infrastructure/secrets.sops.yaml
@@ -0,0 +1,24 @@
+OP_CONNECT_HOST: ENC[AES256_GCM,data:8iBWmj/iPcysoAECfXRoe1qODKTpRgzibAy7BmmOKV75rQlhzPaF+yo=,iv:BrYlRkvczZg/thrTtMfxq1fWw98zlhriK8wSvQ89rL0=,tag:DaKCPb3KslIDb0aqYsf9PQ==,type:str]
+OP_CONNECT_TOKEN: ENC[AES256_GCM,data:AdO8tkxiWIu88UxX3RhqScDWn0xZ4jzTTvvtCRY3fPDF29eXWyxv6+VlMFDW0u5H6Mk7FrjmCBfuF3ask0xgFVYQHAzX5BUXYbOfv3cbwuw5bd/fkTWrI2Oy82Obx0wGgQkYZc43zvMYuYssZizndhmfKolMnC9vTNCW/XZp2ODRveMpwhPaRuZojO5ZmWA14JATDaAI5pIpo8VlG8dy1the7NgboqabFk/ZTDbdlPpsr+B8VtePurezF++Zi3dGHyHla8UtX67oNO9hToynyE4QbLnkQ5LpB7cFOhtCv7NcWg/t66gaoI/78MIlxDxhS9qpazTIeZTEDaV+z2v/IIHCPBwcvOe2DD1nPl9tVUzWdmVQPKtN7z/YkVX85zKUWFL52EE3/KnOa0YQzEmIyWuwavkkYXJW80Od8ZX0xUkAGnAoQkl0P0pb6rzUOgmA4A8CQCA5LLpLoSPBFqcHApdFOGyjkduFHUTd3XnUdrwBQSctMIYVk3yi+DIhz7OURZYnW/pD4VApV8kWlDJkuAmghNbZnnxne4I/tM3FC4g+hhtpMV8IeRnGUOksA+hAMGhtOvZqP/PA2C8heBjsDQqp7+ymHobttttKa+JojTfO57yB3YtHTvHiUO/8Ky8rMcMAm9hfqFlo+iCG2U2N2r2ahydmafbEMcjYfZNI98Gso1bb7NjEpTTOmJB9jajAdnBIWd0muOY6hty6gpxU9P0JXovhgvIR9RR7x2TG800/kipQiLZQbwogyK9HFUVkH4sgGGNRaPZTIwx4BHnOZ+vp27QCO636HEr9ZgGB5Wlr+MxE3AEOtMdIExzoxui1T3tA9vpcox5u1GfpLUnU/cQ8,iv:m2+xlBhkHDFbNSUYKHIWToOd+rD3EDjBeBjouvVpu8w=,tag:H3PPO+xZ/gAZe8nVHTz48A==,type:str]
+PROXMOX_ROOT_PASSWORD: ENC[AES256_GCM,data:LClyIbFR5uCNZX057v0=,iv:961DtGX6oneEuNRJTDoDTNRnvX1iiowvYC/PhqZiyHc=,tag:vxCLP373KcCgQjp3TskoGg==,type:str]
+SSH_PUB_KEY: ENC[AES256_GCM,data:ZAphxKS/0+hWhngvQqPROefI/bSmyxCEJO8N3tT/SQyhC8+kxzArS+09oFVVO7/GRnQe4RFhyuZsbIbZ7JqdVlLRzhrxYmT4wUCNDVHAycWTACkYlneut19b7duigl0RLs0EIlPSStzADvwkmBdpBv5V/tF717QG6FKP15FowrGC+zc0UqKIDFSvztO+zvjdtQwcjcUZ7kMvV50LKunGQk0nrJzNKzPD+zz/3i3aMc6w++ltuqICAOR2SIQNHHGuhLo0IWdGFLivn7n7wz87CtRypjzcdTn4f3aVjaG2LOPFUE71MZxjptqHlci3lu/5i48A9p7VooBNPN53IysEUqZSqULDSXJybBCP1lkuOTBCkeu/+C8T7+HlSXuQW5mhrI0C81z6dgFSL65hx7Yz1WlShAFy5jCcJW72KjMc+oYF7y2otNxyrXRAaweoXVXOq4JlyfXp/MhVeFnoUUCdl5pgerEQJtZt0q8khAfNU984/EioU8o2+//WsLvCATokrbiIZ59jwwN1oNru75XPl4D5rg0T+by/UYHPVJRXJb3KKQ0F/i8WLXG1IanI3S6IJbscWwjCb10NlPEULgzBQSam9+YqxP2pmSQK7WZsoGfGq2asY2PQOn8uHbDPswgwHRWzBUkPwJ95ePylA4GtJbltEV4A61+4ydB2w2uo1zOLSF0p14UKO+8kLyfV9W1r+C+zdXNXa7N/mg3k5gXZ5Pv/SdsphOiRB5KadUs0eftLcXv/0NmCQOZx41HYNLH5hOAdCwRK61P9SDF82X8saCYI1mFSzC6ItJl9ATtLohxro7jZjj11m5hlub/O4SlLZSmYQD5RNXcIkKXUwwsgdANoGz7FzfGNP4YlxNsD0pt03AZqMjLt721Dw+uDH2GSMZ4XveyyijjEIJOkIeDuodOWSKUrCdPnEuBCSkfXqN1EeDs6T2J+e+/pS/QIKyOhLEAnd1DCQKFyoSEkfk82bpkBKq94FuNZok8oFDz1,iv:cP9OMaySJ30N5yycoMia6iC4lCGSg2wdRYpwL/qojFk=,tag:MRbrZqjQcrxLGPFoksSQPQ==,type:str]
+sops:
+ kms: []
+ gcp_kms: []
+ azure_kv: []
+ hc_vault: []
+ age:
+ - recipient: age1k5xl02aujw4rsgghnnd0sdymmwd095w5nqgjvf76warwrdc0uqpqsm2x8m
+ enc: |
+ -----BEGIN AGE ENCRYPTED FILE-----
+ YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSAvcjRRMWIvM3QxUHBrWElK
+ SmRkVFBCdWZvVVdzRi91Q3R1WmR6ZlhSbUIwCmVSaEhCUEhYWlhNS3F5SkJGaCs4
+ S2dJK2ZseHJ6QjRRSUdHa0lraUVsOTAKLS0tIEtXaXdUcXlDZnZ1TWVubHZ1SG1i
+ ZmJuUE5tYWhrMmtFeFFnZ3dWK25saUEKZElOdUu9mUmYvQkrYBzTwDQJwffqjxvp
+ jBBUbAypdjQTP3clZ9QEPtNqsXafIdHNPWoTzqg4r9zi5NVJqUJzQw==
+ -----END AGE ENCRYPTED FILE-----
+ lastmodified: "2024-07-21T22:38:05Z"
+ mac: ENC[AES256_GCM,data:dEdZkoNQPMK+LGYpQnU/wAooBtid/vtPTBk2wQwWNxFyRbP0/SgVf5jR8KG6HtAgnVfKm3+Ia3ftX+3eaH3343fx5cbVLJuhJs57S8PZ4nPIEJLxSVGkgghwByaXZ7u82VUzfVLgnkLrvHW9fGvUdZxk0wdLgPUi6+BOdc0wbvo=,iv:DDurd9ouR6BAKnP+q/BvFehHJIb1NFMZ4S5BOiTairY=,tag:D8jqoIfHDe4+lQrgYsFGEg==,type:str]
+ pgp: []
+ unencrypted_suffix: _unencrypted
+ version: 3.8.1
diff --git a/infrastructure/terraform/proxmox/.terraform.lock.hcl b/infrastructure/terraform/proxmox/.terraform.lock.hcl
new file mode 100644
index 00000000..d4e0f9f3
--- /dev/null
+++ b/infrastructure/terraform/proxmox/.terraform.lock.hcl
@@ -0,0 +1,70 @@
+# This file is maintained automatically by "terraform init".
+# Manual edits may be lost in future updates.
+
+provider "registry.terraform.io/1password/onepassword" {
+ version = "2.1.0"
+ constraints = "2.1.0"
+ hashes = [
+ "h1:WaVFLtfvlx899m6rNsL3qr4aS+v2lIhO8QgfcDgC4NM=",
+ "zh:0d6f803760aa7cae0e841cfca17ef04411231170b2844cc0b30556d5476d9dff",
+ "zh:17badbffb56309f28aee1893a6b93d1cd87ed5157704fb17b93889f0ccf8cc2d",
+ "zh:185e0c7c66cc159769d7b91c37ab51a546efc13fb99eb206481739a521f75236",
+ "zh:19e213f8265445a29d8bb7c7b1f0d4e3c1fdfd538178704f8e8378db2dcdf359",
+ "zh:49929666304f97301f44ee0fdd39f40f63e35ccfb4c81588439bdab6d5bafde0",
+ "zh:4de33f5630350d6a561d5d62994d525beb8849c94287c2658f39242fe3170cf8",
+ "zh:4f212a8fbbbaa7a47f1b31857be3bad2d590f92be845c6b252c9716bb70076d9",
+ "zh:596cc2bd9aaafd2e649aabcff0125afa9d4270f702813c935fbd5694eed002e7",
+ "zh:618e703a43608c502066c5b909ead45b1f4202f7cebc993f447278477d32cda2",
+ "zh:61fde3651bcb2e691ee9d82ce1de03588d006f53b2e8e2516910321da8627228",
+ "zh:890df766e9b839623b1f0437355032a3c006226a6c200cd911e15ee1a9014e9f",
+ "zh:db05022113841a00174bba5e24cfc77195bbc03d24339c5e8ac4346069901e45",
+ "zh:dcc7792a24c74890081a96ba2bc360d90ab71a4d25232ca18046d9868c835e21",
+ "zh:f2e67a298d20bf52cb208611767b420962d3f0d518e89cf41cc432551b1faf63",
+ "zh:f7e587814506c7e74fc1d80b29465c8e4b7bdbf803f7f8c0a8bb498968cdd58d",
+ ]
+}
+
+provider "registry.terraform.io/bpg/proxmox" {
+ version = "0.61.1"
+ constraints = "0.61.1"
+ hashes = [
+ "h1:SQSHTHj2ThcF08cON2gHYcnkS/XLmoF8E4cRIgpagtE=",
+ "zh:27d8b589a2dc1e0a5b0f8ab299b9f3704a2f0b69799d1d4d8845c68056986d1f",
+ "zh:46dfa6b33ddd7007a2144f38090457604eb56a59a303b37bb0ad1be5c84ddaca",
+ "zh:47a1b14a759393c5ecc76f2feb950677c418c910b8c677fde0dd3e4675c41579",
+ "zh:582e49d109d1c2b1f3b1268a7cbc43548f3c6d96a87c92a5428767097a5e383e",
+ "zh:5e98ad6afae5969a4c3ffb14c0484936550c66c8313d7686551c29b633ff32f2",
+ "zh:7b9e24b76f947ab8f1e571cf61beefc983b7d2aa1b85df35c4f015728fe37a38",
+ "zh:8255ca210f279a0f7b8ca2762df26d2ea1a01704298c5e3d5cf601bd39a743f0",
+ "zh:85d7655fdc95dedced9cf8105a0beeb0d7bc8f668c55f62019a7215a76d60300",
+ "zh:8aeea5a1d001b06baaf923b754e1a14d06c75eb8c8b87a7f65a3c8205fc8b079",
+ "zh:a9cfab6c06f613658c5fdd83742cd22c0eb7563778924b1407965ef8c36c1ce0",
+ "zh:ceaab67801d49a92eb5858b1ddae6df2569462e5ffbe31f9dbd79dcb684ea142",
+ "zh:dc25b506d5c55d1d78a335d3ebd03213c99b4b2a5859812349a955c2f746ff7e",
+ "zh:e04b477fd77a0d37a0bdb76a7cf69184dad9e7fbba9b4f3a378a8901b82b75e5",
+ "zh:f1e6838d9141557f73340df9b21fce5a82b41cc16ae36f063a920ccc36bc0758",
+ "zh:f26e0763dbe6a6b2195c94b44696f2110f7f55433dc142839be16b9697fa5597",
+ ]
+}
+
+provider "registry.terraform.io/paultyng/unifi" {
+ version = "0.41.0"
+ constraints = "0.41.0"
+ hashes = [
+ "h1:fc0gUNc7ddxeLKVgVbt2iuYBn0U9GKE9lxK78w8oRF4=",
+ "zh:03ddd3aee05a08e1446f75a7b3f52810181d3307728cba08ce8fb67f109a9c00",
+ "zh:11b14b79ad02b0a55fd6116b10c0eb6fab432dd7d1f3527af0e2055adf292451",
+ "zh:18c0eb19889927f115a1e05d64f59b4e8d530ccdf1a8b574940a86be20973564",
+ "zh:2df9ca0c21830d2757758e574b19d0d4e54965ce80dbbfb3f124db1dac3d7e8f",
+ "zh:36274af3b7e8b08ba69c04a226c63e0dd2ec386c583288ebd7bc2a30e349ee8f",
+ "zh:413eb222ef30889bab33ccbfc46c9fb64307555da34eac4625d51e696ac72e1d",
+ "zh:4839814ff9f405a13397ffadd6f1052c770b88802280a4d8cde066f9a19718c7",
+ "zh:9547b7831852cc5b9c0fd13ab447d48539eae94582c8725ad255af36e31fb5d9",
+ "zh:a855c89b12326eb1c89bbf292a2bb1de3651794e3409d5012076ada89aabdc8a",
+ "zh:aef12a33b90fd77a9bf4e9d397966ccbfa4a037a648a1725074aff2db2d90fb0",
+ "zh:b3c72a6a02e29b4d21aa0d0831a272ca7cb82c3f8c2c3c7f09fcc2d2dcd78752",
+ "zh:c8354eaaab5f526e8e530b098544c7583a0f0b5b27d67500c7b3e9da56a3a7e5",
+ "zh:dc29f1e70f20ce86d3c6a66c7a817616f993a1cf9d941604dfd5222a06992c4c",
+ "zh:e772779333419f34d2c6da333c7f7d235a5a34f21ea47636b548e132aed74f3b",
+ ]
+}
diff --git a/infrastructure/terraform/proxmox/main.tf b/infrastructure/terraform/proxmox/main.tf
new file mode 100644
index 00000000..5bb6c28f
--- /dev/null
+++ b/infrastructure/terraform/proxmox/main.tf
@@ -0,0 +1,49 @@
+terraform {
+# backend "s3" {
+# bucket = "terraform"
+# key = "proxmox/state.tfstate"
+# skip_credentials_validation = true
+# skip_metadata_api_check = true
+# skip_region_validation = true
+# force_path_style = true
+# }
+
+ required_providers {
+ proxmox = {
+ source = "bpg/proxmox"
+ version = "0.61.1"
+ }
+ unifi = {
+ source = "paultyng/unifi"
+ version = "0.41.0"
+ }
+ }
+}
+
+module "secret_pve" {
+ # Remember to export OP_CONNECT_HOST and OP_CONNECT_TOKEN
+ source = "github.com/bjw-s/terraform-1password-item?ref=main"
+ vault = "homelab"
+ item = "Proxmox root"
+}
+
+module "secret_unifi" {
+ # Remember to export OP_CONNECT_HOST and OP_CONNECT_TOKEN
+ source = "github.com/bjw-s/terraform-1password-item?ref=main"
+ vault = "homelab"
+ item = "Unifi"
+}
+
+provider "proxmox" {
+ endpoint = "https://192.168.0.41:8006/"
+ username = "${module.secret_pve.fields.username}@pam"
+ password = module.secret_pve.fields.password
+ insecure = true
+}
+
+provider "unifi" {
+ username = module.secret_unifi.fields.username
+ password = module.secret_unifi.fields.password
+ api_url = "https://192.168.0.1"
+ allow_insecure = true
+}
diff --git a/infrastructure/terraform/proxmox/talos-node/instance.tf b/infrastructure/terraform/proxmox/talos-node/instance.tf
new file mode 100644
index 00000000..31eff9a3
--- /dev/null
+++ b/infrastructure/terraform/proxmox/talos-node/instance.tf
@@ -0,0 +1,80 @@
+resource "proxmox_virtual_environment_vm" "node" {
+ name = var.machine_name
+ node_name = var.target_node
+ vm_id = var.vmid
+
+ on_boot = true
+ tablet_device = false
+ timeout_stop_vm = 600
+ boot_order = ["scsi0", "ide0"]
+
+ operating_system {
+ type = "l26"
+ }
+
+ agent {
+ enabled = true
+ type = "virtio"
+ timeout = "10s"
+ }
+
+ bios = "seabios"
+
+ machine = "q35"
+
+ cpu {
+ cores = var.cpu_cores
+ type = "host"
+ }
+
+ memory {
+ dedicated = var.memory
+ floating = var.memory
+ }
+
+ scsi_hardware = "virtio-scsi-single"
+
+ dynamic "disk" {
+ for_each = var.disks
+ content {
+ datastore_id = disk.value.datastore_id
+ discard = "on"
+ interface = disk.value.interface
+ iothread = true
+ size = disk.value.size
+ file_format = "raw"
+ ssd = true
+ }
+ }
+
+ network_device {
+ model = "virtio"
+ bridge = "vmbr0"
+ mac_address = var.mac_address
+ vlan_id = var.vlan_id
+ }
+
+ cdrom {
+ enabled = true
+ file_id = proxmox_virtual_environment_download_file.talos_img.id
+ interface = "ide0"
+ }
+
+ hostpci {
+ device = "hostpci0"
+ id = "0000:00:02.0"
+ }
+
+ lifecycle {
+ ignore_changes = [
+ cpu["architecture"]
+ ]
+ }
+}
+
+resource "proxmox_virtual_environment_download_file" "talos_img" {
+ content_type = "iso"
+ datastore_id = "local"
+ node_name = var.target_node
+ url = var.iso_path
+}
diff --git a/infrastructure/terraform/proxmox/talos-node/main.tf b/infrastructure/terraform/proxmox/talos-node/main.tf
new file mode 100644
index 00000000..e5901a2c
--- /dev/null
+++ b/infrastructure/terraform/proxmox/talos-node/main.tf
@@ -0,0 +1,7 @@
+terraform {
+ required_providers {
+ proxmox = {
+ source = "bpg/proxmox"
+ }
+ }
+}
diff --git a/infrastructure/terraform/proxmox/talos-node/variables.tf b/infrastructure/terraform/proxmox/talos-node/variables.tf
new file mode 100644
index 00000000..bcfc5cc2
--- /dev/null
+++ b/infrastructure/terraform/proxmox/talos-node/variables.tf
@@ -0,0 +1,55 @@
+variable "machine_name" {
+ type = string
+}
+
+variable "mac_address" {
+ type = string
+}
+
+variable "vmid" {
+ type = number
+ default = 0
+}
+
+variable "target_node" {
+ type = string
+}
+
+variable "iso_path" {
+ type = string
+ default = ""
+}
+
+variable "oncreate" {
+ type = bool
+ default = true
+}
+
+variable "startup" {
+ type = string
+ default = ""
+}
+
+variable "cpu_cores" {
+ type = number
+ default = 1
+}
+
+variable "memory" {
+ type = number
+ default = 1024
+}
+
+variable "vlan_id" {
+ type = number
+ default = 0
+}
+
+variable "disks" {
+ type = list(object({
+ datastore_id = string
+ interface = string
+ size = string
+ }))
+ default = []
+}
diff --git a/infrastructure/terraform/proxmox/talos.tf b/infrastructure/terraform/proxmox/talos.tf
new file mode 100644
index 00000000..f5fcd9bb
--- /dev/null
+++ b/infrastructure/terraform/proxmox/talos.tf
@@ -0,0 +1,36 @@
+data "unifi_network" "Servers" {
+ name = "Servers Trusted"
+}
+
+locals {
+ // if changing, don't forget to change it in other places. DRY is to hard
+ mac_addresses = ["BC:24:11:B5:DD:1F", "BC:24:11:0C:FD:22", "BC:24:11:A8:19:33"]
+ # renovate: datasource=docker depName=ghcr.io/siderolabs/installer
+ talos_version = "v1.7.5"
+}
+
+module "talos-controlplanes" {
+ source = "./talos-node"
+ oncreate = false
+ count = 3
+ machine_name = "k8s-control-${count.index + 1}"
+ vmid = sum([100, count.index])
+ target_node = "proxmox${count.index + 1}"
+ iso_path = "https://factory.talos.dev/image/88d1f7a5c4f1d3aba7df787c448c1d3d008ed29cfb34af53fa0df4336a56040b/${local.talos_version}/nocloud-amd64.iso"
+ cpu_cores = 4
+ memory = 29 * 1024
+ vlan_id = data.unifi_network.Servers.vlan_id
+ mac_address = local.mac_addresses[count.index]
+ disks = [
+ {
+ datastore_id : "local-lvm"
+ interface : "scsi0"
+ size : "900"
+ },
+ {
+ datastore_id : "ssd"
+ interface : "scsi1"
+ size : "900"
+ }
+ ]
+}
diff --git a/kubernetes/apps/cert-manager/cert-manager/issuers/externalsecret.yaml b/kubernetes/apps/cert-manager/cert-manager/issuers/externalsecret.yaml
new file mode 100644
index 00000000..8fa616e1
--- /dev/null
+++ b/kubernetes/apps/cert-manager/cert-manager/issuers/externalsecret.yaml
@@ -0,0 +1,19 @@
+---
+# yaml-language-server: $schema=https://kubernetes-schemas.pages.dev/external-secrets.io/externalsecret_v1beta1.json
+apiVersion: external-secrets.io/v1beta1
+kind: ExternalSecret
+metadata:
+ name: cloudflare
+spec:
+ secretStoreRef:
+ kind: ClusterSecretStore
+ name: onepassword-connect
+ target:
+ name: cloudflare-secret
+ template:
+ engineVersion: v2
+ data:
+ CF_API_TOKEN: "{{ .CF_API_TOKEN }}"
+ dataFrom:
+ - extract:
+ key: cloudflare
diff --git a/kubernetes/apps/cert-manager/cert-manager/issuers/issuers.yaml b/kubernetes/apps/cert-manager/cert-manager/issuers/issuers.yaml
index 1cf7148a..ce09da8b 100644
--- a/kubernetes/apps/cert-manager/cert-manager/issuers/issuers.yaml
+++ b/kubernetes/apps/cert-manager/cert-manager/issuers/issuers.yaml
@@ -12,8 +12,8 @@ spec:
- dns01:
cloudflare:
apiTokenSecretRef:
- name: cert-manager-secret
- key: api-token
+ name: cloudflare-secret
+ key: CF_API_TOKEN
selector:
dnsZones:
- "${SECRET_DOMAIN}"
@@ -32,8 +32,8 @@ spec:
- dns01:
cloudflare:
apiTokenSecretRef:
- name: cert-manager-secret
- key: api-token
+ name: cloudflare-secret
+ key: CF_API_TOKEN
selector:
dnsZones:
- "${SECRET_DOMAIN}"
diff --git a/kubernetes/apps/cert-manager/cert-manager/issuers/kustomization.yaml b/kubernetes/apps/cert-manager/cert-manager/issuers/kustomization.yaml
index fd43d965..d6ac943f 100644
--- a/kubernetes/apps/cert-manager/cert-manager/issuers/kustomization.yaml
+++ b/kubernetes/apps/cert-manager/cert-manager/issuers/kustomization.yaml
@@ -3,5 +3,5 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- - ./secret.sops.yaml
+ - ./externalsecret.yaml
- ./issuers.yaml
diff --git a/kubernetes/apps/cert-manager/cert-manager/issuers/secret.sops.yaml b/kubernetes/apps/cert-manager/cert-manager/issuers/secret.sops.yaml
deleted file mode 100644
index eb1a98c9..00000000
--- a/kubernetes/apps/cert-manager/cert-manager/issuers/secret.sops.yaml
+++ /dev/null
@@ -1,26 +0,0 @@
-apiVersion: v1
-kind: Secret
-metadata:
- name: cert-manager-secret
-stringData:
- api-token: ENC[AES256_GCM,data:V/OeW+bpuNGXDAiNZ2WmawliZ8JakYzZvSqNhuLRCif3e1nXDXXL+Q==,iv:yq3rE8ZsK2ih6FMNtFRvak7xNNTTB/VCz0+Mp8CiJ5M=,tag:2eY19fzMjg99TAlbC44ntw==,type:str]
-sops:
- kms: []
- gcp_kms: []
- azure_kv: []
- hc_vault: []
- age:
- - recipient: age1k5xl02aujw4rsgghnnd0sdymmwd095w5nqgjvf76warwrdc0uqpqsm2x8m
- enc: |
- -----BEGIN AGE ENCRYPTED FILE-----
- YWdlLWVuY3J5cHRpb24ub3JnL3YxCi0+IFgyNTUxOSBnSm52UU95ZVJMUE52cjc3
- THplYTFpbFd4ZDJSV2RIaDNLWVRxZFd6TEVzCi9OSmIvYUhvVUhHQldoalJzMEpX
- dXNPV3AreUowSHBBY1NYUlh5b24wZDAKLS0tIHVwVkViazFmVmhqQjBqNkJiVlN4
- VGFML3ZMZzk4WlF3NjJ6SXpobzJPMlEKheLxsJRKPxsPwGOKZ8kb5viGJ07RT9eq
- id87ugUEST/+c5l0YE4Q5DDRpikoiT3uoDS7X+PfIGHgQWiQUq4uNQ==
- -----END AGE ENCRYPTED FILE-----
- lastmodified: "2024-02-17T21:45:25Z"
- mac: ENC[AES256_GCM,data:bKwwkxj+C5/dPsKsiFi599+d31RpAbcQQ5HugHBNIANGT0nwmYx9Cj8gDGcAeY4OBs9fWzZ2uHVW9ZbgrzyOdsSH2VdurPvOruJZ2kuWZ1BYZm1pbsFXRWhuWxaaJLTK9mP4YlOEQ76uYVMaaXORS7Pt4AHmliDReOyGJF4X+lI=,iv:SVsKQ7xOkLcODOWe7A/IFoQpIMB4Cbb7p8P4st3lZjo=,tag:Wq6M1In3czCDwUXsFBHMGQ==,type:str]
- pgp: []
- encrypted_regex: ^(data|stringData)$
- version: 3.7.3
diff --git a/kubernetes/apps/observability/kustomization.yaml b/kubernetes/apps/observability/kustomization.yaml
index d070a45b..f345c413 100644
--- a/kubernetes/apps/observability/kustomization.yaml
+++ b/kubernetes/apps/observability/kustomization.yaml
@@ -7,7 +7,7 @@ resources:
- ./alert.yaml
- ./portainer/ks.yaml
- ./gatus/ks.yaml
-# - ./thanos/ks.yaml
+ - ./thanos/ks.yaml
- ./kube-prometheus-stack/ks.yaml
- ./prometheus-operator-crds/ks.yaml
- ./grafana/ks.yaml
diff --git a/kubernetes/apps/observability/thanos/app/helmrelease.yaml b/kubernetes/apps/observability/thanos/app/helmrelease.yaml
index 112662b0..82ad8f47 100644
--- a/kubernetes/apps/observability/thanos/app/helmrelease.yaml
+++ b/kubernetes/apps/observability/thanos/app/helmrelease.yaml
@@ -65,8 +65,8 @@ spec:
- --retention.resolution-1h=60d
persistence: &persistence
enabled: true
- storageClass: longhorn-local
- size: 20Gi
+ storageClass: local-hostpath
+ size: 25Gi
query:
replicas: 2
extraArgs: ["--alert.query-url=https://thanos.${SECRET_DOMAIN}"]
diff --git a/kubernetes/apps/storage/local-path-provisioner/app/helmrelease.yaml b/kubernetes/apps/storage/local-path-provisioner/app/helmrelease.yaml
index 2d96e0bb..ac2907d2 100644
--- a/kubernetes/apps/storage/local-path-provisioner/app/helmrelease.yaml
+++ b/kubernetes/apps/storage/local-path-provisioner/app/helmrelease.yaml
@@ -57,7 +57,7 @@ spec:
config:
driver: local-hostpath
local-hostpath:
- shareBasePath: &storagePath /var/democratic-csi/local
+ shareBasePath: &storagePath /var/mnt/ssd/democratic-csi/local
controllerBasePath: *storagePath
dirPermissionsMode: "0770"
dirPermissionsUser: 0
diff --git a/kubernetes/bootstrap/talos/talconfig.yaml b/kubernetes/bootstrap/talos/talconfig.yaml
index 930dc418..cff4fbf0 100644
--- a/kubernetes/bootstrap/talos/talconfig.yaml
+++ b/kubernetes/bootstrap/talos/talconfig.yaml
@@ -27,7 +27,7 @@ nodes:
talosImageURL: factory.talos.dev/installer/88d1f7a5c4f1d3aba7df787c448c1d3d008ed29cfb34af53fa0df4336a56040b
controlPlane: true
nodeLabels:
- "node.longhorn.io/create-default-disk": "true"
+ "node.longhorn.io/create-default-disk": "config"
networkInterfaces:
- deviceSelector:
hardwareAddr: "bc:24:11:b5:dd:1f"
@@ -46,7 +46,7 @@ nodes:
talosImageURL: factory.talos.dev/installer/88d1f7a5c4f1d3aba7df787c448c1d3d008ed29cfb34af53fa0df4336a56040b
controlPlane: true
nodeLabels:
- "node.longhorn.io/create-default-disk": "true"
+ "node.longhorn.io/create-default-disk": "config"
networkInterfaces:
- deviceSelector:
hardwareAddr: "bc:24:11:0c:fd:22"
@@ -65,7 +65,7 @@ nodes:
talosImageURL: factory.talos.dev/installer/88d1f7a5c4f1d3aba7df787c448c1d3d008ed29cfb34af53fa0df4336a56040b
controlPlane: true
nodeLabels:
- "node.longhorn.io/create-default-disk": "true"
+ "node.longhorn.io/create-default-disk": "config"
networkInterfaces:
- deviceSelector:
hardwareAddr: "bc:24:11:a8:19:33"
@@ -139,7 +139,15 @@ patches:
net.core.rmem_max: "2500000"
net.core.wmem_max: "2500000"
- # Mount longhorn in kubelet
+ # Add additional ssd
+ - |-
+ machine:
+ disks:
+ - device: /dev/sdb
+ partitions:
+ - mountpoint: /var/mnt/ssd
+
+ # Mount longhorn and local-hostpath in kubelet
- |-
machine:
kubelet:
@@ -151,9 +159,16 @@ patches:
- bind
- rshared
- rw
- - destination: /var/democratic-csi/local
+ - destination: /var/mnt/ssd/longhorn
+ type: bind
+ source: /var/mnt/ssd/longhorn
+ options:
+ - bind
+ - rshared
+ - rw
+ - destination: /var/mnt/ssd/democratic-csi/local
type: bind
- source: /var/democratic-csi/local
+ source: /var/mnt/ssd/democratic-csi/local
options:
- bind
- rshared
diff --git a/kubernetes/talos/clusterconfig/talosconfig b/kubernetes/talos/clusterconfig/talosconfig
deleted file mode 100644
index e35e5db5..00000000
--- a/kubernetes/talos/clusterconfig/talosconfig
+++ /dev/null
@@ -1,2 +0,0 @@
-context: ""
-contexts: {}