diff --git a/.github/workflows/terraform.yaml b/.github/workflows/terraform.yaml new file mode 100644 index 0000000..e4f9d4d --- /dev/null +++ b/.github/workflows/terraform.yaml @@ -0,0 +1,40 @@ +name: 'Terraform GitHub Actions' + +on: + pull_request: + +jobs: + terraform-fmt: + name: Terraform Format + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v2.3.4 + + - name: Setup Terraform + uses: hashicorp/setup-terraform@v1 + + - name: Terraform Format + id: fmt + run: terraform fmt -diff -check -no-color -recursive + continue-on-error: true + + - uses: actions/github-script@v4 + if: github.event_name == 'pull_request' && steps.fmt.outputs.exitcode != 0 + env: + TF_FMT_STDOUT: "${{ steps.fmt.outputs.stdout }}" + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + script: | + const output = `#### Terraform Format and Style 🖌 - \`${{ steps.fmt.outcome }}\` + \`\`\`diff + ${process.env.TF_FMT_STDOUT} + \`\`\` + *Pusher: @${{ github.actor }}, Action: \`${{ github.event_name }}\`, Workflow: \`${{ github.workflow }}\`*`; + github.issues.createComment({ + issue_number: context.issue.number, + owner: context.repo.owner, + repo: context.repo.repo, + body: output + }) + throw "failed to run `terraform fmt -check -recursive -diff`" \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..265e719 --- /dev/null +++ b/.gitignore @@ -0,0 +1,9 @@ +.terraform.* +.terraform/ +terraform.tfstate* +terraform.tfvars +terraform.txt +.auto.tfvars +creds/ +.vscode/ +debug.log \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..4597018 --- /dev/null +++ b/LICENSE @@ -0,0 +1,7 @@ +Copyright © 2022 AIGIS (Lotus Labs Ltd), Colin Wilson + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. \ No newline at end of file diff --git a/agents.tf b/agents.tf new file mode 100644 index 0000000..cb42ad4 --- /dev/null +++ b/agents.tf @@ -0,0 +1,57 @@ +# Create initial K3s server node +resource "hcloud_server" "agent_node" { + + for_each = local.agent_nodes + + name = "k3s-agent-${each.value.nodepool_name}-${each.value.location}-${random_id.agent_node_id[each.value.index].hex}" + image = "ubuntu-22.04" + server_type = each.value.server_type + placement_group_id = element(hcloud_placement_group.k3s_agent_placement_group.*.id, ceil(each.value.index / 10)) + firewall_ids = [hcloud_firewall.k3s.id] + location = each.value.location + ssh_keys = [var.ssh_public_key_name] + labels = { + provisioner = "terraform", + engine = "k3s" + type = "agent" + } + # Prevent destroying the whole cluster if the user changes any of the attributes + # that force to recreate the servers or network ip's/mac addresses. + lifecycle { + ignore_changes = [ + location, + network, + ssh_keys, + user_data, + ] + } + network { + network_id = hcloud_network.k3s_net.id + #ip = "10.1.0.3" + } + user_data = templatefile("${path.module}/user_data/agent/agent.yaml.tftpl", { + k3s_channel = each.value.channel + agent_config = base64gzip(yamlencode({ + server = "https://${hcloud_load_balancer_network.k3s_network.ip}:6443" + token = random_password.k3s_agent_token.result + flannel-iface = "ens10" + kubelet-arg = ["cloud-provider=external"] + node-label = each.value.labels + })) + }) + provisioner "remote-exec" { + inline = [ + "cloud-init status --wait --long > /dev/null" # wait for cloud-init to complete + ] + + connection { + host = self.ipv4_address + type = "ssh" + user = "root" + private_key = var.ssh_private_key + } + } + depends_on = [ + hcloud_server.server_node_init + ] +} \ No newline at end of file diff --git a/code_of_conduct.md b/code_of_conduct.md new file mode 100644 index 0000000..2d09690 --- /dev/null +++ b/code_of_conduct.md @@ -0,0 +1,134 @@ + +# Contributor Covenant Code of Conduct + +## Our Pledge + +We as members, contributors, and leaders pledge to make participation in our +community a harassment-free experience for everyone, regardless of age, body +size, visible or invisible disability, ethnicity, sex characteristics, gender +identity and expression, level of experience, education, socio-economic status, +nationality, personal appearance, race, caste, color, religion, or sexual +identity and orientation. + +We pledge to act and interact in ways that contribute to an open, welcoming, +diverse, inclusive, and healthy community. + +## Our Standards + +Examples of behaviour that contributes to a positive environment for our +community include: + +* Demonstrating empathy and kindness toward other people +* Being respectful of differing opinions, viewpoints, and experiences +* Giving and gracefully accepting constructive feedback +* Accepting responsibility and apologizing to those affected by our mistakes, + and learning from the experience +* Focusing on what is best not just for us as individuals, but for the overall + community + +Examples of unacceptable behaviour include: + +* The use of sexualized language or imagery, and sexual attention or advances of + any kind +* Trolling, insulting or derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or email address, + without their explicit permission +* Other conduct which could reasonably be considered inappropriate in a + professional setting + +## Enforcement Responsibilities + +Community leaders are responsible for clarifying and enforcing our standards of +acceptable behaviour and will take appropriate and fair corrective action in +response to any behaviour that they deem inappropriate, threatening, offensive, +or harmful. + +Community leaders have the right and responsibility to remove, edit, or reject +comments, commits, code, wiki edits, issues, and other contributions that are +not aligned to this Code of Conduct, and will communicate reasons for moderation +decisions when appropriate. + +## Scope + +This Code of Conduct applies within all community spaces, and also applies when +an individual is officially representing the community in public spaces. +Examples of representing our community include using an official e-mail address, +posting via an official social media account, or acting as an appointed +representative at an online or offline event. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behaviour may be +reported to the community leaders responsible for enforcement at +contact@aigis.uk. +All complaints will be reviewed and investigated promptly and fairly. + +All community leaders are obligated to respect the privacy and security of the +reporter of any incident. + +## Enforcement Guidelines + +Community leaders will follow these Community Impact Guidelines in determining +the consequences for any action they deem in violation of this Code of Conduct: + +### 1. Correction + +**Community Impact**: Use of inappropriate language or other behaviour deemed +unprofessional or unwelcome in the community. + +**Consequence**: A private, written warning from community leaders, providing +clarity around the nature of the violation and an explanation of why the +behaviour was inappropriate. A public apology may be requested. + +### 2. Warning + +**Community Impact**: A violation through a single incident or series of +actions. + +**Consequence**: A warning with consequences for continued behaviour. No +interaction with the people involved, including unsolicited interaction with +those enforcing the Code of Conduct, for a specified period of time. This +includes avoiding interactions in community spaces as well as external channels +like social media. Violating these terms may lead to a temporary or permanent +ban. + +### 3. Temporary Ban + +**Community Impact**: A serious violation of community standards, including +sustained inappropriate behaviour. + +**Consequence**: A temporary ban from any sort of interaction or public +communication with the community for a specified period of time. No public or +private interaction with the people involved, including unsolicited interaction +with those enforcing the Code of Conduct, is allowed during this period. +Violating these terms may lead to a permanent ban. + +### 4. Permanent Ban + +**Community Impact**: Demonstrating a pattern of violation of community +standards, including sustained inappropriate behaviour, harassment of an +individual, or aggression toward or disparagement of classes of individuals. + +**Consequence**: A permanent ban from any sort of public interaction within the +community. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], +version 2.1, available at +[https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1]. + +Community Impact Guidelines were inspired by +[Mozilla's code of conduct enforcement ladder][Mozilla CoC]. + +For answers to common questions about this code of conduct, see the FAQ at +[https://www.contributor-covenant.org/faq][FAQ]. Translations are available at +[https://www.contributor-covenant.org/translations][translations]. + +[homepage]: https://www.contributor-covenant.org +[v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html +[Mozilla CoC]: https://github.com/mozilla/diversity +[FAQ]: https://www.contributor-covenant.org/faq +[translations]: https://www.contributor-covenant.org/translations + diff --git a/firewall.tf b/firewall.tf new file mode 100644 index 0000000..3604ced --- /dev/null +++ b/firewall.tf @@ -0,0 +1,14 @@ +resource "hcloud_firewall" "k3s" { + name = "k3s-cluster" + + dynamic "rule" { + for_each = concat(local.base_firewall_rules) + content { + direction = rule.value.direction + protocol = rule.value.protocol + port = lookup(rule.value, "port", null) + destination_ips = lookup(rule.value, "destination_ips", []) + source_ips = lookup(rule.value, "source_ips", []) + } + } +} \ No newline at end of file diff --git a/k3s_tokens.tf b/k3s_tokens.tf new file mode 100644 index 0000000..73b29e8 --- /dev/null +++ b/k3s_tokens.tf @@ -0,0 +1,11 @@ +resource "random_password" "k3s_token" { + length = 48 + upper = false + special = false +} + +resource "random_password" "k3s_agent_token" { + length = 48 + upper = false + special = false +} \ No newline at end of file diff --git a/kubeconfig.yaml.tftpl b/kubeconfig.yaml.tftpl new file mode 100644 index 0000000..99007f3 --- /dev/null +++ b/kubeconfig.yaml.tftpl @@ -0,0 +1,19 @@ +apiVersion: v1 +clusters: +- cluster: + certificate-authority-data: ${certificate-authority-data} + server: https://${k3s_lb_ip}:6443 + name: default +contexts: +- context: + cluster: default + user: default + name: default +current-context: default +kind: Config +preferences: {} +users: +- name: default + user: + client-certificate-data: ${client-certificate-data} + client-key-data: ${client-key-data} \ No newline at end of file diff --git a/kubeconfig_certs.tf b/kubeconfig_certs.tf new file mode 100644 index 0000000..bde91ba --- /dev/null +++ b/kubeconfig_certs.tf @@ -0,0 +1,58 @@ +# Create Private Keys and Certificates required by k3s and the Default Kubeconfig + +# Generate Server, Client & Request Header CA Keys + Default Client Admin Key +resource "tls_private_key" "keys" { + for_each = setunion(local.ca_names, local.client_names) + algorithm = "ECDSA" # ECDSA key + ecdsa_curve = "P256" # P256 elliptic curve +} + +# Generate Server, Client & Request Header CA Certificates +resource "tls_self_signed_cert" "ca_certs" { + + for_each = local.ca_names + + private_key_pem = tls_private_key.keys[each.key].private_key_pem + + subject { + common_name = "k3s-${each.key}-ca@${formatdate("YYYYMMDDhhmmss", timestamp())}" + } + + is_ca_certificate = true + + validity_period_hours = 87600 # 10 years + + allowed_uses = [ + "cert_signing", + "key_encipherment", + "digital_signature", + ] + + lifecycle { + ignore_changes = [subject] + } +} + +# Default Client Admin User Certificate (Signed by Client CA) +resource "tls_cert_request" "client_admin_user" { + private_key_pem = tls_private_key.keys["client-admin"].private_key_pem + + subject { + common_name = "system:admin" + organization = "system:masters" + } +} + +resource "tls_locally_signed_cert" "client_admin_user" { + cert_request_pem = tls_cert_request.client_admin_user.cert_request_pem + ca_private_key_pem = tls_private_key.keys["client"].private_key_pem + ca_cert_pem = tls_self_signed_cert.ca_certs["client"].cert_pem + + validity_period_hours = 8760 # 1 year + + allowed_uses = [ + "key_encipherment", + "digital_signature", + "client_auth", + ] +} \ No newline at end of file diff --git a/kubeconfig_file_output.tf b/kubeconfig_file_output.tf new file mode 100644 index 0000000..ba17ca7 --- /dev/null +++ b/kubeconfig_file_output.tf @@ -0,0 +1,4 @@ +resource "local_file" "k3s_kubeconfig" { + content = local.kubeconfig + filename = "${path.root}/k3s.yaml" +} \ No newline at end of file diff --git a/loadbalancer.tf b/loadbalancer.tf new file mode 100644 index 0000000..7cd282c --- /dev/null +++ b/loadbalancer.tf @@ -0,0 +1,35 @@ +resource "hcloud_load_balancer" "k3s_api_lb" { + name = "k3s-api-lb" + load_balancer_type = "lb11" + location = var.location + algorithm { + type = "least_connections" + } + labels = { + provisioner = "terraform" + } +} + +resource "hcloud_load_balancer_network" "k3s_network" { + load_balancer_id = hcloud_load_balancer.k3s_api_lb.id + subnet_id = hcloud_network_subnet.k3s_net.id + #ip = "10.1.0.1" +} + +resource "hcloud_load_balancer_target" "k3s_api_lb_target" { + type = "label_selector" + load_balancer_id = hcloud_load_balancer.k3s_api_lb.id + label_selector = "type=${local.server_label}" + use_private_ip = true + + depends_on = [ + hcloud_server.server_node_init[0] + ] +} + +resource "hcloud_load_balancer_service" "k3s_api_service" { + load_balancer_id = hcloud_load_balancer.k3s_api_lb.id + protocol = "tcp" + listen_port = 6443 + destination_port = 6443 +} \ No newline at end of file diff --git a/locals.tf b/locals.tf new file mode 100644 index 0000000..fc6e650 --- /dev/null +++ b/locals.tf @@ -0,0 +1,189 @@ +locals { + + flannel_iface = "ens10" # https://docs.hetzner.com/cloud/networks/server-configuration/#debian--ubuntu + + server_label = "server" + agent_labels = concat([ + "node.kubernetes.io/provisioner=terraform", + "node.kubernetes.io/type=agent", + "node.kubernetes.io/engine=k3s" + ], var.sys_upgrade_ctrl ? [ + ["system_upgrade=true"] + ] : []) + + critical_addons_only_true = "--node-taint \"CriticalAddonsOnly=true:NoExecute\" \\" + + taint_critical = var.server_taint_criticalonly == true ? local.critical_addons_only_true : "\\" + + # The main network cidr that all subnets will be created upon + network_ipv4_cidr = "10.0.0.0/8" + + agent_nodes = merge([ + for pool_index, nodepool_obj in var.agent_nodepools : { + for node_index in range(nodepool_obj.count) : + format("%s-%s-%s", pool_index, node_index, nodepool_obj.name) => { + nodepool_name : format("%s-%s", node_index, nodepool_obj.name), + server_type : nodepool_obj.server_type, + location : nodepool_obj.location, + channel : nodepool_obj.channel, + labels : concat(local.agent_labels, nodepool_obj.labels), + taints : nodepool_obj.taints, + index : node_index + } + } + ]...) + + agent_count = sum([for v in var.agent_nodepools : v.count]) + + # Manifest templates + + # Kubernetes Dashboard + k8s_dash_yaml_tpl = var.k8s_dashboard == true ? templatefile("${path.module}/manifests/templates/cloud-init/write_k8s_dash.tftpl", { + k8s_dash_yaml = base64gzip(file("${path.module}/manifests/kubernetes_dashboard.yaml")) + }) : "" + + # System Upgrade Controller + sys_upgrade_ctrl_yaml_tpl = var.sys_upgrade_ctrl == true ? templatefile("${path.module}/manifests/templates/cloud-init/write_sys_upgrade.tftpl", { + sys_upgrade_ctrl_yaml = base64gzip(file("${path.module}/manifests/system_upgrade_controller.yaml")) + }) : "" + + # Cert Manager + cert_manager_yaml_tpl = var.install_cert_manager == true ? templatefile("${path.module}/manifests/templates/cloud-init/write_cert_manager.tftpl", { + cert_manager_yaml = base64gzip(file("${path.module}/manifests/cert_manager.yaml")) + }) : "" + + # Kubeconfig TLS Resources + ca_names = toset(["server", "client", "request-header"]) + + client_names = toset(["client-admin"]) + + certificate-authority-data = tls_self_signed_cert.ca_certs["server"].cert_pem + + client-certificate-data = trimspace(join("", [ + tls_locally_signed_cert.client_admin_user.cert_pem, + tls_self_signed_cert.ca_certs["client"].cert_pem + ])) + + client-key-data = tls_private_key.keys["client-admin"].private_key_pem + + kubeconfig = templatefile("${path.module}/kubeconfig.yaml.tftpl", { + certificate-authority-data = base64encode(local.certificate-authority-data) + client-certificate-data = base64encode(local.client-certificate-data) + client-key-data = base64encode(local.client-key-data) + k3s_lb_ip = hcloud_load_balancer.k3s_api_lb.ipv4 + }) + + # The following IPs are important to be whitelisted because they communicate with Hetzner services and enable the CCM and CSI to work properly. + # Source https://github.com/hetznercloud/csi-driver/issues/204#issuecomment-848625566 + hetzner_metadata_service_ipv4 = "169.254.169.254/32" + hetzner_cloud_api_ipv4 = "213.239.246.1/32" + + # internal Pod CIDR, used for the controller and currently for calico + cluster_cidr_ipv4 = "10.42.0.0/16" + + whitelisted_ips = [ + local.network_ipv4_cidr, + local.hetzner_metadata_service_ipv4, + local.hetzner_cloud_api_ipv4, + "127.0.0.1/32", + ] + + base_firewall_rules = concat([ + # Allowing internal cluster traffic and Hetzner metadata service and cloud API IPs + { + direction = "in" + protocol = "tcp" + port = "any" + source_ips = local.whitelisted_ips + }, + { + direction = "in" + protocol = "udp" + port = "any" + source_ips = local.whitelisted_ips + }, + { + direction = "in" + protocol = "icmp" + source_ips = local.whitelisted_ips + }, + + # Allow all traffic to the kube api server + { + direction = "in" + protocol = "tcp" + port = "6443" + source_ips = [ + "10.0.0.0/8", + "127.0.0.1/32" + ] + }, + + # Allow all traffic to the ssh port + { + direction = "in" + protocol = "tcp" + port = "22" + source_ips = [ + "0.0.0.0/0" + ] + }, + + # Allow basic out traffic + # ICMP to ping outside services + { + direction = "out" + protocol = "icmp" + destination_ips = [ + "0.0.0.0/0" + ] + }, + + # DNS + { + direction = "out" + protocol = "tcp" + port = "53" + destination_ips = [ + "0.0.0.0/0" + ] + }, + { + direction = "out" + protocol = "udp" + port = "53" + destination_ips = [ + "0.0.0.0/0" + ] + }, + + # HTTP(s) + { + direction = "out" + protocol = "tcp" + port = "80" + destination_ips = [ + "0.0.0.0/0" + ] + }, + { + direction = "out" + protocol = "tcp" + port = "443" + destination_ips = [ + "0.0.0.0/0" + ] + }, + + #NTP + { + direction = "out" + protocol = "udp" + port = "123" + destination_ips = [ + "0.0.0.0/0" + ] + } + ] + ) +} \ No newline at end of file diff --git a/manifests/cert_manager.yaml b/manifests/cert_manager.yaml new file mode 100644 index 0000000..bcaa0c8 --- /dev/null +++ b/manifests/cert_manager.yaml @@ -0,0 +1,17 @@ +--- +apiVersion: v1 +kind: Namespace +metadata: + name: cert-manager +--- +apiVersion: helm.cattle.io/v1 +kind: HelmChart +metadata: + name: cert-manager + namespace: kube-system +spec: + chart: cert-manager + repo: https://charts.jetstack.io + targetNamespace: cert-manager + valuesContent: |- + installCRDs: true \ No newline at end of file diff --git a/manifests/csi.yaml b/manifests/csi.yaml new file mode 100644 index 0000000..565225d --- /dev/null +++ b/manifests/csi.yaml @@ -0,0 +1,348 @@ +--- +apiVersion: storage.k8s.io/v1 +kind: CSIDriver +metadata: + name: csi.hetzner.cloud +spec: + attachRequired: true + podInfoOnMount: true + volumeLifecycleModes: + - Persistent +--- +kind: StorageClass +apiVersion: storage.k8s.io/v1 +metadata: + namespace: kube-system + name: hcloud-volumes + annotations: + storageclass.kubernetes.io/is-default-class: "true" +provisioner: csi.hetzner.cloud +volumeBindingMode: WaitForFirstConsumer +allowVolumeExpansion: true +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: hcloud-csi + namespace: kube-system +--- +kind: ClusterRole +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: hcloud-csi +rules: + # attacher + - apiGroups: [""] + resources: ["persistentvolumes"] + verbs: ["get", "list", "watch", "update", "patch"] + - apiGroups: [""] + resources: ["nodes"] + verbs: ["get", "list", "watch"] + - apiGroups: ["csi.storage.k8s.io"] + resources: ["csinodeinfos"] + verbs: ["get", "list", "watch"] + - apiGroups: ["storage.k8s.io"] + resources: ["csinodes"] + verbs: ["get", "list", "watch"] + - apiGroups: ["storage.k8s.io"] + resources: ["volumeattachments"] + verbs: ["get", "list", "watch", "update", "patch"] + - apiGroups: ["storage.k8s.io"] + resources: ["volumeattachments/status"] + verbs: ["patch"] + # provisioner + - apiGroups: [""] + resources: ["secrets"] + verbs: ["get", "list"] + - apiGroups: [""] + resources: ["persistentvolumes"] + verbs: ["get", "list", "watch", "create", "delete", "patch"] + - apiGroups: [""] + resources: ["persistentvolumeclaims", "persistentvolumeclaims/status"] + verbs: ["get", "list", "watch", "update", "patch"] + - apiGroups: ["storage.k8s.io"] + resources: ["storageclasses"] + verbs: ["get", "list", "watch"] + - apiGroups: [""] + resources: ["events"] + verbs: ["list", "watch", "create", "update", "patch"] + - apiGroups: ["snapshot.storage.k8s.io"] + resources: ["volumesnapshots"] + verbs: ["get", "list"] + - apiGroups: ["snapshot.storage.k8s.io"] + resources: ["volumesnapshotcontents"] + verbs: ["get", "list"] + # resizer + - apiGroups: [""] + resources: ["pods"] + verbs: ["get", "list", "watch"] + # node + - apiGroups: [""] + resources: ["events"] + verbs: ["get", "list", "watch", "create", "update", "patch"] +--- +kind: ClusterRoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: hcloud-csi +subjects: + - kind: ServiceAccount + name: hcloud-csi + namespace: kube-system +roleRef: + kind: ClusterRole + name: hcloud-csi + apiGroup: rbac.authorization.k8s.io +--- +kind: StatefulSet +apiVersion: apps/v1 +metadata: + name: hcloud-csi-controller + namespace: kube-system +spec: + selector: + matchLabels: + app: hcloud-csi-controller + serviceName: hcloud-csi-controller + replicas: 1 + template: + metadata: + labels: + app: hcloud-csi-controller + spec: + serviceAccount: hcloud-csi + containers: + - name: csi-attacher + image: k8s.gcr.io/sig-storage/csi-attacher:v3.2.1 + volumeMounts: + - name: socket-dir + mountPath: /run/csi + securityContext: + privileged: true + capabilities: + add: ["SYS_ADMIN"] + allowPrivilegeEscalation: true + - name: csi-resizer + image: k8s.gcr.io/sig-storage/csi-resizer:v1.2.0 + volumeMounts: + - name: socket-dir + mountPath: /run/csi + securityContext: + privileged: true + capabilities: + add: ["SYS_ADMIN"] + allowPrivilegeEscalation: true + - name: csi-provisioner + image: k8s.gcr.io/sig-storage/csi-provisioner:v2.2.2 + args: + - --feature-gates=Topology=true + - --default-fstype=ext4 + volumeMounts: + - name: socket-dir + mountPath: /run/csi + securityContext: + privileged: true + capabilities: + add: ["SYS_ADMIN"] + allowPrivilegeEscalation: true + - name: hcloud-csi-driver + image: hetznercloud/hcloud-csi-driver:1.6.0 + imagePullPolicy: Always + env: + - name: CSI_ENDPOINT + value: unix:///run/csi/socket + - name: METRICS_ENDPOINT + value: 0.0.0.0:9189 + - name: ENABLE_METRICS + value: "true" + - name: KUBE_NODE_NAME + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: spec.nodeName + - name: HCLOUD_TOKEN + valueFrom: + secretKeyRef: + name: hcloud + key: token + volumeMounts: + - name: socket-dir + mountPath: /run/csi + ports: + - containerPort: 9189 + name: metrics + - name: healthz + containerPort: 9808 + protocol: TCP + livenessProbe: + failureThreshold: 5 + httpGet: + path: /healthz + port: healthz + initialDelaySeconds: 10 + timeoutSeconds: 3 + periodSeconds: 2 + securityContext: + privileged: true + capabilities: + add: ["SYS_ADMIN"] + allowPrivilegeEscalation: true + - name: liveness-probe + imagePullPolicy: Always + image: k8s.gcr.io/sig-storage/livenessprobe:v2.3.0 + volumeMounts: + - mountPath: /run/csi + name: socket-dir + volumes: + - name: socket-dir + emptyDir: {} +--- +kind: DaemonSet +apiVersion: apps/v1 +metadata: + name: hcloud-csi-node + namespace: kube-system + labels: + app: hcloud-csi +spec: + selector: + matchLabels: + app: hcloud-csi + template: + metadata: + labels: + app: hcloud-csi + spec: + tolerations: + - effect: NoExecute + operator: Exists + - effect: NoSchedule + operator: Exists + - key: CriticalAddonsOnly + operator: Exists + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: "instance.hetzner.cloud/is-root-server" + operator: NotIn + values: + - "true" + serviceAccount: hcloud-csi + containers: + - name: csi-node-driver-registrar + image: k8s.gcr.io/sig-storage/csi-node-driver-registrar:v2.2.0 + args: + - --kubelet-registration-path=/var/lib/kubelet/plugins/csi.hetzner.cloud/socket + env: + - name: KUBE_NODE_NAME + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: spec.nodeName + volumeMounts: + - name: plugin-dir + mountPath: /run/csi + - name: registration-dir + mountPath: /registration + securityContext: + privileged: true + - name: hcloud-csi-driver + image: hetznercloud/hcloud-csi-driver:1.6.0 + imagePullPolicy: Always + env: + - name: CSI_ENDPOINT + value: unix:///run/csi/socket + - name: METRICS_ENDPOINT + value: 0.0.0.0:9189 + - name: ENABLE_METRICS + value: "true" + - name: HCLOUD_TOKEN + valueFrom: + secretKeyRef: + name: hcloud + key: token + - name: KUBE_NODE_NAME + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: spec.nodeName + volumeMounts: + - name: kubelet-dir + mountPath: /var/lib/kubelet + mountPropagation: "Bidirectional" + - name: plugin-dir + mountPath: /run/csi + - name: device-dir + mountPath: /dev + securityContext: + privileged: true + ports: + - containerPort: 9189 + name: metrics + - name: healthz + containerPort: 9808 + protocol: TCP + livenessProbe: + failureThreshold: 5 + httpGet: + path: /healthz + port: healthz + initialDelaySeconds: 10 + timeoutSeconds: 3 + periodSeconds: 2 + - name: liveness-probe + imagePullPolicy: Always + image: k8s.gcr.io/sig-storage/livenessprobe:v2.3.0 + volumeMounts: + - mountPath: /run/csi + name: plugin-dir + volumes: + - name: kubelet-dir + hostPath: + path: /var/lib/kubelet + type: Directory + - name: plugin-dir + hostPath: + path: /var/lib/kubelet/plugins/csi.hetzner.cloud/ + type: DirectoryOrCreate + - name: registration-dir + hostPath: + path: /var/lib/kubelet/plugins_registry/ + type: Directory + - name: device-dir + hostPath: + path: /dev + type: Directory +--- +apiVersion: v1 +kind: Service +metadata: + name: hcloud-csi-controller-metrics + namespace: kube-system + labels: + app: hcloud-csi +spec: + selector: + app: hcloud-csi-controller + ports: + - port: 9189 + name: metrics + targetPort: metrics + +--- +apiVersion: v1 +kind: Service +metadata: + name: hcloud-csi-node-metrics + namespace: kube-system + labels: + app: hcloud-csi +spec: + selector: + app: hcloud-csi + ports: + - port: 9189 + name: metrics + targetPort: metrics diff --git a/manifests/kubernetes_dashboard.yaml b/manifests/kubernetes_dashboard.yaml new file mode 100644 index 0000000..6befcfd --- /dev/null +++ b/manifests/kubernetes_dashboard.yaml @@ -0,0 +1,332 @@ +# Copyright 2017 The Kubernetes Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Source: https://raw.githubusercontent.com/kubernetes/dashboard/v2.6.0/aio/deploy/recommended.yaml + +apiVersion: v1 +kind: Namespace +metadata: + name: kubernetes-dashboard + +--- + +apiVersion: v1 +kind: ServiceAccount +metadata: + labels: + k8s-app: kubernetes-dashboard + name: kubernetes-dashboard + namespace: kubernetes-dashboard + +--- + +kind: Service +apiVersion: v1 +metadata: + labels: + k8s-app: kubernetes-dashboard + name: kubernetes-dashboard + namespace: kubernetes-dashboard +spec: + ports: + - port: 443 + targetPort: 8443 + selector: + k8s-app: kubernetes-dashboard + +--- + +apiVersion: v1 +kind: Secret +metadata: + labels: + k8s-app: kubernetes-dashboard + name: kubernetes-dashboard-certs + namespace: kubernetes-dashboard +type: Opaque + +--- + +apiVersion: v1 +kind: Secret +metadata: + labels: + k8s-app: kubernetes-dashboard + name: kubernetes-dashboard-csrf + namespace: kubernetes-dashboard +type: Opaque +data: + csrf: "" + +--- + +apiVersion: v1 +kind: Secret +metadata: + labels: + k8s-app: kubernetes-dashboard + name: kubernetes-dashboard-key-holder + namespace: kubernetes-dashboard +type: Opaque + +--- + +kind: ConfigMap +apiVersion: v1 +metadata: + labels: + k8s-app: kubernetes-dashboard + name: kubernetes-dashboard-settings + namespace: kubernetes-dashboard + +--- + +kind: Role +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + labels: + k8s-app: kubernetes-dashboard + name: kubernetes-dashboard + namespace: kubernetes-dashboard +rules: + # Allow Dashboard to get, update and delete Dashboard exclusive secrets. + - apiGroups: [""] + resources: ["secrets"] + resourceNames: ["kubernetes-dashboard-key-holder", "kubernetes-dashboard-certs", "kubernetes-dashboard-csrf"] + verbs: ["get", "update", "delete"] + # Allow Dashboard to get and update 'kubernetes-dashboard-settings' config map. + - apiGroups: [""] + resources: ["configmaps"] + resourceNames: ["kubernetes-dashboard-settings"] + verbs: ["get", "update"] + # Allow Dashboard to get metrics. + - apiGroups: [""] + resources: ["services"] + resourceNames: ["heapster", "dashboard-metrics-scraper"] + verbs: ["proxy"] + - apiGroups: [""] + resources: ["services/proxy"] + resourceNames: ["heapster", "http:heapster:", "https:heapster:", "dashboard-metrics-scraper", "http:dashboard-metrics-scraper"] + verbs: ["get"] + +--- + +kind: ClusterRole +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + labels: + k8s-app: kubernetes-dashboard + name: kubernetes-dashboard +rules: + # Allow Metrics Scraper to get metrics from the Metrics server + - apiGroups: ["metrics.k8s.io"] + resources: ["pods", "nodes"] + verbs: ["get", "list", "watch"] + +--- + +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + labels: + k8s-app: kubernetes-dashboard + name: kubernetes-dashboard + namespace: kubernetes-dashboard +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: kubernetes-dashboard +subjects: + - kind: ServiceAccount + name: kubernetes-dashboard + namespace: kubernetes-dashboard + +--- + +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: kubernetes-dashboard +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: kubernetes-dashboard +subjects: + - kind: ServiceAccount + name: kubernetes-dashboard + namespace: kubernetes-dashboard + +--- + +kind: Deployment +apiVersion: apps/v1 +metadata: + labels: + k8s-app: kubernetes-dashboard + name: kubernetes-dashboard + namespace: kubernetes-dashboard +spec: + replicas: 1 + revisionHistoryLimit: 10 + selector: + matchLabels: + k8s-app: kubernetes-dashboard + template: + metadata: + labels: + k8s-app: kubernetes-dashboard + spec: + securityContext: + seccompProfile: + type: RuntimeDefault + containers: + - name: kubernetes-dashboard + image: kubernetesui/dashboard:v2.6.0 + imagePullPolicy: Always + ports: + - containerPort: 8443 + protocol: TCP + args: + - --auto-generate-certificates + - --namespace=kubernetes-dashboard + # Uncomment the following line to manually specify Kubernetes API server Host + # If not specified, Dashboard will attempt to auto discover the API server and connect + # to it. Uncomment only if the default does not work. + # - --apiserver-host=http://my-address:port + volumeMounts: + - name: kubernetes-dashboard-certs + mountPath: /certs + # Create on-disk volume to store exec logs + - mountPath: /tmp + name: tmp-volume + livenessProbe: + httpGet: + scheme: HTTPS + path: / + port: 8443 + initialDelaySeconds: 30 + timeoutSeconds: 30 + securityContext: + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true + runAsUser: 1001 + runAsGroup: 2001 + volumes: + - name: kubernetes-dashboard-certs + secret: + secretName: kubernetes-dashboard-certs + - name: tmp-volume + emptyDir: {} + serviceAccountName: kubernetes-dashboard + nodeSelector: + "kubernetes.io/os": linux + # Comment the following tolerations if Dashboard must not be deployed on master + tolerations: + - key: node-role.kubernetes.io/master + effect: NoSchedule + +--- + +kind: Service +apiVersion: v1 +metadata: + labels: + k8s-app: dashboard-metrics-scraper + name: dashboard-metrics-scraper + namespace: kubernetes-dashboard +spec: + ports: + - port: 8000 + targetPort: 8000 + selector: + k8s-app: dashboard-metrics-scraper + +--- + +kind: Deployment +apiVersion: apps/v1 +metadata: + labels: + k8s-app: dashboard-metrics-scraper + name: dashboard-metrics-scraper + namespace: kubernetes-dashboard +spec: + replicas: 1 + revisionHistoryLimit: 10 + selector: + matchLabels: + k8s-app: dashboard-metrics-scraper + template: + metadata: + labels: + k8s-app: dashboard-metrics-scraper + spec: + securityContext: + seccompProfile: + type: RuntimeDefault + containers: + - name: dashboard-metrics-scraper + image: kubernetesui/metrics-scraper:v1.0.8 + ports: + - containerPort: 8000 + protocol: TCP + livenessProbe: + httpGet: + scheme: HTTP + path: / + port: 8000 + initialDelaySeconds: 30 + timeoutSeconds: 30 + volumeMounts: + - mountPath: /tmp + name: tmp-volume + securityContext: + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true + runAsUser: 1001 + runAsGroup: 2001 + serviceAccountName: kubernetes-dashboard + nodeSelector: + "kubernetes.io/os": linux + # Comment the following tolerations if Dashboard must not be deployed on master + tolerations: + - key: node-role.kubernetes.io/master + effect: NoSchedule + volumes: + - name: tmp-volume + emptyDir: {} + +--- + + +apiVersion: v1 +kind: ServiceAccount +metadata: + name: admin-user + namespace: kubernetes-dashboard + +--- + +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: admin-user +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: cluster-admin +subjects: +- kind: ServiceAccount + name: admin-user + namespace: kubernetes-dashboard \ No newline at end of file diff --git a/manifests/system_upgrade_controller.yaml b/manifests/system_upgrade_controller.yaml new file mode 100644 index 0000000..5bac30e --- /dev/null +++ b/manifests/system_upgrade_controller.yaml @@ -0,0 +1,117 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: system-upgrade +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: system-upgrade + namespace: system-upgrade +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: system-upgrade +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: cluster-admin +subjects: +- kind: ServiceAccount + name: system-upgrade + namespace: system-upgrade +--- +apiVersion: v1 +data: + SYSTEM_UPGRADE_CONTROLLER_DEBUG: "false" + SYSTEM_UPGRADE_CONTROLLER_THREADS: "2" + SYSTEM_UPGRADE_JOB_ACTIVE_DEADLINE_SECONDS: "900" + SYSTEM_UPGRADE_JOB_BACKOFF_LIMIT: "99" + SYSTEM_UPGRADE_JOB_IMAGE_PULL_POLICY: Always + SYSTEM_UPGRADE_JOB_KUBECTL_IMAGE: rancher/kubectl:v1.21.9 + SYSTEM_UPGRADE_JOB_PRIVILEGED: "true" + SYSTEM_UPGRADE_JOB_TTL_SECONDS_AFTER_FINISH: "900" + SYSTEM_UPGRADE_PLAN_POLLING_INTERVAL: 15m +kind: ConfigMap +metadata: + name: default-controller-env + namespace: system-upgrade +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: system-upgrade-controller + namespace: system-upgrade +spec: + selector: + matchLabels: + upgrade.cattle.io/controller: system-upgrade-controller + template: + metadata: + labels: + upgrade.cattle.io/controller: system-upgrade-controller + spec: + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: node-role.kubernetes.io/master + operator: Exists + containers: + - env: + - name: SYSTEM_UPGRADE_CONTROLLER_NAME + valueFrom: + fieldRef: + fieldPath: metadata.labels['upgrade.cattle.io/controller'] + - name: SYSTEM_UPGRADE_CONTROLLER_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + envFrom: + - configMapRef: + name: default-controller-env + image: rancher/system-upgrade-controller:v0.9.1 + imagePullPolicy: IfNotPresent + name: system-upgrade-controller + volumeMounts: + - mountPath: /etc/ssl + name: etc-ssl + - mountPath: /etc/pki + name: etc-pki + - mountPath: /etc/ca-certificates + name: etc-ca-certificates + - mountPath: /tmp + name: tmp + serviceAccountName: system-upgrade + tolerations: + - key: CriticalAddonsOnly + operator: Exists + - effect: NoSchedule + key: node-role.kubernetes.io/master + operator: Exists + - effect: NoSchedule + key: node-role.kubernetes.io/controlplane + operator: Exists + - effect: NoSchedule + key: node-role.kubernetes.io/control-plane + operator: Exists + - effect: NoExecute + key: node-role.kubernetes.io/etcd + operator: Exists + volumes: + - hostPath: + path: /etc/ssl + type: Directory + name: etc-ssl + - hostPath: + path: /etc/pki + type: DirectoryOrCreate + name: etc-pki + - hostPath: + path: /etc/ca-certificates + type: DirectoryOrCreate + name: etc-ca-certificates + - emptyDir: {} + name: tmp diff --git a/manifests/system_upgrade_plans.yaml b/manifests/system_upgrade_plans.yaml new file mode 100644 index 0000000..39fbc33 --- /dev/null +++ b/manifests/system_upgrade_plans.yaml @@ -0,0 +1,52 @@ +# Doc: https://rancher.com/docs/k3s/latest/en/upgrades/automated/ +# agent plan +apiVersion: upgrade.cattle.io/v1 +kind: Plan +metadata: + name: k3s-agent + namespace: system-upgrade + labels: + k3s_upgrade: agent +spec: + concurrency: 1 + channel: https://update.k3s.io/v1-release/channels/${channel} + serviceAccountName: system-upgrade + nodeSelector: + matchExpressions: + - {key: system_upgrade, operator: Exists} + - {key: system_upgrade, operator: NotIn, values: ["disabled", "false"]} + - {key: node-role.kubernetes.io/master, operator: NotIn, values: ["true"]} + tolerations: + - {key: server-usage, effect: NoSchedule, operator: Equal, value: storage} + prepare: + image: rancher/k3s-upgrade + args: ["prepare", "k3s-server"] + drain: + force: true + skipWaitForDeleteTimeout: 60 + upgrade: + image: rancher/k3s-upgrade +--- +# server plan +apiVersion: upgrade.cattle.io/v1 +kind: Plan +metadata: + name: k3s-server + namespace: system-upgrade + labels: + k3s_upgrade: server +spec: + concurrency: 1 + channel: https://update.k3s.io/v1-release/channels/${channel} + serviceAccountName: system-upgrade + nodeSelector: + matchExpressions: + - {key: system_upgrade, operator: Exists} + - {key: system_upgrade, operator: NotIn, values: ["disabled", "false"]} + - {key: node-role.kubernetes.io/master, operator: In, values: ["true"]} + tolerations: + - {key: node-role.kubernetes.io/master, effect: NoSchedule, operator: Exists} + - {key: CriticalAddonsOnly, effect: NoExecute, operator: Exists} + cordon: true + upgrade: + image: rancher/k3s-upgrade \ No newline at end of file diff --git a/manifests/templates/ccm.yaml.tftpl b/manifests/templates/ccm.yaml.tftpl new file mode 100644 index 0000000..df518cc --- /dev/null +++ b/manifests/templates/ccm.yaml.tftpl @@ -0,0 +1,88 @@ +# NOTE: this release was tested against kubernetes v1.18.x +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: cloud-controller-manager + namespace: kube-system +--- +kind: ClusterRoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: system:cloud-controller-manager +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: cluster-admin +subjects: + - kind: ServiceAccount + name: cloud-controller-manager + namespace: kube-system +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: hcloud-cloud-controller-manager + namespace: kube-system +spec: + replicas: 1 + revisionHistoryLimit: 2 + selector: + matchLabels: + app: hcloud-cloud-controller-manager + template: + metadata: + labels: + app: hcloud-cloud-controller-manager + annotations: + scheduler.alpha.kubernetes.io/critical-pod: '' + spec: + serviceAccountName: cloud-controller-manager + dnsPolicy: Default + tolerations: + # this taint is set by all kubelets running `--cloud-provider=external` + # so we should tolerate it to schedule the cloud controller manager + - key: "node.cloudprovider.kubernetes.io/uninitialized" + value: "true" + effect: "NoSchedule" + - key: "CriticalAddonsOnly" + operator: "Exists" + # cloud controller manages should be able to run on masters + - key: "node-role.kubernetes.io/master" + effect: NoSchedule + operator: Exists + - key: "node-role.kubernetes.io/control-plane" + effect: NoSchedule + operator: Exists + - key: "node.kubernetes.io/not-ready" + effect: "NoSchedule" + hostNetwork: true + containers: + - image: hetznercloud/hcloud-cloud-controller-manager:v1.12.1 + name: hcloud-cloud-controller-manager + command: + - "/bin/hcloud-cloud-controller-manager" + - "--cloud-provider=hcloud" + - "--leader-elect=false" + - "--allow-untagged-cloud" + - "--allocate-node-cidrs=true" + - "--cluster-cidr=${cluster_cidr_ipv4}" + resources: + requests: + cpu: 100m + memory: 50Mi + env: + - name: NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: HCLOUD_TOKEN + valueFrom: + secretKeyRef: + name: hcloud + key: token + - name: HCLOUD_NETWORK + valueFrom: + secretKeyRef: + name: hcloud + key: network diff --git a/manifests/templates/cloud-init/write_cert_manager.tftpl b/manifests/templates/cloud-init/write_cert_manager.tftpl new file mode 100644 index 0000000..d1105d2 --- /dev/null +++ b/manifests/templates/cloud-init/write_cert_manager.tftpl @@ -0,0 +1,4 @@ +- content: ${cert_manager_yaml} + encoding: gzip+base64 + path: /root/post_install/cert_manager.yaml + permissions: '0600' \ No newline at end of file diff --git a/manifests/templates/cloud-init/write_k8s_dash.tftpl b/manifests/templates/cloud-init/write_k8s_dash.tftpl new file mode 100644 index 0000000..9e99c0d --- /dev/null +++ b/manifests/templates/cloud-init/write_k8s_dash.tftpl @@ -0,0 +1,4 @@ +- content: ${k8s_dash_yaml} + encoding: gzip+base64 + path: /root/manifests/kubernetes_dashboard.yaml + permissions: '0600' \ No newline at end of file diff --git a/manifests/templates/cloud-init/write_sys_upgrade.tftpl b/manifests/templates/cloud-init/write_sys_upgrade.tftpl new file mode 100644 index 0000000..0a44c3d --- /dev/null +++ b/manifests/templates/cloud-init/write_sys_upgrade.tftpl @@ -0,0 +1,4 @@ +- content: ${sys_upgrade_ctrl_yaml} + encoding: gzip+base64 + path: /root/manifests/system_upgrade_controller.yaml + permissions: '0600' \ No newline at end of file diff --git a/network.tf b/network.tf new file mode 100644 index 0000000..2eee762 --- /dev/null +++ b/network.tf @@ -0,0 +1,14 @@ +resource "hcloud_network" "k3s_net" { + name = "k3s-net-01" + ip_range = var.k3s_network_range + labels = { + "type" = "cluster" + } +} + +resource "hcloud_network_subnet" "k3s_net" { + network_id = hcloud_network.k3s_net.id + type = "cloud" + network_zone = "eu-central" + ip_range = "10.1.0.0/16" +} \ No newline at end of file diff --git a/outputs.tf b/outputs.tf new file mode 100644 index 0000000..76b8097 --- /dev/null +++ b/outputs.tf @@ -0,0 +1,3 @@ +output "local_agent_nodepools" { + value = local.agent_nodes +} \ No newline at end of file diff --git a/placement_groups.tf b/placement_groups.tf new file mode 100644 index 0000000..cbdad4e --- /dev/null +++ b/placement_groups.tf @@ -0,0 +1,11 @@ +resource "hcloud_placement_group" "k3s_server_placement_group" { + count = ceil(var.server_count / 10) + name = "k3s-server-group-${count.index + 1}" + type = "spread" +} + +resource "hcloud_placement_group" "k3s_agent_placement_group" { + count = ceil(local.agent_count / 10) + name = "k3s-agent-group-${count.index + 1}" + type = "spread" +} \ No newline at end of file diff --git a/random_ids.tf b/random_ids.tf new file mode 100644 index 0000000..0fee61a --- /dev/null +++ b/random_ids.tf @@ -0,0 +1,9 @@ +resource "random_id" "server_node_id" { + byte_length = 2 + count = var.server_count +} + +resource "random_id" "agent_node_id" { + count = local.agent_count + byte_length = 2 +} \ No newline at end of file diff --git a/server.tf b/server.tf new file mode 100644 index 0000000..765ad4a --- /dev/null +++ b/server.tf @@ -0,0 +1,56 @@ +# Create initial K3s server node +resource "hcloud_server" "server_node" { + count = var.server_count - 1 + name = "k3s-server-${count.index + 2}-${var.location}-${random_id.server_node_id[count.index + 1].hex}" + image = "ubuntu-22.04" + server_type = "cx11" + placement_group_id = element(hcloud_placement_group.k3s_server_placement_group.*.id, ceil(count.index / 10)) + firewall_ids = [hcloud_firewall.k3s.id] + location = var.location + ssh_keys = [var.ssh_public_key_name] + labels = { + provisioner = "terraform", + engine = "k3s" + type = local.server_label + } + # Prevent destroying the whole cluster if the user changes any of the attributes + # that force to recreate the servers or network ip's/mac addresses. + lifecycle { + ignore_changes = [ + location, + network, + ssh_keys, + user_data, + ] + } + network { + network_id = hcloud_network.k3s_net.id + #ip = "10.1.0.3" + } + user_data = templatefile("${path.module}/user_data/server/server.yaml.tftpl", { + install_script = base64gzip(templatefile("${path.module}/user_data/server/server_install.sh", { + sleep_period = (30 * count.index) # Server nodes cannot join the cluster (etcd) simultaneously. Sleep workaround (unreliable) avoids a join failure. + server_init_private_ip = hcloud_server.server_node_init[0].network.*.ip[0] + k3s_channel = var.k3s_channel + k3s_token = random_password.k3s_token.result + k3s_agent_token = random_password.k3s_agent_token.result + critical_taint = local.taint_critical + flannel_backend = var.flannel_backend + })) + }) + provisioner "remote-exec" { + inline = [ + "cloud-init status --wait --long > /dev/null" # wait for cloud-init to complete + ] + + connection { + host = self.ipv4_address + type = "ssh" + user = "root" + private_key = var.ssh_private_key + } + } + depends_on = [ + hcloud_server.server_node_init + ] +} \ No newline at end of file diff --git a/server_init.tf b/server_init.tf new file mode 100644 index 0000000..4942f31 --- /dev/null +++ b/server_init.tf @@ -0,0 +1,63 @@ +# Create initial K3s server node +resource "hcloud_server" "server_node_init" { + count = 1 + name = "k3s-server-1-${var.location}-${random_id.server_node_id[count.index].hex}" + image = "ubuntu-22.04" + server_type = "cx11" + placement_group_id = element(hcloud_placement_group.k3s_server_placement_group.*.id, ceil(count.index / 10)) + firewall_ids = [hcloud_firewall.k3s.id] + location = var.location + ssh_keys = [var.ssh_public_key_name] + labels = { + provisioner = "terraform", + engine = "k3s" + type = local.server_label + } + network { + network_id = hcloud_network.k3s_net.id + ip = "10.1.0.2" + } + user_data = templatefile("${path.module}/user_data/server/server_init.yaml.tftpl", { + install_script = base64gzip(templatefile("${path.module}/user_data/server/server_init_install.sh", { + k3s_channel = var.k3s_channel + k3s_token = random_password.k3s_token.result + k3s_agent_token = random_password.k3s_agent_token.result + critical_taint = local.taint_critical + flannel_backend = var.flannel_backend + k3s_lb_ip = hcloud_load_balancer.k3s_api_lb.ipv4 + })) + ccm_tpl = base64gzip(templatefile("${path.module}/manifests/templates/ccm.yaml.tftpl", { + cluster_cidr_ipv4 = local.cluster_cidr_ipv4 + })) + csi_tpl = base64gzip(file("${path.module}/manifests/csi.yaml")) + # --- START k3s generated CA keys & certs --- + ca_keys = { for ca_name, key in tls_private_key.keys : ca_name => base64gzip(key.private_key_pem) if contains(local.ca_names, ca_name) } + ca_certs = { for ca_name, cert in tls_self_signed_cert.ca_certs : ca_name => base64gzip(cert.cert_pem) } + # --- END k3s generated keys & certs --- + hcloud_token = var.hcloud_token + k3s_net_id = hcloud_network.k3s_net.id + flannel_backend = var.flannel_backend + k8s_dashboard = local.k8s_dash_yaml_tpl + sys_upgrade_ctrl = local.sys_upgrade_ctrl_yaml_tpl + cert_manager = local.cert_manager_yaml_tpl + }) + provisioner "remote-exec" { + inline = [ + "cloud-init status --wait --long > /dev/null" # wait for cloud-init to complete + ] + + connection { + host = self.ipv4_address + type = "ssh" + user = "root" + private_key = var.ssh_private_key + } + } + # **Note**: the depends_on is important when directly attaching the + # server to a network. Otherwise Terraform will attempt to create + # server and sub-network in parallel. This may result in the server + # creation failing randomly. + depends_on = [ + hcloud_network_subnet.k3s_net + ] +} \ No newline at end of file diff --git a/ssh_key.tf b/ssh_key.tf new file mode 100644 index 0000000..108d557 --- /dev/null +++ b/ssh_key.tf @@ -0,0 +1,4 @@ +resource "hcloud_ssh_key" "default" { + name = var.ssh_public_key_name + public_key = var.ssh_public_key +} \ No newline at end of file diff --git a/user_data/agent/agent.yaml.tftpl b/user_data/agent/agent.yaml.tftpl new file mode 100644 index 0000000..e08755c --- /dev/null +++ b/user_data/agent/agent.yaml.tftpl @@ -0,0 +1,29 @@ +#cloud-config +package_update: true +package_upgrade: true +package_reboot_if_required: false + +# Disable password authentication for root user +ssh_pwauth: false + +# Write out base64 encoded k3s install script to /root/agent_install_k3s.sh +write_files: +- encoding: gzip+base64 + content: ${agent_config} + owner: root:root + path: /etc/rancher/k3s/config.yaml + permissions: '0644' + #defer: true + +# Install required packages +packages: + - ca-certificates + - curl + - ntp + - wireguard + +# Install K3s +runcmd: + # run k3s install script + #- bash /root/agent_install_k3s.sh + - curl -sfL https://get.k3s.io | INSTALL_K3S_EXEC=agent INSTALL_K3S_CHANNEL=${k3s_channel} sh - diff --git a/user_data/server/server.yaml.tftpl b/user_data/server/server.yaml.tftpl new file mode 100644 index 0000000..bdbb7fb --- /dev/null +++ b/user_data/server/server.yaml.tftpl @@ -0,0 +1,28 @@ +#cloud-config +package_update: true +package_upgrade: true +package_reboot_if_required: false + +# Disable password authentication for root user +ssh_pwauth: false + +# Write out base64 encoded k3s install script to /root/install_k3s.sh +write_files: +- encoding: gzip+base64 + content: ${install_script} + owner: root:root + path: /root/server_install_k3s.sh + permissions: '0755' + #defer: true + +# Install required packages +packages: + - ca-certificates + - curl + - ntp + - wireguard + +# Install K3s +runcmd: + # run k3s install script + - bash /root/server_install_k3s.sh diff --git a/user_data/server/server_init.yaml.tftpl b/user_data/server/server_init.yaml.tftpl new file mode 100644 index 0000000..2a462a6 --- /dev/null +++ b/user_data/server/server_init.yaml.tftpl @@ -0,0 +1,59 @@ +#cloud-config +package_update: true +package_upgrade: true +package_reboot_if_required: false + +# Disable password authentication for root user +ssh_pwauth: false + +# Write out base64 encoded k3s install script to /root/install_k3s.sh +write_files: +- encoding: gzip+base64 + content: ${install_script} + owner: root:root + path: /root/install_k3s.sh + permissions: '0755' + #defer: true +- content: ${ccm_tpl} + encoding: gzip+base64 + path: /root/manifests/hcloud_ccm.yaml + permissions: '0600' +- content: ${csi_tpl} + encoding: gzip+base64 + path: /root/manifests/hcloud_csi.yaml + permissions: '0600' +${k8s_dashboard} +${sys_upgrade_ctrl} +${cert_manager} + # write k3s keys +%{ for ca_keys_key, ca_keys_value in ca_keys ~} +- content: ${ca_keys_value} + encoding: gzip+base64 + path: /var/lib/rancher/k3s/server/tls/${ca_keys_key}-ca.key + permissions: '0600' +%{ endfor ~} + # write k3s certificates +%{ for ca_certs_key, ca_certs_value in ca_certs ~} +- content: ${ca_certs_value} + encoding: gzip+base64 + path: /var/lib/rancher/k3s/server/tls/${ca_certs_key}-ca.crt + permissions: '0644' +%{ endfor ~} + + +# Install required packages +packages: + - ca-certificates + - curl + - ntp + - wireguard + +# Install K3s +runcmd: + # run k3s install script + - bash /root/install_k3s.sh + # Create hcloud token secret + - kubectl -n kube-system create secret generic hcloud --from-literal=token=${hcloud_token} --from-literal=network=${k3s_net_id} + - kubectl apply -f /root/manifests/ + - echo "WAITING" && sleep 10 + - kubectl apply -f /root/post_install/ \ No newline at end of file diff --git a/user_data/server/server_init_install.sh b/user_data/server/server_init_install.sh new file mode 100644 index 0000000..6cf9c4d --- /dev/null +++ b/user_data/server/server_init_install.sh @@ -0,0 +1,20 @@ +#!/bin/bash + +# Set Private & Public IP variables +NODE_PUBLIC_IP=$(/sbin/ip -o -4 addr list eth0 | awk '{print $4}' | cut -d/ -f1) +NODE_PRIVATE_IP=$(/sbin/ip -o -4 addr list ens10 | awk '{print $4}' | cut -d/ -f1) + +# install k3s +curl -sfL https://get.k3s.io | INSTALL_K3S_CHANNEL=${k3s_channel} K3S_TOKEN=${k3s_token} K3S_AGENT_TOKEN=${k3s_agent_token} sh -s - server --cluster-init \ + --node-ip $${NODE_PRIVATE_IP} \ + --advertise-address $${NODE_PRIVATE_IP} \ + ${critical_taint} + --tls-san ${k3s_lb_ip} \ + --flannel-backend=${flannel_backend} \ + --flannel-iface=ens10 \ + --node-label="node.kubernetes.io/created-by=terraform" \ + --disable local-storage \ + --disable-cloud-controller \ + --disable traefik \ + --disable servicelb \ + --kubelet-arg 'cloud-provider=external' \ No newline at end of file diff --git a/user_data/server/server_install.sh b/user_data/server/server_install.sh new file mode 100644 index 0000000..13dab98 --- /dev/null +++ b/user_data/server/server_install.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +# Set Private & Public IP variables +NODE_PUBLIC_IP=$(/sbin/ip -o -4 addr list eth0 | awk '{print $4}' | cut -d/ -f1) +NODE_PRIVATE_IP=$(/sbin/ip -o -4 addr list ens10 | awk '{print $4}' | cut -d/ -f1) + +sleep ${sleep_period} + +# install k3s +curl -sfL https://get.k3s.io | INSTALL_K3S_CHANNEL=${k3s_channel} K3S_TOKEN=${k3s_token} K3S_AGENT_TOKEN=${k3s_agent_token} K3S_URL=https://${server_init_private_ip}:6443 sh -s - server \ + --node-ip $${NODE_PRIVATE_IP} \ + --advertise-address $${NODE_PRIVATE_IP} \ + ${critical_taint} + --flannel-backend=${flannel_backend} \ + --flannel-iface=ens10 \ + --node-label="node.kubernetes.io/created-by=terraform" \ + --disable local-storage \ + --disable-cloud-controller \ + --disable traefik \ + --disable servicelb \ + --kubelet-arg 'cloud-provider=external' \ No newline at end of file diff --git a/variables.tf b/variables.tf new file mode 100644 index 0000000..ae56c11 --- /dev/null +++ b/variables.tf @@ -0,0 +1,97 @@ +variable "hcloud_token" { + description = "Hetzner Cloud API Token" + type = string + sensitive = true +} + +variable "ssh_public_key" { + type = string + description = "SSH Public Key" +} + +variable "ssh_public_key_name" { + type = string + description = "SSH Public Key Name" + default = "default" +} + +variable "ssh_private_key" { + type = string + description = "SSH private Key" + sensitive = true +} + +variable "location" { + type = string + description = "Location in which to provision the cluster. Default is nbg1 (Nuremberg, Germany)" + default = "nbg1" + validation { + condition = length(regexall("^nbg1|fsn1|hel1|ash$", var.location)) > 0 + error_message = "Invalid location. Valid locations include nbg1 (default), fsn1, hel2, ash." + } +} + +variable "k3s_network_range" { + type = string + description = "Range of IP addresses for the network in CIDR notation. Must be one of the private ipv4 ranges of RFC1918" + default = "10.0.0.0/8" +} + +variable "k3s_channel" { + type = string + description = "K3s release channel. 'stable', 'latest', 'testing' or a specific channel or version e.g. 'v1.20', 'v1.21.0+k3s1'" + default = "stable" +} + +variable "flannel_backend" { + type = string + description = "Flannel Backend Type. Valid options include vxlan (default), ipsec or wireguard" + default = "vxlan" + validation { + condition = length(regexall("^ipsec|vxlan|wireguard|wireguard-native$", var.flannel_backend)) > 0 + error_message = "Invalid Flannel backend value. Valid backend types are vxlan, ipsec & wireguard." + } +} + +variable "server_count" { + type = number + description = "Number of server (master) nodes to provision" + default = 2 +} + +variable "agent_nodepools" { + description = "Number of agent nodes to provision" + type = list(any) + default = [] +} + + +variable "server_taint_criticalonly" { + type = bool + description = "Allow only critical addons to be scheduled on servers? (thus preventing workloads from being launched on them)" + default = true +} + +variable "k8s_dashboard" { + type = bool + description = "Pre-install the Kubernetes Dashboard? (Default is false)" + default = false +} + +variable "k8s_dashboard_version" { + type = string + description = "Kubernetes Dashboard version" + default = "2.4.0" # https://github.com/kubernetes/dashboard/releases +} + +variable "sys_upgrade_ctrl" { + type = bool + description = "Pre-install the System Upgrade Controller?" + default = false +} + +variable "install_cert_manager" { + type = bool + description = "Pre-install cert-manager?" + default = false +} \ No newline at end of file diff --git a/versions.tf b/versions.tf new file mode 100644 index 0000000..a3e31a4 --- /dev/null +++ b/versions.tf @@ -0,0 +1,17 @@ +terraform { + required_version = ">= 1.2.0" + required_providers { + hcloud = { + source = "hetznercloud/hcloud" + version = ">= 1.3.0" + } + tls = { + source = "hashicorp/tls" + version = "3.4.0" + } + local = { + source = "hashicorp/local" + version = "2.2.3" + } + } +} \ No newline at end of file