diff --git a/pipelines/e2e/scripts/longhorn-setup.sh b/pipelines/e2e/scripts/longhorn-setup.sh index 28dcc15ca1..091da9f15b 100755 --- a/pipelines/e2e/scripts/longhorn-setup.sh +++ b/pipelines/e2e/scripts/longhorn-setup.sh @@ -25,6 +25,16 @@ create_instance_mapping_configmap(){ kubectl create configmap instance-mapping --from-file=/tmp/instance_mapping } +longhornctl_check(){ + curl -L https://github.com/longhorn/cli/releases/download/v1.7.1-rc2/longhornctl-linux-amd64 -o longhornctl + chmod +x longhornctl + ./longhornctl install preflight + ./longhornctl check preflight + if [[ -n $(./longhornctl check preflight 2>&1 | grep error) ]]; then + exit 1 + fi +} + main(){ set_kubeconfig @@ -44,8 +54,11 @@ main(){ install_backupstores install_csi_snapshotter - install_litmus - install_experiments + # msg="failed to get package manager" error="operating systems (amzn, sl-micro) are not supported" + if [[ "${TF_VAR_k8s_distro_name}" != "eks" ]] && \ + [[ "${DISTRO}" != "sle-micro" ]] && [[ "${DISTRO}" != "talos" ]]; then + longhornctl_check + fi generate_longhorn_yaml_manifest install_longhorn_by_manifest diff --git a/pipelines/utilities/create_longhorn_namespace.sh b/pipelines/utilities/create_longhorn_namespace.sh index 76d5c8b0b7..7249e3877b 100755 --- a/pipelines/utilities/create_longhorn_namespace.sh +++ b/pipelines/utilities/create_longhorn_namespace.sh @@ -1,6 +1,6 @@ create_longhorn_namespace(){ kubectl create ns "${LONGHORN_NAMESPACE}" - if [[ "${TF_VAR_cis_hardening}" == true ]]; then + if [[ "${TF_VAR_cis_hardening}" == true ]] || [[ "${DISTRO}" == "talos" ]]; then kubectl label ns default "${LONGHORN_NAMESPACE}" pod-security.kubernetes.io/enforce=privileged kubectl label ns default "${LONGHORN_NAMESPACE}" pod-security.kubernetes.io/enforce-version=latest kubectl label ns default "${LONGHORN_NAMESPACE}" pod-security.kubernetes.io/audit=privileged diff --git a/pipelines/utilities/install_litmus.sh b/pipelines/utilities/install_litmus.sh deleted file mode 100755 index b250a19ee6..0000000000 --- a/pipelines/utilities/install_litmus.sh +++ /dev/null @@ -1,15 +0,0 @@ -install_litmus(){ - kubectl apply -f https://litmuschaos.github.io/litmus/litmus-operator-v1.13.8.yaml -} - -install_experiments(){ - kubectl apply -f https://raw.githubusercontent.com/litmuschaos/chaos-charts/v1.13.x/charts/kube-aws/ec2-terminate-by-id/experiment.yaml - kubectl apply -f https://raw.githubusercontent.com/litmuschaos/chaos-charts/v1.13.x/charts/kube-aws/ec2-terminate-by-id/rbac.yaml -} - -create_cloud_secret(){ - sed -i "s%aws_access_key_id =%aws_access_key_id = ${TF_VAR_lh_aws_access_key}%g" "pipelines/templates/cloud_secret.yml" - sed -i "s%aws_secret_access_key =%aws_secret_access_key = ${TF_VAR_lh_aws_secret_key}%g" "pipelines/templates/cloud_secret.yml" - - kubectl apply -f "pipelines/templates/cloud_secret.yml" -} \ No newline at end of file diff --git a/pipelines/utilities/kubeconfig.sh b/pipelines/utilities/kubeconfig.sh index 2035ba75a4..b66ee418f1 100755 --- a/pipelines/utilities/kubeconfig.sh +++ b/pipelines/utilities/kubeconfig.sh @@ -1,7 +1,13 @@ set_kubeconfig(){ # rke2, rke and k3s all support amd64 # but only k3s supports arm64 - if [[ "${LONGHORN_TEST_CLOUDPROVIDER}" == "harvester" ]]; then + if [[ "${DISTRO}" == "talos" ]]; then + export KUBECONFIG="${PWD}/test_framework/terraform/${LONGHORN_TEST_CLOUDPROVIDER}/${DISTRO}/kubeconfig" + until [ $(kubectl get node -o jsonpath='{.items[*].status.conditions}' | jq '.[] | select(.type == "Ready").status' | grep -ci true) -eq 4 ]; do + echo "waiting for talos cluster nodes to be running"; + sleep 30; + done + elif [[ "${LONGHORN_TEST_CLOUDPROVIDER}" == "harvester" ]]; then export KUBECONFIG="${PWD}/test_framework/kube_config.yaml" elif [[ "${TF_VAR_arch}" == "amd64" ]]; then if [[ "${TF_VAR_k8s_distro_name}" == "rke" ]]; then diff --git a/test_framework/scripts/kubeconfig.sh b/test_framework/scripts/kubeconfig.sh index cc519a4ca4..a528f22aaa 100755 --- a/test_framework/scripts/kubeconfig.sh +++ b/test_framework/scripts/kubeconfig.sh @@ -1,5 +1,13 @@ set_kubeconfig(){ - if [[ "${TF_VAR_k8s_distro_name}" == "rke" ]]; then + if [[ "${DISTRO}" == "talos" ]]; then + export KUBECONFIG="test_framework/terraform/${LONGHORN_TEST_CLOUDPROVIDER}/${DISTRO}/kubeconfig" + until [ $(kubectl get node -o jsonpath='{.items[*].status.conditions}' | jq '.[] | select(.type == "Ready").status' | grep -ci true) -eq 4 ]; do + echo "waiting for talos cluster nodes to be running"; + sleep 30; + done + elif [[ "${LONGHORN_TEST_CLOUDPROVIDER}" == "harvester" ]]; then + export KUBECONFIG="test_framework/kube_config.yaml" + elif [[ "${TF_VAR_k8s_distro_name}" == "rke" ]]; then export KUBECONFIG="test_framework/kube_config_rke.yml" elif [[ "${TF_VAR_k8s_distro_name}" == "rke2" ]]; then export KUBECONFIG="test_framework/terraform/${LONGHORN_TEST_CLOUDPROVIDER}/${DISTRO}/rke2.yaml" diff --git a/test_framework/scripts/longhorn-setup.sh b/test_framework/scripts/longhorn-setup.sh index efc5b29a82..d9c5761ff2 100755 --- a/test_framework/scripts/longhorn-setup.sh +++ b/test_framework/scripts/longhorn-setup.sh @@ -318,7 +318,7 @@ install_longhorn_stable(){ create_longhorn_namespace(){ kubectl create ns ${LONGHORN_NAMESPACE} - if [[ "${TF_VAR_cis_hardening}" == true ]]; then + if [[ "${TF_VAR_cis_hardening}" == true ]] || [[ "${DISTRO}" == "talos" ]]; then kubectl label ns default ${LONGHORN_NAMESPACE} pod-security.kubernetes.io/enforce=privileged kubectl label ns default ${LONGHORN_NAMESPACE} pod-security.kubernetes.io/enforce-version=latest kubectl label ns default ${LONGHORN_NAMESPACE} pod-security.kubernetes.io/audit=privileged @@ -352,7 +352,7 @@ create_aws_secret(){ longhornctl_check(){ - curl -L https://github.com/longhorn/cli/releases/download/v1.7.0-rc2/longhornctl-linux-amd64 -o longhornctl + curl -L https://github.com/longhorn/cli/releases/download/v1.7.1-rc2/longhornctl-linux-amd64 -o longhornctl chmod +x longhornctl ./longhornctl install preflight ./longhornctl check preflight @@ -523,7 +523,7 @@ main(){ # msg="failed to get package manager" error="operating systems (amzn, sl-micro) are not supported" if [[ "${TF_VAR_k8s_distro_name}" != "eks" ]] && \ - [[ "${DISTRO}" != "sle-micro" ]]; then + [[ "${DISTRO}" != "sle-micro" ]] && [[ "${DISTRO}" != "talos" ]]; then longhornctl_check fi diff --git a/test_framework/terraform/aws/talos/aws.yaml b/test_framework/terraform/aws/talos/aws.yaml new file mode 100644 index 0000000000..9a5804296f --- /dev/null +++ b/test_framework/terraform/aws/talos/aws.yaml @@ -0,0 +1,9 @@ +# create custom ami following +# https://www.talos.dev/v1.6/talos-guides/install/cloud-platforms/aws/#create-your-own-amis +# and +# https://www.talos.dev/v1.7/talos-guides/install/boot-assets/#example-aws-with-image-factory +customization: + systemExtensions: + officialExtensions: + - siderolabs/iscsi-tools + - siderolabs/util-linux-tools diff --git a/test_framework/terraform/aws/talos/data.tf b/test_framework/terraform/aws/talos/data.tf new file mode 100644 index 0000000000..dea03aa12a --- /dev/null +++ b/test_framework/terraform/aws/talos/data.tf @@ -0,0 +1,8 @@ +data "aws_ami" "talos" { + most_recent = true + filter { + name = "name" + values = ["talos-v${var.os_distro_version}-${var.arch}"] + } + owners = [var.aws_ami_talos_account_number] +} diff --git a/test_framework/terraform/aws/talos/main.tf b/test_framework/terraform/aws/talos/main.tf new file mode 100644 index 0000000000..3fd6611482 --- /dev/null +++ b/test_framework/terraform/aws/talos/main.tf @@ -0,0 +1,250 @@ +terraform { + required_providers { + aws = { + source = "hashicorp/aws" + version = ">= 4.0" + } + talos = { + source = "siderolabs/talos" + version = ">= 0.4.0" + } + } +} + +provider "aws" { + region = var.aws_region + access_key = var.lh_aws_access_key + secret_key = var.lh_aws_secret_key +} + +# Create a random string suffix for instance names +resource "random_string" "random_suffix" { + length = 8 + special = false + lower = true + upper = false +} + +# Create a VPC +resource "aws_vpc" "lh_aws_vpc" { + cidr_block = "10.0.0.0/16" + + tags = { + Name = "${var.lh_aws_vpc_name}-${random_string.random_suffix.id}" + Owner = var.resources_owner + } +} + +# Create security group +resource "aws_security_group" "lh_aws_secgrp" { + name = "lh_aws_secgrp" + description = "Allow all inbound traffic" + vpc_id = aws_vpc.lh_aws_vpc.id + + egress { + from_port = 0 + to_port = 0 + protocol = "-1" + description = "Egress everywhere" + cidr_blocks = ["0.0.0.0/0"] + } + + ingress { + from_port = 0 + to_port = 0 + protocol = "-1" + description = "Ingress everywhere" + cidr_blocks = ["0.0.0.0/0"] + } + + tags = { + Name = "lh_aws_sec_grp-${random_string.random_suffix.id}" + Owner = var.resources_owner + } +} + +# Create subnet +resource "aws_subnet" "lh_aws_subnet" { + vpc_id = aws_vpc.lh_aws_vpc.id + availability_zone = var.aws_availability_zone + cidr_block = "10.0.1.0/24" + + tags = { + Name = "lh_subnet-${random_string.random_suffix.id}" + Owner = var.resources_owner + } +} + +# Create internet gateway +resource "aws_internet_gateway" "lh_aws_igw" { + vpc_id = aws_vpc.lh_aws_vpc.id + + tags = { + Name = "lh_igw-${random_string.random_suffix.id}" + Owner = var.resources_owner + } +} + +# Create route table for subnet +resource "aws_route_table" "lh_aws_rt" { + depends_on = [ + aws_internet_gateway.lh_aws_igw, + ] + + vpc_id = aws_vpc.lh_aws_vpc.id + + route { + cidr_block = "0.0.0.0/0" + gateway_id = aws_internet_gateway.lh_aws_igw.id + } + + tags = { + Name = "lh_aws_rt-${random_string.random_suffix.id}" + Owner = var.resources_owner + } +} + +# Associate subnet to route table +resource "aws_route_table_association" "lh_aws_subnet_rt_association" { + depends_on = [ + aws_subnet.lh_aws_subnet, + aws_route_table.lh_aws_rt + ] + + subnet_id = aws_subnet.lh_aws_subnet.id + route_table_id = aws_route_table.lh_aws_rt.id +} + +resource "aws_instance" "lh_aws_instance_controlplane" { + count = var.lh_aws_instance_count_controlplane + + ami = data.aws_ami.talos.id + instance_type = var.lh_aws_instance_type_controlplane + + subnet_id = aws_subnet.lh_aws_subnet.id + associate_public_ip_address = true + vpc_security_group_ids = [aws_security_group.lh_aws_secgrp.id] + + root_block_device { + delete_on_termination = true + volume_size = var.lh_aws_instance_root_block_device_size_controlplane + } + + tags = { + Name = "${var.lh_aws_instance_name_controlplane}-${count.index}-${random_string.random_suffix.id}" + DoNotDelete = "true" + Owner = var.resources_owner + } +} + +resource "aws_instance" "lh_aws_instance_worker" { + count = var.lh_aws_instance_count_worker + + ami = data.aws_ami.talos.id + instance_type = var.lh_aws_instance_type_worker + + subnet_id = aws_subnet.lh_aws_subnet.id + associate_public_ip_address = true + vpc_security_group_ids = [aws_security_group.lh_aws_secgrp.id] + + root_block_device { + delete_on_termination = true + volume_size = var.lh_aws_instance_root_block_device_size_controlplane + } + + tags = { + Name = "${var.lh_aws_instance_name_worker}-${count.index}-${random_string.random_suffix.id}" + DoNotDelete = "true" + Owner = var.resources_owner + } +} + +resource "talos_machine_secrets" "machine_secrets" {} + +data "talos_machine_configuration" "controlplane" { + + depends_on = [ aws_instance.lh_aws_instance_controlplane ] + + count = var.lh_aws_instance_count_controlplane + + cluster_name = "lh-tests-cluster" + cluster_endpoint = "https://${aws_instance.lh_aws_instance_controlplane[0].public_ip}:6443" + machine_type = "controlplane" + machine_secrets = talos_machine_secrets.machine_secrets.machine_secrets + kubernetes_version = var.k8s_distro_version + talos_version = "v${var.os_distro_version}" + docs = false + examples = false + config_patches = [ + file("${path.module}/talos-patch.yaml") + ] +} + +data "talos_machine_configuration" "worker" { + + depends_on = [ aws_instance.lh_aws_instance_controlplane ] + + count = var.lh_aws_instance_count_worker + + cluster_name = "lh-tests-cluster" + cluster_endpoint = "https://${aws_instance.lh_aws_instance_controlplane[0].public_ip}:6443" + machine_type = "worker" + machine_secrets = talos_machine_secrets.machine_secrets.machine_secrets + kubernetes_version = var.k8s_distro_version + talos_version = "v${var.os_distro_version}" + docs = false + examples = false + config_patches = [ + file("${path.module}/talos-patch.yaml") + ] +} + +resource "talos_machine_configuration_apply" "controlplane" { + count = var.lh_aws_instance_count_controlplane + + client_configuration = talos_machine_secrets.machine_secrets.client_configuration + machine_configuration_input = data.talos_machine_configuration.controlplane[count.index].machine_configuration + endpoint = aws_instance.lh_aws_instance_controlplane[count.index].public_ip + node = aws_instance.lh_aws_instance_controlplane[count.index].private_ip +} + +resource "talos_machine_configuration_apply" "worker" { + count = var.lh_aws_instance_count_worker + + client_configuration = talos_machine_secrets.machine_secrets.client_configuration + machine_configuration_input = data.talos_machine_configuration.worker[count.index].machine_configuration + endpoint = aws_instance.lh_aws_instance_worker[count.index].public_ip + node = aws_instance.lh_aws_instance_worker[count.index].private_ip +} + +resource "talos_machine_bootstrap" "this" { + depends_on = [talos_machine_configuration_apply.controlplane] + + client_configuration = talos_machine_secrets.machine_secrets.client_configuration + endpoint = aws_instance.lh_aws_instance_controlplane[0].public_ip + node = aws_instance.lh_aws_instance_controlplane[0].private_ip +} + +data "talos_client_configuration" "this" { + cluster_name = "lh-tests-cluster" + client_configuration = talos_machine_secrets.machine_secrets.client_configuration + endpoints = aws_instance.lh_aws_instance_controlplane[*].public_ip +} + +resource "local_file" "talosconfig" { + content = nonsensitive(data.talos_client_configuration.this.talos_config) + filename = "talos_k8s_config" +} + +data "talos_cluster_kubeconfig" "this" { + depends_on = [talos_machine_bootstrap.this] + + client_configuration = talos_machine_secrets.machine_secrets.client_configuration + endpoint = aws_instance.lh_aws_instance_controlplane[0].public_ip + node = aws_instance.lh_aws_instance_controlplane.0.private_ip +} + +resource "local_file" "kubeconfig" { + content = nonsensitive(data.talos_cluster_kubeconfig.this.kubeconfig_raw) + filename = "kubeconfig" +} diff --git a/test_framework/terraform/aws/talos/output.tf b/test_framework/terraform/aws/talos/output.tf new file mode 100644 index 0000000000..e45ef4a816 --- /dev/null +++ b/test_framework/terraform/aws/talos/output.tf @@ -0,0 +1,39 @@ +output "instance_mapping" { + depends_on = [ + aws_instance.lh_aws_instance_controlplane, + aws_instance.lh_aws_instance_worker, + ] + + value = jsonencode( + concat( + [ + for controlplane_instance in aws_instance.lh_aws_instance_controlplane : { + "name": controlplane_instance.private_dns, + "id": controlplane_instance.id + } + + ], + [ + for worker_instance in aws_instance.lh_aws_instance_worker : { + "name": worker_instance.private_dns, + "id": worker_instance.id + } + ] + ) + ) +} + +output "controlplane_public_ip" { + depends_on = [ + aws_instance.lh_aws_instance_controlplane + ] + value = aws_instance.lh_aws_instance_controlplane[0].public_ip +} + +output "resource_suffix" { + depends_on = [ + random_string.random_suffix + ] + + value = random_string.random_suffix.id +} diff --git a/test_framework/terraform/aws/talos/talos-patch-worker.yaml b/test_framework/terraform/aws/talos/talos-patch-worker.yaml new file mode 100644 index 0000000000..9c26d7f6a6 --- /dev/null +++ b/test_framework/terraform/aws/talos/talos-patch-worker.yaml @@ -0,0 +1,10 @@ +machine: + kubelet: + extraMounts: + - destination: /var/lib/longhorn + type: bind + source: /var/lib/longhorn + options: + - bind + - rshared + - rw diff --git a/test_framework/terraform/aws/talos/talos-patch.yaml b/test_framework/terraform/aws/talos/talos-patch.yaml new file mode 100644 index 0000000000..8d21db65c4 --- /dev/null +++ b/test_framework/terraform/aws/talos/talos-patch.yaml @@ -0,0 +1,13 @@ +machine: + type: init + kubelet: + extraMounts: + - destination: /var/lib/longhorn + type: bind + source: /var/lib/longhorn + options: + - bind + - rshared + - rw +cluster: + allowSchedulingOnControlPlanes: false diff --git a/test_framework/terraform/aws/talos/variables.tf b/test_framework/terraform/aws/talos/variables.tf new file mode 100644 index 0000000000..da1c35eb04 --- /dev/null +++ b/test_framework/terraform/aws/talos/variables.tf @@ -0,0 +1,102 @@ +variable "lh_aws_access_key" { + type = string + description = "AWS ACCESS_KEY" +} + +variable "lh_aws_secret_key" { + type = string + description = "AWS SECRET_KEY" +} + +variable "aws_region" { + type = string + default = "us-east-1" +} + +variable "aws_availability_zone" { + type = string + default = "us-east-1a" +} + +variable "lh_aws_vpc_name" { + type = string + default = "vpc-lh-tests" +} + +variable "arch" { + type = string + description = "available values (amd64, arm64)" + default = "amd64" +} + +variable "os_distro_version" { + type = string + default = "1.7.6" +} + +variable "aws_ami_talos_account_number" { + type = string + default = "641769369267" +} + +variable "lh_aws_instance_count_controlplane" { + type = number + default = 1 +} + +variable "lh_aws_instance_count_worker" { + type = number + default = 3 +} + +variable "lh_aws_instance_name_controlplane" { + type = string + default = "lh-tests-controlplane" +} + +variable "lh_aws_instance_name_worker" { + type = string + default = "lh-tests-worker" +} + +variable "lh_aws_instance_type_controlplane" { + type = string + description = "Recommended instance types t2.xlarge for amd64 & a1.xlarge for arm64" + default = "t2.xlarge" +} + +variable "lh_aws_instance_type_worker" { + type = string + description = "Recommended instance types t2.xlarge for amd64 & a1.xlarge for arm64" + default = "t2.xlarge" +} + +variable "lh_aws_instance_root_block_device_size_controlplane" { + type = number + default = 40 +} + +variable "lh_aws_instance_root_block_device_size_worker" { + type = number + default = 40 +} + +variable "k8s_distro_version" { + type = string + default = "v1.30.0" +} + +variable "use_hdd" { + type = bool + default = false +} + +variable "create_load_balancer" { + type = bool + default = false +} + +variable "resources_owner" { + type = string + default = "longhorn-infra" +}