diff --git a/.github/workflows/terraform.yml b/.github/workflows/terraform.yml index 55ca2b65..e48f20c5 100644 --- a/.github/workflows/terraform.yml +++ b/.github/workflows/terraform.yml @@ -4,7 +4,7 @@ on: pull_request: permissions: - contents: read + contents: write concurrency: group: project-e00pjzzrtk1fs3yavy @@ -22,10 +22,11 @@ jobs: max-parallel: 2 matrix: solution: + - name: compute-testing + - name: slurm - name: k8s-inference - name: k8s-training - - name: slurm - - name: compute-testing + - name: wireguard defaults: run: @@ -61,6 +62,9 @@ jobs: > ~/.s3cfg mkdir -p tests/reports + - name: Install XMLStarlet + run: sudo apt install -y xmlstarlet + - name: Install Nebius CLI run: | curl -sSL https://storage.ai.nebius.cloud/nebius/install.sh | bash @@ -107,18 +111,46 @@ jobs: # Run Terraform Tests - name: Terraform Test - run: terraform test -verbose -junit-xml=tests/reports/TEST-result-${{ github.run_id }}.xml + run: terraform test -junit-xml=tests/reports/TEST-result-${{ github.run_id }}.xml - - name: Test Summary - uses: test-summary/action@v2 - with: - paths: ${{ matrix.solution.name }}/tests/reports/TEST-result-${{ github.run_id }}.xml - if: always() + - name: Set date in report + run: | + xmlstarlet ed \ + --inplace \ + -i '/testsuites' -t attr -n timestamp -v $(date --iso-8601=seconds) \ + -i '/testsuites/testsuite[*]' -t attr -n timestamp -v $(date --iso-8601=seconds) \ + -i '/testsuites/testsuite[*]/testcase' -t attr -n timestamp -v $(date --iso-8601=seconds) \ + tests/reports/TEST-result-${{ github.run_id }}.xml - name: Upload test results run: s3cmd sync tests/reports s3://terraform-test-reports/${{ matrix.solution.name }}/ if: always() + - name: Load test report history + uses: actions/checkout@v4 + if: always() + continue-on-error: true + with: + ref: gh-pages + path: gh-pages + + - name: Build test report + uses: simple-elf/allure-report-action@v1.9 + if: always() + with: + gh_pages: gh-pages + subfolder: ${{ matrix.solution.name }} + allure_results: ${{ matrix.solution.name }}/tests/reports + + - name: Publish test report + uses: peaceiris/actions-gh-pages@v4 + if: always() + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + publish_branch: gh-pages + publish_dir: allure-history + keep_files: true + cleanup-infra: name: 'Cleanup Infra' environment: diff --git a/compute-testing/tests/main.tftest.hcl b/compute-testing/tests/main.tftest.hcl index 35a1861e..4a0b1e8e 100644 --- a/compute-testing/tests/main.tftest.hcl +++ b/compute-testing/tests/main.tftest.hcl @@ -1,3 +1,3 @@ -run "create_cluster" { +run "compute_testing_plan" { command = plan } diff --git a/k8s-inference/tests/main.tftest.hcl b/k8s-inference/tests/main.tftest.hcl index 523066e7..18844ab4 100644 --- a/k8s-inference/tests/main.tftest.hcl +++ b/k8s-inference/tests/main.tftest.hcl @@ -1,4 +1,27 @@ -run "create_cluster" { +run "k8s_inference_apply" { + command = apply + plan_options { + target = [ + nebius_mk8s_v1_cluster.k8s-cluster + ] + } +} + +run "k8s_node_groups_inference_apply" { + command = apply + plan_options { + target = [ + nebius_mk8s_v1_node_group.cpu-only, + nebius_mk8s_v1_node_group.gpu + ] + } +} + +run "full_inference_apply" { + command = apply +} + +run "test_mode_k8s_inference_apply" { command = apply variables { diff --git a/k8s-training/tests/main.tftest.hcl b/k8s-training/tests/main.tftest.hcl index 6238e7bb..1f204bd3 100644 --- a/k8s-training/tests/main.tftest.hcl +++ b/k8s-training/tests/main.tftest.hcl @@ -1,4 +1,31 @@ -run "create_cluster" { +run "k8s_training_apply" { + command = apply + plan_options { + target = [ + nebius_mk8s_v1_cluster.k8s-cluster + ] + } +} + +run "k8s_node_groups_training_apply" { + command = apply + plan_options { + target = [ + nebius_mk8s_v1_node_group.cpu-only, + nebius_mk8s_v1_node_group.gpu + ] + } +} + +run "full_training_apply" { + command = apply + + variables { + enable_loki = false # TODO: Disabling Loki since not possible to delete non-empty storage bucket + } +} + +run "test_mode_k8s_training_apply" { command = apply variables { diff --git a/slurm/tests/main.tftest.hcl b/slurm/tests/main.tftest.hcl index 66d45c39..6847e79c 100644 --- a/slurm/tests/main.tftest.hcl +++ b/slurm/tests/main.tftest.hcl @@ -1,4 +1,26 @@ -run "create_cluster" { +run "slurm_master_apply" { + command = apply + + variables { + cluster_workers_count = 2 + } + + plan_options { + target = [ + nebius_compute_v1_instance.master + ] + } +} + +run "slurm_full_apply" { + command = apply + + variables { + cluster_workers_count = 2 + } +} + +run "test_mode_slurm_apply" { command = apply variables { diff --git a/tmp-tests/pv-pvc.yaml b/tmp-tests/pv-pvc.yaml deleted file mode 100644 index b9ab7a02..00000000 --- a/tmp-tests/pv-pvc.yaml +++ /dev/null @@ -1,26 +0,0 @@ -kind: PersistentVolume -apiVersion: v1 -metadata: - name: store-persistent-volume -spec: - storageClassName: hostpath - capacity: - storage: 2Gi - accessModes: - - ReadWriteMany - hostPath: - path: "/mnt/filestore/pvc" - ---- - -kind: PersistentVolumeClaim -apiVersion: v1 -metadata: - name: store-persistent-volume-claim -spec: - storageClassName: hostpath - accessModes: - - ReadWriteMany - resources: - requests: - storage: 2Gi \ No newline at end of file diff --git a/tmp-tests/pvc-users.yaml b/tmp-tests/pvc-users.yaml deleted file mode 100644 index 8b24ef72..00000000 --- a/tmp-tests/pvc-users.yaml +++ /dev/null @@ -1,50 +0,0 @@ -apiVersion: v1 -kind: Pod -metadata: - name: ubuntu-pod-1 -spec: - containers: - - name: ubuntu - image: ubuntu:22.04 - command: [ "sh", "-c", "sleep infinity" ] - volumeMounts: - - mountPath: /data - name: store-volume - volumes: - - name: store-volume - persistentVolumeClaim: - claimName: store-persistent-volume-claim ---- -apiVersion: v1 -kind: Pod -metadata: - name: ubuntu-pod-2 -spec: - containers: - - name: ubuntu - image: ubuntu:22.04 - command: [ "sh", "-c", "sleep infinity" ] - volumeMounts: - - mountPath: /data - name: store-volume - volumes: - - name: store-volume - persistentVolumeClaim: - claimName: store-persistent-volume-claim ---- -apiVersion: v1 -kind: Pod -metadata: - name: ubuntu-pod-3 -spec: - containers: - - name: ubuntu - image: ubuntu:22.04 - command: [ "sh", "-c", "sleep infinity" ] - volumeMounts: - - mountPath: /data - name: store-volume - volumes: - - name: store-volume - persistentVolumeClaim: - claimName: store-persistent-volume-claim diff --git a/wireguard/main.tf b/wireguard/main.tf index 5a3750b1..4bc7ba4d 100644 --- a/wireguard/main.tf +++ b/wireguard/main.tf @@ -1,6 +1,6 @@ -resource "nebius_compute_v1_instance" "wireguard-instanse" { +resource "nebius_compute_v1_instance" "wireguard_instance" { parent_id = var.parent_id - name = "wireguard-instanse" + name = "wireguard-instance" boot_disk = { attach_mode = "READ_WRITE" @@ -9,16 +9,18 @@ resource "nebius_compute_v1_instance" "wireguard-instanse" { network_interfaces = [ { - name = "eth0" - subnet_id = var.subnet_id - ip_address = {} - public_ip_address = var.public_ip_allocation_id != null ? { allocation_id = var.public_ip_allocation_id } : {} + name = "eth0" + subnet_id = var.subnet_id + ip_address = {} + public_ip_address = { + allocation_id = var.public_ip_allocation_id + } } ] resources = { platform = "cpu-e2" - preset = "16vcpu-64gb" + preset = "4vcpu-16gb" } diff --git a/wireguard/output.tf b/wireguard/output.tf new file mode 100644 index 00000000..95e4555f --- /dev/null +++ b/wireguard/output.tf @@ -0,0 +1,3 @@ +output "wg_instance_pib" { + value = trimsuffix(nebius_compute_v1_instance.wireguard_instance.status.network_interfaces[0].public_ip_address.address, "/32") +} diff --git a/wireguard/terraform.tfvars b/wireguard/terraform.tfvars index efdfc882..38986d48 100644 --- a/wireguard/terraform.tfvars +++ b/wireguard/terraform.tfvars @@ -1,5 +1,8 @@ -#parent_id = "" -#subnet_id = "" -#ssh_user_name = "" -#ssh_public_key = "" -#public_ip_allocation_id = "" +# parent_id = "" +# subnet_id = "" +# ssh_user_name = "ubuntu" +# ssh_public_key = { +# key = "put your public ssh key here" +# path = "put path to ssh key here" +# } +# public_ip_allocation_id = "" diff --git a/wireguard/test-resource.tf b/wireguard/test-resource.tf new file mode 100644 index 00000000..7743604a --- /dev/null +++ b/wireguard/test-resource.tf @@ -0,0 +1,32 @@ +locals { + test_wg_host = trimsuffix(nebius_compute_v1_instance.wireguard_instance.status.network_interfaces[0].public_ip_address.address, "/32") +} + +resource "null_resource" "check_wireguard_instance" { + count = var.test_mode ? 1 : 0 + + connection { + user = "ubuntu" + host = local.test_wg_host + } + + provisioner "remote-exec" { + inline = [ + "set -eu", + "cloud-init status --wait", + "ip link show wg0", + "systemctl -q status wg-quick@wg0.service > /dev/null", + ] + } +} + + +resource "null_resource" "check_wireguard_web_ui" { + depends_on = [null_resource.check_wireguard_instance] + count = var.test_mode ? 1 : 0 + + provisioner "local-exec" { + interpreter = ["bash", "-c"] + command = "sleep 15 && curl ${local.test_wg_host}:5000" + } +} diff --git a/wireguard/tests/main.tftest.hcl b/wireguard/tests/main.tftest.hcl new file mode 100644 index 00000000..f8ebc7af --- /dev/null +++ b/wireguard/tests/main.tftest.hcl @@ -0,0 +1,11 @@ +run "wireguard_apply" { + command = apply +} + +run "test_mode_wireguard_apply" { + command = apply + + variables { + test_mode = true + } +} diff --git a/wireguard/variables.tf b/wireguard/variables.tf index 388d440f..8d14ea3b 100644 --- a/wireguard/variables.tf +++ b/wireguard/variables.tf @@ -17,7 +17,7 @@ variable "ssh_user_name" { } variable "ssh_public_key" { - description = "SSH Public Key to access the cluster nodes" + description = "SSH Public Key to access the cluster nodes." type = object({ key = optional(string), path = optional(string, "~/.ssh/id_rsa.pub") @@ -29,10 +29,15 @@ variable "ssh_public_key" { } } - # Access By IP variable "public_ip_allocation_id" { description = "Id of a manually created public_ip_allocation." type = string default = null } + +variable "test_mode" { + description = "Switch between real usage and testing." + type = bool + default = false +}