From 6d767dc483bca2d7c9ce3b7762a2e6916bc99b3a Mon Sep 17 00:00:00 2001 From: Chin-Ya Huang Date: Mon, 16 Dec 2024 12:54:04 +0800 Subject: [PATCH 1/5] chore(robot): cleanup - Fix test case names to follow a consistent format. - Remove redundant spaces. Signed-off-by: Chin-Ya Huang --- e2e/tests/negative/component_resilience.robot | 24 +++++++++---------- e2e/tests/negative/node_drain.robot | 8 +++---- .../pull_backup_from_another_longhorn.robot | 6 ++--- e2e/tests/negative/replica_rebuilding.robot | 4 ++-- e2e/tests/negative/test_backup_listing.robot | 4 ++-- .../test_persistentvolumeclaim.robot | 2 +- e2e/tests/regression/test_volume.robot | 4 ++-- 7 files changed, 26 insertions(+), 26 deletions(-) diff --git a/e2e/tests/negative/component_resilience.robot b/e2e/tests/negative/component_resilience.robot index 3d959ed4d..ed07b9853 100644 --- a/e2e/tests/negative/component_resilience.robot +++ b/e2e/tests/negative/component_resilience.robot @@ -29,12 +29,12 @@ Delete instance-manager of volume ${volume_id} and wait for recover Delete instance-manager of deployment ${deployment_id} volume and wait for recover When Delete instance-manager of deployment ${deployment_id} volume And Wait for volume of deployment ${deployment_id} attached and degraded - And Wait for volume of deployment ${deployment_id} healthy + And Wait for volume of deployment ${deployment_id} healthy And Wait for deployment ${deployment_id} pods stable And Check deployment ${deployment_id} data in file data.txt is intact *** Test Cases *** -Test Longhorn components recovery +Test Longhorn Components Recovery [Documentation] -- Manual test plan -- ... Test data setup: ... Deploy Longhorn on a 3 nodes cluster. @@ -64,19 +64,19 @@ Test Longhorn components recovery And Attach volume 1 And Wait for volume 1 healthy And Write data to volume 1 - + When Create storageclass longhorn-test-1 with dataEngine=${DATA_ENGINE} And Create persistentvolumeclaim 1 using RWX volume with longhorn-test-1 storageclass And Create deployment 1 with persistentvolumeclaim 1 And Write 100 MB data to file data.txt in deployment 1 END - When Delete Longhorn DaemonSet longhorn-csi-plugin pod on node 1 + When Delete Longhorn DaemonSet longhorn-csi-plugin pod on node 1 And Delete Longhorn Deployment csi-attacher pod on node 1 And Delete Longhorn Deployment csi-provisioner pod on node 1 And Delete Longhorn Deployment csi-resizer pod on node 1 And Delete Longhorn Deployment csi-snapshotter pod on node 1 - And Delete Longhorn DaemonSet longhorn-manager pod on node 1 + And Delete Longhorn DaemonSet longhorn-manager pod on node 1 And Delete Longhorn DaemonSet engine-image pod on node 1 And Delete Longhorn component instance-manager pod on node 1 And Delete Longhorn Deployment longhorn-ui pod @@ -93,7 +93,7 @@ Test Longhorn components recovery And Check deployment 1 data in file data.txt is intact END -Test Longhorn volume recovery +Test Longhorn Volume Recovery [Documentation] -- Manual test plan -- ... Test data setup: ... Deploy Longhorn on a 3 nodes cluster. @@ -115,7 +115,7 @@ Test Longhorn volume recovery And Wait until volume 0 replica rebuilding started on replica node Then Delete instance-manager of volume 0 and wait for recover -Test Longhorn backing image volume recovery +Test Longhorn Backing Image Volume Recovery [Documentation] -- Manual test plan -- ... Test data setup: ... Deploy Longhorn on a 3 nodes cluster. @@ -127,7 +127,7 @@ Test Longhorn backing image volume recovery ... Test steps: ... Delete the IM of the volume and make sure volume recovers. Check the data as well. ... Start replica rebuilding for the aforementioned volume, and delete the IM-e while it is rebuilding. Verify the recovered volume. - ... Delete the backing image manager pod and verify the pod gets recreated. + ... Delete the backing image manager pod and verify the pod gets recreated. IF '${DATA_ENGINE}' == 'v1' When Create backing image bi with url=https://longhorn-backing-image.s3-us-west-1.amazonaws.com/parrot.qcow2 And Create volume 0 with backingImage=bi dataEngine=${DATA_ENGINE} @@ -135,7 +135,7 @@ Test Longhorn backing image volume recovery And Wait for volume 0 healthy And Write data to volume 0 Then Delete instance-manager of volume 0 and wait for recover - + When Delete volume 0 replica on replica node And Wait until volume 0 replica rebuilding started on replica node Then Delete instance-manager of volume 0 and wait for recover @@ -144,7 +144,7 @@ Test Longhorn backing image volume recovery Then Wait backing image managers running END -Test Longhorn dynamic provisioned RWX volume recovery +Test Longhorn Dynamic Provisioned RWX Volume Recovery [Documentation] -- Manual test plan -- ... Test data setup: ... Deploy Longhorn on a 3 nodes cluster. @@ -174,7 +174,7 @@ Test Longhorn dynamic provisioned RWX volume recovery And Check deployment 0 data in file data.txt is intact END -Test Longhorn dynamic provisioned RWO volume recovery +Test Longhorn Dynamic Provisioned RWO Volume Recovery [Documentation] -- Manual test plan -- ... Test data setup: ... Deploy Longhorn on a 3 nodes cluster. @@ -191,7 +191,7 @@ Test Longhorn dynamic provisioned RWO volume recovery And Create deployment 0 with persistentvolumeclaim 0 And Write 500 MB data to file data.txt in deployment 0 Then Delete instance-manager of deployment 0 volume and wait for recover - + When Delete replica of deployment 0 volume on replica node And Wait until volume of deployment 0 replica rebuilding started on replica node Then Delete instance-manager of deployment 0 volume and wait for recover diff --git a/e2e/tests/negative/node_drain.robot b/e2e/tests/negative/node_drain.robot index bdd1d5c45..bffcdc165 100644 --- a/e2e/tests/negative/node_drain.robot +++ b/e2e/tests/negative/node_drain.robot @@ -88,7 +88,7 @@ Force Drain Replica Node While Replica Rebuilding And Check deployment 1 data in file data.txt is intact END -Drain node with force +Drain Node With Force [Documentation] Drain node with force ... 1. Deploy a cluster contains 3 worker nodes N1, N2, N3. ... 2. Deploy Longhorn. @@ -117,7 +117,7 @@ Drain node with force And Check instance-manager pod is not running on drained node Then Check deployment 0 data in file data.txt is intact -Drain node without force +Drain Node Without Force [Documentation] Drain node without force ... 1. Cordon the node. Longhorn will automatically disable the node scheduling when a Kubernetes node is cordoned. ... 2. Evict all the replicas from the node. @@ -139,7 +139,7 @@ Drain node without force And Check instance-manager pod is not running on drained node Then Check deployment 0 data in file data.txt is intact -Test kubectl drain nodes for PVC/PV/LHV is created through Longhorn API +Test Kubectl Drain Nodes For PVC/PV/LHV Is Created Through Longhorn API [Documentation] Test kubectl drain nodes for PVC/PV/LHV is created through Longhorn API ... Given 1 PVC/PV/LHV created through Longhorn API And LHV is not yet attached/replicated. ... When kubectl drain nodes. @@ -153,7 +153,7 @@ Test kubectl drain nodes for PVC/PV/LHV is created through Longhorn API And Create persistentvolumeclaim for volume 0 And Force drain all nodes -Stopped replicas on deleted nodes should not be counted as healthy replicas when draining nodes +Stopped Replicas On Deleted Nodes Should Not Be Counted As Healthy Replicas When Draining Nodes [Documentation] Stopped replicas on deleted nodes should not be counted as healthy replicas when draining nodes ... When draining a node, the node will be set as unscheduled and all pods should be evicted. ... By Longhorn’s default settings, the replica will only be evicted if there is another healthy replica on the running node. diff --git a/e2e/tests/negative/pull_backup_from_another_longhorn.robot b/e2e/tests/negative/pull_backup_from_another_longhorn.robot index 5b2de7b8b..c067264cd 100644 --- a/e2e/tests/negative/pull_backup_from_another_longhorn.robot +++ b/e2e/tests/negative/pull_backup_from_another_longhorn.robot @@ -20,7 +20,7 @@ Test Setup Set test environment Test Teardown Cleanup test resources *** Test Cases *** -Pull backup created by another Longhorn system +Pull Backup Created By Another Longhorn System [Documentation] Pull backup created by another Longhorn system ... 1. Install test version of Longhorn. ... 2. Create volume, write data, and take backup. @@ -32,7 +32,7 @@ Pull backup created by another Longhorn system ... 8. Create volume, write data, and take backup. ... 9. Uninstall Longhorn. ... 10. Install test version of Longhorn. - ... 11. Restore the backup create in step 8 and verify the data. + ... 11. Restore the backup create in step 8 and verify the data. ... ... Important ... - This test case need have set environment variable manually first if not run on Jenkins @@ -49,7 +49,7 @@ Pull backup created by another Longhorn system And Attach volume 0 And Wait for volume 0 healthy And Write data 0 300 MB to volume 0 - When Create backup 0 for volume 0 + When Create backup 0 for volume 0 Then Verify backup list contains no error for volume 0 And Verify backup list contains backup 0 of volume 0 Then Uninstall Longhorn diff --git a/e2e/tests/negative/replica_rebuilding.robot b/e2e/tests/negative/replica_rebuilding.robot index 1a6cbf9ed..4d5d323ee 100644 --- a/e2e/tests/negative/replica_rebuilding.robot +++ b/e2e/tests/negative/replica_rebuilding.robot @@ -66,7 +66,7 @@ Reboot Replica Node While Replica Rebuilding And Check volume 0 data is intact END -Delete replicas one by one after the volume is healthy +Delete Replicas One By One After The Volume Is Healthy Given Create storageclass longhorn-test with dataEngine=${DATA_ENGINE} And Create persistentvolumeclaim 0 using RWO volume with longhorn-test storageclass And Create deployment 0 with persistentvolumeclaim 0 @@ -90,7 +90,7 @@ Delete replicas one by one after the volume is healthy Then Check deployment 0 data in file data.txt is intact END -Delete replicas one by one regardless of the volume health +Delete Replicas One By One Regardless Of The Volume Health [Documentation] Currently v2 data engine have a chance to hit ... https://github.com/longhorn/longhorn/issues/9216 and will be fixed ... in v1.9.0 diff --git a/e2e/tests/negative/test_backup_listing.robot b/e2e/tests/negative/test_backup_listing.robot index 58c2661aa..9849f71ca 100644 --- a/e2e/tests/negative/test_backup_listing.robot +++ b/e2e/tests/negative/test_backup_listing.robot @@ -127,7 +127,7 @@ Pod ${pod_id} data should same as volume ${source_volume_id} backup ${backup_id} ... msg="expected ${expected_checksum}, got ${current_checksum}!" *** Test Cases *** -Backup listing with more than 1000 backups +Backup Listing With More Than 1000 Backups [Tags] manual longhorn-8355 [Documentation] Test backup listing Given Create persistentvolumeclaim 0 using RWO volume @@ -139,7 +139,7 @@ Backup listing with more than 1000 backups Then Get deployment 1 volume data in file data And Volume 1 data should same as deployment 0 volume -Backup listing of volume bigger than 200 Gi +Backup Listing Of Volume Bigger Than 200 Gi [Tags] manual longhorn-8355 large-size [Documentation] Test backup bigger than 200 Gi Given Create persistentvolumeclaim 0 using RWO volume diff --git a/e2e/tests/regression/test_persistentvolumeclaim.robot b/e2e/tests/regression/test_persistentvolumeclaim.robot index d1e146b2b..a6755d913 100644 --- a/e2e/tests/regression/test_persistentvolumeclaim.robot +++ b/e2e/tests/regression/test_persistentvolumeclaim.robot @@ -20,7 +20,7 @@ ${RETRY_INTERVAL} 1 *** Test Cases *** -Test persistentvolumeclaim expand more than storage maximum size should fail +Test PersistentVolumeClaim Expand More Than Storage Maximum Size Should Fail [Tags] volume expansion [Documentation] Verify that a PersistentVolumeClaim cannot be expanded beyond ... the storage maximum size. diff --git a/e2e/tests/regression/test_volume.robot b/e2e/tests/regression/test_volume.robot index c06df66dd..71e3ca4e8 100644 --- a/e2e/tests/regression/test_volume.robot +++ b/e2e/tests/regression/test_volume.robot @@ -24,7 +24,7 @@ Create volume with invalid name should fail *** Test Cases *** -Test RWX volume data integrity after CSI plugin pod restart +Test RWX Volume Data Integrity After CSI Plugin Pod Restart [Tags] volume rwx storage-network [Documentation] Test RWX volume data directory is accessible after Longhorn CSI plugin pod restart. ... @@ -41,7 +41,7 @@ Test RWX volume data integrity after CSI plugin pod restart Then Check deployment 0 data in file data.txt is intact -Test detached volume should not reattach after node eviction +Test Detached Volume Should Not Reattach After Node Eviction [Tags] volume node-eviction [Documentation] Test detached volume should not reattach after node eviction. ... From 92b772a3842451a39cd5960eb10ccb80e2cf2000 Mon Sep 17 00:00:00 2001 From: Yang Chiu Date: Mon, 16 Dec 2024 13:39:37 +0800 Subject: [PATCH 2/5] test(robot): fix pull backup created by another longhorn system test case for v2 volumes by re-setting up v2 environment after reinstallation Signed-off-by: Yang Chiu --- e2e/keywords/common.resource | 13 +++++++------ .../pull_backup_from_another_longhorn.robot | 3 +++ 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/e2e/keywords/common.resource b/e2e/keywords/common.resource index e38dfa544..89fabbbeb 100644 --- a/e2e/keywords/common.resource +++ b/e2e/keywords/common.resource @@ -24,12 +24,7 @@ Library ../libs/keywords/sharemanager_keywords.py Library ../libs/keywords/k8s_keywords.py *** Keywords *** -Set test environment - init_k8s_api_client - - setup_control_plane_network_latency - set_backupstore - +Set up v2 environment update_setting v2-data-engine true ${worker_nodes}= get_worker_nodes ${host_provider}= Get Environment Variable HOST_PROVIDER @@ -38,6 +33,12 @@ Set test environment add_disk block-disk ${worker_node} block ${disk_path} END +Set test environment + init_k8s_api_client + setup_control_plane_network_latency + set_backupstore + set_up_v2_environment + Cleanup test resources FOR ${powered_off_node} IN @{powered_off_nodes} Run keyword And Ignore Error power_on_node_by_name ${powered_off_node} diff --git a/e2e/tests/negative/pull_backup_from_another_longhorn.robot b/e2e/tests/negative/pull_backup_from_another_longhorn.robot index c067264cd..8911af635 100644 --- a/e2e/tests/negative/pull_backup_from_another_longhorn.robot +++ b/e2e/tests/negative/pull_backup_from_another_longhorn.robot @@ -59,6 +59,7 @@ Pull Backup Created By Another Longhorn System Then Install Longhorn And Set setting deleting-confirmation-flag to true And Set backupstore + And Set up v2 environment And Check backup synced from backupstore And Create volume 1 from backup 0 in another cluster And Wait for volume 1 detached @@ -72,6 +73,7 @@ Pull Backup Created By Another Longhorn System Then Install Longhorn stable version And Set setting deleting-confirmation-flag to true And Set backupstore + And Set up v2 environment And Create volume 2 with dataEngine=${DATA_ENGINE} And Attach volume 2 And Wait for volume 2 healthy @@ -85,6 +87,7 @@ Pull Backup Created By Another Longhorn System # Install current version then pull backup and verify data Then Install Longhorn And Set backupstore + And Set up v2 environment And Check backup synced from backupstore And Create volume 3 from backup 1 in another cluster And Wait for volume 3 detached From 06bd90fce7ca1038aa17c3df40db1889a803e114 Mon Sep 17 00:00:00 2001 From: Yang Chiu Date: Sun, 15 Dec 2024 19:17:18 +0800 Subject: [PATCH 3/5] test(robot): add missing scale_up_coredns function Signed-off-by: Yang Chiu --- pipelines/e2e/scripts/longhorn-setup.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/pipelines/e2e/scripts/longhorn-setup.sh b/pipelines/e2e/scripts/longhorn-setup.sh index 591840330..9d070154e 100755 --- a/pipelines/e2e/scripts/longhorn-setup.sh +++ b/pipelines/e2e/scripts/longhorn-setup.sh @@ -13,6 +13,7 @@ source pipelines/utilities/create_longhorn_namespace.sh source pipelines/utilities/longhorn_manifest.sh source pipelines/utilities/longhorn_ui.sh source pipelines/utilities/run_longhorn_e2e_test.sh +source pipelines/utilities/coredns.sh # create and clean tmpdir TMPDIR="/tmp/longhorn" From eba6f0c3fd986c10baf743fc59c7b2b6cabdaf15 Mon Sep 17 00:00:00 2001 From: Yang Chiu Date: Sun, 15 Dec 2024 16:16:21 +0800 Subject: [PATCH 4/5] ci: prevent eks from auto-scaling down the node count to < 3 when cluster start Signed-off-by: Yang Chiu --- test_framework/scripts/longhorn-setup.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test_framework/scripts/longhorn-setup.sh b/test_framework/scripts/longhorn-setup.sh index f83f85cd9..fbfb87ca4 100755 --- a/test_framework/scripts/longhorn-setup.sh +++ b/test_framework/scripts/longhorn-setup.sh @@ -52,7 +52,7 @@ install_cluster_autoscaler(){ yq -i 'select(.kind == "Deployment").spec.template.spec.containers[0].env += [{"name": "AWS_ACCESS_KEY_ID", "valueFrom": {"secretKeyRef": {"name": "aws-cred-secret", "key": "AWS_ACCESS_KEY_ID"}}}]' "${TF_VAR_tf_workspace}/templates/cluster_autoscaler.yaml" yq -i 'select(.kind == "Deployment").spec.template.spec.containers[0].env += [{"name": "AWS_SECRET_ACCESS_KEY", "valueFrom": {"secretKeyRef": {"name": "aws-cred-secret", "key": "AWS_SECRET_ACCESS_KEY"}}}]' "${TF_VAR_tf_workspace}/templates/cluster_autoscaler.yaml" yq -i 'select(.kind == "Deployment").spec.template.spec.containers[0].env += [{"name": "AWS_REGION", "valueFrom": {"secretKeyRef": {"name": "aws-cred-secret", "key": "AWS_DEFAULT_REGION"}}}]' "${TF_VAR_tf_workspace}/templates/cluster_autoscaler.yaml" - yq -i 'select(.kind == "Deployment").spec.template.spec.containers[0].command += "--scale-down-unneeded-time=1m"' "${TF_VAR_tf_workspace}/templates/cluster_autoscaler.yaml" + yq -i 'select(.kind == "Deployment").spec.template.spec.containers[0].command += "--scale-down-unneeded-time=3m"' "${TF_VAR_tf_workspace}/templates/cluster_autoscaler.yaml" yq -i 'select(.kind == "Deployment").spec.template.spec.containers[0].command += "--scale-down-delay-after-add=1m"' "${TF_VAR_tf_workspace}/templates/cluster_autoscaler.yaml" kubectl apply -f "${TF_VAR_tf_workspace}/templates/cluster_autoscaler.yaml" } From c073759109fd2d47315a3dd4bb236044595c4b14 Mon Sep 17 00:00:00 2001 From: Yang Chiu Date: Mon, 16 Dec 2024 10:22:54 +0800 Subject: [PATCH 5/5] test(robot): fix migration confirmation after migration node down test case for v2 volumes since v2 volumes delete stopped replicas on down nodes, replica name checks will fail for v2 volumes. Signed-off-by: Yang Chiu --- e2e/tests/negative/live_migration.robot | 2 -- 1 file changed, 2 deletions(-) diff --git a/e2e/tests/negative/live_migration.robot b/e2e/tests/negative/live_migration.robot index 9753214f9..e1466164a 100644 --- a/e2e/tests/negative/live_migration.robot +++ b/e2e/tests/negative/live_migration.robot @@ -19,7 +19,6 @@ Migration Confirmation After Migration Node Down And Attach volume 0 to node 0 And Wait for volume 0 healthy And Write data to volume 0 - And Get volume 0 engine and replica names And Attach volume 0 to node 1 And Wait for volume 0 migration to be ready @@ -31,7 +30,6 @@ Migration Confirmation After Migration Node Down # volume stuck in attaching status and waiting for migration node to come back Then Check volume 0 kept in attaching - And Volume 0 migration should fail or rollback # power on migration node When Power on off nodes