Merge branch 'master' into asyncio-2

longhorn · Dec 16, 2024 · 5036c96 · 5036c96
2 parents 6920a7e + 8a2ea04
commit 5036c96
Show file tree

Hide file tree

Showing 13 changed files with 61 additions and 42 deletions.
diff --git a/e2e/keywords/common.resource b/e2e/keywords/common.resource
@@ -24,12 +24,7 @@ Library             ../libs/keywords/sharemanager_keywords.py
 Library             ../libs/keywords/k8s_keywords.py
 
 *** Keywords ***
-Set test environment
-    init_k8s_api_client
-
-    setup_control_plane_network_latency
-    set_backupstore
-
+Set up v2 environment
     update_setting    v2-data-engine    true
     ${worker_nodes}=    get_worker_nodes
     ${host_provider}=    Get Environment Variable    HOST_PROVIDER
@@ -38,6 +33,12 @@ Set test environment
         add_disk    block-disk    ${worker_node}    block    ${disk_path}
     END
 
+Set test environment
+    init_k8s_api_client
+    setup_control_plane_network_latency
+    set_backupstore
+    set_up_v2_environment
+
 Cleanup test resources
     FOR    ${powered_off_node}    IN    @{powered_off_nodes}
         Run keyword And Ignore Error    power_on_node_by_name    ${powered_off_node}

diff --git a/e2e/libs/recurringjob/rest.py b/e2e/libs/recurringjob/rest.py
@@ -130,8 +130,8 @@ def _check_snapshot_created(self, volume_name, job_name):
                         # but job_name is in spec.labels.RecurringJob
                         # and crd doesn't support field selector
                         # so need to filter by ourselves
-                        if 'RecurringJob' in item['status']['labels'] and \
-                            item['status']['labels']['RecurringJob'] == job_name and \
+                        if 'RecurringJob' in item['spec']['labels'] and \
+                            item['spec']['labels']['RecurringJob'] == job_name and \
                             item['status']['readyToUse'] == True:
                             snapshot_time = item['metadata']['creationTimestamp']
                             snapshot_time = datetime.strptime(snapshot_time, '%Y-%m-%dT%H:%M:%SZ')

diff --git a/e2e/tests/negative/component_resilience.robot b/e2e/tests/negative/component_resilience.robot
@@ -29,12 +29,12 @@ Delete instance-manager of volume ${volume_id} and wait for recover
 Delete instance-manager of deployment ${deployment_id} volume and wait for recover
     When Delete instance-manager of deployment ${deployment_id} volume
     And Wait for volume of deployment ${deployment_id} attached and degraded
-    And Wait for volume of deployment ${deployment_id} healthy    
+    And Wait for volume of deployment ${deployment_id} healthy
     And Wait for deployment ${deployment_id} pods stable
     And Check deployment ${deployment_id} data in file data.txt is intact
 
 *** Test Cases ***
-Test Longhorn components recovery
+Test Longhorn Components Recovery
     [Documentation]    -- Manual test plan --
     ...                Test data setup:
     ...                    Deploy Longhorn on a 3 nodes cluster.
@@ -64,19 +64,19 @@ Test Longhorn components recovery
         And Attach volume 1
         And Wait for volume 1 healthy
         And Write data to volume 1
-    
+
         When Create storageclass longhorn-test-1 with    dataEngine=${DATA_ENGINE}
         And Create persistentvolumeclaim 1 using RWX volume with longhorn-test-1 storageclass
         And Create deployment 1 with persistentvolumeclaim 1
         And Write 100 MB data to file data.txt in deployment 1
     END
 
-    When Delete Longhorn DaemonSet longhorn-csi-plugin pod on node 1    
+    When Delete Longhorn DaemonSet longhorn-csi-plugin pod on node 1
     And Delete Longhorn Deployment csi-attacher pod on node 1
     And Delete Longhorn Deployment csi-provisioner pod on node 1
     And Delete Longhorn Deployment csi-resizer pod on node 1
     And Delete Longhorn Deployment csi-snapshotter pod on node 1
-    And Delete Longhorn DaemonSet longhorn-manager pod on node 1    
+    And Delete Longhorn DaemonSet longhorn-manager pod on node 1
     And Delete Longhorn DaemonSet engine-image pod on node 1
     And Delete Longhorn component instance-manager pod on node 1
     And Delete Longhorn Deployment longhorn-ui pod
@@ -93,7 +93,7 @@ Test Longhorn components recovery
         And Check deployment 1 data in file data.txt is intact
     END
 
-Test Longhorn volume recovery
+Test Longhorn Volume Recovery
     [Documentation]    -- Manual test plan --
     ...                Test data setup:
     ...                    Deploy Longhorn on a 3 nodes cluster.
@@ -115,7 +115,7 @@ Test Longhorn volume recovery
     And Wait until volume 0 replica rebuilding started on replica node
     Then Delete instance-manager of volume 0 and wait for recover
 
-Test Longhorn backing image volume recovery
+Test Longhorn Backing Image Volume Recovery
     [Documentation]    -- Manual test plan --
     ...                Test data setup:
     ...                    Deploy Longhorn on a 3 nodes cluster.
@@ -127,15 +127,15 @@ Test Longhorn backing image volume recovery
     ...                Test steps:
     ...                    Delete the IM of the volume and make sure volume recovers. Check the data as well.
     ...                    Start replica rebuilding for the aforementioned volume, and delete the IM-e while it is rebuilding. Verify the recovered volume.    
-    ...                    Delete the backing image manager pod and verify the pod gets recreated.    
+    ...                    Delete the backing image manager pod and verify the pod gets recreated.
     IF    '${DATA_ENGINE}' == 'v1'
         When Create backing image bi with    url=https://longhorn-backing-image.s3-us-west-1.amazonaws.com/parrot.qcow2
         And Create volume 0 with    backingImage=bi    dataEngine=${DATA_ENGINE}
         And Attach volume 0
         And Wait for volume 0 healthy
         And Write data to volume 0
         Then Delete instance-manager of volume 0 and wait for recover
-    
+
         When Delete volume 0 replica on replica node
         And Wait until volume 0 replica rebuilding started on replica node
         Then Delete instance-manager of volume 0 and wait for recover
@@ -144,7 +144,7 @@ Test Longhorn backing image volume recovery
         Then Wait backing image managers running
     END
 
-Test Longhorn dynamic provisioned RWX volume recovery
+Test Longhorn Dynamic Provisioned RWX Volume Recovery
     [Documentation]    -- Manual test plan --
     ...                Test data setup:
     ...                    Deploy Longhorn on a 3 nodes cluster.
@@ -174,7 +174,7 @@ Test Longhorn dynamic provisioned RWX volume recovery
         And Check deployment 0 data in file data.txt is intact
     END
 
-Test Longhorn dynamic provisioned RWO volume recovery
+Test Longhorn Dynamic Provisioned RWO Volume Recovery
     [Documentation]    -- Manual test plan --
     ...                Test data setup:
     ...                    Deploy Longhorn on a 3 nodes cluster.
@@ -191,7 +191,7 @@ Test Longhorn dynamic provisioned RWO volume recovery
     And Create deployment 0 with persistentvolumeclaim 0
     And Write 500 MB data to file data.txt in deployment 0
     Then Delete instance-manager of deployment 0 volume and wait for recover
-    
+
     When Delete replica of deployment 0 volume on replica node
     And Wait until volume of deployment 0 replica rebuilding started on replica node
     Then Delete instance-manager of deployment 0 volume and wait for recover
diff --git a/e2e/tests/negative/live_migration.robot b/e2e/tests/negative/live_migration.robot
@@ -19,7 +19,6 @@ Migration Confirmation After Migration Node Down
     And Attach volume 0 to node 0
     And Wait for volume 0 healthy
     And Write data to volume 0
-    And Get volume 0 engine and replica names
 
     And Attach volume 0 to node 1
     And Wait for volume 0 migration to be ready
@@ -31,7 +30,6 @@ Migration Confirmation After Migration Node Down
 
     # volume stuck in attaching status and waiting for migration node to come back
     Then Check volume 0 kept in attaching
-    And Volume 0 migration should fail or rollback
 
     # power on migration node
     When Power on off nodes

diff --git a/e2e/tests/negative/node_drain.robot b/e2e/tests/negative/node_drain.robot
@@ -88,7 +88,7 @@ Force Drain Replica Node While Replica Rebuilding
         And Check deployment 1 data in file data.txt is intact
     END
 
-Drain node with force
+Drain Node With Force
     [Documentation]    Drain node with force
     ...    1. Deploy a cluster contains 3 worker nodes N1, N2, N3.
     ...    2. Deploy Longhorn.
@@ -117,7 +117,7 @@ Drain node with force
     And Check instance-manager pod is not running on drained node
     Then Check deployment 0 data in file data.txt is intact
 
-Drain node without force
+Drain Node Without Force
     [Documentation]    Drain node without force
     ...    1. Cordon the node. Longhorn will automatically disable the node scheduling when a Kubernetes node is cordoned.
     ...    2. Evict all the replicas from the node.
@@ -139,7 +139,7 @@ Drain node without force
     And Check instance-manager pod is not running on drained node
     Then Check deployment 0 data in file data.txt is intact
 
-Test kubectl drain nodes for PVC/PV/LHV is created through Longhorn API
+Test Kubectl Drain Nodes For PVC/PV/LHV Is Created Through Longhorn API
     [Documentation]    Test kubectl drain nodes for PVC/PV/LHV is created through Longhorn API
     ...    Given 1 PVC/PV/LHV created through Longhorn API And LHV is not yet attached/replicated.
     ...    When kubectl drain nodes.
@@ -153,7 +153,7 @@ Test kubectl drain nodes for PVC/PV/LHV is created through Longhorn API
     And Create persistentvolumeclaim for volume 0
     And Force drain all nodes
 
-Stopped replicas on deleted nodes should not be counted as healthy replicas when draining nodes
+Stopped Replicas On Deleted Nodes Should Not Be Counted As Healthy Replicas When Draining Nodes
     [Documentation]    Stopped replicas on deleted nodes should not be counted as healthy replicas when draining nodes
     ...    When draining a node, the node will be set as unscheduled and all pods should be evicted.
     ...    By Longhorn’s default settings, the replica will only be evicted if there is another healthy replica on the running node.

diff --git a/e2e/tests/negative/pull_backup_from_another_longhorn.robot b/e2e/tests/negative/pull_backup_from_another_longhorn.robot
@@ -20,7 +20,7 @@ Test Setup    Set test environment
 Test Teardown    Cleanup test resources
 
 *** Test Cases ***
-Pull backup created by another Longhorn system
+Pull Backup Created By Another Longhorn System
     [Documentation]    Pull backup created by another Longhorn system
     ...    1. Install test version of Longhorn.
     ...    2. Create volume, write data, and take backup.
@@ -32,7 +32,7 @@ Pull backup created by another Longhorn system
     ...    8. Create volume, write data, and take backup.
     ...    9. Uninstall Longhorn.
     ...    10. Install test version of Longhorn.
-    ...    11. Restore the backup create in step 8 and verify the data.        
+    ...    11. Restore the backup create in step 8 and verify the data.
     ...
     ...    Important
     ...    - This test case need have set environment variable manually first if not run on Jenkins
@@ -49,7 +49,7 @@ Pull backup created by another Longhorn system
     And Attach volume 0
     And Wait for volume 0 healthy
     And Write data 0 300 MB to volume 0
-    When Create backup 0 for volume 0    
+    When Create backup 0 for volume 0
     Then Verify backup list contains no error for volume 0
     And Verify backup list contains backup 0 of volume 0
     Then Uninstall Longhorn
@@ -59,6 +59,7 @@ Pull backup created by another Longhorn system
     Then Install Longhorn
     And Set setting deleting-confirmation-flag to true
     And Set backupstore
+    And Set up v2 environment
     And Check backup synced from backupstore
     And Create volume 1 from backup 0 in another cluster
     And Wait for volume 1 detached
@@ -72,6 +73,7 @@ Pull backup created by another Longhorn system
     Then Install Longhorn stable version
     And Set setting deleting-confirmation-flag to true
     And Set backupstore
+    And Set up v2 environment
     And Create volume 2 with    dataEngine=${DATA_ENGINE}
     And Attach volume 2
     And Wait for volume 2 healthy
@@ -85,6 +87,7 @@ Pull backup created by another Longhorn system
      # Install current version then pull backup and verify data
     Then Install Longhorn
     And Set backupstore
+    And Set up v2 environment
     And Check backup synced from backupstore
     And Create volume 3 from backup 1 in another cluster
     And Wait for volume 3 detached

diff --git a/e2e/tests/negative/replica_rebuilding.robot b/e2e/tests/negative/replica_rebuilding.robot
@@ -66,7 +66,7 @@ Reboot Replica Node While Replica Rebuilding
         And Check volume 0 data is intact
     END
 
-Delete replicas one by one after the volume is healthy
+Delete Replicas One By One After The Volume Is Healthy
     Given Create storageclass longhorn-test with    dataEngine=${DATA_ENGINE}
     And Create persistentvolumeclaim 0 using RWO volume with longhorn-test storageclass
     And Create deployment 0 with persistentvolumeclaim 0
@@ -90,7 +90,7 @@ Delete replicas one by one after the volume is healthy
         Then Check deployment 0 data in file data.txt is intact
     END
 
-Delete replicas one by one regardless of the volume health
+Delete Replicas One By One Regardless Of The Volume Health
     [Documentation]    Currently v2 data engine have a chance to hit
     ...                https://github.com/longhorn/longhorn/issues/9216 and will be fixed
     ...                in v1.9.0

diff --git a/e2e/tests/negative/test_backup_listing.robot b/e2e/tests/negative/test_backup_listing.robot
@@ -127,7 +127,7 @@ Pod ${pod_id} data should same as volume ${source_volume_id} backup ${backup_id}
     ...  msg="expected ${expected_checksum}, got ${current_checksum}!"
 
 *** Test Cases ***
-Backup listing with more than 1000 backups
+Backup Listing With More Than 1000 Backups
     [Tags]  manual  longhorn-8355
     [Documentation]    Test backup listing
     Given Create persistentvolumeclaim 0 using RWO volume
@@ -139,7 +139,7 @@ Backup listing with more than 1000 backups
     Then Get deployment 1 volume data in file data
     And Volume 1 data should same as deployment 0 volume
 
-Backup listing of volume bigger than 200 Gi
+Backup Listing Of Volume Bigger Than 200 Gi
     [Tags]  manual  longhorn-8355  large-size
     [Documentation]    Test backup bigger than 200 Gi
     Given Create persistentvolumeclaim 0 using RWO volume

diff --git a/e2e/tests/regression/test_persistentvolumeclaim.robot b/e2e/tests/regression/test_persistentvolumeclaim.robot
@@ -20,7 +20,7 @@ ${RETRY_INTERVAL}    1
 
 *** Test Cases ***
 
-Test persistentvolumeclaim expand more than storage maximum size should fail
+Test PersistentVolumeClaim Expand More Than Storage Maximum Size Should Fail
     [Tags]    volume    expansion
     [Documentation]    Verify that a PersistentVolumeClaim cannot be expanded beyond
     ...                the storage maximum size.

diff --git a/e2e/tests/regression/test_volume.robot b/e2e/tests/regression/test_volume.robot
@@ -24,7 +24,7 @@ Create volume with invalid name should fail
 
 *** Test Cases ***
 
-Test RWX volume data integrity after CSI plugin pod restart
+Test RWX Volume Data Integrity After CSI Plugin Pod Restart
     [Tags]    volume    rwx    storage-network
     [Documentation]    Test RWX volume data directory is accessible after Longhorn CSI plugin pod restart.
     ...
@@ -41,7 +41,7 @@ Test RWX volume data integrity after CSI plugin pod restart
 
     Then Check deployment 0 data in file data.txt is intact
 
-Test detached volume should not reattach after node eviction
+Test Detached Volume Should Not Reattach After Node Eviction
     [Tags]    volume    node-eviction
     [Documentation]    Test detached volume should not reattach after node eviction.
     ...

diff --git a/pipelines/e2e/scripts/longhorn-setup.sh b/pipelines/e2e/scripts/longhorn-setup.sh
@@ -13,6 +13,7 @@ source pipelines/utilities/create_longhorn_namespace.sh
 source pipelines/utilities/longhorn_manifest.sh
 source pipelines/utilities/longhorn_ui.sh
 source pipelines/utilities/run_longhorn_e2e_test.sh
+source pipelines/utilities/coredns.sh
 
 # create and clean tmpdir
 TMPDIR="/tmp/longhorn"

diff --git a/pipelines/utilities/install_backupstores.sh b/pipelines/utilities/install_backupstores.sh
@@ -7,10 +7,10 @@ install_backupstores(){
                  -f ${NFS_BACKUPSTORE_URL} \
                  -f ${CIFS_BACKUPSTORE_URL} \
                  -f ${AZURITE_BACKUPSTORE_URL}
-  setup_azuitize_backup_store
+  setup_azurite_backup_store
 }
 
-setup_azuitize_backup_store(){
+setup_azurite_backup_store(){
   RETRY=0
   MAX_RETRY=60
   until (kubectl get pods | grep 'longhorn-test-azblob' | grep 'Running'); do

diff --git a/test_framework/scripts/longhorn-setup.sh b/test_framework/scripts/longhorn-setup.sh
@@ -52,7 +52,7 @@ install_cluster_autoscaler(){
   yq -i 'select(.kind == "Deployment").spec.template.spec.containers[0].env += [{"name": "AWS_ACCESS_KEY_ID", "valueFrom": {"secretKeyRef": {"name": "aws-cred-secret", "key": "AWS_ACCESS_KEY_ID"}}}]' "${TF_VAR_tf_workspace}/templates/cluster_autoscaler.yaml"
   yq -i 'select(.kind == "Deployment").spec.template.spec.containers[0].env += [{"name": "AWS_SECRET_ACCESS_KEY", "valueFrom": {"secretKeyRef": {"name": "aws-cred-secret", "key": "AWS_SECRET_ACCESS_KEY"}}}]' "${TF_VAR_tf_workspace}/templates/cluster_autoscaler.yaml"
   yq -i 'select(.kind == "Deployment").spec.template.spec.containers[0].env += [{"name": "AWS_REGION", "valueFrom": {"secretKeyRef": {"name": "aws-cred-secret", "key": "AWS_DEFAULT_REGION"}}}]' "${TF_VAR_tf_workspace}/templates/cluster_autoscaler.yaml"
-  yq -i 'select(.kind == "Deployment").spec.template.spec.containers[0].command += "--scale-down-unneeded-time=1m"' "${TF_VAR_tf_workspace}/templates/cluster_autoscaler.yaml"
+  yq -i 'select(.kind == "Deployment").spec.template.spec.containers[0].command += "--scale-down-unneeded-time=3m"' "${TF_VAR_tf_workspace}/templates/cluster_autoscaler.yaml"
   yq -i 'select(.kind == "Deployment").spec.template.spec.containers[0].command += "--scale-down-delay-after-add=1m"' "${TF_VAR_tf_workspace}/templates/cluster_autoscaler.yaml"
   kubectl apply -f "${TF_VAR_tf_workspace}/templates/cluster_autoscaler.yaml"
 }
@@ -341,10 +341,22 @@ install_backupstores(){
                -f ${NFS_BACKUPSTORE_URL} \
                -f ${CIFS_BACKUPSTORE_URL} \
                -f ${AZURITE_BACKUPSTORE_URL}
-  setup_azuitize_backup_store
+  setup_azurite_backup_store
 }
 
-setup_azuitize_backup_store(){
+install_backupstores_from_lh_repo(){
+  MINIO_BACKUPSTORE_URL="https://raw.githubusercontent.com/longhorn/longhorn/master/deploy/backupstores/minio-backupstore.yaml"
+  NFS_BACKUPSTORE_URL="https://raw.githubusercontent.com/longhorn/longhorn/master/deploy/backupstores/nfs-backupstore.yaml"
+  CIFS_BACKUPSTORE_URL="https://raw.githubusercontent.com/longhorn/longhorn/master/deploy/backupstores/cifs-backupstore.yaml"
+  AZURITE_BACKUPSTORE_URL="https://raw.githubusercontent.com/longhorn/longhorn/master/deploy/backupstores/azurite-backupstore.yaml"
+  kubectl create -f ${MINIO_BACKUPSTORE_URL} \
+                 -f ${NFS_BACKUPSTORE_URL} \
+                 -f ${CIFS_BACKUPSTORE_URL} \
+                 -f ${AZURITE_BACKUPSTORE_URL}
+  setup_azurite_backup_store
+}
+
+setup_azurite_backup_store(){
   RETRY=0
   MAX_RETRY=60
   until (kubectl get pods | grep 'longhorn-test-azblob' | grep 'Running'); do
@@ -557,7 +569,11 @@ main(){
     install_cluster_autoscaler
   fi
   if [[ ${PYTEST_CUSTOM_OPTIONS} != *"--include-cluster-autoscaler-test"* ]]; then
-    install_backupstores
+      if [[ "${TF_VAR_k8s_distro_name}" == "eks" || "${TF_VAR_k8s_distro_name}" == "aks" ]]; then
+          install_backupstores_from_lh_repo
+      else
+          install_backupstores
+      fi
   fi
   install_csi_snapshotter_crds
   if [[ "${TF_VAR_enable_mtls}" == true ]]; then