test

longhorn · Feb 21, 2024 · 4eb6a4b · 4eb6a4b
1 parent 57546b9
commit 4eb6a4b
Show file tree

Hide file tree

Showing 3 changed files with 133 additions and 129 deletions.
diff --git a/manager/integration/Dockerfile b/manager/integration/Dockerfile
@@ -1,6 +1,6 @@
 FROM registry.suse.com/bci/python:3.9
 
-ARG KUBECTL_VERSION=v1.17.0
+ARG KUBECTL_VERSION=v1.28.4
 ARG YQ_VERSION=v4.24.2
 ARG TERRAFORM_VERSION=1.3.5
 ARG ARCH=amd64

diff --git a/manager/integration/tests/common.py b/manager/integration/tests/common.py
@@ -6125,8 +6125,14 @@ def wait_for_instance_manager_count(client, number, retry_counts=120):
     return len(ims)
 
 
-def create_deployment_and_write_data(client, core_api, make_deployment_with_pvc, volume_name, size, replica_count, data_size, attach_node_id=None): # NOQA
-    print(volume_name)
+def create_deployment_and_write_data(client, # NOQA
+                                     core_api, # NOQA
+                                     make_deployment_with_pvc, # NOQA
+                                     volume_name, # NOQA
+                                     size, # NOQA
+                                     replica_count, # NOQA
+                                     data_size, # NOQA
+                                     attach_node_id=None): # NOQA
     apps_api = get_apps_api_client()
     volume = client.create_volume(name=volume_name,
                                   size=size,

diff --git a/manager/integration/tests/test_node.py b/manager/integration/tests/test_node.py
@@ -50,9 +50,6 @@
 from common import update_setting
 from common import SETTING_NODE_DRAIN_POLICY, DATA_SIZE_IN_MB_3
 from common import make_deployment_with_pvc # NOQA
-from common import create_pv_for_volume
-from common import create_pvc_for_volume, create_and_wait_deployment
-from common import get_apps_api_client, write_pod_volume_random_data
 from common import prepare_host_disk, wait_for_volume_degraded
 from common import create_deployment_and_write_data
 
@@ -2693,7 +2690,15 @@ def finalizer():
 def drain_node(core_api, node): # NOQA    
     set_node_cordon(core_api, node.id, True)
 
-    command = ["kubectl", "drain", node.id, "--ignore-daemonsets"]
+    command = [
+        "kubectl",
+        "drain",
+        node.id,
+        "--ignore-daemonsets",
+        "--delete-emptydir-data",
+        "--grace-period=-1"
+    ]
+
     subprocess.run(command, check=True)
 
 
@@ -2713,8 +2718,84 @@ def get_replica_detail(replica_name):
     return replica_info
 
 
+def check_node_auto_evict_state(client, target_node, expect_state): # NOQA
+    def get_specific_node(client, target_node):
+        nodes = client.list_node()
+        for node in nodes:
+            if node.id == target_node.id:
+                return node
+
+    for i in range(RETRY_COUNTS):
+        node = get_specific_node(client, target_node)
+        if node.autoEvicting is expect_state:
+            break
+        time.sleep(RETRY_INTERVAL)
+    assert node.autoEvicting is expect_state
+
+
+def check_replica_evict_state(client, volume_name, node, expect_state): # NOQA
+    volume = client.by_id_volume(volume_name)
+    for replica in volume.replicas:
+        if replica.hostId == node.id:
+            replica_name = replica.name
+            break
+
+    replica_info = get_replica_detail(replica_name)
+    eviction_requested = replica_info["spec"]["evictionRequested"]
+    assert eviction_requested is expect_state
+
+
+def wait_drain_complete(future, timeout):
+    """
+    Wait concurrent.futures object complete in a duration
+    """
+    thread_timeout = timeout
+    try:
+        future.result(timeout=thread_timeout)
+        drain_complete = True
+    except TimeoutError:
+        print("drain node thread exceed timeout ({})s".format(thread_timeout))
+        drain_complete = False
+        future.cancel()
+    finally:
+        assert drain_complete is True
+
+
+def make_replica_on_specific_node(client, volume_name, node): # NOQA
+    volume = client.by_id_volume(volume_name)
+    volume.updateReplicaCount(replicaCount=1)
+    for replica in volume.replicas:
+        if replica.hostId != node.id:
+            volume.replicaRemove(name=replica.name)
+    wait_for_volume_replica_count(client, volume_name, 1)
+
+
+def get_all_replica_name(client, volume_name): # NOQA
+    volume_replicas = []
+    volume = client.by_id_volume(volume_name)
+    for replica in volume.replicas:
+        volume_replicas.append(replica.name)
+
+    return volume_replicas
+
+
+def check_all_replicas_evict_state(client, volume_name, expect_state): # NOQA
+    volume = client.by_id_volume(volume_name)
+    for replica in volume.replicas:
+        replica_info = get_replica_detail(replica.name)
+        eviction_requested = replica_info["spec"]["evictionRequested"]
+        assert eviction_requested is expect_state
+
+
+@pytest.mark.skip(reason="Can not run when in-cluster backup store pod exist")  # NOQA
 def test_drain_with_block_for_eviction_success(client, core_api, volume_name, make_deployment_with_pvc): # NOQA
     """
+    Test case has the potential to drain node where backup store pods are
+    loacted.
+    In that case, test case will fail because backup store pods can only be
+    forcibly drained.
+    ---
+
     Test drain completes after evicting replica with node-drain-policy
     block-for-eviction
 
@@ -2746,33 +2827,13 @@ def test_drain_with_block_for_eviction_success(client, core_api, volume_name, ma
     client.update(setting, value="block-for-eviction")
 
     # Step 2, 3, 4
-    volume = client.create_volume(name=volume_name,
-                                  size=str(1 * Gi),
-                                  numberOfReplicas=3)
-    volume = common.wait_for_volume_detached(client, volume_name)
-
-    pvc_name = volume_name + "-pvc"
-    create_pv_for_volume(client, core_api, volume, volume_name)
-    create_pvc_for_volume(client, core_api, volume, pvc_name)
-    deployment_name = volume_name + "-dep"
-    deployment = make_deployment_with_pvc(deployment_name, pvc_name)
-    deployment["spec"]["template"]["spec"]["nodeSelector"] \
-        = {"kubernetes.io/hostname": host_id}
-
-    apps_api = get_apps_api_client()
-    create_and_wait_deployment(apps_api, deployment)
-
-    pod_names = common.get_deployment_pod_names(core_api, deployment)
-    data_path = '/data/test'
-    write_pod_volume_random_data(core_api,
-                                 pod_names[0],
-                                 data_path,
-                                 DATA_SIZE_IN_MB_3)
-    expected_test_data_checksum = get_pod_data_md5sum(core_api,
-                                                      pod_names[0],
-                                                      data_path)
-
-    volume = wait_for_volume_healthy(client, volume_name)
+    volume, pod, checksum = create_deployment_and_write_data(client,
+                                                             core_api,
+                                                             make_deployment_with_pvc, # NOQA
+                                                             volume_name,
+                                                             str(1 * Gi),
+                                                             3,
+                                                             DATA_SIZE_IN_MB_3, host_id) # NOQA
 
     # Make replica not locate on eviction target node
     volume.updateReplicaCount(replicaCount=2)
@@ -2789,33 +2850,11 @@ def test_drain_with_block_for_eviction_success(client, core_api, volume_name, ma
     future = executor.submit(drain_node, core_api, evict_source_node)
 
     # Step 6
-    volume = client.by_id_volume(volume_name)
-    for replica in volume.replicas:
-        if replica.hostId == evict_source_node.id:
-            replica_name = replica.name
-            break
-
-    replica_info = get_replica_detail(replica_name)
-    eviction_requested = replica_info["spec"]["evictionRequested"]
-    assert eviction_requested is True
-
-    nodes = client.list_node()
-    for node in nodes:
-        if node.id == evict_source_node.id:
-            assert node.autoEvicting is True
+    check_replica_evict_state(client, volume_name, evict_source_node, True)
+    check_node_auto_evict_state(client, evict_source_node, True)
 
     # Step 7
-    thread_timeout = 60
-    try:
-        future.result(timeout=thread_timeout)
-        drain_complete = True
-    except TimeoutError:
-        print("drain node thread exceed timeout ({})s".format(thread_timeout))
-        drain_complete = False
-        future.cancel()
-    finally:
-        assert drain_complete is True
-
+    wait_drain_complete(future, 60)
     wait_for_volume_replica_count(client, volume_name, 2)
 
     # Step 8
@@ -2828,33 +2867,29 @@ def test_drain_with_block_for_eviction_success(client, core_api, volume_name, ma
         assert replica.hostId != evict_source_node.id
 
     # Stpe 10
-    nodes = client.list_node()
-    for node in nodes:
-        assert node.autoEvicting is False
+    check_node_auto_evict_state(client, evict_source_node, False)
 
     # Step 11
-    volume = client.by_id_volume(volume_name)
-    for replica in volume.replicas:
-        if replica.hostId == evict_target_node.id:
-            replica_name = replica.name
-            break
-
-    replica_info = get_replica_detail(replica_name)
-    eviction_requested = replica_info["spec"]["evictionRequested"]
-    assert eviction_requested is False
+    check_replica_evict_state(client, volume_name, evict_target_node, False)
 
     # Step 12
+    data_path = data_path = '/data/test'
     test_data_checksum = get_pod_data_md5sum(core_api,
-                                             pod_names[0],
+                                             pod,
                                              data_path)
-
-    assert expected_test_data_checksum == test_data_checksum
+    assert checksum == test_data_checksum
 
 
+@pytest.mark.skip(reason="Can not run when in-cluster backup store pod exist")  # NOQA
 def test_drain_with_block_for_eviction_if_contains_last_replica_success(client, # NOQA
                                                                         core_api, # NOQA
                                                                         make_deployment_with_pvc): # NOQA
     """
+    Test case has the potential to drain node where backup store pods are
+    loacted.
+    In that case, test case will fail because backup store pods can only be
+    forcibly drained.
+    ---
     Test drain completes after evicting replicas with node-drain-policy
     block-for-eviction-if-contains-last-replica
 
@@ -2882,52 +2917,10 @@ def test_drain_with_block_for_eviction_if_contains_last_replica_success(client,
     12. Verify that `replica.spec.evictionRequested == false` on all replicas.
     13. Verify the the data in both volumes.
     """
-    def make_replica_on_specific_node(client, volume_name, node): # NOQA
-        volume = client.by_id_volume(volume_name)
-        volume.updateReplicaCount(replicaCount=1)
-        for replica in volume.replicas:
-            if replica.hostId != node.id:
-                volume.replicaRemove(name=replica.name)
-
-    def get_all_replica_name(client, volume_name): # NOQA
-        volume_replicas = []
-        volume = client.by_id_volume(volume_name)
-        for replica in volume.replicas:
-            volume_replicas.append(replica.name)
-
-        return volume_replicas
-
-    def check_replica_evict_state(client, volume_name, node, expect_state): # NOQA
-        volume = client.by_id_volume(volume_name)
-        for replica in volume.replicas:
-            if replica.hostId == node.id:
-                replica_name = replica.name
-                break
-
-        replica_info = get_replica_detail(replica_name)
-        eviction_requested = replica_info["spec"]["evictionRequested"]
-        assert eviction_requested is expect_state
-
-    def check_all_replicas_evict_state(client, volume_name, expect_state): # NOQA
-        volume = client.by_id_volume(volume_name)
-        for replica in volume.replicas:
-            replica_info = get_replica_detail(replica.name)
-            eviction_requested = replica_info["spec"]["evictionRequested"]
-            assert eviction_requested is expect_state
-
-    def check_node_auto_evict_state(client, target_node, expect_state):
-        nodes = client.list_node()
-        for node in nodes:
-            if node.id == target_node.id:
-                assert node.autoEvicting is expect_state
-
     host_id = get_self_host_id()
     nodes = client.list_node()
     evict_nodes = [node for node in nodes if node.id != host_id][:2]
     evict_source_node = evict_nodes[0]
-    print()
-    print(host_id)
-    print(evict_source_node.id)
 
     # Create extra disk on current node
     node = client.by_id_node(host_id)
@@ -2961,8 +2954,22 @@ def check_node_auto_evict_state(client, target_node, expect_state):
     # Step 2, 3
     volume1_name = "vol-1"
     volume2_name = "vol-2"
-    volume1, pod1, checksum1 = create_deployment_and_write_data(client, core_api, make_deployment_with_pvc, volume1_name, str(1 * Gi), 3, DATA_SIZE_IN_MB_3, host_id) # NOQA
-    volume2, pod2, checksum2 = create_deployment_and_write_data(client, core_api, make_deployment_with_pvc, volume2_name, str(1 * Gi), 3, DATA_SIZE_IN_MB_3, host_id) # NOQA
+    volume1, pod1, checksum1 = create_deployment_and_write_data(client,
+                                                                core_api,
+                                                                make_deployment_with_pvc, # NOQA
+                                                                volume1_name,
+                                                                str(1 * Gi),
+                                                                3,
+                                                                DATA_SIZE_IN_MB_3, # NOQA
+                                                                host_id) # NOQA
+    volume2, pod2, checksum2 = create_deployment_and_write_data(client,
+                                                                core_api,
+                                                                make_deployment_with_pvc,  # NOQA
+                                                                volume2_name,
+                                                                str(1 * Gi),
+                                                                3,
+                                                                DATA_SIZE_IN_MB_3, # NOQA
+                                                                host_id) # NOQA
     # Make volume 1 replica only located on evict_source_node
     make_replica_on_specific_node(client, volume1_name, evict_source_node)
     volume2_replicas = get_all_replica_name(client, volume2_name)
@@ -2979,16 +2986,7 @@ def check_node_auto_evict_state(client, target_node, expect_state):
     check_all_replicas_evict_state(client, volume2_name, False)
 
     # Step 7
-    thread_timeout = 60
-    try:
-        future.result(timeout=thread_timeout)
-        drain_complete = True
-    except TimeoutError:
-        print("drain node thread exceed timeout ({})s".format(thread_timeout))
-        drain_complete = False
-        future.cancel()
-    finally:
-        assert drain_complete is True
+    wait_drain_complete(future, 60)
 
     # Step 8
     set_node_cordon(core_api, evict_source_node.id, False)
@@ -3001,7 +2999,7 @@ def check_node_auto_evict_state(client, target_node, expect_state):
 
     # Step 10
     # Verify volume2 replicas not moved by check replica name
-    # stored before node drain
+    # stored before the node drain
     volume2 = wait_for_volume_healthy(client, volume2_name)
     for replica in volume2.replicas:
         assert replica.name in volume2_replicas