From d25e1932dba1ea415425ed863cef39d75116098b Mon Sep 17 00:00:00 2001
From: Chris <chris.chien@suse.com>
Date: Tue, 20 Feb 2024 16:48:23 +0800
Subject: [PATCH] Add test case
 test_drain_with_block_for_eviction_if_contains_last_replica_success

ref: 7521

Signed-off-by: Chris <chris.chien@suse.com>
---
 manager/integration/tests/common.py    |  26 ++++
 manager/integration/tests/test_node.py | 172 ++++++++++++++++++++++++-
 2 files changed, 195 insertions(+), 3 deletions(-)

diff --git a/manager/integration/tests/common.py b/manager/integration/tests/common.py
index c9c36e0aa0..854005e020 100644
--- a/manager/integration/tests/common.py
+++ b/manager/integration/tests/common.py
@@ -6123,3 +6123,29 @@ def wait_for_instance_manager_count(client, number, retry_counts=120):
         time.sleep(RETRY_INTERVAL_LONG)
 
     return len(ims)
+
+
+def create_disk_on_current_node(host_id, data_locality):
+    # Create extra disk on current node
+    node = client.by_id_node(host_id)
+    disks = node.disks
+
+    disk_volume_name = 'vol-disk'
+    disk_volume = client.create_volume(name=disk_volume_name,
+                                       size=str(2 * Gi),
+                                       numberOfReplicas=1,
+                                       dataLocality=data_locality)
+    disk_volume = wait_for_volume_detached(client, disk_volume_name)
+
+    disk_volume.attach(hostId=host_id)
+    disk_volume = wait_for_volume_healthy(client, disk_volume_name)
+    disk_path = prepare_host_disk(get_volume_endpoint(disk_volume),
+                                  disk_volume_name)
+    disk = {"path": disk_path, "allowScheduling": True}
+
+    update_disk = get_update_disks(disks)
+    update_disk["disk1"] = disk
+
+    node = update_node_disks(client, node.name, disks=update_disk, retry=True)
+    node = wait_for_disk_update(client, host_id, len(update_disk))
+    assert len(node.disks) == len(update_disk)
diff --git a/manager/integration/tests/test_node.py b/manager/integration/tests/test_node.py
index f54b0e5335..e35db27a94 100644
--- a/manager/integration/tests/test_node.py
+++ b/manager/integration/tests/test_node.py
@@ -53,6 +53,8 @@
 from common import create_pv_for_volume
 from common import create_pvc_for_volume, create_and_wait_deployment
 from common import get_apps_api_client, write_pod_volume_random_data
+from common import wait_for_volume_degraded
+from common import create_disk_on_current_node
 
 from backupstore import set_random_backupstore # NOQA
 from concurrent.futures import ThreadPoolExecutor, TimeoutError
@@ -2849,8 +2851,22 @@ def test_drain_with_block_for_eviction_success(client, core_api, volume_name, ma
     assert expected_test_data_checksum == test_data_checksum
 
 
-@pytest.mark.skip(reason="TODO")  # NOQA
-def test_drain_with_block_for_eviction_if_contains_last_replica_success():
+def write_deployment_and_get_md5(core_api, data_path, deployment, size): # NOQA
+    deployment_pod_names = common.get_deployment_pod_names(core_api,
+                                                           deployment)
+    write_pod_volume_random_data(core_api,
+                                 deployment_pod_names[0],
+                                 data_path,
+                                 size)
+    expected_test_data_checksum = get_pod_data_md5sum(core_api,
+                                                      deployment_pod_names[0],
+                                                      data_path)
+    return expected_test_data_checksum, deployment_pod_names[0]
+
+
+def test_drain_with_block_for_eviction_if_contains_last_replica_success(client, # NOQA
+                                                                        core_api, # NOQA
+                                                                        make_deployment_with_pvc): # NOQA
     """
     Test drain completes after evicting replicas with node-drain-policy
     block-for-eviction-if-contains-last-replica
@@ -2864,7 +2880,6 @@ def test_drain_with_block_for_eviction_if_contains_last_replica_success():
     4. Write data to the volumes.
     5. Drain a node both volumes have a replica scheduled to.
     6. While the drain is ongoing:
-       - Verify that the volume with one replica never becomes degraded.
        - Verify that the volume with three replicas becomes degraded.
        - Verify that `node.status.autoEvicting == true`.
        - Optionally verify that `replica.spec.evictionRequested == true` on the
@@ -2880,6 +2895,157 @@ def test_drain_with_block_for_eviction_if_contains_last_replica_success():
     12. Verify that `replica.spec.evictionRequested == false` on all replicas.
     13. Verify the the data in both volumes.
     """
+    apps_api = get_apps_api_client()
+    host_id = get_self_host_id()
+    nodes = client.list_node()
+    evict_nodes = [node for node in nodes if node.id != host_id][:2]
+    evict_source_node = evict_nodes[0]
+
+    create_disk_on_current_node(client, host_id, "strict-local")
+    # Step 1
+    setting = client.by_id_setting(
+        SETTING_NODE_DRAIN_POLICY)
+    client.update(setting, value="block-for-eviction-if-contains-last-replica")
+
+    # Step 2, 3
+    def create_volume_with_deployment(client, volume_name, on_current_node=False): # NOQA
+        volume = client.create_volume(name=volume_name,
+                                      size=str(1 * Gi),
+                                      numberOfReplicas=3)
+        volume = common.wait_for_volume_detached(client, volume_name)
+
+        pvc_name = volume_name + "-pvc"
+        create_pv_for_volume(client, core_api, volume, volume_name)
+        create_pvc_for_volume(client, core_api, volume, pvc_name)
+        deployment_name = volume_name + "-dep"
+        deployment = make_deployment_with_pvc(deployment_name, pvc_name)
+        if on_current_node is True:
+            deployment["spec"]["template"]["spec"]["nodeSelector"] \
+                = {"kubernetes.io/hostname": get_self_host_id()}
+
+        create_and_wait_deployment(apps_api, deployment)
+
+        volume = wait_for_volume_healthy(client, volume_name)
+        return volume, deployment
+
+    volume1_name = "vol-1"
+    volume1, deployment1 = create_volume_with_deployment(client,
+                                                         volume1_name,
+                                                         True)
+
+    volume1.updateReplicaCount(replicaCount=1)
+    for replica in volume1.replicas:
+        if replica.hostId != evict_source_node.id:
+            volume1.replicaRemove(name=replica.name)
+
+    volume2_name = "vol-2"
+    volume2, deployment2 = create_volume_with_deployment(client,
+                                                         volume2_name,
+                                                         True)
+
+    volume2_replicas = []
+    volume2 = client.by_id_volume(volume2_name)
+    for replica in volume2.replicas:
+        volume2_replicas.append(replica.name)
+
+    # Step 4
+    data_path = '/data/test'
+    expected_test_data_checksum1, deployment_pod1 = \
+        write_deployment_and_get_md5(core_api,
+                                     data_path,
+                                     deployment1,
+                                     DATA_SIZE_IN_MB_3)
+    expected_test_data_checksum2, deployment_pod2 = \
+        write_deployment_and_get_md5(core_api,
+                                     data_path,
+                                     deployment2,
+                                     DATA_SIZE_IN_MB_3)
+
+    # Step 5
+    executor = ThreadPoolExecutor(max_workers=5)
+    future = executor.submit(drain_node, core_api, evict_source_node)
+
+    # Step 6
+    volume1 = client.by_id_volume(volume1_name)
+    for replica in volume1.replicas:
+        if replica.hostId == evict_source_node.id:
+            replica_name = replica.name
+            break
+
+    replica_info = get_replica_detail(replica_name)
+    eviction_requested = replica_info["spec"]["evictionRequested"]
+    assert eviction_requested is True
+
+    nodes = client.list_node()
+    for node in nodes:
+        if node.id == evict_source_node.id:
+            assert node.autoEvicting is True
+
+    volume2 = wait_for_volume_degraded(client, volume2_name)
+
+    for replica in volume2.replicas:
+        replica_info = get_replica_detail(replica.name)
+        eviction_requested = replica_info["spec"]["evictionRequested"]
+        assert eviction_requested is False
+
+    # Step 7
+    thread_timeout = 60
+    try:
+        future.result(timeout=thread_timeout)
+        drain_complete = True
+    except TimeoutError:
+        print("drain node thread exceed timeout ({})s".format(thread_timeout))
+        drain_complete = False
+        future.cancel()
+    finally:
+        assert drain_complete is True
+
+    # Step 8
+    set_node_cordon(core_api, evict_source_node.id, False)
+
+    # Step 9
+    volume1 = client.by_id_volume(volume1_name)
+    assert len(volume1.replicas) == 1
+    for replica in volume1.replicas:
+        assert replica.hostId != evict_source_node.id
+
+    # Step 10
+    volume2 = wait_for_volume_healthy(client, volume2_name)
+    for replica in volume2.replicas:
+        assert replica.name in volume2_replicas
+
+    # Step 11
+    nodes = client.list_node()
+    for node in nodes:
+        if node.id == evict_source_node.id:
+            assert node.autoEvicting is False
+
+    # Step 12
+    def check_all_replica_eviction_request(client, volume_name, expect_result): # NOQA
+        volume = client.by_id_volume(client, volume_name)
+        for replica in volume.replicas:
+            replica_info = get_replica_detail(replica.name)
+            eviction_requested = replica_info["spec"]["evictionRequested"]
+            assert eviction_requested is expect_result
+
+    check_all_replica_eviction_request(client,
+                                       volume1_name,
+                                       expect_result=False)
+    check_all_replica_eviction_request(client,
+                                       volume2_name,
+                                       expect_result=False)
+
+    # Step 13
+    test_data_checksum1 = get_pod_data_md5sum(core_api,
+                                              deployment_pod1,
+                                              data_path)
+    assert expected_test_data_checksum1 == test_data_checksum1
+
+    test_data_checksum2 = get_pod_data_md5sum(core_api,
+                                              deployment_pod2,
+                                              data_path)
+    assert expected_test_data_checksum2 == test_data_checksum2
+
 
 @pytest.mark.skip(reason="TODO")  # NOQA
 def test_drain_with_block_for_eviction_failure():