From acda9e35f567c9d78469d9ed9c68ae7cd86b8025 Mon Sep 17 00:00:00 2001 From: Chris <chris.chien@suse.com> Date: Tue, 20 Feb 2024 16:48:23 +0800 Subject: [PATCH] Add test case test_drain_with_block_for_eviction_if_contains_last_replica_success ref: 7521 Signed-off-by: Chris <chris.chien@suse.com> --- manager/integration/tests/test_node.py | 201 ++++++++++++++++++++++++- 1 file changed, 198 insertions(+), 3 deletions(-) diff --git a/manager/integration/tests/test_node.py b/manager/integration/tests/test_node.py index f54b0e5335..dca1d1784b 100644 --- a/manager/integration/tests/test_node.py +++ b/manager/integration/tests/test_node.py @@ -53,6 +53,7 @@ from common import create_pv_for_volume from common import create_pvc_for_volume, create_and_wait_deployment from common import get_apps_api_client, write_pod_volume_random_data +from common import prepare_host_disk, wait_for_volume_degraded from backupstore import set_random_backupstore # NOQA from concurrent.futures import ThreadPoolExecutor, TimeoutError @@ -2849,8 +2850,22 @@ def test_drain_with_block_for_eviction_success(client, core_api, volume_name, ma assert expected_test_data_checksum == test_data_checksum -@pytest.mark.skip(reason="TODO") # NOQA -def test_drain_with_block_for_eviction_if_contains_last_replica_success(): +def write_deployment_and_get_md5(core_api, data_path, deployment, size): # NOQA + deployment_pod_names = common.get_deployment_pod_names(core_api, + deployment) + write_pod_volume_random_data(core_api, + deployment_pod_names[0], + data_path, + size) + expected_test_data_checksum = get_pod_data_md5sum(core_api, + deployment_pod_names[0], + data_path) + return expected_test_data_checksum, deployment_pod_names[0] + + +def test_drain_with_block_for_eviction_if_contains_last_replica_success(client, # NOQA + core_api, # NOQA + make_deployment_with_pvc): # NOQA """ Test drain completes after evicting replicas with node-drain-policy block-for-eviction-if-contains-last-replica @@ -2864,7 +2879,6 @@ def test_drain_with_block_for_eviction_if_contains_last_replica_success(): 4. Write data to the volumes. 5. Drain a node both volumes have a replica scheduled to. 6. While the drain is ongoing: - - Verify that the volume with one replica never becomes degraded. - Verify that the volume with three replicas becomes degraded. - Verify that `node.status.autoEvicting == true`. - Optionally verify that `replica.spec.evictionRequested == true` on the @@ -2880,6 +2894,187 @@ def test_drain_with_block_for_eviction_if_contains_last_replica_success(): 12. Verify that `replica.spec.evictionRequested == false` on all replicas. 13. Verify the the data in both volumes. """ + apps_api = get_apps_api_client() + host_id = get_self_host_id() + nodes = client.list_node() + evict_nodes = [node for node in nodes if node.id != host_id][:2] + evict_source_node = evict_nodes[0] + + # Create extra disk on current node + node = client.by_id_node(host_id) + disks = node.disks + + disk_volume_name = 'vol-disk' + disk_volume = client.create_volume(name=disk_volume_name, + size=str(2 * Gi), + numberOfReplicas=1, + dataLocality="strict-local") + disk_volume = wait_for_volume_detached(client, disk_volume_name) + + disk_volume.attach(hostId=host_id) + disk_volume = wait_for_volume_healthy(client, disk_volume_name) + disk_path = prepare_host_disk(get_volume_endpoint(disk_volume), + disk_volume_name) + disk = {"path": disk_path, "allowScheduling": True} + + update_disk = get_update_disks(disks) + update_disk["disk1"] = disk + + node = update_node_disks(client, node.name, disks=update_disk, retry=True) + node = wait_for_disk_update(client, host_id, len(update_disk)) + assert len(node.disks) == len(update_disk) + + # Step 1 + setting = client.by_id_setting( + SETTING_NODE_DRAIN_POLICY) + client.update(setting, value="block-for-eviction-if-contains-last-replica") + + # Step 2, 3 + volume1_name = "vol-1" + volume1 = client.create_volume(name=volume1_name, + size=str(1 * Gi), + numberOfReplicas=3) + volume1 = common.wait_for_volume_detached(client, volume1_name) + + pvc1_name = volume1_name + "-pvc" + create_pv_for_volume(client, core_api, volume1, volume1_name) + create_pvc_for_volume(client, core_api, volume1, pvc1_name) + deployment1_name = volume1_name + "-dep" + deployment1 = make_deployment_with_pvc(deployment1_name, pvc1_name) + deployment1["spec"]["template"]["spec"]["nodeSelector"] \ + = {"kubernetes.io/hostname": host_id} + + create_and_wait_deployment(apps_api, deployment1) + + volume1 = wait_for_volume_healthy(client, volume1_name) + # Make volume 1 replica only located on evict_source_node + volume1.updateReplicaCount(replicaCount=1) + for replica in volume1.replicas: + if replica.hostId != evict_source_node.id: + volume1.replicaRemove(name=replica.name) + + # volume 2 attach to current with 3 replicas + volume2_name = "vol-2" + volume2 = client.create_volume(name=volume2_name, + size=str(1 * Gi), + numberOfReplicas=3) + volume2 = common.wait_for_volume_detached(client, volume2_name) + + pvc2_name = volume2_name + "-pvc" + create_pv_for_volume(client, core_api, volume2, volume2_name) + create_pvc_for_volume(client, core_api, volume2, pvc2_name) + deployment2_name = volume2_name + "-dep" + deployment2 = make_deployment_with_pvc(deployment2_name, pvc2_name) + deployment2["spec"]["template"]["spec"]["nodeSelector"] \ + = {"kubernetes.io/hostname": host_id} + + create_and_wait_deployment(apps_api, deployment2) + + volume2_replicas = [] + volume2 = client.by_id_volume(volume2_name) + for replica in volume2.replicas: + volume2_replicas.append(replica.name) + + # Step 4 + data_path = '/data/test' + expected_test_data_checksum1, deployment_pod1 = \ + write_deployment_and_get_md5(core_api, + data_path, + deployment1, + DATA_SIZE_IN_MB_3) + expected_test_data_checksum2, deployment_pod2 = \ + write_deployment_and_get_md5(core_api, + data_path, + deployment2, + DATA_SIZE_IN_MB_3) + + # Step 5 + executor = ThreadPoolExecutor(max_workers=5) + future = executor.submit(drain_node, core_api, evict_source_node) + + # Step 6 + volume1 = client.by_id_volume(volume1_name) + for replica in volume1.replicas: + if replica.hostId == evict_source_node.id: + replica_name = replica.name + break + + replica_info = get_replica_detail(replica_name) + eviction_requested = replica_info["spec"]["evictionRequested"] + assert eviction_requested is True + + nodes = client.list_node() + for node in nodes: + if node.id == evict_source_node.id: + assert node.autoEvicting is True + + volume2 = wait_for_volume_degraded(client, volume2_name) + + for replica in volume2.replicas: + replica_info = get_replica_detail(replica.name) + eviction_requested = replica_info["spec"]["evictionRequested"] + assert eviction_requested is False + + # Step 7 + thread_timeout = 60 + try: + future.result(timeout=thread_timeout) + drain_complete = True + except TimeoutError: + print("drain node thread exceed timeout ({})s".format(thread_timeout)) + drain_complete = False + future.cancel() + finally: + assert drain_complete is True + + # Step 8 + set_node_cordon(core_api, evict_source_node.id, False) + + # Step 9 + volume1 = client.by_id_volume(volume1_name) + assert len(volume1.replicas) == 1 + for replica in volume1.replicas: + assert replica.hostId != evict_source_node.id + + # Step 10 + # Verify volume2 replicas not moved by check replica name + # stored before node drain + volume2 = wait_for_volume_healthy(client, volume2_name) + for replica in volume2.replicas: + assert replica.name in volume2_replicas + + # Step 11 + nodes = client.list_node() + for node in nodes: + if node.id == evict_source_node.id: + assert node.autoEvicting is False + + # Step 12 + def check_all_replica_eviction_request(client, volume_name, expect_result): # NOQA + volume = client.by_id_volume(client, volume_name) + for replica in volume.replicas: + replica_info = get_replica_detail(replica.name) + eviction_requested = replica_info["spec"]["evictionRequested"] + assert eviction_requested is expect_result + + check_all_replica_eviction_request(client, + volume1_name, + expect_result=False) + check_all_replica_eviction_request(client, + volume2_name, + expect_result=False) + + # Step 13 + test_data_checksum1 = get_pod_data_md5sum(core_api, + deployment_pod1, + data_path) + assert expected_test_data_checksum1 == test_data_checksum1 + + test_data_checksum2 = get_pod_data_md5sum(core_api, + deployment_pod2, + data_path) + assert expected_test_data_checksum2 == test_data_checksum2 + @pytest.mark.skip(reason="TODO") # NOQA def test_drain_with_block_for_eviction_failure():