diff --git a/e2e/keywords/k8s.resource b/e2e/keywords/k8s.resource index 24fd068b21..0b7c18a7ac 100644 --- a/e2e/keywords/k8s.resource +++ b/e2e/keywords/k8s.resource @@ -69,6 +69,11 @@ Drain volume of ${workload_kind} ${workload_id} volume node Uncordon the drained node uncordon_node ${drained_node} +Cordon node ${node_id} + ${node_name} = get_node_by_index ${node_id} + cordon_node ${node_name} + check_node_is_not_schedulable ${node_name} + Cordon ${workload_kind} ${workload_id} volume node ${workload_name} = generate_name_with_suffix ${workload_kind} ${workload_id} ${volume_name} = get_workload_volume_name ${workload_name} diff --git a/e2e/libs/keywords/volume_keywords.py b/e2e/libs/keywords/volume_keywords.py index 1d6f4ecde7..69f096b0fb 100644 --- a/e2e/libs/keywords/volume_keywords.py +++ b/e2e/libs/keywords/volume_keywords.py @@ -153,6 +153,8 @@ def wait_for_replica_rebuilding_to_complete_on_node(self, volume_name, replica_l def wait_for_replica_rebuilding_to_complete(self, volume_name): for node_name in self.node.list_node_names_by_role("worker"): + if self.node.is_node_schedulable(node_name) == "False": + continue logging(f"Waiting for volume {volume_name}'s replica on node {node_name} rebuilding completed") self.volume.wait_for_replica_rebuilding_complete(volume_name, node_name) diff --git a/e2e/libs/node/node.py b/e2e/libs/node/node.py index dc1491986c..17b9ed88a7 100644 --- a/e2e/libs/node/node.py +++ b/e2e/libs/node/node.py @@ -169,3 +169,7 @@ def check_node_schedulable(self, node_name, schedulable): break time.sleep(self.retry_interval) assert node["conditions"]["Schedulable"]["status"] == schedulable + + def is_node_schedulable(self, node_name): + node = get_longhorn_client().by_id_node(node_name) + return node["conditions"]["Schedulable"]["status"] diff --git a/e2e/tests/regression/test_scheduling.robot b/e2e/tests/regression/test_scheduling.robot new file mode 100644 index 0000000000..d0f1fdeb4a --- /dev/null +++ b/e2e/tests/regression/test_scheduling.robot @@ -0,0 +1,53 @@ +*** Settings *** +Documentation Scheduling Test Cases + +Test Tags regression + +Resource ../keywords/common.resource +Resource ../keywords/volume.resource +Resource ../keywords/setting.resource +Resource ../keywords/deployment.resource +Resource ../keywords/persistentvolumeclaim.resource +Resource ../keywords/workload.resource +Resource ../keywords/k8s.resource + +Test Setup Set test environment +Test Teardown Cleanup test resources + +*** Variables *** +${LOOP_COUNT} 1 +${RETRY_COUNT} 300 +${RETRY_INTERVAL} 1 +${DATA_ENGINE} v1 + +*** Test Cases *** +Test Soft Anti Affinity Scheduling + [Tags] coretest + [Documentation] Test that volumes with Soft Anti-Affinity work as expected. + ... + ... With Soft Anti-Affinity, a new replica should still be scheduled on a node + ... with an existing replica, which will result in "Healthy" state but limited + ... redundancy. + ... + ... 1. Create a volume and attach to the current node + ... 2. Generate and write `data` to the volume. + ... 3. Set `soft anti-affinity` to true + ... 4. Disable current node's scheduling. + ... 5. Remove the replica on the current node + ... 6. Wait for the volume to complete rebuild. Volume should have 3 replicas. + ... 7. Verify `data` + Given Create volume 0 with numberOfReplicas=3 dataEngine=${DATA_ENGINE} + And Attach volume 0 + And Wait for volume 0 healthy + And Write data to volume 0 + + When Set setting replica-soft-anti-affinity to true + # disabling scheduling on a node only sets the node status to "Disable", not "Unschedulable" + # therefore disabling scheduling doesn't alter the node["conditions"]["Schedulable"]["status"] field + # only cordoning a node can set it to "Unschedulable" + And Cordon node 1 + And Delete volume 0 replica on node 1 + + Then Wait until volume 0 replicas rebuilding completed + And Wait for volume 0 healthy + And Check volume 0 data is intact