diff --git a/e2e/keywords/common.resource b/e2e/keywords/common.resource index ee0c548159..e38dfa5445 100644 --- a/e2e/keywords/common.resource +++ b/e2e/keywords/common.resource @@ -1,6 +1,7 @@ *** Settings *** Documentation Common keywords +Library Collections Library OperatingSystem Library ../libs/keywords/common_keywords.py Library ../libs/keywords/deployment_keywords.py @@ -34,11 +35,14 @@ Set test environment ${host_provider}= Get Environment Variable HOST_PROVIDER ${disk_path}= Set Variable If "${host_provider}" == "harvester" /dev/vdc /dev/xvdh FOR ${worker_node} IN @{worker_nodes} - add_disk ${worker_node} block ${disk_path} + add_disk block-disk ${worker_node} block ${disk_path} END Cleanup test resources - Run keyword And Ignore Error power_on_node_by_name ${powered_off_node} + FOR ${powered_off_node} IN @{powered_off_nodes} + Run keyword And Ignore Error power_on_node_by_name ${powered_off_node} + Remove Values From List ${powered_off_nodes} ${powered_off_node} + END uncordon_all_nodes cleanup_control_plane_network_latency reset_node_schedule diff --git a/e2e/keywords/host.resource b/e2e/keywords/host.resource index e686aec164..07483b2132 100644 --- a/e2e/keywords/host.resource +++ b/e2e/keywords/host.resource @@ -1,6 +1,7 @@ *** Settings *** Documentation Physical Node Keywords +Library Collections Library ../libs/keywords/common_keywords.py Library ../libs/keywords/host_keywords.py Library ../libs/keywords/network_keywords.py @@ -34,11 +35,13 @@ Restart cluster reboot_all_nodes setup_control_plane_network_latency -Power on off node - Run keyword And Ignore Error - ... power_on_node_by_name ${powered_off_node} +Power on off nodes + FOR ${powered_off_node} IN @{powered_off_nodes} + Run keyword And Ignore Error power_on_node_by_name ${powered_off_node} + Remove Values From List ${powered_off_nodes} ${powered_off_node} + END Power off node ${node_id} ${powered_off_node} = get_node_by_index ${node_id} + Append to list ${powered_off_nodes} ${powered_off_node} power_off_node_by_name ${powered_off_node} - Set Test Variable ${powered_off_node} \ No newline at end of file diff --git a/e2e/keywords/node.resource b/e2e/keywords/node.resource index 60cdcde8ae..16f68be374 100644 --- a/e2e/keywords/node.resource +++ b/e2e/keywords/node.resource @@ -3,12 +3,13 @@ Documentation Node Keywords Library ../libs/keywords/common_keywords.py Library ../libs/keywords/node_keywords.py +Library ../libs/keywords/volume_keywords.py *** Keywords *** Add ${disk_type} type disk ${disk_path} for all worker nodes ${worker_nodes}= get_worker_nodes FOR ${worker_node} IN @{worker_nodes} - add_disk ${worker_node} ${disk_type} ${disk_path} + add_disk ${disk_type}-disk ${worker_node} ${disk_type} ${disk_path} END Set node ${node_id} with @@ -31,3 +32,32 @@ Disable node ${node_id} default disk Enable node ${node_id} default disk ${node_name} = get_node_by_index ${node_id} enable_default_disk ${node_name} + +Disable disk ${disk_id} scheduling on node ${node_id} + ${node_name} = get_node_by_index ${node_id} + ${disk_name} = generate_name_with_suffix disk ${disk_id} + disable_disk ${node_name} ${disk_name} + +Enable disk ${disk_id} scheduling on node ${node_id} + ${node_name} = get_node_by_index ${node_id} + ${disk_name} = generate_name_with_suffix disk ${disk_id} + enable_disk ${node_name} ${disk_name} + +Check node ${node_id} disk ${disk_id} is in pressure + ${node_name} = get_node_by_index ${node_id} + ${disk_name} = generate_name_with_suffix disk ${disk_id} + wait_for_disk_in_pressure ${node_name} ${disk_name} + +Check node ${node_id} disk ${disk_id} is not in pressure + ${node_name} = get_node_by_index ${node_id} + ${disk_name} = generate_name_with_suffix disk ${disk_id} + wait_for_disk_not_in_pressure ${node_name} ${disk_name} + +Create ${disk_size} Gi disk ${disk_id} on node ${node_id} + ${node_name} = get_node_by_index ${node_id} + ${disk_name} = generate_name_with_suffix disk ${disk_id} + create_volume ${disk_name} size=${disk_size}Gi numberOfReplicas=1 + attach_volume ${disk_name} ${node_name} + wait_for_volume_healthy ${disk_name} + ${mount_path} = mount_disk ${disk_name} ${node_name} + add_disk ${disk_name} ${node_name} filesystem ${mount_path} diff --git a/e2e/keywords/replica.resource b/e2e/keywords/replica.resource index a1df20eec3..7e1b531e7b 100644 --- a/e2e/keywords/replica.resource +++ b/e2e/keywords/replica.resource @@ -3,8 +3,16 @@ Documentation Longhorn replica related keywords Library ../libs/keywords/common_keywords.py Library ../libs/keywords/replica_keywords.py +Library ../libs/keywords/node_keywords.py *** Keywords *** Volume ${volume_id} replica ${setting_name} should be ${setting_value} ${volume_name} = generate_name_with_suffix volume ${volume_id} validate_replica_setting ${volume_name} ${setting_name} ${setting_value} + +There should be replicas running on node ${node_id} disk ${disk_id} + ${node_name} = get_node_by_index ${node_id} + ${disk_name} = generate_name_with_suffix disk ${disk_id} + ${disk_uuid} = get_disk_uuid ${node_name} ${disk_name} + ${replicas} = get_replicas volume_name= node_name=${node_name} disk_uuid=${disk_uuid} + Should Be True len(${replicas}) > 0 diff --git a/e2e/keywords/sharemanager.resource b/e2e/keywords/sharemanager.resource index 3e8026de2d..c5d4d51745 100644 --- a/e2e/keywords/sharemanager.resource +++ b/e2e/keywords/sharemanager.resource @@ -26,6 +26,11 @@ Delete sharemanager pod of deployment ${deployment_id} and wait for recreation ${volume_name} = get_workload_volume_name ${deployment_name} delete_sharemanager_pod_and_wait_for_recreation ${volume_name} +Wait for sharemanager pod of deployment ${deployment_id} restart + ${deployment_name} = generate_name_with_suffix deployment ${deployment_id} + ${volume_name} = get_workload_volume_name ${deployment_name} + wait_for_sharemanager_pod_restart ${volume_name} + Wait for sharemanager pod of deployment ${deployment_id} running ${deployment_name} = generate_name_with_suffix deployment ${deployment_id} ${volume_name} = get_workload_volume_name ${deployment_name} diff --git a/e2e/keywords/statefulset.resource b/e2e/keywords/statefulset.resource index 6ebf77bda8..5b679c230b 100644 --- a/e2e/keywords/statefulset.resource +++ b/e2e/keywords/statefulset.resource @@ -15,6 +15,14 @@ Create statefulset ${statefulset_id} using ${volume_type} volume with ${sc_name} ${statefulset_name} = generate_name_with_suffix statefulset ${statefulset_id} create_statefulset ${statefulset_name} ${volume_type} ${sc_name} +Create statefulset ${statefulset_id} using ${volume_type} volume with ${sc_name} storageclass and size ${size} Mi + ${statefulset_name} = generate_name_with_suffix statefulset ${statefulset_id} + create_statefulset ${statefulset_name} ${volume_type} ${sc_name} ${size}Mi + +Create statefulset ${statefulset_id} using ${volume_type} volume with ${sc_name} storageclass and size ${size} Gi + ${statefulset_name} = generate_name_with_suffix statefulset ${statefulset_id} + create_statefulset ${statefulset_name} ${volume_type} ${sc_name} ${size}Gi + Scale statefulset ${statefulset_id} to ${replicaset_size} ${statefulset_name} = generate_name_with_suffix statefulset ${statefulset_id} scale_statefulset ${statefulset_name} ${replicaset_size} diff --git a/e2e/keywords/variables.resource b/e2e/keywords/variables.resource new file mode 100644 index 0000000000..c213dcabc4 --- /dev/null +++ b/e2e/keywords/variables.resource @@ -0,0 +1,13 @@ +*** Settings *** +Documentation Global Variables + +*** Variables *** +${LOOP_COUNT} 1 +${RETRY_COUNT} 300 +${RETRY_INTERVAL} 1 +${VOLUME_TYPE} RWO +${CONTROL_PLANE_NODE_NETWORK_LATENCY_IN_MS} 0 +${RWX_VOLUME_FAST_FAILOVER} false +${DATA_ENGINE} v1 + +@{powered_off_nodes}= diff --git a/e2e/keywords/volume.resource b/e2e/keywords/volume.resource index c810e193e2..921de837c3 100644 --- a/e2e/keywords/volume.resource +++ b/e2e/keywords/volume.resource @@ -167,14 +167,19 @@ Check all replicas of volume ${volume_id} kept in error Sleep ${RETRY_INTERVAL} END -Wait for volume ${volume_id} migration ready +Wait for volume ${volume_id} migration to be ready ${volume_name} = generate_name_with_suffix volume ${volume_id} - wait_for_volume_migration_ready ${volume_name} + wait_for_volume_migration_to_be_ready ${volume_name} -Wait for volume ${volume_id} migrated to node ${node_id} +Wait for volume ${volume_id} to migrate to node ${node_id} ${volume_name} = generate_name_with_suffix volume ${volume_id} ${node_name} = get_node_by_index ${node_id} - wait_for_volume_migration_completed ${volume_name} ${node_name} + wait_for_volume_migration_complete ${volume_name} ${node_name} + +Wait for volume ${volume_id} to stay on node ${node_id} + ${volume_name} = generate_name_with_suffix volume ${volume_id} + ${node_name} = get_node_by_index ${node_id} + wait_for_volume_migration_to_rollback ${volume_name} ${node_name} Wait for volume ${volume_id} restoration from backup ${backup_id} completed ${volume_name} = generate_name_with_suffix volume ${volume_id} @@ -247,6 +252,44 @@ Check volume ${volume_id} replica on node ${node_id} exist ${replica_name} get_replica_name_on_node ${volume_name} ${node_name} Should Not Be Equal ${replica_name} ${None} +Volume ${volume_id} should have ${expected_replica_count} replicas running + ${volume_name} = generate_name_with_suffix volume ${volume_id} + ${replica_count} = wait_for_replica_count ${volume_name} node_name= replica_count=${expected_replica_count} + +Volume ${volume_id} should have ${expected_replica_count} replicas running on node ${node_id} + ${volume_name} = generate_name_with_suffix volume ${volume_id} + ${node_name} = get_node_by_index ${node_id} + ${replica_count} = wait_for_replica_count ${volume_name} node_name=${node_name} replica_count=${expected_replica_count} + Set Test Variable ${volume_name} + Set Test Variable ${node_name} + Set Test Variable ${replica_count} + +Volume ${volume_id} should have replicas running on node ${node_id} + ${volume_name} = generate_name_with_suffix volume ${volume_id} + ${node_name} = get_node_by_index ${node_id} + ${replica_count} = wait_for_replica_count ${volume_name} node_name=${node_name} + Set Test Variable ${volume_name} + Set Test Variable ${node_name} + Set Test Variable ${replica_count} + +Volume ${volume_id} should have ${expected_replica_count} replicas running on node ${node_id} and no additional scheduling occurs + Volume ${volume_id} should have ${expected_replica_count} replicas running on node ${node_id} + FOR ${i} IN RANGE 3 + Log to console Ensuring there's no additional scheduling for node ${node_name} ... (${i}) + ${new_replica_count} = wait_for_replica_count ${volume_name} node_name=${node_name} replica_count=${expected_replica_count} + Should Be Equal As Integers ${replica_count} ${new_replica_count} + Sleep 5 + END + +Volume ${volume_id} should have replicas running on node ${node_id} and no additional scheduling occurs + Volume ${volume_id} should have replicas running on node ${node_id} + FOR ${i} IN RANGE 3 + Log to console Ensuring there's no additional scheduling for node ${node_name} ... (${i}) + ${new_replica_count} = wait_for_replica_count ${volume_name} node_name=${node_name} + Should Be Equal As Integers ${replica_count} ${new_replica_count} + Sleep 5 + END + Check volume ${volume_id} data is intact ${volume_name} = generate_name_with_suffix volume ${volume_id} check_data_checksum ${volume_name} diff --git a/e2e/keywords/workload.resource b/e2e/keywords/workload.resource index c3eb835b6c..18bae1c0e1 100644 --- a/e2e/keywords/workload.resource +++ b/e2e/keywords/workload.resource @@ -8,6 +8,7 @@ Library ../libs/keywords/volume_keywords.py Library ../libs/keywords/workload_keywords.py Library ../libs/keywords/host_keywords.py Library ../libs/keywords/k8s_keywords.py +Library ../libs/keywords/replica_keywords.py *** Keywords *** Create pod ${pod_id} using volume ${volume_id} @@ -45,9 +46,18 @@ Power off volume node of ${workload_kind} ${workload_id} ${workload_name} = generate_name_with_suffix ${workload_kind} ${workload_id} ${volume_name} = get_workload_volume_name ${workload_name} ${powered_off_node} = get_volume_node ${volume_name} + Append to list ${powered_off_nodes} ${powered_off_node} ${last_volume_node} = get_volume_node ${volume_name} power_off_volume_node ${volume_name} - Set Test Variable ${powered_off_node} + Set Test Variable ${last_volume_node} + +Power off volume node of ${workload_kind} ${workload_id} without waiting + ${workload_name} = generate_name_with_suffix ${workload_kind} ${workload_id} + ${volume_name} = get_workload_volume_name ${workload_name} + ${powered_off_node} = get_volume_node ${volume_name} + Append to list ${powered_off_nodes} ${powered_off_node} + ${last_volume_node} = get_volume_node ${volume_name} + power_off_volume_node ${volume_name} waiting=False Set Test Variable ${last_volume_node} Reboot volume node of ${workload_kind} ${workload_id} @@ -213,3 +223,12 @@ Delete Longhorn ${workload_kind} ${workload_name} pod ${pod_name} = get_workload_pod_name ${workload_name} longhorn-system Log ${pod_name} delete_pod ${pod_name} longhorn-system + +Check volume of ${workload_kind} ${workload_id} replica on node ${node_id} disk ${disk_id} + ${workload_name} = generate_name_with_suffix ${workload_kind} ${workload_id} + ${volume_name} = get_workload_volume_name ${workload_name} + ${node_name} = get_node_by_index ${node_id} + ${disk_name} = generate_name_with_suffix disk ${disk_id} + ${disk_uuid} = get_disk_uuid ${node_name} ${disk_name} + ${replicas} = get_replicas volume_name=${volume_name} node_name=${node_name} disk_uuid=${disk_uuid} + Should Be True len(${replicas}) > 0 diff --git a/e2e/libs/engine/crd.py b/e2e/libs/engine/crd.py index a38f5f0b45..fdaa94f627 100644 --- a/e2e/libs/engine/crd.py +++ b/e2e/libs/engine/crd.py @@ -3,6 +3,7 @@ from kubernetes import client from engine.base import Base +from utility.utility import logging class CRD(Base): @@ -10,14 +11,13 @@ def __init__(self): self.obj_api = client.CustomObjectsApi() def get_engines(self, volume_name, node_name=None): - if volume_name == "" or node_name == "": - logging.info("getting all engines") + if not node_name: + logging(f"Getting all engines of {volume_name}") else: - logging.info( - f"getting the volume {volume_name} on node {node_name} engine") + logging(f"Getting engine of volume {volume_name} on node {node_name}") label_selector = [] - if volume_name != "": + if volume_name: label_selector.append(f"longhornvolume={volume_name}") if node_name: label_selector.append(f"longhornnode={node_name}") @@ -31,20 +31,19 @@ def get_engines(self, volume_name, node_name=None): ) if api_response == "" or api_response is None: - raise Exception(f"failed to get the volume {volume_name} engine") + raise Exception(f"failed to get volume {volume_name} engine") engines = api_response["items"] if len(engines) == 0: - logging.warning(f"cannot get the volume {volume_name} engines") + logging(f"Cannot get volume {volume_name} engines") return engines def delete_engine(self, volume_name, node_name): if volume_name == "" or node_name == "": - logging.info("deleting all engines") + logging("deleting all engines") else: - logging.info( - f"delete the volume {volume_name} on node {node_name} engine") + logging(f"delete the volume {volume_name} on node {node_name} engine") for engine in self.get_engine(volume_name, node_name): engine_name = engine['metadata']['name'] @@ -55,7 +54,7 @@ def delete_engine(self, volume_name, node_name): plural="engines", name=engine_name ) - logging.info("finished delete engines") + logging("finished delete engines") def validate_engine_setting(self, volume_name, setting_name, value): engines = self.get_engines(volume_name) diff --git a/e2e/libs/host/aws.py b/e2e/libs/host/aws.py index 2f2148b7f4..bcce99086d 100644 --- a/e2e/libs/host/aws.py +++ b/e2e/libs/host/aws.py @@ -68,14 +68,15 @@ def reboot_all_worker_nodes(self, shut_down_time_in_sec=NODE_REBOOT_DOWN_TIME_SE waiter.wait(InstanceIds=instance_ids) logging(f"Started instances") - def power_off_node(self, power_off_node_name): + def power_off_node(self, power_off_node_name, waiting=True): instance_ids = [self.mapping[power_off_node_name]] resp = self.aws_client.stop_instances(InstanceIds=instance_ids, Force=True) assert resp['ResponseMetadata']['HTTPStatusCode'] == 200, f"Failed to stop instances {instance_ids} response: {resp}" logging(f"Stopping instances {instance_ids}") - waiter = self.aws_client.get_waiter('instance_stopped') - waiter.wait(InstanceIds=instance_ids) - logging(f"Stopped instances") + if waiting: + waiter = self.aws_client.get_waiter('instance_stopped') + waiter.wait(InstanceIds=instance_ids) + logging(f"Stopped instances") def power_on_node(self, power_on_node_name): instance_ids = [self.mapping[power_on_node_name]] diff --git a/e2e/libs/host/base.py b/e2e/libs/host/base.py index c9a30bc463..323cc4867e 100644 --- a/e2e/libs/host/base.py +++ b/e2e/libs/host/base.py @@ -23,7 +23,7 @@ def reboot_all_worker_nodes(self, shut_down_time_in_sec): return NotImplemented @abstractmethod - def power_off_node(self, node_name): + def power_off_node(self, node_name, waiting): return NotImplemented @abstractmethod diff --git a/e2e/libs/host/harvester.py b/e2e/libs/host/harvester.py index 2a1e26a772..f856cd983d 100644 --- a/e2e/libs/host/harvester.py +++ b/e2e/libs/host/harvester.py @@ -53,7 +53,7 @@ def reboot_all_worker_nodes(self, shut_down_time_in_sec): for node_name in node_names: self.power_on_node(node_name) - def power_off_node(self, node_name): + def power_off_node(self, node_name, waiting=True): vm_id = self.mapping[node_name] url = f"{self.url}/{vm_id}" @@ -68,6 +68,9 @@ def power_off_node(self, node_name): logging(f"Stopping vm failed with error {e}") logging(f"Stopping vm {vm_id}") + if not waiting: + return + stopped = False for i in range(self.retry_count): logging(f"Waiting for vm {vm_id} stopped ... ({i})") diff --git a/e2e/libs/keywords/host_keywords.py b/e2e/libs/keywords/host_keywords.py index 99d6cc4a67..92a3aee5e7 100644 --- a/e2e/libs/keywords/host_keywords.py +++ b/e2e/libs/keywords/host_keywords.py @@ -46,10 +46,10 @@ def reboot_node_by_name(self, node_name, downtime_in_min=1): logging(f'Rebooting node {node_name} with downtime {reboot_down_time_sec} seconds') self.host.reboot_node(node_name, reboot_down_time_sec) - def power_off_volume_node(self, volume_name): + def power_off_volume_node(self, volume_name, waiting=True): node_id = self.volume_keywords.get_node_id_by_replica_locality(volume_name, "volume node") - logging(f'Power off volume {volume_name} node {node_id}') - self.host.power_off_node(node_id) + logging(f'Power off volume {volume_name} node {node_id} with waiting = {waiting}') + self.host.power_off_node(node_id, waiting) def power_on_node_by_name(self, node_name): self.host.power_on_node(node_name) diff --git a/e2e/libs/keywords/node_keywords.py b/e2e/libs/keywords/node_keywords.py index 832834f6c6..9e120de788 100644 --- a/e2e/libs/keywords/node_keywords.py +++ b/e2e/libs/keywords/node_keywords.py @@ -11,10 +11,14 @@ def __init__(self): def list_node_names_by_role(self, role): return self.node.list_node_names_by_role(role) - def add_disk(self, node_name, type, path): - logging(f"Adding {type} type disk {path} to node {node_name}") + def mount_disk(self, disk_name, node_name): + logging(f"Mount device /dev/longhorn/{disk_name} on node {node_name}") + return self.node.mount_disk(disk_name, node_name) + + def add_disk(self, disk_name, node_name, type, path): + logging(f"Adding {type} type disk {disk_name} {path} to node {node_name}") disk = { - f"{type}-disk": { + f"{disk_name}": { "diskType": type, "path": path, "allowScheduling": True @@ -38,6 +42,13 @@ def set_node(self, node_name, allowScheduling=True, evictionRequested=False): logging(f"Setting node {node_name}; scheduling={allowScheduling}; evictionRequested={evictionRequested}") self.node.set_node(node_name, allowScheduling, evictionRequested) + def disable_disk(self, node_name, disk_name): + self.node.set_disk_scheduling(node_name, disk_name, allowScheduling=False) + + def enable_disk(self, node_name, disk_name): + self.node.set_disk_scheduling(node_name, disk_name, allowScheduling=True) + + def disable_node_scheduling(self, node_name): self.node.set_node_scheduling(node_name, allowScheduling=False) @@ -52,3 +63,15 @@ def reset_node_schedule(self): def check_node_is_not_schedulable(self, node_name): self.node.check_node_schedulable(node_name, schedulable="False") + + def is_disk_in_pressure(self, node_name, disk_name): + return self.node.is_disk_in_pressure(node_name, disk_name) + + def wait_for_disk_in_pressure(self, node_name, disk_name): + self.node.wait_for_disk_in_pressure(node_name, disk_name) + + def wait_for_disk_not_in_pressure(self, node_name, disk_name): + self.node.wait_for_disk_not_in_pressure(node_name, disk_name) + + def get_disk_uuid(self, node_name, disk_name): + return self.node.get_disk_uuid(node_name, disk_name) diff --git a/e2e/libs/keywords/replica_keywords.py b/e2e/libs/keywords/replica_keywords.py index a9f0966c2f..f02b757c6b 100644 --- a/e2e/libs/keywords/replica_keywords.py +++ b/e2e/libs/keywords/replica_keywords.py @@ -8,3 +8,6 @@ def __init__(self): def validate_replica_setting(self, volume_name, setting_name, value): return self.replica.validate_replica_setting(volume_name, setting_name, value) + + def get_replicas(self, volume_name=None, node_name=None, disk_uuid=None): + return self.replica.get(volume_name, node_name, disk_uuid) diff --git a/e2e/libs/keywords/sharemanager_keywords.py b/e2e/libs/keywords/sharemanager_keywords.py index b541f5b26b..7819c60df5 100644 --- a/e2e/libs/keywords/sharemanager_keywords.py +++ b/e2e/libs/keywords/sharemanager_keywords.py @@ -67,12 +67,32 @@ def delete_sharemanager_pod_and_wait_for_recreation(self, name): assert False, f"sharemanager pod {sharemanager_pod_name} not recreated" + def wait_for_sharemanager_pod_restart(self, name): + sharemanager_pod_name = "share-manager-" + name + sharemanager_pod = get_pod(sharemanager_pod_name, "longhorn-system") + last_creation_time = sharemanager_pod.metadata.creation_timestamp + + retry_count, retry_interval = get_retry_count_and_interval() + for i in range(retry_count): + logging(f"Waiting for sharemanager for volume {name} restart ... ({i})") + time.sleep(retry_interval) + sharemanager_pod = get_pod(sharemanager_pod_name, "longhorn-system") + if sharemanager_pod == None: + continue + creation_time = sharemanager_pod.metadata.creation_timestamp + logging(f"Getting new sharemanager which is created at {creation_time}, and old one is created at {last_creation_time}") + if creation_time > last_creation_time: + return + + assert False, f"sharemanager pod {sharemanager_pod_name} isn't restarted" + def wait_for_share_manager_pod_running(self, name): sharemanager_pod_name = "share-manager-" + name retry_count, retry_interval = get_retry_count_and_interval() for i in range(retry_count): sharemanager_pod = get_pod(sharemanager_pod_name, "longhorn-system") + logging(f"Waiting for sharemanager for volume {name} running, currently {sharemanager_pod.status.phase} ... ({i})") if sharemanager_pod.status.phase == "Running": return diff --git a/e2e/libs/keywords/statefulset_keywords.py b/e2e/libs/keywords/statefulset_keywords.py index 4baa61ab61..8b3256c7eb 100644 --- a/e2e/libs/keywords/statefulset_keywords.py +++ b/e2e/libs/keywords/statefulset_keywords.py @@ -28,9 +28,9 @@ def cleanup_statefulsets(self): for statefulset in statefulsets.items: self.delete_statefulset(statefulset.metadata.name) - def create_statefulset(self, name, volume_type="RWO", sc_name="longhorn"): - logging(f'Creating {volume_type} statefulset {name} with {sc_name} storageclass') - create_statefulset(name, volume_type, sc_name) + def create_statefulset(self, name, volume_type="RWO", sc_name="longhorn", size=None): + logging(f'Creating {volume_type} statefulset {name} with {sc_name} storageclass and size = {size}') + create_statefulset(name, volume_type, sc_name, size) def delete_statefulset(self, name): logging(f'Deleting statefulset {name}') diff --git a/e2e/libs/keywords/volume_keywords.py b/e2e/libs/keywords/volume_keywords.py index 572e29e402..369be72668 100644 --- a/e2e/libs/keywords/volume_keywords.py +++ b/e2e/libs/keywords/volume_keywords.py @@ -236,6 +236,9 @@ def wait_for_replica_running(self, volume_name, node_name): def get_replica_name_on_node(self, volume_name, node_name): return self.volume.get_replica_name_on_node(volume_name, node_name) + def wait_for_replica_count(self, volume_name, node_name=None, replica_count=None): + return self.volume.wait_for_replica_count(volume_name, node_name, replica_count) + def wait_for_replica_rebuilding_to_stop_on_node(self, volume_name, replica_locality): node_id = self.get_node_id_by_replica_locality(volume_name, replica_locality) retry_count, retry_interval = get_retry_count_and_interval() @@ -268,13 +271,17 @@ def wait_for_volume_faulted(self, volume_name): logging(f'Waiting for volume {volume_name} to be in faulted') self.volume.wait_for_volume_faulted(volume_name) - def wait_for_volume_migration_ready(self, volume_name): + def wait_for_volume_migration_to_be_ready(self, volume_name): logging(f'Waiting for volume {volume_name} migration to be ready') - self.volume.wait_for_volume_migration_ready(volume_name) + self.volume.wait_for_volume_migration_to_be_ready(volume_name) + + def wait_for_volume_migration_complete(self, volume_name, node_name): + logging(f'Waiting for volume {volume_name} migration to node {node_name} complete') + self.volume.wait_for_volume_migration_complete(volume_name, node_name) - def wait_for_volume_migration_completed(self, volume_name, node_name): - logging(f'Waiting for volume {volume_name} migration to node {node_name} completed') - self.volume.wait_for_volume_migration_completed(volume_name, node_name) + def wait_for_volume_migration_to_rollback(self, volume_name, node_name): + logging(f'Waiting for volume {volume_name} migration to rollback to node {node_name}') + self.volume.wait_for_volume_migration_to_rollback(volume_name, node_name) def wait_for_volume_restoration_completed(self, volume_name, backup_name): logging(f'Waiting for volume {volume_name} restoration from {backup_name} completed') diff --git a/e2e/libs/node/node.py b/e2e/libs/node/node.py index 636706d492..d7ed523427 100644 --- a/e2e/libs/node/node.py +++ b/e2e/libs/node/node.py @@ -1,5 +1,6 @@ import time import re +import os from kubernetes import client from robot.libraries.BuiltIn import BuiltIn @@ -9,15 +10,27 @@ from utility.utility import get_longhorn_client from utility.utility import get_retry_count_and_interval from utility.utility import logging - +from node_exec import NodeExec class Node: DEFAULT_DISK_PATH = "/var/lib/longhorn/" + DEFAULT_VOLUME_PATH = "/dev/longhorn/" def __init__(self): self.retry_count, self.retry_interval = get_retry_count_and_interval() + def mount_disk(self, disk_name, node_name): + mount_path = os.path.join(self.DEFAULT_DISK_PATH, disk_name) + device_path = os.path.join(self.DEFAULT_VOLUME_PATH, disk_name) + cmd = f"mkdir -p {mount_path}" + res = NodeExec(node_name).issue_cmd(cmd) + cmd = f"mkfs.ext4 {device_path}" + res = NodeExec(node_name).issue_cmd(cmd) + cmd = f"mount {device_path} {mount_path}" + res = NodeExec(node_name).issue_cmd(cmd) + return mount_path + def update_disks(self, node_name, disks): node = get_longhorn_client().by_id_node(node_name) for _ in range(self.retry_count): @@ -37,9 +50,9 @@ def wait_for_disk_update(self, node_name, disk_num): disks = node.disks for d in disks: if disks[d]["diskUUID"] == "" or \ - not disks[d]["conditions"] or \ - disks[d]["conditions"]["Ready"]["status"] != "True" or \ - disks[d]["conditions"]["Schedulable"]["status"] != "True": + (disks[d]["allowScheduling"] and + (not disks[d]["conditions"] or + disks[d]["conditions"]["Ready"]["status"] != "True")): all_updated = False break if all_updated: @@ -59,6 +72,10 @@ def reset_disks(self, node_name): for disk_name, disk in iter(node.disks.items()): if disk.path != self.DEFAULT_DISK_PATH: disk.allowScheduling = False + logging(f"Disabling scheduling disk {disk_name} on node {node_name}") + else: + disk.allowScheduling = True + logging(f"Enabling scheduling disk {disk_name} on node {node_name}") self.update_disks(node_name, node.disks) disks = {} @@ -66,8 +83,9 @@ def reset_disks(self, node_name): if disk.path == self.DEFAULT_DISK_PATH: disks[disk_name] = disk disk.allowScheduling = True + logging(f"Keeping disk {disk_name} on node {node_name}") else: - logging(f"Try to remove disk {disk_name} from node {node_name}") + logging(f"Removing disk {disk_name} from node {node_name}") self.update_disks(node_name, disks) def is_accessing_node_by_index(self, node): @@ -183,6 +201,14 @@ def set_default_disk_scheduling(self, node_name, allowScheduling): disk.allowScheduling = allowScheduling self.update_disks(node_name, node.disks) + def set_disk_scheduling(self, node_name, disk_name, allowScheduling): + node = get_longhorn_client().by_id_node(node_name) + + for name, disk in iter(node.disks.items()): + if name == disk_name: + disk.allowScheduling = allowScheduling + self.update_disks(node_name, node.disks) + def check_node_schedulable(self, node_name, schedulable): node = get_longhorn_client().by_id_node(node_name) for _ in range(self.retry_count): @@ -194,3 +220,29 @@ def check_node_schedulable(self, node_name, schedulable): def is_node_schedulable(self, node_name): node = get_longhorn_client().by_id_node(node_name) return node["conditions"]["Schedulable"]["status"] + + def is_disk_in_pressure(self, node_name, disk_name): + node = get_longhorn_client().by_id_node(node_name) + return node["disks"][disk_name]["conditions"]["Schedulable"]["reason"] == "DiskPressure" + + def wait_for_disk_in_pressure(self, node_name, disk_name): + for i in range(self.retry_count): + is_in_pressure = self.is_disk_in_pressure(node_name, disk_name) + logging(f"Waiting for disk {disk_name} on node {node_name} in pressure ... ({i})") + if is_in_pressure: + break + time.sleep(self.retry_interval) + assert self.is_disk_in_pressure(node_name, disk_name), f"Waiting for node {node_name} disk {disk_name} in pressure failed: {get_longhorn_client().by_id_node(node_name)}" + + def wait_for_disk_not_in_pressure(self, node_name, disk_name): + for i in range(self.retry_count): + is_in_pressure = self.is_disk_in_pressure(node_name, disk_name) + logging(f"Waiting for disk {disk_name} on node {node_name} not in pressure ... ({i})") + if not is_in_pressure: + break + time.sleep(self.retry_interval) + assert not self.is_disk_in_pressure(node_name, disk_name), f"Waiting for node {node_name} disk {disk_name} not in pressure failed: {get_longhorn_client().by_id_node(node_name)}" + + def get_disk_uuid(self, node_name, disk_name): + node = get_longhorn_client().by_id_node(node_name) + return node["disks"][disk_name]["diskUUID"] diff --git a/e2e/libs/replica/base.py b/e2e/libs/replica/base.py index a3b6681af6..c9ce12c01c 100644 --- a/e2e/libs/replica/base.py +++ b/e2e/libs/replica/base.py @@ -4,7 +4,7 @@ class Base(ABC): @abstractmethod - def get(self, volume_name, node_name): + def get(self, volume_name, node_name, disk_uuid): return NotImplemented @abstractmethod diff --git a/e2e/libs/replica/crd.py b/e2e/libs/replica/crd.py index c238bfa5e9..f2e4a07124 100644 --- a/e2e/libs/replica/crd.py +++ b/e2e/libs/replica/crd.py @@ -10,12 +10,14 @@ class CRD(Base): def __init__(self): self.obj_api = client.CustomObjectsApi() - def get(self, volume_name, node_name=None): + def get(self, volume_name=None, node_name=None, disk_uuid=None): label_selector = [] - if volume_name != "": + if volume_name: label_selector.append(f"longhornvolume={volume_name}") if node_name: label_selector.append(f"longhornnode={node_name}") + if disk_uuid: + label_selector.append(f"longhorndiskuuid={disk_uuid}") replicas = self.obj_api.list_namespaced_custom_object( group="longhorn.io", diff --git a/e2e/libs/replica/replica.py b/e2e/libs/replica/replica.py index 55893c0cbe..31312a8e82 100644 --- a/e2e/libs/replica/replica.py +++ b/e2e/libs/replica/replica.py @@ -16,8 +16,8 @@ def __init__(self): def delete(self, volume_name="", node_name=""): return self.replica.delete(volume_name, node_name) - def get(self, volume_name, node_name): - return self.replica.get(volume_name, node_name) + def get(self, volume_name, node_name, disk_uuid=None): + return self.replica.get(volume_name, node_name, disk_uuid) def wait_for_rebuilding_start(self, volume_name, node_name): return self.replica.wait_for_rebuilding_start(volume_name,node_name) diff --git a/e2e/libs/replica/rest.py b/e2e/libs/replica/rest.py index f3347157dd..f01bb9ce34 100644 --- a/e2e/libs/replica/rest.py +++ b/e2e/libs/replica/rest.py @@ -12,7 +12,7 @@ class Rest(Base): def __init__(self): pass - def get(self, volume_name, node_name): + def get(self, volume_name, node_name, disk_uuid): return NotImplemented def delete(self, volume_name, node_name): diff --git a/e2e/libs/volume/base.py b/e2e/libs/volume/base.py index c58489e866..57eb51f462 100644 --- a/e2e/libs/volume/base.py +++ b/e2e/libs/volume/base.py @@ -81,11 +81,15 @@ def wait_for_volume_state(self, volume_name, desired_state): return NotImplemented @abstractmethod - def wait_for_volume_migration_ready(self, volume_name): + def wait_for_volume_migration_to_be_ready(self, volume_name): return NotImplemented @abstractmethod - def wait_for_volume_migration_completed(self, volume_name, node_name): + def wait_for_volume_migration_complete(self, volume_name, node_name): + return NotImplemented + + @abstractmethod + def wait_for_volume_migration_to_rollback(self, volume_name, node_name): return NotImplemented @abstractmethod diff --git a/e2e/libs/volume/crd.py b/e2e/libs/volume/crd.py index c863c2cef8..80078c081b 100644 --- a/e2e/libs/volume/crd.py +++ b/e2e/libs/volume/crd.py @@ -262,6 +262,9 @@ def is_replica_running(self, volume_name, node_name, is_running): def get_replica_name_on_node(self, volume_name, node_name): return Rest().get_replica_name_on_node(volume_name, node_name) + def wait_for_replica_count(self, volume_name, node_name, replica_count): + return Rest().wait_for_replica_count(volume_name, node_name, replica_count) + def wait_for_volume_keep_in_state(self, volume_name, desired_state): self.wait_for_volume_state(volume_name, desired_state) @@ -298,10 +301,10 @@ def wait_for_volume_robustness_not(self, volume_name, not_desired_state): time.sleep(self.retry_interval) assert volume["status"]["robustness"] != not_desired_state - def wait_for_volume_migration_ready(self, volume_name): + def wait_for_volume_migration_to_be_ready(self, volume_name): ready = False for i in range(self.retry_count): - logging(f"Waiting for volume {volume_name} migration ready ({i}) ...") + logging(f"Waiting for volume {volume_name} migration to be ready ({i}) ...") try: engines = self.engine.get_engines(volume_name) ready = len(engines) == 2 @@ -314,19 +317,33 @@ def wait_for_volume_migration_ready(self, volume_name): time.sleep(self.retry_interval) assert ready - def wait_for_volume_migration_completed(self, volume_name, node_name): - completed = False + def wait_for_volume_migration_complete(self, volume_name, node_name): + complete = False for i in range(self.retry_count): - logging(f"Waiting for volume {volume_name} migration to node {node_name} completed ({i}) ...") + logging(f"Waiting for volume {volume_name} migration to node {node_name} complete ({i}) ...") try: - engines = self.engine.get_engines(volume_name, node_name) - completed = len(engines) == 1 and engines[0]['status']['endpoint'] - if completed: + engines = self.engine.get_engines(volume_name) + complete = len(engines) == 1 and engines[0]['status']['endpoint'] and engines[0]['status']['ownerID'] == node_name + if complete: break except Exception as e: logging(f"Getting volume {volume_name} engines error: {e}") time.sleep(self.retry_interval) - assert completed + assert complete + + def wait_for_volume_migration_to_rollback(self, volume_name, node_name): + rollback = False + for i in range(self.retry_count): + logging(f"Waiting for volume {volume_name} migration to rollback to node {node_name} ({i}) ...") + try: + engines = self.engine.get_engines(volume_name) + rollback = len(engines) == 1 and engines[0]['status']['endpoint'] and engines[0]['status']['ownerID'] == node_name + if rollback: + break + except Exception as e: + logging(f"Getting volume {volume_name} engines error: {e}") + time.sleep(self.retry_interval) + assert rollback def wait_for_volume_restoration_completed(self, volume_name, backup_name): completed = False diff --git a/e2e/libs/volume/rest.py b/e2e/libs/volume/rest.py index ab170070a7..65d25851ba 100644 --- a/e2e/libs/volume/rest.py +++ b/e2e/libs/volume/rest.py @@ -71,10 +71,13 @@ def wait_for_volume_state(self, volume_name, desired_state): def wait_for_restore_required_status(self, volume_name, restore_required_state): return NotImplemented - def wait_for_volume_migration_ready(self, volume_name): + def wait_for_volume_migration_to_be_ready(self, volume_name): return NotImplemented - def wait_for_volume_migration_completed(self, volume_name, node_name): + def wait_for_volume_migration_complete(self, volume_name, node_name): + return NotImplemented + + def wait_for_volume_migration_to_rollback(self, volume_name, node_name): return NotImplemented def wait_for_volume_restoration_completed(self, volume_name): @@ -234,6 +237,23 @@ def get_replica_name_on_node(self, volume_name, node_name): if r.hostId == node_name: return r.name + def wait_for_replica_count(self, volume_name, node_name, replica_count): + for i in range(self.retry_count): + running_replica_count = 0 + volume = get_longhorn_client().by_id_volume(volume_name) + for r in volume.replicas: + if node_name and r.hostId == node_name and r.running: + running_replica_count += 1 + elif not node_name and r.running: + running_replica_count += 1 + logging(f"Waiting for {replica_count if replica_count else ''} replicas for volume {volume_name} running on {node_name if node_name else 'nodes'}, currently it's {running_replica_count} ... ({i})") + if replica_count and running_replica_count == int(replica_count): + break + elif not replica_count and running_replica_count: + break + time.sleep(self.retry_interval) + return running_replica_count + def wait_for_replica_rebuilding_complete(self, volume_name, node_name=None): completed = False for i in range(self.retry_count): diff --git a/e2e/libs/volume/volume.py b/e2e/libs/volume/volume.py index f9bb1ac7eb..8ba8eec4a8 100644 --- a/e2e/libs/volume/volume.py +++ b/e2e/libs/volume/volume.py @@ -69,11 +69,14 @@ def wait_for_volume_healthy(self, volume_name): self.volume.wait_for_volume_state(volume_name, "attached") self.volume.wait_for_volume_robustness(volume_name, "healthy") - def wait_for_volume_migration_ready(self, volume_name): - self.volume.wait_for_volume_migration_ready(volume_name) + def wait_for_volume_migration_to_be_ready(self, volume_name): + self.volume.wait_for_volume_migration_to_be_ready(volume_name) - def wait_for_volume_migration_completed(self, volume_name, node_name): - self.volume.wait_for_volume_migration_completed(volume_name, node_name) + def wait_for_volume_migration_complete(self, volume_name, node_name): + self.volume.wait_for_volume_migration_complete(volume_name, node_name) + + def wait_for_volume_migration_to_rollback(self, volume_name, node_name): + self.volume.wait_for_volume_migration_to_rollback(volume_name, node_name) def wait_for_volume_restoration_completed(self, volume_name, backup_name): self.volume.wait_for_volume_restoration_completed(volume_name, backup_name) @@ -128,6 +131,9 @@ def wait_for_replica_running(self, volume_name, node_name): def get_replica_name_on_node(self, volume_name, node_name): return self.volume.get_replica_name_on_node(volume_name, node_name) + def wait_for_replica_count(self, volume_name, node_name, replica_count): + return self.volume.wait_for_replica_count(volume_name, node_name, replica_count) + def wait_for_replica_rebuilding_complete(self, volume_name, node_name=None): return self.volume.wait_for_replica_rebuilding_complete(volume_name, node_name) diff --git a/e2e/libs/workload/statefulset.py b/e2e/libs/workload/statefulset.py index cc2571a39e..eebe8b5014 100644 --- a/e2e/libs/workload/statefulset.py +++ b/e2e/libs/workload/statefulset.py @@ -10,7 +10,7 @@ from utility.utility import logging -def create_statefulset(statefulset_name, volume_type, sc_name): +def create_statefulset(statefulset_name, volume_type, sc_name, size): filepath = "./templates/workload/statefulset.yaml" with open(filepath, 'r') as f: namespace = 'default' @@ -30,6 +30,10 @@ def create_statefulset(statefulset_name, volume_type, sc_name): if volume_type == 'RWX': manifest_dict['spec']['volumeClaimTemplates'][0]['spec']['accessModes'][0] = 'ReadWriteMany' + # correct request storage size + if size: + manifest_dict['spec']['volumeClaimTemplates'][0]['spec']['resources']['requests']['storage'] = size + api = client.AppsV1Api() statefulset = api.create_namespaced_stateful_set( body=manifest_dict, diff --git a/e2e/tests/negative/cluster_restart.robot b/e2e/tests/negative/cluster_restart.robot index a4687830f3..02626de0c9 100644 --- a/e2e/tests/negative/cluster_restart.robot +++ b/e2e/tests/negative/cluster_restart.robot @@ -3,6 +3,7 @@ Documentation Negative Test Cases Test Tags negative cluster +Resource ../keywords/variables.resource Resource ../keywords/common.resource Resource ../keywords/deployment.resource Resource ../keywords/longhorn.resource @@ -16,15 +17,6 @@ Resource ../keywords/setting.resource Test Setup Set test environment Test Teardown Cleanup test resources -*** Variables *** -${LOOP_COUNT} 1 -${RETRY_COUNT} 300 -${RETRY_INTERVAL} 1 -${CONTROL_PLANE_NODE_NETWORK_LATENCY_IN_MS} 0 -${RWX_VOLUME_FAST_FAILOVER} false -${DATA_ENGINE} v1 - - *** Test Cases *** Restart Cluster While Workload Heavy Writing Given Set setting rwx-volume-fast-failover to ${RWX_VOLUME_FAST_FAILOVER} diff --git a/e2e/tests/negative/component_resilience.robot b/e2e/tests/negative/component_resilience.robot index fa45633760..3d959ed4db 100644 --- a/e2e/tests/negative/component_resilience.robot +++ b/e2e/tests/negative/component_resilience.robot @@ -3,6 +3,7 @@ Documentation Negative Test Cases Test Tags negative +Resource ../keywords/variables.resource Resource ../keywords/common.resource Resource ../keywords/volume.resource Resource ../keywords/backing_image.resource @@ -18,13 +19,6 @@ Resource ../keywords/sharemanager.resource Test Setup Set test environment Test Teardown Cleanup test resources -*** Variables *** -${LOOP_COUNT} 1 -${RETRY_COUNT} 300 -${RETRY_INTERVAL} 1 -${RWX_VOLUME_FAST_FAILOVER} false -${DATA_ENGINE} v1 - *** Keywords *** Delete instance-manager of volume ${volume_id} and wait for recover When Delete instance-manager of volume ${volume_id} diff --git a/e2e/tests/negative/kubelet_restart.robot b/e2e/tests/negative/kubelet_restart.robot index 116630dbf4..2d56a0db6e 100644 --- a/e2e/tests/negative/kubelet_restart.robot +++ b/e2e/tests/negative/kubelet_restart.robot @@ -3,6 +3,7 @@ Documentation Negative Test Cases Test Tags negative +Resource ../keywords/variables.resource Resource ../keywords/common.resource Resource ../keywords/storageclass.resource Resource ../keywords/persistentvolumeclaim.resource @@ -14,13 +15,6 @@ Resource ../keywords/setting.resource Test Setup Set test environment Test Teardown Cleanup test resources -*** Variables *** -${LOOP_COUNT} 1 -${RETRY_COUNT} 300 -${RETRY_INTERVAL} 1 -${RWX_VOLUME_FAST_FAILOVER} false -${DATA_ENGINE} v1 - *** Test Cases *** Restart Volume Node Kubelet While Workload Heavy Writing Given Set setting rwx-volume-fast-failover to ${RWX_VOLUME_FAST_FAILOVER} diff --git a/e2e/tests/negative/network_disconnect.robot b/e2e/tests/negative/network_disconnect.robot index 3e0b786c54..142492c914 100644 --- a/e2e/tests/negative/network_disconnect.robot +++ b/e2e/tests/negative/network_disconnect.robot @@ -3,6 +3,7 @@ Documentation Negative Test Cases Test Tags negative +Resource ../keywords/variables.resource Resource ../keywords/volume.resource Resource ../keywords/storageclass.resource Resource ../keywords/statefulset.resource @@ -14,14 +15,6 @@ Resource ../keywords/setting.resource Test Setup Set test environment Test Teardown Cleanup test resources -*** Variables *** -${LOOP_COUNT} 1 -${LATENCY_IN_MS} 0 -${RETRY_COUNT} 300 -${RETRY_INTERVAL} 1 -${RWX_VOLUME_FAST_FAILOVER} false -${DATA_ENGINE} v1 - *** Test Cases *** Disconnect Volume Node Network While Workload Heavy Writing Given Set setting rwx-volume-fast-failover to ${RWX_VOLUME_FAST_FAILOVER} diff --git a/e2e/tests/negative/node_delete.robot b/e2e/tests/negative/node_delete.robot index 146a47badd..8ffadb7815 100644 --- a/e2e/tests/negative/node_delete.robot +++ b/e2e/tests/negative/node_delete.robot @@ -3,6 +3,7 @@ Documentation Negative Test Cases Test Tags negative +Resource ../keywords/variables.resource Resource ../keywords/common.resource Resource ../keywords/host.resource Resource ../keywords/storageclass.resource @@ -15,13 +16,6 @@ Resource ../keywords/setting.resource Test Setup Set test environment Test Teardown Cleanup test resources -*** Variables *** -${LOOP_COUNT} 1 -${RETRY_COUNT} 300 -${RETRY_INTERVAL} 1 -${RWX_VOLUME_FAST_FAILOVER} false -${DATA_ENGINE} v1 - *** Test Cases *** Delete Volume Node While Replica Rebuilding Given Set setting node-down-pod-deletion-policy to do-nothing diff --git a/e2e/tests/negative/node_drain.robot b/e2e/tests/negative/node_drain.robot index d8551fcab8..bdd1d5c454 100644 --- a/e2e/tests/negative/node_drain.robot +++ b/e2e/tests/negative/node_drain.robot @@ -3,6 +3,7 @@ Documentation Negative Test Cases Test Tags negative +Resource ../keywords/variables.resource Resource ../keywords/common.resource Resource ../keywords/storageclass.resource Resource ../keywords/persistentvolumeclaim.resource @@ -18,13 +19,6 @@ Resource ../keywords/node.resource Test Setup Set test environment Test Teardown Cleanup test resources -*** Variables *** -${LOOP_COUNT} 1 -${RETRY_COUNT} 300 -${RETRY_INTERVAL} 1 -${RWX_VOLUME_FAST_FAILOVER} false -${DATA_ENGINE} v1 - *** Test Cases *** Force Drain Volume Node While Replica Rebuilding Given Set setting rwx-volume-fast-failover to ${RWX_VOLUME_FAST_FAILOVER} diff --git a/e2e/tests/negative/node_reboot.robot b/e2e/tests/negative/node_reboot.robot index 36de6fe7e5..676982a379 100644 --- a/e2e/tests/negative/node_reboot.robot +++ b/e2e/tests/negative/node_reboot.robot @@ -3,6 +3,8 @@ Documentation Negative Test Cases Test Tags negative +Resource ../keywords/variables.resource +Resource ../keywords/sharemanager.resource Resource ../keywords/common.resource Resource ../keywords/deployment.resource Resource ../keywords/longhorn.resource @@ -18,16 +20,41 @@ Resource ../keywords/setting.resource Test Setup Set test environment Test Teardown Cleanup test resources -*** Variables *** -${LOOP_COUNT} 1 -${RETRY_COUNT} 300 -${RETRY_INTERVAL} 1 -${VOLUME_TYPE} RWO -${CONTROL_PLANE_NODE_NETWORK_LATENCY_IN_MS} 0 -${RWX_VOLUME_FAST_FAILOVER} false -${DATA_ENGINE} v1 - *** Test Cases *** +Shutdown Volume Node And Test Auto Reattach To A New Node + Given Set setting node-down-pod-deletion-policy to delete-both-statefulset-and-deployment-pod + And Create storageclass longhorn-test with dataEngine=${DATA_ENGINE} + And Create persistentvolumeclaim 0 using RWO volume with longhorn-test storageclass + And Create persistentvolumeclaim 1 using RWX volume with longhorn-test storageclass + + And Create deployment 0 with persistentvolumeclaim 0 + And Create deployment 1 with persistentvolumeclaim 1 + + And Wait for volume of deployment 0 healthy + And Wait for volume of deployment 1 healthy + + And Write 100 MB data to file data.bin in deployment 0 + And Write 100 MB data to file data.bin in deployment 1 + + When Power off volume node of deployment 0 without waiting + And Power off volume node of deployment 1 without waiting + + Then Wait for sharemanager pod of deployment 1 restart + And Wait for sharemanager pod of deployment 1 running + + And Wait for volume of deployment 0 attached and degraded + And Wait for volume of deployment 1 attached and degraded + + And Wait for workloads pods stable + ... deployment 0 deployment 1 + + And Check deployment 0 data in file data.bin is intact + And Check deployment 1 data in file data.bin is intact + And Check deployment 0 works + And Check deployment 1 works + + And Power on off nodes + Reboot Node One By One While Workload Heavy Writing [Tags] reboot Given Set setting rwx-volume-fast-failover to ${RWX_VOLUME_FAST_FAILOVER} @@ -289,7 +316,7 @@ Single Replica Node Down Deletion Policy do-nothing With RWO Volume Replica Loca And Power off volume node of deployment 0 And Wait for deployment 0 pod stuck in Terminating on the original node - When Power on off node + When Power on off nodes And Wait for deployment 0 pods stable Then Check deployment 0 data in file data is intact @@ -307,7 +334,7 @@ Single Replica Node Down Deletion Policy do-nothing With RWO Volume Replica Loca And Power off volume node of deployment 0 And Wait for deployment 0 pod stuck in Terminating on the original node - When Power on off node + When Power on off nodes And Wait for deployment 0 pods stable Then Check deployment 0 data in file data is intact @@ -328,7 +355,7 @@ Single Replica Node Down Deletion Policy delete-deployment-pod With RWO Volume R And Wait for deployment 0 pods stable Then Check deployment 0 data in file data is intact - And Power on off node + And Power on off nodes Single Replica Node Down Deletion Policy delete-deployment-pod With RWO Volume Replica Locate On Volume Node Given Create storageclass longhorn-test with dataEngine=${DATA_ENGINE} @@ -345,7 +372,7 @@ Single Replica Node Down Deletion Policy delete-deployment-pod With RWO Volume R Then Wait for volume of deployment 0 faulted And Wait for deployment 0 pod stuck in ContainerCreating on another node - When Power on off node + When Power on off nodes And Wait for deployment 0 pods stable And Check deployment 0 pod is Running on the original node Then Check deployment 0 data in file data is intact @@ -366,7 +393,7 @@ Single Replica Node Down Deletion Policy delete-both-statefulset-and-deployment- And Wait for statefulset 0 pods stable Then Check statefulset 0 data in file data is intact - And Power on off node + And Power on off nodes Single Replica Node Down Deletion Policy delete-both-statefulset-and-deployment-pod With RWO Volume Replica Locate On Volume Node Given Create storageclass longhorn-test with dataEngine=${DATA_ENGINE} @@ -382,7 +409,7 @@ Single Replica Node Down Deletion Policy delete-both-statefulset-and-deployment- Then Wait for volume of statefulset 0 faulted And Wait for statefulset 0 pod stuck in ContainerCreating on another node - When Power on off node + When Power on off nodes And Wait for statefulset 0 pods stable And Check statefulset 0 pod is Running on the original node Then Check statefulset 0 data in file data is intact diff --git a/e2e/tests/negative/pull_backup_from_another_longhorn.robot b/e2e/tests/negative/pull_backup_from_another_longhorn.robot index 819350ad68..5b2de7b8b8 100644 --- a/e2e/tests/negative/pull_backup_from_another_longhorn.robot +++ b/e2e/tests/negative/pull_backup_from_another_longhorn.robot @@ -3,6 +3,7 @@ Documentation Uninstallation Checks Test Tags negative +Resource ../keywords/variables.resource Resource ../keywords/common.resource Resource ../keywords/setting.resource Resource ../keywords/volume.resource @@ -18,12 +19,6 @@ Library ../libs/keywords/setting_keywords.py Test Setup Set test environment Test Teardown Cleanup test resources -*** Variables *** -${LOOP_COUNT} 1 -${RETRY_COUNT} 300 -${RETRY_INTERVAL} 1 -${DATA_ENGINE} v1 - *** Test Cases *** Pull backup created by another Longhorn system [Documentation] Pull backup created by another Longhorn system diff --git a/e2e/tests/negative/replica_rebuilding.robot b/e2e/tests/negative/replica_rebuilding.robot index 167dac8691..1a6cbf9ed1 100644 --- a/e2e/tests/negative/replica_rebuilding.robot +++ b/e2e/tests/negative/replica_rebuilding.robot @@ -3,6 +3,7 @@ Documentation Negative Test Cases Test Tags negative +Resource ../keywords/variables.resource Resource ../keywords/common.resource Resource ../keywords/host.resource Resource ../keywords/volume.resource @@ -14,12 +15,6 @@ Resource ../keywords/workload.resource Test Setup Set test environment Test Teardown Cleanup test resources -*** Variables *** -${LOOP_COUNT} 1 -${RETRY_COUNT} 300 -${RETRY_INTERVAL} 1 -${DATA_ENGINE} v1 - *** Test Cases *** Delete Replica While Replica Rebuilding Given Create volume 0 with size=2Gi numberOfReplicas=3 dataEngine=${DATA_ENGINE} diff --git a/e2e/tests/negative/stress_cpu.robot b/e2e/tests/negative/stress_cpu.robot index b9d0a65836..68047dc40e 100644 --- a/e2e/tests/negative/stress_cpu.robot +++ b/e2e/tests/negative/stress_cpu.robot @@ -3,6 +3,7 @@ Documentation Negative Test Cases Test Tags negative +Resource ../keywords/variables.resource Resource ../keywords/common.resource Resource ../keywords/persistentvolumeclaim.resource Resource ../keywords/statefulset.resource @@ -13,12 +14,7 @@ Resource ../keywords/workload.resource Test Setup Set test environment Test Teardown Cleanup test resources -*** Variables *** -${LOOP_COUNT} 1 -${RETRY_COUNT} 300 -${RETRY_INTERVAL} 1 *** Test Cases *** - Stress Volume Node CPU When Replica Is Rebuilding Given Create volume 0 with size=5Gi numberOfReplicas=3 And Attach volume 0 diff --git a/e2e/tests/negative/stress_filesystem.robot b/e2e/tests/negative/stress_filesystem.robot index 85ec54de9f..094aa6bf3c 100644 --- a/e2e/tests/negative/stress_filesystem.robot +++ b/e2e/tests/negative/stress_filesystem.robot @@ -3,6 +3,7 @@ Documentation Negative Test Cases Test Tags negative +Resource ../keywords/variables.resource Resource ../keywords/common.resource Resource ../keywords/persistentvolumeclaim.resource Resource ../keywords/statefulset.resource @@ -13,13 +14,7 @@ Resource ../keywords/workload.resource Test Setup Set test environment Test Teardown Cleanup test resources -*** Variables *** -${LOOP_COUNT} 1 -${RETRY_COUNT} 300 -${RETRY_INTERVAL} 1 - *** Test Cases *** - Stress Volume Node Filesystem When Replica Is Rebuilding Given Create volume 0 with size=5Gi numberOfReplicas=3 And Attach volume 0 diff --git a/e2e/tests/negative/stress_memory.robot b/e2e/tests/negative/stress_memory.robot index 6f3a5c6b90..f566610d76 100644 --- a/e2e/tests/negative/stress_memory.robot +++ b/e2e/tests/negative/stress_memory.robot @@ -3,6 +3,7 @@ Documentation Negative Test Cases Test Tags negative +Resource ../keywords/variables.resource Resource ../keywords/common.resource Resource ../keywords/persistentvolumeclaim.resource Resource ../keywords/statefulset.resource @@ -13,13 +14,7 @@ Resource ../keywords/workload.resource Test Setup Set test environment Test Teardown Cleanup test resources -*** Variables *** -${LOOP_COUNT} 1 -${RETRY_COUNT} 300 -${RETRY_INTERVAL} 1 - *** Test Cases *** - Stress Volume Node Memory When Replica Is Rebuilding Given Create volume 0 with size=5Gi numberOfReplicas=3 And Attach volume 0 diff --git a/e2e/tests/negative/test_backup_listing.robot b/e2e/tests/negative/test_backup_listing.robot index 6e0c921b25..58c2661aae 100644 --- a/e2e/tests/negative/test_backup_listing.robot +++ b/e2e/tests/negative/test_backup_listing.robot @@ -2,8 +2,9 @@ Documentation Test backup listing ... https://longhorn.github.io/longhorn-tests/manual/pre-release/stress/backup-listing/ -Test Tags manual +Test Tags manual negative +Resource ../keywords/variables.resource Resource ../keywords/common.resource Resource ../keywords/deployment.resource Resource ../keywords/workload.resource @@ -22,9 +23,6 @@ Test Teardown Cleanup test resources *** Variables *** ${LOOP_COUNT} 1001 -${RETRY_COUNT} 300 -${RETRY_INTERVAL} 1 -${DATA_ENGINE} v1 *** Keywords *** Verify backup ${backup_id} count for ${workload_kind} ${workload_id} volume @@ -142,7 +140,7 @@ Backup listing with more than 1000 backups And Volume 1 data should same as deployment 0 volume Backup listing of volume bigger than 200 Gi - [Tags] manual longhorn-8355 + [Tags] manual longhorn-8355 large-size [Documentation] Test backup bigger than 200 Gi Given Create persistentvolumeclaim 0 using RWO volume And Create deployment 0 with persistentvolumeclaim 0 @@ -152,7 +150,8 @@ Backup listing of volume bigger than 200 Gi And Create deployment 1 with volume 1 Then Get deployment 1 volume data in file data And Volume 1 data should same as deployment 0 volume - Then Create pod 2 mount 250 GB volume 2 + Then Create volume 2 from deployment 0 volume random backup + And Create pod 2 mount 250 GB volume 2 And Write 210 GB large data to file 0 in pod 2 Then Volume 2 backup 0 should be able to create Then Delete pod 2 and volume 2 diff --git a/e2e/tests/negative/uninstallation_checks.robot b/e2e/tests/negative/uninstallation_checks.robot index 0d35658e28..c5d83cf26f 100644 --- a/e2e/tests/negative/uninstallation_checks.robot +++ b/e2e/tests/negative/uninstallation_checks.robot @@ -3,6 +3,7 @@ Documentation Uninstallation Checks Test Tags negative +Resource ../keywords/variables.resource Resource ../keywords/common.resource Resource ../keywords/setting.resource Resource ../keywords/volume.resource @@ -18,11 +19,6 @@ Library ../libs/keywords/setting_keywords.py Test Setup Set test environment Test Teardown Cleanup test resources -*** Variables *** -${LOOP_COUNT} 1 -${RETRY_COUNT} 300 -${RETRY_INTERVAL} 1 - *** Test Cases *** Uninstallation Checks [Documentation] Uninstallation Checks diff --git a/e2e/tests/regression/test_backing_image.robot b/e2e/tests/regression/test_backing_image.robot index 7eb1564a86..618b48dee8 100644 --- a/e2e/tests/regression/test_backing_image.robot +++ b/e2e/tests/regression/test_backing_image.robot @@ -3,6 +3,7 @@ Documentation Backing Image Test Cases Test Tags regression backing_image +Resource ../keywords/variables.resource Resource ../keywords/common.resource Resource ../keywords/volume.resource Resource ../keywords/backing_image.resource @@ -10,12 +11,6 @@ Resource ../keywords/backing_image.resource Test Setup Set test environment Test Teardown Cleanup test resources -*** Variables *** -${LOOP_COUNT} 1 -${RETRY_COUNT} 300 -${RETRY_INTERVAL} 1 -${DATA_ENGINE} v1 - *** Test Cases *** Test Backing Image Basic Operation [Tags] coretest diff --git a/e2e/tests/regression/test_backup.robot b/e2e/tests/regression/test_backup.robot index 7a793d0131..0e2257aab1 100644 --- a/e2e/tests/regression/test_backup.robot +++ b/e2e/tests/regression/test_backup.robot @@ -3,6 +3,7 @@ Documentation Backup Test Cases Test Tags regression +Resource ../keywords/variables.resource Resource ../keywords/common.resource Resource ../keywords/setting.resource Resource ../keywords/volume.resource @@ -16,12 +17,6 @@ Resource ../keywords/backupstore.resource Test Setup Set test environment Test Teardown Cleanup test resources -*** Variables *** -${LOOP_COUNT} 1 -${RETRY_COUNT} 300 -${RETRY_INTERVAL} 1 -${DATA_ENGINE} v1 - *** Keywords *** Snapshot PV PVC could not be created on DR volume 1 Create snapshot 0 of volume 1 will fail diff --git a/e2e/tests/regression/test_basic.robot b/e2e/tests/regression/test_basic.robot index b0791e30c9..c8fa7d5060 100644 --- a/e2e/tests/regression/test_basic.robot +++ b/e2e/tests/regression/test_basic.robot @@ -3,6 +3,7 @@ Documentation Basic Test Cases Test Tags regression +Resource ../keywords/variables.resource Resource ../keywords/common.resource Resource ../keywords/node.resource Resource ../keywords/setting.resource @@ -19,12 +20,6 @@ Resource ../keywords/node.resource Test Setup Set test environment Test Teardown Cleanup test resources -*** Variables *** -${LOOP_COUNT} 1 -${RETRY_COUNT} 300 -${RETRY_INTERVAL} 1 -${DATA_ENGINE} v1 - *** Keywords *** Create volume with invalid name should fail [Arguments] ${invalid_volume_name} diff --git a/e2e/tests/regression/test_engine_image.robot b/e2e/tests/regression/test_engine_image.robot index ed0e2dbdd7..55b01b94eb 100644 --- a/e2e/tests/regression/test_engine_image.robot +++ b/e2e/tests/regression/test_engine_image.robot @@ -3,6 +3,7 @@ Documentation Engine Image Test Cases Test Tags regression engine_image +Resource ../keywords/variables.resource Resource ../keywords/common.resource Resource ../keywords/volume.resource Resource ../keywords/engine_image.resource @@ -10,12 +11,6 @@ Resource ../keywords/engine_image.resource Test Setup Set test environment Test Teardown Cleanup test resources -*** Variables *** -${LOOP_COUNT} 1 -${RETRY_COUNT} 300 -${RETRY_INTERVAL} 1 -${DATA_ENGINE} v1 - *** Test Cases *** Test Replica Rebuilding After Engine Upgrade [Tags] coretest diff --git a/e2e/tests/regression/test_ha.robot b/e2e/tests/regression/test_ha.robot index bb86936c57..a58818e3d9 100644 --- a/e2e/tests/regression/test_ha.robot +++ b/e2e/tests/regression/test_ha.robot @@ -3,6 +3,7 @@ Documentation HA Test Cases Test Tags regression +Resource ../keywords/variables.resource Resource ../keywords/common.resource Resource ../keywords/volume.resource Resource ../keywords/setting.resource @@ -14,11 +15,6 @@ Resource ../keywords/statefulset.resource Test Setup Set test environment Test Teardown Cleanup test resources -*** Variables *** -${LOOP_COUNT} 1 -${RETRY_COUNT} 300 -${RETRY_INTERVAL} 1 - *** Test Cases *** Disrupt Data Plane Traffic For Less Than Long Engine Replica Timeout Given Set setting engine-replica-timeout to 15 diff --git a/e2e/tests/regression/test_migration.robot b/e2e/tests/regression/test_migration.robot index c61cc892fd..850f27082c 100644 --- a/e2e/tests/regression/test_migration.robot +++ b/e2e/tests/regression/test_migration.robot @@ -3,6 +3,7 @@ Documentation Migration Test Cases Test Tags regression +Resource ../keywords/variables.resource Resource ../keywords/common.resource Resource ../keywords/deployment.resource Resource ../keywords/persistentvolumeclaim.resource @@ -13,12 +14,6 @@ Resource ../keywords/volume.resource Test Setup Set test environment Test Teardown Cleanup test resources -*** Variables *** -${LOOP_COUNT} 1 -${RETRY_COUNT} 300 -${RETRY_INTERVAL} 1 -${DATA_ENGINE} v1 - *** Test Cases *** Test Migration Confirm [Tags] coretest migration @@ -39,8 +34,33 @@ Test Migration Confirm And Wait for volume 0 healthy And Write data to volume 0 And Attach volume 0 to node 1 - Then Wait for volume 0 migration ready + Then Wait for volume 0 migration to be ready And Detach volume 0 from node 0 - And Wait for volume 0 migrated to node 1 + And Wait for volume 0 to migrate to node 1 + And Wait for volume 0 healthy + And Check volume 0 data is intact + +Test Migration Rollback + [Tags] coretest migration + [Documentation] Test that a migratable RWX volume can be rolled back to initial node. + ... + ... 1. Create a new RWX migratable volume. + ... 2. Attach to test node to write some test data on it. + ... 3. Detach from test node. + ... 4. Get set of nodes excluding the test node + ... 5. Attach volume to node 1 (initial node) + ... 6. Attach volume to node 2 (migration target) + ... 7. Wait for migration ready (engine running on node 2) + ... 8. Detach volume from node 2 + ... 9. Observe volume stayed on node 1 (single active engine) + ... 10. Validate initially written test data + Given Create volume 0 with migratable=True accessMode=RWX dataEngine=${DATA_ENGINE} + When Attach volume 0 to node 0 + And Wait for volume 0 healthy + And Write data to volume 0 + And Attach volume 0 to node 1 + Then Wait for volume 0 migration to be ready + And Detach volume 0 from node 1 + And Wait for volume 0 to stay on node 0 And Wait for volume 0 healthy And Check volume 0 data is intact diff --git a/e2e/tests/regression/test_replica.robot b/e2e/tests/regression/test_replica.robot index 1555d3e5bd..633b0b6320 100644 --- a/e2e/tests/regression/test_replica.robot +++ b/e2e/tests/regression/test_replica.robot @@ -3,6 +3,7 @@ Documentation Replica Test Cases Test Tags regression +Resource ../keywords/variables.resource Resource ../keywords/common.resource Resource ../keywords/volume.resource Resource ../keywords/setting.resource @@ -13,12 +14,6 @@ Resource ../keywords/workload.resource Test Setup Set test environment Test Teardown Cleanup test resources -*** Variables *** -${LOOP_COUNT} 1 -${RETRY_COUNT} 300 -${RETRY_INTERVAL} 1 -${DATA_ENGINE} v1 - *** Test Cases *** Test Replica Rebuilding Per Volume Limit [Tags] coretest diff --git a/e2e/tests/regression/test_scheduling.robot b/e2e/tests/regression/test_scheduling.robot index d0f1fdeb4a..b26c0b5339 100644 --- a/e2e/tests/regression/test_scheduling.robot +++ b/e2e/tests/regression/test_scheduling.robot @@ -3,23 +3,20 @@ Documentation Scheduling Test Cases Test Tags regression +Resource ../keywords/variables.resource Resource ../keywords/common.resource Resource ../keywords/volume.resource +Resource ../keywords/replica.resource Resource ../keywords/setting.resource -Resource ../keywords/deployment.resource -Resource ../keywords/persistentvolumeclaim.resource +Resource ../keywords/storageclass.resource +Resource ../keywords/statefulset.resource Resource ../keywords/workload.resource Resource ../keywords/k8s.resource +Resource ../keywords/node.resource Test Setup Set test environment Test Teardown Cleanup test resources -*** Variables *** -${LOOP_COUNT} 1 -${RETRY_COUNT} 300 -${RETRY_INTERVAL} 1 -${DATA_ENGINE} v1 - *** Test Cases *** Test Soft Anti Affinity Scheduling [Tags] coretest @@ -51,3 +48,91 @@ Test Soft Anti Affinity Scheduling Then Wait until volume 0 replicas rebuilding completed And Wait for volume 0 healthy And Check volume 0 data is intact + +Test Replica Auto Balance Disk In Pressure + [Tags] coretest + [Documentation] Test replica auto balance disk in pressure + ... This test simulates a scenario where a disk reaches a certain + ... pressure threshold (80%), triggering the replica auto balance + ... to rebuild the replicas to another disk with enough available + ... space. Replicas should not be rebuilt at the same time. + ... + ... Issue: https://github.com/longhorn/longhorn/issues/4105 + Given Set setting replica-soft-anti-affinity to false + And Set setting replica-auto-balance-disk-pressure-percentage to 80 + + And Create 1 Gi disk 0 on node 0 + And Create 1 Gi disk 1 on node 0 + And Disable disk 1 scheduling on node 0 + And Disable node 0 default disk + And Disable node 1 scheduling + And Disable node 2 scheduling + + And Create storageclass one-replica with numberOfReplicas=1 dataEngine=${DATA_ENGINE} + # 1 Gi disk, but only 950 Mi available, 950 Mi / 3 = 316 Mi + And Create statefulset 0 using RWO volume with one-replica storageclass and size 316 Mi + And Create statefulset 1 using RWO volume with one-replica storageclass and size 316 Mi + And Create statefulset 2 using RWO volume with one-replica storageclass and size 316 Mi + And Check volume of statefulset 0 replica on node 0 disk 0 + And Check volume of statefulset 1 replica on node 0 disk 0 + And Check volume of statefulset 2 replica on node 0 disk 0 + + # Write 950 Mi * 80% / 3 = 254 Mi data to disk 0 to make it in pressure + And Write 254 MB data to file data.bin in statefulset 0 + And Write 254 MB data to file data.bin in statefulset 1 + And Write 254 MB data to file data.bin in statefulset 2 + And Check node 0 disk 0 is in pressure + + When Enable disk 1 scheduling on node 0 + And set setting replica-auto-balance to best-effort + + # auto balance should happen + Then There should be replicas running on node 0 disk 0 + And There should be replicas running on node 0 disk 1 + And Check node 0 disk 0 is not in pressure + And Check node 0 disk 1 is not in pressure + + And Check statefulset 0 data in file data.bin is intact + And Check statefulset 1 data in file data.bin is intact + And Check statefulset 2 data in file data.bin is intact + +Test Replica Auto Balance Node Least Effort + [Tags] coretest + [Documentation] Scenario: replica auto-balance nodes with `least_effort` + Given Set setting replica-soft-anti-affinity to true + And Set setting replica-auto-balance to least-effort + + When Disable node 1 scheduling + And Disable node 2 scheduling + And Create volume 0 with numberOfReplicas=6 dataEngine=${DATA_ENGINE} + And Attach volume 0 + And Wait for volume 0 healthy + And Write data to volume 0 + Then Volume 0 should have 6 replicas running on node 0 + And Volume 0 should have 0 replicas running on node 1 + And Volume 0 should have 0 replicas running on node 2 + + When Enable node 1 scheduling + # wait for auto balance + Then Volume 0 should have replicas running on node 1 + And Volume 0 should have 6 replicas running + # loop 3 times with 5-second wait and compare the replica count to: + # ensure no additional scheduling occurs + # the replica count remains unchanged + And Volume 0 should have 5 replicas running on node 0 and no additional scheduling occurs + And Volume 0 should have 1 replicas running on node 1 and no additional scheduling occurs + And Volume 0 should have 0 replicas running on node 2 and no additional scheduling occurs + + When Enable node 2 scheduling + # wait for auto balance + Then Volume 0 should have replicas running on node 2 + And Volume 0 should have 6 replicas running + # loop 3 times with 5-second wait and compare the replica count to: + # ensure no additional scheduling occurs + # the replica count remains unchanged + And Volume 0 should have 4 replicas running on node 0 and no additional scheduling occurs + And Volume 0 should have 1 replicas running on node 1 and no additional scheduling occurs + And Volume 0 should have 1 replicas running on node 2 and no additional scheduling occurs + + And Wait for volume 0 healthy + And Check volume 0 data is intact diff --git a/e2e/tests/regression/test_settings.robot b/e2e/tests/regression/test_settings.robot index bb28480533..1d9a7093e7 100644 --- a/e2e/tests/regression/test_settings.robot +++ b/e2e/tests/regression/test_settings.robot @@ -3,6 +3,7 @@ Documentation Settings Test Cases Test Tags regression +Resource ../keywords/variables.resource Resource ../keywords/common.resource Resource ../keywords/volume.resource Resource ../keywords/setting.resource @@ -15,12 +16,6 @@ Resource ../keywords/sharemanager.resource Test Setup Set test environment Test Teardown Cleanup test resources -*** Variables *** -${LOOP_COUNT} 1 -${RETRY_COUNT} 300 -${RETRY_INTERVAL} 1 -${DATA_ENGINE} v1 - *** Test Cases *** Test Setting Concurrent Rebuild Limit [Documentation] Test if setting Concurrent Replica Rebuild Per Node Limit works correctly. diff --git a/e2e/tests/regression/test_v2.robot b/e2e/tests/regression/test_v2.robot index fddba633a7..c2ef068dd1 100644 --- a/e2e/tests/regression/test_v2.robot +++ b/e2e/tests/regression/test_v2.robot @@ -3,6 +3,7 @@ Documentation v2 Data Engine Test Cases Test Tags regression v2 +Resource ../keywords/variables.resource Resource ../keywords/common.resource Resource ../keywords/storageclass.resource Resource ../keywords/persistentvolumeclaim.resource @@ -17,11 +18,6 @@ Resource ../keywords/longhorn.resource Test Setup Set test environment Test Teardown Cleanup test resources -*** Variables *** -${LOOP_COUNT} 1 -${RETRY_COUNT} 300 -${RETRY_INTERVAL} 1 - *** Test Cases *** Test V2 Volume Basic [Tags] coretest @@ -55,6 +51,7 @@ Degraded Volume Replica Rebuilding END V2 Volume Should Block Trim When Volume Is Degraded + [Tags] cluster Given Set setting auto-salvage to true And Create storageclass longhorn-test with dataEngine=v2 And Create persistentvolumeclaim 0 using RWO volume with longhorn-test storageclass diff --git a/e2e/tests/regression/test_volume.robot b/e2e/tests/regression/test_volume.robot index f6954594d9..c06df66dd1 100644 --- a/e2e/tests/regression/test_volume.robot +++ b/e2e/tests/regression/test_volume.robot @@ -3,6 +3,7 @@ Documentation Volume Test Cases Test Tags regression +Resource ../keywords/variables.resource Resource ../keywords/common.resource Resource ../keywords/deployment.resource Resource ../keywords/longhorn.resource @@ -15,11 +16,6 @@ Resource ../keywords/volume.resource Test Setup Set test environment Test Teardown Cleanup test resources -*** Variables *** -${LOOP_COUNT} 1 -${RETRY_COUNT} 300 -${RETRY_INTERVAL} 1 - *** Keywords *** Create volume with invalid name should fail [Arguments] ${invalid_volume_name}