From f012eb04dda0b7ddbf970fba4529f0343d195435 Mon Sep 17 00:00:00 2001 From: Yang Chiu Date: Fri, 19 Apr 2024 15:00:39 +0800 Subject: [PATCH 1/4] test(robot): add basic v2 volume test Signed-off-by: Yang Chiu --- e2e/keywords/common.resource | 2 ++ e2e/keywords/node.resource | 12 +++++++++ e2e/libs/keywords/node_keywords.py | 24 +++++++++++++++++ e2e/libs/keywords/volume_keywords.py | 4 +-- e2e/libs/node/node.py | 40 ++++++++++++++++++++++++++-- e2e/libs/volume/crd.py | 5 ++-- e2e/libs/volume/volume.py | 4 +-- e2e/tests/v2/volume.robot | 31 +++++++++++++++++++++ 8 files changed, 114 insertions(+), 8 deletions(-) create mode 100644 e2e/keywords/node.resource create mode 100644 e2e/libs/keywords/node_keywords.py create mode 100644 e2e/tests/v2/volume.robot diff --git a/e2e/keywords/common.resource b/e2e/keywords/common.resource index 4dbd6010a2..423d3cad0a 100644 --- a/e2e/keywords/common.resource +++ b/e2e/keywords/common.resource @@ -13,6 +13,7 @@ Library ../libs/keywords/persistentvolumeclaim_keywords.py Library ../libs/keywords/network_keywords.py Library ../libs/keywords/backupstore_keywords.py Library ../libs/keywords/storageclass_keywords.py +Library ../libs/keywords/node_keywords.py *** Keywords *** Set test environment @@ -34,3 +35,4 @@ Cleanup test resources cleanup_volumes cleanup_storageclasses cleanup_backupstore + cleanup_disks diff --git a/e2e/keywords/node.resource b/e2e/keywords/node.resource new file mode 100644 index 0000000000..378a218362 --- /dev/null +++ b/e2e/keywords/node.resource @@ -0,0 +1,12 @@ +*** Settings *** +Documentation Node Keywords + +Library ../libs/keywords/common_keywords.py +Library ../libs/keywords/node_keywords.py + +*** Keywords *** +Add ${disk_type} type disk ${disk_path} for all worker nodes + ${worker_nodes}= get_worker_nodes + FOR ${worker_node} IN @{worker_nodes} + add_disk ${worker_node} ${disk_type} ${disk_path} + END diff --git a/e2e/libs/keywords/node_keywords.py b/e2e/libs/keywords/node_keywords.py new file mode 100644 index 0000000000..98b6bf15bc --- /dev/null +++ b/e2e/libs/keywords/node_keywords.py @@ -0,0 +1,24 @@ +from node import Node +from utility.utility import logging + +class node_keywords: + + def __init__(self): + self.node = Node() + + def add_disk(self, node_name, type, path): + logging(f"Adding {type} type disk {path} to node {node_name}") + disk = { + f"{type}-disk": { + "diskType": type, + "path": path, + "allowScheduling": True + } + } + self.node.add_disk(node_name, disk) + + def cleanup_disks(self): + nodes = self.node.list_node_names_by_role("worker") + for node_name in nodes: + logging(f"Resetting node {node_name} disks to default") + self.node.reset_disks(node_name) diff --git a/e2e/libs/keywords/volume_keywords.py b/e2e/libs/keywords/volume_keywords.py index b3ba2f7e9d..419626c40f 100644 --- a/e2e/libs/keywords/volume_keywords.py +++ b/e2e/libs/keywords/volume_keywords.py @@ -24,9 +24,9 @@ def cleanup_volumes(self): for volume in volumes['items']: self.delete_volume(volume['metadata']['name']) - def create_volume(self, volume_name, size, replica_count, frontend="blockdev", migratable=False, access_mode="RWO"): + def create_volume(self, volume_name, size, replica_count, frontend="blockdev", migratable=False, access_mode="RWO", data_engine="v1"): logging(f'Creating volume {volume_name}') - self.volume.create(volume_name, size, replica_count, frontend, migratable, access_mode) + self.volume.create(volume_name, size, replica_count, frontend, migratable, access_mode, data_engine) def delete_volume(self, volume_name): logging(f'Deleting volume {volume_name}') diff --git a/e2e/libs/node/node.py b/e2e/libs/node/node.py index cdc6edd543..28cb407e38 100644 --- a/e2e/libs/node/node.py +++ b/e2e/libs/node/node.py @@ -4,14 +4,50 @@ from kubernetes import client from robot.libraries.BuiltIn import BuiltIn - +from utility.utility import get_longhorn_client from utility.utility import get_retry_count_and_interval +from utility.utility import logging class Node: + DEFAULT_DISK_PATH = "/var/lib/longhorn/" + def __init__(self): - pass + self.longhorn_client = get_longhorn_client() + self.retry_count, self.retry_interval = get_retry_count_and_interval() + + def update_disks(self, node_name, disks): + node = self.longhorn_client.by_id_node(node_name) + for _ in range(self.retry_count): + try: + node.diskUpdate(disks=disks) + break + except Exception as e: + logging(f"Updating node {node_name} disk error: {e}") + time.sleep(self.retry_interval) + + def add_disk(self, node_name, disk): + node = self.longhorn_client.by_id_node(node_name) + disks = node.disks + disks.update(disk) + self.update_disks(node_name, disks) + + def reset_disks(self, node_name): + node = self.longhorn_client.by_id_node(node_name) + + for disk_name, disk in iter(node.disks.items()): + if disk.path != self.DEFAULT_DISK_PATH: + disk.allowScheduling = False + self.update_disks(node_name, node.disks) + + disks = {} + for disk_name, disk in iter(node.disks.items()): + if disk.path == self.DEFAULT_DISK_PATH: + disks[disk_name] = disk + else: + logging(f"Try to remove disk {disk_name} from node {node_name}") + self.update_disks(node_name, disks) def get_all_pods_on_node(self, node_name): api = client.CoreV1Api() diff --git a/e2e/libs/volume/crd.py b/e2e/libs/volume/crd.py index 18c1c36ea9..27b62de471 100644 --- a/e2e/libs/volume/crd.py +++ b/e2e/libs/volume/crd.py @@ -21,7 +21,7 @@ def __init__(self, node_exec): self.retry_count, self.retry_interval = get_retry_count_and_interval() self.engine = Engine() - def create(self, volume_name, size, replica_count, frontend, migratable, access_mode): + def create(self, volume_name, size, replica_count, frontend, migratable, access_mode, data_engine): size = str(int(size) * GIBIBYTE) access_mode = access_mode.lower() body = { @@ -39,7 +39,8 @@ def create(self, volume_name, size, replica_count, frontend, migratable, access_ "size": size, "numberOfReplicas": int(replica_count), "migratable": migratable, - "accessMode": access_mode + "accessMode": access_mode, + "dataEngine": data_engine } } try: diff --git a/e2e/libs/volume/volume.py b/e2e/libs/volume/volume.py index 852f77da9e..ee0e9df2f8 100644 --- a/e2e/libs/volume/volume.py +++ b/e2e/libs/volume/volume.py @@ -18,8 +18,8 @@ def __init__(self): else: self.volume = Rest(node_exec) - def create(self, volume_name, size, replica_count, frontend, migratable, access_mode): - return self.volume.create(volume_name, size, replica_count, frontend, migratable, access_mode) + def create(self, volume_name, size, replica_count, frontend, migratable, access_mode, data_engine): + return self.volume.create(volume_name, size, replica_count, frontend, migratable, access_mode, data_engine) def delete(self, volume_name): return self.volume.delete(volume_name) diff --git a/e2e/tests/v2/volume.robot b/e2e/tests/v2/volume.robot new file mode 100644 index 0000000000..80b0ba1ee6 --- /dev/null +++ b/e2e/tests/v2/volume.robot @@ -0,0 +1,31 @@ +*** Settings *** +Documentation v2 Data Engine Test Cases + +Test Tags regression + +Resource ../keywords/common.resource +Resource ../keywords/volume.resource +Resource ../keywords/setting.resource +Resource ../keywords/node.resource + +Test Setup Set test environment +Test Teardown Cleanup test resources + +*** Variables *** +${LOOP_COUNT} 1 +${RETRY_COUNT} 300 +${RETRY_INTERVAL} 1 + +*** Test Cases *** +Test V2 Volume Basic + [Tags] coretest + [Documentation] Test basic v2 volume operations + Given Set setting v2-data-engine to true + And Add block type disk /dev/xvdh for all worker nodes + When Create volume 0 with data_engine=v2 + And Attach volume 0 + And Wait for volume 0 healthy + And Write data to volume 0 + Then Check volume 0 data is intact + And Detach volume 0 + And Delete volume 0 From 4c5e902a7385f0f4c61afe617f136dcdce52878f Mon Sep 17 00:00:00 2001 From: Jack Lin Date: Thu, 18 Apr 2024 16:36:07 +0800 Subject: [PATCH 2/4] feat(system-backup): add system backup backingimage test ref: longhorn/longhorn 5085 Signed-off-by: Jack Lin --- manager/integration/tests/common.py | 1 + .../tests/test_system_backup_restore.py | 69 +++++++++++++++++++ 2 files changed, 70 insertions(+) diff --git a/manager/integration/tests/common.py b/manager/integration/tests/common.py index ba3d8b3abc..11293cabb4 100644 --- a/manager/integration/tests/common.py +++ b/manager/integration/tests/common.py @@ -283,6 +283,7 @@ BACKING_IMAGE_SOURCE_TYPE_DOWNLOAD = "download" BACKING_IMAGE_SOURCE_TYPE_FROM_VOLUME = "export-from-volume" +BACKING_IMAGE_SOURCE_TYPE_RESTORE = "restore" JOB_LABEL = "recurring-job.longhorn.io" diff --git a/manager/integration/tests/test_system_backup_restore.py b/manager/integration/tests/test_system_backup_restore.py index 3633b161ee..1a32c75153 100644 --- a/manager/integration/tests/test_system_backup_restore.py +++ b/manager/integration/tests/test_system_backup_restore.py @@ -19,6 +19,11 @@ from common import wait_for_volume_detached from common import wait_for_volume_healthy from common import wait_for_volume_restoration_completed +from common import cleanup_all_backing_images +from common import create_backing_image_with_matching_url +from common import BACKING_IMAGE_NAME +from common import BACKING_IMAGE_RAW_URL +from common import BACKING_IMAGE_SOURCE_TYPE_RESTORE from common import SETTING_BACKUPSTORE_POLL_INTERVAL @@ -112,6 +117,70 @@ def test_system_backup_and_restore_volume_with_data(client, volume_name, set_ran check_volume_data(restored_volume, data) +@pytest.mark.system_backup_restore # NOQA +def test_system_backup_and_restore_volume_with_backingimage(client, volume_name, set_random_backupstore): # NOQA + """ + Scenario: test system backup and restore volume with backingimage + + Noted that for volume data integrity check, we have + "test_system_backup_and_restore_volume_with_data" to cover it. + BackingImage uses checksum to verified the data during backup/restore. + If it is inconsistent, BackingImage will be failed and so is the test. + Thus, we don't need to do data integrity check in this test. + + Issue: https://github.com/longhorn/longhorn/issues/5085 + + Given a backingimage + And a volume created with the backingimage + When system backup created + Then system backup in state Ready + + When volume deleted + And backingimage deleted + And restore system backup + Then system restore should be in state Completed + And wait for backingimage restoration to complete + And wait for volume restoration to complete + And volume should be detached + + When attach volume + Then volume should be healthy + """ + + host_id = get_self_host_id() + + create_backing_image_with_matching_url( + client, BACKING_IMAGE_NAME, BACKING_IMAGE_RAW_URL) + + volume = create_and_check_volume( + client, volume_name, backing_image=BACKING_IMAGE_NAME) + volume.attach(hostId=host_id) + volume = wait_for_volume_healthy(client, volume_name) + + system_backup_name = system_backup_random_name() + client.create_system_backup(Name=system_backup_name) + + system_backup_wait_for_state("Ready", system_backup_name, client) + + cleanup_volume(client, volume) + cleanup_all_backing_images(client) + + system_restore_name = system_restore_random_name() + client.create_system_restore(Name=system_restore_name, + SystemBackup=system_backup_name) + + system_restore_wait_for_state("Completed", system_restore_name, client) + + backing_image = client.by_id_backing_image(BACKING_IMAGE_NAME) + assert backing_image.sourceType == BACKING_IMAGE_SOURCE_TYPE_RESTORE + + restored_volume = client.by_id_volume(volume_name) + wait_for_volume_restoration_completed(client, volume_name) + wait_for_volume_detached(client, volume_name) + + restored_volume.attach(hostId=host_id) + restored_volume = wait_for_volume_healthy(client, volume_name) + @pytest.mark.system_backup_restore # NOQA def test_system_backup_with_volume_backup_policy_if_not_present(client, volume_name, set_random_backupstore): # NOQA From d1a36a40c3a2a314b860dcece0f8cb2c27b4ee6c Mon Sep 17 00:00:00 2001 From: Yang Chiu Date: Sun, 28 Apr 2024 15:56:14 +0800 Subject: [PATCH 3/4] test(robot): fix kubelet_keywords does not exist error Signed-off-by: Yang Chiu --- e2e/libs/keywords/host_keywords.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/e2e/libs/keywords/host_keywords.py b/e2e/libs/keywords/host_keywords.py index cc319c31a1..e4771b3d0f 100644 --- a/e2e/libs/keywords/host_keywords.py +++ b/e2e/libs/keywords/host_keywords.py @@ -32,13 +32,13 @@ def reboot_node_by_index(self, idx, power_off_time_in_min=1): node_name = self.node.get_node_by_index(idx) reboot_down_time_sec = int(power_off_time_in_min) * 60 - logging(f'Rebooting node {node_name} with downtime {power_off_time_in_min} minutes') + logging(f'Rebooting node {node_name} with downtime {reboot_down_time_sec} seconds') self.host.reboot_node(node_name, reboot_down_time_sec) def reboot_all_worker_nodes(self, power_off_time_in_min=1): reboot_down_time_sec = int(power_off_time_in_min) * 60 - logging(f'Rebooting all worker nodes with downtime {power_off_time_in_min} minutes') + logging(f'Rebooting all worker nodes with downtime {reboot_down_time_sec} seconds') self.host.reboot_all_worker_nodes(reboot_down_time_sec) def reboot_all_nodes(self): @@ -48,5 +48,5 @@ def reboot_all_nodes(self): def reboot_node_by_name(self, node_name, downtime_in_min=1): reboot_down_time_sec = int(downtime_in_min) * 60 - logging(f'Rebooting node {node_name} with downtime {downtime_in_min} minutes') + logging(f'Rebooting node {node_name} with downtime {reboot_down_time_sec} seconds') self.host.reboot_node(node_name, reboot_down_time_sec) From 482e75570d9bcf686fd4701d106a73144b0c45c5 Mon Sep 17 00:00:00 2001 From: Yang Chiu Date: Sun, 28 Apr 2024 21:10:18 +0800 Subject: [PATCH 4/4] test(robot): remove unused test pod functions Signed-off-by: Yang Chiu --- e2e/libs/keywords/volume_keywords.py | 85 +++++++++------------------- e2e/libs/node/node.py | 22 +++---- e2e/libs/node/utility.py | 2 +- e2e/tests/replica_rebuilding.robot | 11 ++-- 4 files changed, 41 insertions(+), 79 deletions(-) diff --git a/e2e/libs/keywords/volume_keywords.py b/e2e/libs/keywords/volume_keywords.py index 419626c40f..1a20098099 100644 --- a/e2e/libs/keywords/volume_keywords.py +++ b/e2e/libs/keywords/volume_keywords.py @@ -52,13 +52,6 @@ def wait_for_volume_expand_to_size(self, volume_name, size): logging(f'Waiting for volume {volume_name} expand to {size}') return self.volume.wait_for_volume_expand_to_size(volume_name, size) - def get_replica_node_ids(self, volume_name): - node_ids = [] - node_ids.extend(self.get_node_ids_by_replica_locality(volume_name, "volume node")) - node_ids.extend(self.get_node_ids_by_replica_locality(volume_name, "replica node")) - node_ids.extend(self.get_node_ids_by_replica_locality(volume_name, "test pod node")) - return node_ids - def get_replica_node(self, volume_name): return self.get_node_id_by_replica_locality(volume_name, "replica node") @@ -78,14 +71,9 @@ def get_node_ids_by_replica_locality(self, volume_name, replica_locality): worker_nodes = self.node.list_node_names_by_role("worker") volume_node = self.get_node_id_by_replica_locality(volume_name, "volume node") replica_nodes = [node for node in worker_nodes if node != volume_node] - test_pod_node = self.node.get_test_pod_running_node() - - if replica_locality == "test pod node": - if test_pod_node in replica_nodes: - return [test_pod_node] - elif replica_locality == "replica node": - return [node for node in replica_nodes if node != test_pod_node] + if replica_locality == "replica node": + return replica_nodes else: raise ValueError(f"Unknown replica locality {replica_locality}") @@ -108,64 +96,43 @@ def check_data_checksum(self, volume_name): logging(f"Checking volume {volume_name} data checksum is {checksum}") self.volume.check_data_checksum(volume_name, checksum) - def delete_replica(self, volume_name, replica_node): - if str(replica_node).isdigit(): - replica_node = self.node.get_node_by_index(replica_node) - - logging(f"Deleting volume {volume_name}'s replica on node {replica_node}") - self.volume.delete_replica(volume_name, replica_node) - def delete_replica_on_node(self, volume_name, replica_locality): - check_replica_locality(replica_locality) - - node_id = self.get_node_id_by_replica_locality(volume_name, replica_locality) + node_name = None + if index := self.node.is_accessing_node_by_index(replica_locality): + node_name = self.node.get_node_by_index(index) + else: + node_name = self.get_node_id_by_replica_locality(volume_name, replica_locality) - logging(f"Deleting volume {volume_name}'s replica on node {node_id}") - self.volume.delete_replica(volume_name, node_id) + logging(f"Deleting volume {volume_name}'s replica on node {node_name}") + self.volume.delete_replica(volume_name, node_name) def set_annotation(self, volume_name, annotation_key, annotation_value): self.volume.set_annotation(volume_name, annotation_key, annotation_value) - async def wait_for_replica_rebuilding_start(self, volume_name, replica_node): - if str(replica_node).isdigit(): - replica_node = self.node.get_node_by_index(replica_node) - - logging(f"Waiting for volume {volume_name}'s replica on node {replica_node} rebuilding started") - await self.volume.wait_for_replica_rebuilding_start( - volume_name, - replica_node - ) - async def wait_for_replica_rebuilding_to_start_on_node(self, volume_name, replica_locality): - check_replica_locality(replica_locality) - - node_id = self.get_node_id_by_replica_locality(volume_name, replica_locality) - - logging(f"Waiting for volume {volume_name}'s replica on node {node_id} rebuilding started") - await self.volume.wait_for_replica_rebuilding_start(volume_name, node_id) - - def wait_for_replica_rebuilding_complete(self, volume_name, replica_node): - if str(replica_node).isdigit(): - replica_node = self.node.get_node_by_index(replica_node) + node_name = None + if index := self.node.is_accessing_node_by_index(replica_locality): + node_name = self.node.get_node_by_index(index) + else: + node_name = self.get_node_id_by_replica_locality(volume_name, replica_locality) - logging(f"Waiting for volume {volume_name}'s replica on node {replica_node} rebuilding completed") - self.volume.wait_for_replica_rebuilding_complete( - volume_name, - replica_node - ) + logging(f"Waiting for volume {volume_name}'s replica on node {node_name} rebuilding started") + await self.volume.wait_for_replica_rebuilding_start(volume_name, node_name) def wait_for_replica_rebuilding_to_complete_on_node(self, volume_name, replica_locality): - check_replica_locality(replica_locality) - - node_id = self.get_node_id_by_replica_locality(volume_name, replica_locality) + node_name = None + if index := self.node.is_accessing_node_by_index(replica_locality): + node_name = self.node.get_node_by_index(index) + else: + node_name = self.get_node_id_by_replica_locality(volume_name, replica_locality) - logging(f"Waiting for volume {volume_name}'s replica on node {node_id} rebuilding completed") - self.volume.wait_for_replica_rebuilding_complete(volume_name, node_id) + logging(f"Waiting for volume {volume_name}'s replica on node {node_name} rebuilding completed") + self.volume.wait_for_replica_rebuilding_complete(volume_name, node_name) def wait_for_replica_rebuilding_to_complete(self, volume_name): - for node_id in self.get_replica_node_ids(volume_name): - logging(f"Waiting for volume {volume_name}'s replica on node {node_id} rebuilding completed") - self.volume.wait_for_replica_rebuilding_complete(volume_name, node_id) + for node_name in self.node.list_node_names_by_role("worker"): + logging(f"Waiting for volume {volume_name}'s replica on node {node_name} rebuilding completed") + self.volume.wait_for_replica_rebuilding_complete(volume_name, node_name) async def only_one_replica_rebuilding_will_start_at_a_time_on_node(self, volume_name_0, volume_name_1, replica_locality): diff --git a/e2e/libs/node/node.py b/e2e/libs/node/node.py index 28cb407e38..fa50907644 100644 --- a/e2e/libs/node/node.py +++ b/e2e/libs/node/node.py @@ -1,6 +1,6 @@ import os import time - +import re from kubernetes import client from robot.libraries.BuiltIn import BuiltIn @@ -76,6 +76,13 @@ def wait_all_pods_evicted(self, node_name): assert evicted, 'failed to evict pods' + def is_accessing_node_by_index(self, node): + p = re.compile('node (\d)') + if m := p.match(node): + return m.group(1) + else: + return None + def get_node_by_index(self, index, role="worker"): nodes = self.list_node_names_by_role(role) return nodes[int(index)] @@ -84,19 +91,6 @@ def get_node_by_name(self, node_name): core_api = client.CoreV1Api() return core_api.read_node(node_name) - def get_test_pod_running_node(self): - if "NODE_NAME" in os.environ: - return os.environ["NODE_NAME"] - else: - return self.get_node_by_index(0) - - def get_test_pod_not_running_node(self): - worker_nodes = self.list_node_names_by_role("worker") - test_pod_running_node = self.get_test_pod_running_node() - for worker_node in worker_nodes: - if worker_node != test_pod_running_node: - return worker_node - def get_node_cpu_cores(self, node_name): node = self.get_node_by_name(node_name) return node.status.capacity['cpu'] diff --git a/e2e/libs/node/utility.py b/e2e/libs/node/utility.py index e625e81249..f285291aea 100644 --- a/e2e/libs/node/utility.py +++ b/e2e/libs/node/utility.py @@ -1,2 +1,2 @@ def check_replica_locality(replica_locality): - assert replica_locality in ["replica node", "test pod node", "volume node"], f"Unknown replica locality: {replica_locality}: " + assert replica_locality in ["replica node", "volume node"], f"Unknown replica locality: {replica_locality}: " diff --git a/e2e/tests/replica_rebuilding.robot b/e2e/tests/replica_rebuilding.robot index b0612e2174..5421928957 100644 --- a/e2e/tests/replica_rebuilding.robot +++ b/e2e/tests/replica_rebuilding.robot @@ -18,14 +18,15 @@ ${RETRY_INTERVAL} 1 Delete Replica While Replica Rebuilding Given Create volume 0 with 2 GB and 3 replicas And Attach volume 0 + And Wait for volume 0 healthy And Write data to volume 0 FOR ${i} IN RANGE ${LOOP_COUNT} - When Delete volume 0 replica on volume node - And Wait until volume 0 replica rebuilding started on volume node - And Delete volume 0 replica on replica node - And Wait until volume 0 replica rebuilding completed on volume node - And Delete volume 0 replica on test pod node + When Delete volume 0 replica on node 0 + And Wait until volume 0 replica rebuilding started on node 0 + And Delete volume 0 replica on node 1 + And Wait until volume 0 replica rebuilding completed on node 0 + And Delete volume 0 replica on node 2 Then Check volume 0 data is intact And Wait until volume 0 replicas rebuilding completed