Skip to content

Commit

Permalink
Merge branch 'master' into auto-balance-disk-pressure
Browse files Browse the repository at this point in the history
Signed-off-by: yangchiu <[email protected]>
  • Loading branch information
yangchiu authored Dec 6, 2024
2 parents e621156 + 92bfb79 commit c8f3e20
Show file tree
Hide file tree
Showing 59 changed files with 906 additions and 139 deletions.

This file was deleted.

10 changes: 7 additions & 3 deletions e2e/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,14 @@ curl -sSfL https://raw.githubusercontent.com/longhorn/longhorn/master/scripts/en

### Run the test

1. Deploy all backupstore servers (including `NFS` server and `Minio` as s3 server) for test purposes.
1. Deploy all backupstore servers (including `NFS` server and `Minio` as s3 server, `CIFS` and `Azurite` server) for test purposes.

For Azurite, there are some manual steps need to be done after manifest deployed(https://github.com/longhorn/longhorn-tests/wiki/Setup-Azurite-Backupstore-For-Testing).
```
kubectl create -f https://raw.githubusercontent.com/longhorn/longhorn/master/deploy/backupstores/minio-backupstore.yaml \
-f https://raw.githubusercontent.com/longhorn/longhorn/master/deploy/backupstores/nfs-backupstore.yaml
kubectl create -f https://raw.githubusercontent.com/longhorn/longhorn-tests/master/manager/integration/deploy/backupstores/minio-backupstore.yaml \
-f https://raw.githubusercontent.com/longhorn/longhorn-tests/master/manager/integration/deploy/backupstores/nfs-backupstore.yaml \
-f https://raw.githubusercontent.com/longhorn/longhorn/master/deploy/backupstores/cifs-backupstore.yaml \
-f https://raw.githubusercontent.com/longhorn/longhorn/master/deploy/backupstores/azurite-backupstore.yaml
```

1. Expose Longhorn API:
Expand Down
5 changes: 5 additions & 0 deletions e2e/keywords/backup.resource
Original file line number Diff line number Diff line change
Expand Up @@ -47,3 +47,8 @@ Check volume ${volume_id} data is backup ${backup_id}

Check backup synced from backupstore
assert_all_backups_before_uninstall_exist ${backups_before_uninstall}

Volume ${volume_id} backup ${backup_id} should be able to create
Create backup ${backup_id} for volume ${volume_id}
Verify backup list contains no error for volume ${volume_id}
Verify backup list contains backup ${backup_id} of volume ${volume_id}
8 changes: 8 additions & 0 deletions e2e/keywords/longhorn.resource
Original file line number Diff line number Diff line change
Expand Up @@ -82,3 +82,11 @@ Delete instance-manager of deployment ${deployment_id} volume

Wait for Longhorn components all running
wait_for_namespace_pods_running longhorn-system

Install Longhorn stable version
install_longhorn_system is_stable_version=True

Uninstall Longhorn stable version
${backups_before_uninstall} = list_all_backups
uninstall_longhorn_system is_stable_version=True
Set Test Variable ${backups_before_uninstall}
8 changes: 4 additions & 4 deletions e2e/keywords/sharemanager.resource
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,12 @@ Check sharemanager ${condition} using headless service
Wait for all sharemanager to be deleted
wait_for_sharemanagers_deleted

Delete sharemanager of deployment ${deployment_id} and wait for recreation
Delete sharemanager pod of deployment ${deployment_id} and wait for recreation
${deployment_name} = generate_name_with_suffix deployment ${deployment_id}
${volume_name} = get_workload_volume_name ${deployment_name}
delete_sharemanager_and_wait_for_recreation ${volume_name}
delete_sharemanager_pod_and_wait_for_recreation ${volume_name}

Wait for sharemanager of deployment ${deployment_id} running
Wait for sharemanager pod of deployment ${deployment_id} running
${deployment_name} = generate_name_with_suffix deployment ${deployment_id}
${volume_name} = get_workload_volume_name ${deployment_name}
wait_for_share_manager_running ${volume_name}
wait_for_share_manager_pod_running ${volume_name}
52 changes: 52 additions & 0 deletions e2e/keywords/volume.resource
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,10 @@ Write data ${data_id} to volume ${volume_id}
${volume_name} = generate_name_with_suffix volume ${volume_id}
write_volume_random_data ${volume_name} 2048 ${data_id}

Write data ${data_id} ${size} MB to volume ${volume_id}
${volume_name} = generate_name_with_suffix volume ${volume_id}
write_volume_random_data ${volume_name} ${size} ${data_id}

Keep writing data to volume ${volume_id}
${volume_name} = generate_name_with_suffix volume ${volume_id}
keep_writing_data ${volume_name}
Expand Down Expand Up @@ -177,6 +181,11 @@ Wait for volume ${volume_id} restoration from backup ${backup_id} completed
${backup_name} = get_backup_name ${backup_id}
wait_for_volume_restoration_completed ${volume_name} ${backup_name}

Wait for volume ${volume_id} restoration from backup ${backup_id} start
${volume_name} = generate_name_with_suffix volume ${volume_id}
${backup_name} = get_backup_name ${backup_id}
wait_for_volume_restoration_start ${volume_name} ${backup_name}

Wait until volume ${volume_id} replica rebuilding started on ${replica_locality}
${volume_name} = generate_name_with_suffix volume ${volume_id}
wait_for_replica_rebuilding_to_start_on_node ${volume_name} ${replica_locality}
Expand Down Expand Up @@ -238,6 +247,44 @@ Check volume ${volume_id} replica on node ${node_id} exist
${replica_name} get_replica_name_on_node ${volume_name} ${node_name}
Should Not Be Equal ${replica_name} ${None}

Volume ${volume_id} should have ${expected_replica_count} replicas running
${volume_name} = generate_name_with_suffix volume ${volume_id}
${replica_count} = wait_for_replica_count ${volume_name} node_name= replica_count=${expected_replica_count}

Volume ${volume_id} should have ${expected_replica_count} replicas running on node ${node_id}
${volume_name} = generate_name_with_suffix volume ${volume_id}
${node_name} = get_node_by_index ${node_id}
${replica_count} = wait_for_replica_count ${volume_name} node_name=${node_name} replica_count=${expected_replica_count}
Set Test Variable ${volume_name}
Set Test Variable ${node_name}
Set Test Variable ${replica_count}

Volume ${volume_id} should have replicas running on node ${node_id}
${volume_name} = generate_name_with_suffix volume ${volume_id}
${node_name} = get_node_by_index ${node_id}
${replica_count} = wait_for_replica_count ${volume_name} node_name=${node_name}
Set Test Variable ${volume_name}
Set Test Variable ${node_name}
Set Test Variable ${replica_count}

Volume ${volume_id} should have ${expected_replica_count} replicas running on node ${node_id} and no additional scheduling occurs
Volume ${volume_id} should have ${expected_replica_count} replicas running on node ${node_id}
FOR ${i} IN RANGE 3
Log to console Ensuring there's no additional scheduling for node ${node_name} ... (${i})
${new_replica_count} = wait_for_replica_count ${volume_name} node_name=${node_name} replica_count=${expected_replica_count}
Should Be Equal As Integers ${replica_count} ${new_replica_count}
Sleep 5
END

Volume ${volume_id} should have replicas running on node ${node_id} and no additional scheduling occurs
Volume ${volume_id} should have replicas running on node ${node_id}
FOR ${i} IN RANGE 3
Log to console Ensuring there's no additional scheduling for node ${node_name} ... (${i})
${new_replica_count} = wait_for_replica_count ${volume_name} node_name=${node_name}
Should Be Equal As Integers ${replica_count} ${new_replica_count}
Sleep 5
END

Check volume ${volume_id} data is intact
${volume_name} = generate_name_with_suffix volume ${volume_id}
check_data_checksum ${volume_name}
Expand Down Expand Up @@ -299,6 +346,11 @@ Check volume ${volume_id} data is backup ${backup_id} created in another cluster
${backup_data} = get_backup_data_from_backup_list ${backups_before_uninstall} ${backup_id}
Should Be Equal ${current_checksum} ${backup_data}

Create volume ${volume_id} from backup ${backup_id} in another cluster
${volume_name} = generate_name_with_suffix volume ${volume_id}
${backup_url} = get_backup_url_from_backup_list ${backups_before_uninstall} ${backup_id}
create_volume ${volume_name} fromBackup=${backup_url}

Create DR volume ${volume_id} from backup ${backup_id} in another cluster
${volume_name} = generate_name_with_suffix volume ${volume_id}
${backup_url} = get_backup_url_from_backup_list ${backups_before_uninstall} ${backup_id}
Expand Down
9 changes: 7 additions & 2 deletions e2e/libs/backup/rest.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,13 @@ def get(self, backup_id, volume_name):

def get_from_list(self, backup_list, backup_id):
for backup in backup_list["items"]:
if backup['metadata']['annotations']['test.longhorn.io/backup-id'] == backup_id:
return backup
try:
if backup['metadata']['annotations']['test.longhorn.io/backup-id'] == backup_id:
return backup
except KeyError as e:
logging(f"Missing key in backup metadata: {str(e)} for backup {backup['metadata']['name']}")
except Exception as e:
logging(f"Unexpected error accessing backup {backup['metadata']['name']}: {str(e)}")
return None

def get_by_snapshot(self, volume_name, snapshot_name):
Expand Down
8 changes: 4 additions & 4 deletions e2e/libs/keywords/longhorn_deploy_keywords.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@ class longhorn_deploy_keywords:
def __init__(self):
self.longhorn = LonghornDeploy()

def uninstall_longhorn_system(self):
self.longhorn.uninstall()
def uninstall_longhorn_system(self, is_stable_version=False):
self.longhorn.uninstall(is_stable_version)

def check_longhorn_crd_removed(self):
self.longhorn.check_longhorn_crd_removed()

def install_longhorn_system(self):
self.longhorn.install()
def install_longhorn_system(self, is_stable_version=False):
self.longhorn.install(is_stable_version)
38 changes: 28 additions & 10 deletions e2e/libs/keywords/sharemanager_keywords.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

from utility.utility import get_retry_count_and_interval
from utility.utility import logging

from utility.utility import get_pod, delete_pod

class sharemanager_keywords:

Expand Down Expand Up @@ -48,14 +48,32 @@ def wait_for_sharemanagers_deleted(self, name=[]):

assert AssertionError, f"Failed to wait for all sharemanagers to be deleted"

def delete_sharemanager(self, name):
return self.sharemanager.delete(name)

def delete_sharemanager_and_wait_for_recreation(self, name):
sharemanager = self.sharemanager.get(name)
last_creation_time = sharemanager["metadata"]["creationTimestamp"]
self.sharemanager.delete(name)
self.sharemanager.wait_for_restart(name, last_creation_time)
def delete_sharemanager_pod_and_wait_for_recreation(self, name):
sharemanager_pod_name = "share-manager-" + name
sharemanager_pod = get_pod(sharemanager_pod_name, "longhorn-system")
last_creation_time = sharemanager_pod.metadata.creation_timestamp
delete_pod(sharemanager_pod_name, "longhorn-system")

retry_count, retry_interval = get_retry_count_and_interval()
for i in range(retry_count):
time.sleep(retry_interval)
sharemanager_pod = get_pod(sharemanager_pod_name, "longhorn-system")
if sharemanager_pod == None:
continue
creation_time = sharemanager_pod.metadata.creation_timestamp
if creation_time > last_creation_time:
return

assert False, f"sharemanager pod {sharemanager_pod_name} not recreated"


def wait_for_share_manager_pod_running(self, name):
sharemanager_pod_name = "share-manager-" + name
retry_count, retry_interval = get_retry_count_and_interval()
for i in range(retry_count):
sharemanager_pod = get_pod(sharemanager_pod_name, "longhorn-system")
if sharemanager_pod.status.phase == "Running":
return

def wait_for_share_manager_running(self, name):
return self.sharemanager.wait_for_running(name)
assert False, f"sharemanager pod {sharemanager_pod_name} not running"
7 changes: 7 additions & 0 deletions e2e/libs/keywords/volume_keywords.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,9 @@ def wait_for_replica_running(self, volume_name, node_name):
def get_replica_name_on_node(self, volume_name, node_name):
return self.volume.get_replica_name_on_node(volume_name, node_name)

def wait_for_replica_count(self, volume_name, node_name=None, replica_count=None):
return self.volume.wait_for_replica_count(volume_name, node_name, replica_count)

def wait_for_replica_rebuilding_to_stop_on_node(self, volume_name, replica_locality):
node_id = self.get_node_id_by_replica_locality(volume_name, replica_locality)
retry_count, retry_interval = get_retry_count_and_interval()
Expand Down Expand Up @@ -280,6 +283,10 @@ def wait_for_volume_restoration_completed(self, volume_name, backup_name):
logging(f'Waiting for volume {volume_name} restoration from {backup_name} completed')
self.volume.wait_for_volume_restoration_completed(volume_name, backup_name)

def wait_for_volume_restoration_start(self, volume_name, backup_name):
logging(f'Waiting for volume {volume_name} restoration from {backup_name} start')
self.volume.wait_for_volume_restoration_start(volume_name, backup_name)

def validate_volume_replicas_anti_affinity(self, volume_name):
self.volume.validate_volume_replicas_anti_affinity(volume_name)

Expand Down
22 changes: 16 additions & 6 deletions e2e/libs/longhorn_deploy/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def install(self):
return NotImplemented

@abstractmethod
def uninstall(self, longhorn_branch=None):
def uninstall(self, is_stable_version=False):
return NotImplemented

def check_longhorn_crd_removed(self):
Expand All @@ -29,17 +29,27 @@ def check_longhorn_crd_removed(self):

def check_longhorn_uninstall_pod_log(self):
logs = k8s.get_pod_logs(LONGHORN_NAMESPACE, LONGHORN_UNINSTALL_JOB_LABEL)
assert "error" not in logs
assert "level=fatal" not in logs
assert "level=error" not in logs, f"find string 'level=error' in uninstall log {logs}"
assert "level=fatal" not in logs, f"find string 'level=fatal' in uninstall log {logs}"

def install_longhorn(self):
def install_longhorn(self, is_stable_version=False):
current_path=os.getcwd()
full_path = os.path.join(current_path, LONGHORN_INSTALL_SCRIPT_PATH)

if is_stable_version is True:
cmd = ['bash', '-c', f'IS_INSTALL_STABLE_VERSION=true {full_path}']
else:
cmd = ['bash', full_path]

try:
output = subprocess.check_output(['bash', full_path], timeout=LONGHORN_INSTALL_TIMEOUT)
output = subprocess.check_output(cmd, timeout=LONGHORN_INSTALL_TIMEOUT)
logging(output)
except subprocess.CalledProcessError as e:
logging(f"Error: {e.stderr}")
logging(f"Command failed with exit code {e.returncode}")
logging(f"stdout: {e.output}")
logging(f"stderr: {e.stderr}")
raise
except subprocess.TimeoutExpired as e:
logging(f"Command timed out after {e.timeout} seconds")
logging(f"stdout: {e.output}")
raise
8 changes: 4 additions & 4 deletions e2e/libs/longhorn_deploy/longhorn_deploy.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,11 @@ def __init__(self):
elif self._method == "helm":
self.longhorn = LonghornHelmChart()

def uninstall(self):
return self.longhorn.uninstall()
def uninstall(self, is_stable_version=False):
return self.longhorn.uninstall(is_stable_version)

def check_longhorn_crd_removed(self):
return self.longhorn.check_longhorn_crd_removed()

def install(self):
return self.longhorn.install()
def install(self, is_stable_version=False):
return self.longhorn.install(is_stable_version)
6 changes: 3 additions & 3 deletions e2e/libs/longhorn_deploy/longhorn_helm_chart.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

class LonghornHelmChart(Base):

def uninstall(self):
def uninstall(self, is_stable_version=False):
control_plane_nodes = Node.list_node_names_by_role(self, role="control-plane")
control_plane_node = control_plane_nodes[0]

Expand All @@ -19,5 +19,5 @@ def uninstall(self):
k8s.delete_namespace(namespace=LONGHORN_NAMESPACE)
k8s.wait_namespace_terminated(namespace=LONGHORN_NAMESPACE)

def install(self):
self.install_longhorn()
def install(self, is_stable_version=False):
self.install_longhorn(is_stable_version)
11 changes: 7 additions & 4 deletions e2e/libs/longhorn_deploy/longhorn_kubectl.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,11 @@

class LonghornKubectl(Base):

def uninstall(self):
longhorn_branch = os.getenv("LONGHORN_REPO_BRANCH")
def uninstall(self, is_stable_version=False):
env_var = "LONGHORN_STABLE_VERSION" if is_stable_version else "LONGHORN_REPO_BRANCH"
longhorn_branch = os.getenv(env_var)
if not longhorn_branch:
raise ValueError(f"Required environment variable {env_var} is not set")

control_plane_nodes = Node.list_node_names_by_role(self, role="control-plane")
control_plane_node = control_plane_nodes[0]
Expand All @@ -30,5 +33,5 @@ def uninstall(self):
assert res, "delete uninstallation components failed"
k8s.wait_namespace_terminated(namespace=LONGHORN_NAMESPACE)

def install(self):
self.install_longhorn()
def install(self, is_stable_version=False):
self.install_longhorn(is_stable_version)
4 changes: 4 additions & 0 deletions e2e/libs/volume/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,10 @@ def wait_for_volume_migration_completed(self, volume_name, node_name):
def wait_for_volume_restoration_completed(self, volume_name, backup_name):
return NotImplemented

@abstractmethod
def wait_for_volume_restoration_start(self, volume_name, backup_name):
return NotImplemented

@abstractmethod
def get_endpoint(self, volume_name):
return NotImplemented
Expand Down
Loading

0 comments on commit c8f3e20

Please sign in to comment.