Skip to content

Commit

Permalink
Merge branch 'master' into scale-up-coredns
Browse files Browse the repository at this point in the history
  • Loading branch information
yangchiu authored Dec 6, 2024
2 parents 044b944 + 8acea05 commit b7c9e99
Show file tree
Hide file tree
Showing 54 changed files with 556 additions and 239 deletions.
8 changes: 6 additions & 2 deletions e2e/keywords/common.resource
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
*** Settings ***
Documentation Common keywords
Library Collections
Library OperatingSystem
Library ../libs/keywords/common_keywords.py
Library ../libs/keywords/deployment_keywords.py
Expand Down Expand Up @@ -34,11 +35,14 @@ Set test environment
${host_provider}= Get Environment Variable HOST_PROVIDER
${disk_path}= Set Variable If "${host_provider}" == "harvester" /dev/vdc /dev/xvdh
FOR ${worker_node} IN @{worker_nodes}
add_disk ${worker_node} block ${disk_path}
add_disk block-disk ${worker_node} block ${disk_path}
END

Cleanup test resources
Run keyword And Ignore Error power_on_node_by_name ${powered_off_node}
FOR ${powered_off_node} IN @{powered_off_nodes}
Run keyword And Ignore Error power_on_node_by_name ${powered_off_node}
Remove Values From List ${powered_off_nodes} ${powered_off_node}
END
uncordon_all_nodes
cleanup_control_plane_network_latency
reset_node_schedule
Expand Down
11 changes: 7 additions & 4 deletions e2e/keywords/host.resource
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
*** Settings ***
Documentation Physical Node Keywords
Library Collections
Library ../libs/keywords/common_keywords.py
Library ../libs/keywords/host_keywords.py
Library ../libs/keywords/network_keywords.py
Expand Down Expand Up @@ -34,11 +35,13 @@ Restart cluster
reboot_all_nodes
setup_control_plane_network_latency

Power on off node
Run keyword And Ignore Error
... power_on_node_by_name ${powered_off_node}
Power on off nodes
FOR ${powered_off_node} IN @{powered_off_nodes}
Run keyword And Ignore Error power_on_node_by_name ${powered_off_node}
Remove Values From List ${powered_off_nodes} ${powered_off_node}
END

Power off node ${node_id}
${powered_off_node} = get_node_by_index ${node_id}
Append to list ${powered_off_nodes} ${powered_off_node}
power_off_node_by_name ${powered_off_node}
Set Test Variable ${powered_off_node}
32 changes: 31 additions & 1 deletion e2e/keywords/node.resource
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,13 @@ Documentation Node Keywords
Library ../libs/keywords/common_keywords.py
Library ../libs/keywords/node_keywords.py
Library ../libs/keywords/volume_keywords.py

*** Keywords ***
Add ${disk_type} type disk ${disk_path} for all worker nodes
${worker_nodes}= get_worker_nodes
FOR ${worker_node} IN @{worker_nodes}
add_disk ${worker_node} ${disk_type} ${disk_path}
add_disk ${disk_type}-disk ${worker_node} ${disk_type} ${disk_path}
END

Set node ${node_id} with
Expand All @@ -31,3 +32,32 @@ Disable node ${node_id} default disk
Enable node ${node_id} default disk
${node_name} = get_node_by_index ${node_id}
enable_default_disk ${node_name}

Disable disk ${disk_id} scheduling on node ${node_id}
${node_name} = get_node_by_index ${node_id}
${disk_name} = generate_name_with_suffix disk ${disk_id}
disable_disk ${node_name} ${disk_name}

Enable disk ${disk_id} scheduling on node ${node_id}
${node_name} = get_node_by_index ${node_id}
${disk_name} = generate_name_with_suffix disk ${disk_id}
enable_disk ${node_name} ${disk_name}

Check node ${node_id} disk ${disk_id} is in pressure
${node_name} = get_node_by_index ${node_id}
${disk_name} = generate_name_with_suffix disk ${disk_id}
wait_for_disk_in_pressure ${node_name} ${disk_name}

Check node ${node_id} disk ${disk_id} is not in pressure
${node_name} = get_node_by_index ${node_id}
${disk_name} = generate_name_with_suffix disk ${disk_id}
wait_for_disk_not_in_pressure ${node_name} ${disk_name}

Create ${disk_size} Gi disk ${disk_id} on node ${node_id}
${node_name} = get_node_by_index ${node_id}
${disk_name} = generate_name_with_suffix disk ${disk_id}
create_volume ${disk_name} size=${disk_size}Gi numberOfReplicas=1
attach_volume ${disk_name} ${node_name}
wait_for_volume_healthy ${disk_name}
${mount_path} = mount_disk ${disk_name} ${node_name}
add_disk ${disk_name} ${node_name} filesystem ${mount_path}
8 changes: 8 additions & 0 deletions e2e/keywords/replica.resource
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,16 @@ Documentation Longhorn replica related keywords
Library ../libs/keywords/common_keywords.py
Library ../libs/keywords/replica_keywords.py
Library ../libs/keywords/node_keywords.py

*** Keywords ***
Volume ${volume_id} replica ${setting_name} should be ${setting_value}
${volume_name} = generate_name_with_suffix volume ${volume_id}
validate_replica_setting ${volume_name} ${setting_name} ${setting_value}

There should be replicas running on node ${node_id} disk ${disk_id}
${node_name} = get_node_by_index ${node_id}
${disk_name} = generate_name_with_suffix disk ${disk_id}
${disk_uuid} = get_disk_uuid ${node_name} ${disk_name}
${replicas} = get_replicas volume_name= node_name=${node_name} disk_uuid=${disk_uuid}
Should Be True len(${replicas}) > 0
5 changes: 5 additions & 0 deletions e2e/keywords/sharemanager.resource
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,11 @@ Delete sharemanager pod of deployment ${deployment_id} and wait for recreation
${volume_name} = get_workload_volume_name ${deployment_name}
delete_sharemanager_pod_and_wait_for_recreation ${volume_name}

Wait for sharemanager pod of deployment ${deployment_id} restart
${deployment_name} = generate_name_with_suffix deployment ${deployment_id}
${volume_name} = get_workload_volume_name ${deployment_name}
wait_for_sharemanager_pod_restart ${volume_name}

Wait for sharemanager pod of deployment ${deployment_id} running
${deployment_name} = generate_name_with_suffix deployment ${deployment_id}
${volume_name} = get_workload_volume_name ${deployment_name}
Expand Down
8 changes: 8 additions & 0 deletions e2e/keywords/statefulset.resource
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,14 @@ Create statefulset ${statefulset_id} using ${volume_type} volume with ${sc_name}
${statefulset_name} = generate_name_with_suffix statefulset ${statefulset_id}
create_statefulset ${statefulset_name} ${volume_type} ${sc_name}

Create statefulset ${statefulset_id} using ${volume_type} volume with ${sc_name} storageclass and size ${size} Mi
${statefulset_name} = generate_name_with_suffix statefulset ${statefulset_id}
create_statefulset ${statefulset_name} ${volume_type} ${sc_name} ${size}Mi

Create statefulset ${statefulset_id} using ${volume_type} volume with ${sc_name} storageclass and size ${size} Gi
${statefulset_name} = generate_name_with_suffix statefulset ${statefulset_id}
create_statefulset ${statefulset_name} ${volume_type} ${sc_name} ${size}Gi

Scale statefulset ${statefulset_id} to ${replicaset_size}
${statefulset_name} = generate_name_with_suffix statefulset ${statefulset_id}
scale_statefulset ${statefulset_name} ${replicaset_size}
Expand Down
13 changes: 13 additions & 0 deletions e2e/keywords/variables.resource
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
*** Settings ***
Documentation Global Variables
*** Variables ***
${LOOP_COUNT} 1
${RETRY_COUNT} 300
${RETRY_INTERVAL} 1
${VOLUME_TYPE} RWO
${CONTROL_PLANE_NODE_NETWORK_LATENCY_IN_MS} 0
${RWX_VOLUME_FAST_FAILOVER} false
${DATA_ENGINE} v1

@{powered_off_nodes}=
51 changes: 47 additions & 4 deletions e2e/keywords/volume.resource
Original file line number Diff line number Diff line change
Expand Up @@ -167,14 +167,19 @@ Check all replicas of volume ${volume_id} kept in error
Sleep ${RETRY_INTERVAL}
END

Wait for volume ${volume_id} migration ready
Wait for volume ${volume_id} migration to be ready
${volume_name} = generate_name_with_suffix volume ${volume_id}
wait_for_volume_migration_ready ${volume_name}
wait_for_volume_migration_to_be_ready ${volume_name}

Wait for volume ${volume_id} migrated to node ${node_id}
Wait for volume ${volume_id} to migrate to node ${node_id}
${volume_name} = generate_name_with_suffix volume ${volume_id}
${node_name} = get_node_by_index ${node_id}
wait_for_volume_migration_completed ${volume_name} ${node_name}
wait_for_volume_migration_complete ${volume_name} ${node_name}

Wait for volume ${volume_id} to stay on node ${node_id}
${volume_name} = generate_name_with_suffix volume ${volume_id}
${node_name} = get_node_by_index ${node_id}
wait_for_volume_migration_to_rollback ${volume_name} ${node_name}

Wait for volume ${volume_id} restoration from backup ${backup_id} completed
${volume_name} = generate_name_with_suffix volume ${volume_id}
Expand Down Expand Up @@ -247,6 +252,44 @@ Check volume ${volume_id} replica on node ${node_id} exist
${replica_name} get_replica_name_on_node ${volume_name} ${node_name}
Should Not Be Equal ${replica_name} ${None}

Volume ${volume_id} should have ${expected_replica_count} replicas running
${volume_name} = generate_name_with_suffix volume ${volume_id}
${replica_count} = wait_for_replica_count ${volume_name} node_name= replica_count=${expected_replica_count}

Volume ${volume_id} should have ${expected_replica_count} replicas running on node ${node_id}
${volume_name} = generate_name_with_suffix volume ${volume_id}
${node_name} = get_node_by_index ${node_id}
${replica_count} = wait_for_replica_count ${volume_name} node_name=${node_name} replica_count=${expected_replica_count}
Set Test Variable ${volume_name}
Set Test Variable ${node_name}
Set Test Variable ${replica_count}

Volume ${volume_id} should have replicas running on node ${node_id}
${volume_name} = generate_name_with_suffix volume ${volume_id}
${node_name} = get_node_by_index ${node_id}
${replica_count} = wait_for_replica_count ${volume_name} node_name=${node_name}
Set Test Variable ${volume_name}
Set Test Variable ${node_name}
Set Test Variable ${replica_count}

Volume ${volume_id} should have ${expected_replica_count} replicas running on node ${node_id} and no additional scheduling occurs
Volume ${volume_id} should have ${expected_replica_count} replicas running on node ${node_id}
FOR ${i} IN RANGE 3
Log to console Ensuring there's no additional scheduling for node ${node_name} ... (${i})
${new_replica_count} = wait_for_replica_count ${volume_name} node_name=${node_name} replica_count=${expected_replica_count}
Should Be Equal As Integers ${replica_count} ${new_replica_count}
Sleep 5
END

Volume ${volume_id} should have replicas running on node ${node_id} and no additional scheduling occurs
Volume ${volume_id} should have replicas running on node ${node_id}
FOR ${i} IN RANGE 3
Log to console Ensuring there's no additional scheduling for node ${node_name} ... (${i})
${new_replica_count} = wait_for_replica_count ${volume_name} node_name=${node_name}
Should Be Equal As Integers ${replica_count} ${new_replica_count}
Sleep 5
END

Check volume ${volume_id} data is intact
${volume_name} = generate_name_with_suffix volume ${volume_id}
check_data_checksum ${volume_name}
Expand Down
21 changes: 20 additions & 1 deletion e2e/keywords/workload.resource
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ Library ../libs/keywords/volume_keywords.py
Library ../libs/keywords/workload_keywords.py
Library ../libs/keywords/host_keywords.py
Library ../libs/keywords/k8s_keywords.py
Library ../libs/keywords/replica_keywords.py

*** Keywords ***
Create pod ${pod_id} using volume ${volume_id}
Expand Down Expand Up @@ -45,9 +46,18 @@ Power off volume node of ${workload_kind} ${workload_id}
${workload_name} = generate_name_with_suffix ${workload_kind} ${workload_id}
${volume_name} = get_workload_volume_name ${workload_name}
${powered_off_node} = get_volume_node ${volume_name}
Append to list ${powered_off_nodes} ${powered_off_node}
${last_volume_node} = get_volume_node ${volume_name}
power_off_volume_node ${volume_name}
Set Test Variable ${powered_off_node}
Set Test Variable ${last_volume_node}

Power off volume node of ${workload_kind} ${workload_id} without waiting
${workload_name} = generate_name_with_suffix ${workload_kind} ${workload_id}
${volume_name} = get_workload_volume_name ${workload_name}
${powered_off_node} = get_volume_node ${volume_name}
Append to list ${powered_off_nodes} ${powered_off_node}
${last_volume_node} = get_volume_node ${volume_name}
power_off_volume_node ${volume_name} waiting=False
Set Test Variable ${last_volume_node}

Reboot volume node of ${workload_kind} ${workload_id}
Expand Down Expand Up @@ -213,3 +223,12 @@ Delete Longhorn ${workload_kind} ${workload_name} pod
${pod_name} = get_workload_pod_name ${workload_name} longhorn-system
Log ${pod_name}
delete_pod ${pod_name} longhorn-system

Check volume of ${workload_kind} ${workload_id} replica on node ${node_id} disk ${disk_id}
${workload_name} = generate_name_with_suffix ${workload_kind} ${workload_id}
${volume_name} = get_workload_volume_name ${workload_name}
${node_name} = get_node_by_index ${node_id}
${disk_name} = generate_name_with_suffix disk ${disk_id}
${disk_uuid} = get_disk_uuid ${node_name} ${disk_name}
${replicas} = get_replicas volume_name=${volume_name} node_name=${node_name} disk_uuid=${disk_uuid}
Should Be True len(${replicas}) > 0
21 changes: 10 additions & 11 deletions e2e/libs/engine/crd.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,21 +3,21 @@
from kubernetes import client

from engine.base import Base
from utility.utility import logging


class CRD(Base):
def __init__(self):
self.obj_api = client.CustomObjectsApi()

def get_engines(self, volume_name, node_name=None):
if volume_name == "" or node_name == "":
logging.info("getting all engines")
if not node_name:
logging(f"Getting all engines of {volume_name}")
else:
logging.info(
f"getting the volume {volume_name} on node {node_name} engine")
logging(f"Getting engine of volume {volume_name} on node {node_name}")

label_selector = []
if volume_name != "":
if volume_name:
label_selector.append(f"longhornvolume={volume_name}")
if node_name:
label_selector.append(f"longhornnode={node_name}")
Expand All @@ -31,20 +31,19 @@ def get_engines(self, volume_name, node_name=None):
)

if api_response == "" or api_response is None:
raise Exception(f"failed to get the volume {volume_name} engine")
raise Exception(f"failed to get volume {volume_name} engine")

engines = api_response["items"]
if len(engines) == 0:
logging.warning(f"cannot get the volume {volume_name} engines")
logging(f"Cannot get volume {volume_name} engines")

return engines

def delete_engine(self, volume_name, node_name):
if volume_name == "" or node_name == "":
logging.info("deleting all engines")
logging("deleting all engines")
else:
logging.info(
f"delete the volume {volume_name} on node {node_name} engine")
logging(f"delete the volume {volume_name} on node {node_name} engine")

for engine in self.get_engine(volume_name, node_name):
engine_name = engine['metadata']['name']
Expand All @@ -55,7 +54,7 @@ def delete_engine(self, volume_name, node_name):
plural="engines",
name=engine_name
)
logging.info("finished delete engines")
logging("finished delete engines")

def validate_engine_setting(self, volume_name, setting_name, value):
engines = self.get_engines(volume_name)
Expand Down
9 changes: 5 additions & 4 deletions e2e/libs/host/aws.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,14 +68,15 @@ def reboot_all_worker_nodes(self, shut_down_time_in_sec=NODE_REBOOT_DOWN_TIME_SE
waiter.wait(InstanceIds=instance_ids)
logging(f"Started instances")

def power_off_node(self, power_off_node_name):
def power_off_node(self, power_off_node_name, waiting=True):
instance_ids = [self.mapping[power_off_node_name]]
resp = self.aws_client.stop_instances(InstanceIds=instance_ids, Force=True)
assert resp['ResponseMetadata']['HTTPStatusCode'] == 200, f"Failed to stop instances {instance_ids} response: {resp}"
logging(f"Stopping instances {instance_ids}")
waiter = self.aws_client.get_waiter('instance_stopped')
waiter.wait(InstanceIds=instance_ids)
logging(f"Stopped instances")
if waiting:
waiter = self.aws_client.get_waiter('instance_stopped')
waiter.wait(InstanceIds=instance_ids)
logging(f"Stopped instances")

def power_on_node(self, power_on_node_name):
instance_ids = [self.mapping[power_on_node_name]]
Expand Down
2 changes: 1 addition & 1 deletion e2e/libs/host/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def reboot_all_worker_nodes(self, shut_down_time_in_sec):
return NotImplemented

@abstractmethod
def power_off_node(self, node_name):
def power_off_node(self, node_name, waiting):
return NotImplemented

@abstractmethod
Expand Down
5 changes: 4 additions & 1 deletion e2e/libs/host/harvester.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def reboot_all_worker_nodes(self, shut_down_time_in_sec):
for node_name in node_names:
self.power_on_node(node_name)

def power_off_node(self, node_name):
def power_off_node(self, node_name, waiting=True):
vm_id = self.mapping[node_name]

url = f"{self.url}/{vm_id}"
Expand All @@ -68,6 +68,9 @@ def power_off_node(self, node_name):
logging(f"Stopping vm failed with error {e}")
logging(f"Stopping vm {vm_id}")

if not waiting:
return

stopped = False
for i in range(self.retry_count):
logging(f"Waiting for vm {vm_id} stopped ... ({i})")
Expand Down
6 changes: 3 additions & 3 deletions e2e/libs/keywords/host_keywords.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,10 @@ def reboot_node_by_name(self, node_name, downtime_in_min=1):
logging(f'Rebooting node {node_name} with downtime {reboot_down_time_sec} seconds')
self.host.reboot_node(node_name, reboot_down_time_sec)

def power_off_volume_node(self, volume_name):
def power_off_volume_node(self, volume_name, waiting=True):
node_id = self.volume_keywords.get_node_id_by_replica_locality(volume_name, "volume node")
logging(f'Power off volume {volume_name} node {node_id}')
self.host.power_off_node(node_id)
logging(f'Power off volume {volume_name} node {node_id} with waiting = {waiting}')
self.host.power_off_node(node_id, waiting)

def power_on_node_by_name(self, node_name):
self.host.power_on_node(node_name)
Expand Down
Loading

0 comments on commit b7c9e99

Please sign in to comment.