Skip to content

Commit

Permalink
test: add cluster restart test case
Browse files Browse the repository at this point in the history
Signed-off-by: Yang Chiu <[email protected]>
  • Loading branch information
yangchiu authored and David Ko committed Sep 18, 2023
1 parent 1e6f404 commit f74ce69
Show file tree
Hide file tree
Showing 21 changed files with 646 additions and 9 deletions.
7 changes: 7 additions & 0 deletions e2e/keywords/common.resource
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ Documentation Common keywords
Library ../libs/keywords/common_keywords.py
Library ../libs/keywords/volume_keywords.py
Library ../libs/keywords/recurring_job_keywords.py
Library ../libs/keywords/workload_keywords.py


*** Variables ***
Expand All @@ -15,8 +16,14 @@ Set test environment
init_node_exec ${TEST NAME}
@{volume_list} = Create List
Set Test Variable ${volume_list}
@{deployment_list} = Create List
Set Test Variable ${deployment_list}
@{statefulset_list} = Create List
Set Test Variable ${statefulset_list}

Cleanup test resources
cleanup_node_exec
cleanup_recurring_jobs ${volume_list}
cleanup_volumes ${volume_list}
cleanup_deployments ${deployment_list}
cleanup_statefulsets ${statefulset_list}
10 changes: 10 additions & 0 deletions e2e/keywords/node.resource
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ Documentation Physical Node Keywords
Library ../libs/keywords/volume_keywords.py
Library ../libs/keywords/node_keywords.py
Library ../libs/keywords/workload_keywords.py

*** Keywords ***
During replica rebuilding, reboot volume node
Expand All @@ -22,3 +23,12 @@ Reboot volume ${idx} replica node
FOR ${item} IN @{volume_list}
wait for volume_attached ${item}
END

Restart cluster
restart_all_nodes
FOR ${deployment} IN @{deployment_list}
wait_for_workload_pod_stable ${deployment}
END
FOR ${statefulset} IN @{statefulset_list}
wait_for_workload_pod_stable ${statefulset}
END
2 changes: 1 addition & 1 deletion e2e/keywords/volume.resource
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ Create a volume with ${size} GB and ${replica_count} replicas
Set Test Variable ${volume_name}
Append To List ${volume_list} ${volume_name}

Create volume ${idx} with size ${size} GB and ${replica_count} replicas
Create volume ${idx} with ${size} GB and ${replica_count} replicas
${volume_name} = create_volume ${size} ${replica_count}
attach_volume ${volume_name}
Insert Into List ${volume_list} ${idx} ${volume_name}
Expand Down
33 changes: 33 additions & 0 deletions e2e/keywords/workload.resource
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
*** Settings ***
Documentation Workload Keywords
Library Collections
Library ../libs/keywords/workload_keywords.py

*** Keywords ***
Create deployment ${idx} with ${volume_type} volume
${deployment_name} = create_deployment ${volume_type}
Insert Into List ${deployment_list} ${idx} ${deployment_name}

Create statefulset ${idx} with ${volume_type} volume
${statefulset_name} = create_statefulset ${volume_type}
Insert Into List ${statefulset_list} ${idx} ${statefulset_name}

Keep writing data to deployment ${idx}
${pod_name} = get_workload_pod_name ${deployment_list}[${idx}]
keep_writing_pod_data ${pod_name}

Keep writing data to statefulset ${idx}
${pod_name} = get_workload_pod_name ${statefulset_list}[${idx}]
keep_writing_pod_data ${pod_name}

Check deployment ${idx} works
${pod_name} = get_workload_pod_name ${deployment_list}[${idx}]
${pod_data_checksum} = write_pod_random_data ${pod_name} 1024
check_pod_data ${pod_name} ${pod_data_checksum}

Check statefulset ${idx} works
${pod_name} = get_workload_pod_name ${statefulset_list}[${idx}]
${pod_data_checksum} = write_pod_random_data ${pod_name} 1024
check_pod_data ${pod_name} ${pod_data_checksum}

3 changes: 3 additions & 0 deletions e2e/libs/keywords/node_keywords.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,6 @@ def reboot_replica_node(self, volume_name):
volume_keywords = BuiltIn().get_library_instance('volume_keywords')
replica_node = volume_keywords.get_replica_node(volume_name)
self.node.reboot_node(test_pod_running_node, replica_node)

def restart_all_nodes(self):
self.node.restart_all_nodes()
3 changes: 3 additions & 0 deletions e2e/libs/keywords/volume_keywords.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,9 @@ def wait_for_replica_rebuilding_complete(self, volume_name, replica_node):
def wait_for_volume_attached(self, volume_name):
self.volume.wait_for_volume_attached(volume_name)

def wait_for_volume_healthy(self, volume_name):
self.volume.wait_for_volume_healthy(volume_name)

def cleanup_volumes(self, volume_names):
logging.warn(f"cleanup volumes {volume_names}")
self.volume.cleanup(volume_names)
50 changes: 50 additions & 0 deletions e2e/libs/keywords/workload_keywords.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
from workload.workload import *
import logging

class workload_keywords:

def __init__(self):
logging.warn("initialize workload_keywords class")

def create_deployment(self, volume_type="rwo"):
pvc_filepath = f"./templates/workload/{volume_type}_pvc.yaml"
deployment_filepath = f"./templates/workload/deployment_with_{volume_type}_volume.yaml"
pvc_name = create_pvc(pvc_filepath)
deployment_name = create_deployment(deployment_filepath)
return deployment_name

def create_statefulset(self, volume_type="rwo"):
statefulset_filepath = f"./templates/workload/statefulset_with_{volume_type}_volume.yaml"
statefulset_name = create_statefulset(statefulset_filepath)
return statefulset_name

def get_workload_pod_name(self, workload_name):
return get_workload_pod_names(workload_name)[0]

def get_workload_volume_name(self, workload_name):
return get_workload_volume_name(workload_name)

def keep_writing_pod_data(self, pod_name):
return keep_writing_pod_data(pod_name)

def write_pod_random_data(self, pod, size_in_mb):
return write_pod_random_data(pod, size_in_mb)

def check_pod_data(self, pod_name, checksum):
print(f"check pod {pod_name} data with checksum {checksum}")
check_pod_data(pod_name, checksum)

def cleanup_deployments(self, deployment_names):
for name in deployment_names:
pvc_name = get_workload_pvc_name(name)
delete_deployment(name)
delete_pvc(pvc_name)

def cleanup_statefulsets(self, statefulset_names):
for name in statefulset_names:
pvc_name = get_workload_pvc_name(name)
delete_statefulset(name)
delete_pvc(pvc_name)

def wait_for_workload_pod_stable(self, workload_name):
return wait_for_workload_pod_stable(workload_name)
28 changes: 26 additions & 2 deletions e2e/libs/node/node.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,42 @@
import time
import logging
from utility.utility import apply_cr_from_yaml, get_cr
from utility.utility import wait_for_cluster_ready
import boto3

RETRY_COUNT = 180
RETRY_INTERVAL = 1

class Node:

def __init__(self):
with open('/tmp/instance_mapping', 'r') as f:
self.mapping = yaml.safe_load(f)
self.aws_client = boto3.client('ec2')
#logging.warn(f"describe_instances = {self.aws_client.describe_instances()}")

def restart_all_nodes(self):
instance_ids = [value for value in self.mapping.values()]
print(instance_ids)
resp = self.aws_client.stop_instances(InstanceIds=instance_ids)
print(resp)
waiter = self.aws_client.get_waiter('instance_stopped')
waiter.wait(InstanceIds=instance_ids)
print(f"all instances stopped")
time.sleep(60)
resp = self.aws_client.start_instances(InstanceIds=instance_ids)
print(resp)
waiter = self.aws_client.get_waiter('instance_running')
waiter.wait(InstanceIds=instance_ids)
wait_for_cluster_ready()
print(f"all instances running")

def reboot_node(self, running_on_node_name, reboot_node_name, shut_down_time_in_sec=10):
with open('/tmp/instance_mapping', 'r') as f:
mapping = yaml.safe_load(f)
reboot_node_id = mapping[reboot_node_name]

filepath = './litmus/reboot-engine.yaml'
filepath = './litmus/reboot-node.yaml'
with open(filepath, 'r') as f:
data = yaml.safe_load(f)
data['spec']['components']['runner']['nodeSelector']['kubernetes.io/hostname'] = running_on_node_name
Expand All @@ -33,7 +57,7 @@ def reboot_node(self, running_on_node_name, reboot_node_name, shut_down_time_in_
'v1alpha1',
'default',
'chaosresults',
'reboot-engine-ec2-terminate-by-id')
'reboot-node-ec2-terminate-by-id')
if results['status']['experimentStatus']['verdict'] == 'Pass':
break
time.sleep(RETRY_INTERVAL)
Expand Down
28 changes: 26 additions & 2 deletions e2e/libs/utility/utility.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from kubernetes import config, client, dynamic
from kubernetes.client.rest import ApiException
from kubernetes.stream import stream
from longhorn import from_env
import string
import random
Expand Down Expand Up @@ -37,6 +38,24 @@ def list_nodes():
nodes.append(item.metadata.name)
return sorted(nodes)

def wait_for_cluster_ready():
core_api = client.CoreV1Api()
for i in range(RETRY_COUNTS):
try:
resp = core_api.list_node()
ready = True
for item in resp.items:
for condition in item.status.conditions:
if condition.type == 'Ready' and condition.status != 'True':
ready = False
break
if ready:
break
except Exception as e:
logging.warn(f"list node error: {e}")
time.sleep(RETRY_INTERVAL)
assert ready, f"expect cluster's ready but it isn't {resp}"

def get_node(index):
nodes = list_nodes()
return nodes[int(index)]
Expand Down Expand Up @@ -123,8 +142,13 @@ def get_longhorn_client():
# manually expose longhorn client
# to access longhorn manager in local environment
longhorn_client_url = os.getenv('LONGHORN_CLIENT_URL')
longhorn_client = from_env(url=f"{longhorn_client_url}/v1/schemas")
return longhorn_client
for i in range(RETRY_COUNTS):
try:
longhorn_client = from_env(url=f"{longhorn_client_url}/v1/schemas")
return longhorn_client
except Exception as e:
logging.info(f"get longhorn client error: {e}")
time.sleep(RETRY_INTERVAL)
else:
logging.info(f"initialize longhorn api client from longhorn manager")
# for ci, run test in in-cluster environment
Expand Down
4 changes: 4 additions & 0 deletions e2e/libs/volume/volume.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@ def wait_for_volume_attached(self, volume_name):
self.volume.wait_for_volume_state(volume_name, "attached")
self.volume.wait_for_volume_robustness_not(volume_name, "unknown")

def wait_for_volume_healthy(self, volume_name):
self.volume.wait_for_volume_state(volume_name, "attached")
self.volume.wait_for_volume_robustness(volume_name, "healthy")

def get_endpoint(self, volume_name):
return self.volume.get_endpoint(volume_name)

Expand Down
1 change: 1 addition & 0 deletions e2e/libs/workload/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from workload import workload
Loading

0 comments on commit f74ce69

Please sign in to comment.