diff --git a/e2e/libs/keywords/volume_keywords.py b/e2e/libs/keywords/volume_keywords.py index 10129769ad..d3e147b2f7 100644 --- a/e2e/libs/keywords/volume_keywords.py +++ b/e2e/libs/keywords/volume_keywords.py @@ -1,8 +1,9 @@ +from node.utility import list_node_names_by_role + from utility.utility import generate_volume_name from utility.utility import get_node from utility.utility import get_test_pod_not_running_node from utility.utility import get_test_pod_running_node -from utility.utility import list_nodes from utility.utility import logging from volume import Volume @@ -33,12 +34,12 @@ def get_volume_node(self, volume_name): def get_replica_node(self, volume_name): - nodes = list_nodes() + worker_nodes = list_node_names_by_role("worker") volume_node = self.get_volume_node(volume_name) test_pod_running_node = get_test_pod_running_node() - for node in nodes: - if node != volume_node and node != test_pod_running_node: - return node + for worker_node in worker_nodes: + if worker_node != volume_node and worker_node != test_pod_running_node: + return worker_node def write_volume_random_data(self, volume_name, size_in_mb): diff --git a/e2e/libs/network/network.py b/e2e/libs/network/network.py index fdcfd534de..1c720d8d52 100644 --- a/e2e/libs/network/network.py +++ b/e2e/libs/network/network.py @@ -1,10 +1,11 @@ from robot.libraries.BuiltIn import BuiltIn -from utility.utility import get_control_plane_nodes +from node.utility import list_node_names_by_role from node_exec import NodeExec + def get_control_plane_node_network_latency_in_ms(): latency_in_ms = int(BuiltIn().get_variable_value("${CONTROL_PLANE_NODE_NETWORK_LATENCY_IN_MS}", default="0")) return latency_in_ms @@ -12,21 +13,21 @@ def get_control_plane_node_network_latency_in_ms(): def setup_control_plane_network_latency(): latency_in_ms = get_control_plane_node_network_latency_in_ms() if latency_in_ms != 0: - nodes = get_control_plane_nodes() - for node in nodes: + control_plane_nodes = list_node_names_by_role("control-plane") + for control_plane_node in control_plane_nodes: cmd = f"tc qdisc replace dev eth0 root netem delay {latency_in_ms}ms" - res = NodeExec.get_instance().issue_cmd(node, cmd) + res = NodeExec.get_instance().issue_cmd(control_plane_node, cmd) cmd = f"tc qdisc show dev eth0 | grep delay" - res = NodeExec.get_instance().issue_cmd(node, cmd) + res = NodeExec.get_instance().issue_cmd(control_plane_node, cmd) assert res, "setup control plane network latency failed" def cleanup_control_plane_network_latency(): latency_in_ms = get_control_plane_node_network_latency_in_ms() if latency_in_ms != 0: - nodes = get_control_plane_nodes() - for node in nodes: + control_plane_nodes = list_node_names_by_role("control-plane") + for control_plane_node in control_plane_nodes: cmd = "tc qdisc del dev eth0 root" - res = NodeExec.get_instance().issue_cmd(node, cmd) + res = NodeExec.get_instance().issue_cmd(control_plane_node, cmd) cmd = f"tc qdisc show dev eth0 | grep -v delay" - res = NodeExec.get_instance().issue_cmd(node, cmd) - assert res, "cleanup control plane network failed" \ No newline at end of file + res = NodeExec.get_instance().issue_cmd(control_plane_node, cmd) + assert res, "cleanup control plane network failed" diff --git a/e2e/libs/node/node.py b/e2e/libs/node/node.py index 98935bffcd..663a6e58b8 100644 --- a/e2e/libs/node/node.py +++ b/e2e/libs/node/node.py @@ -4,7 +4,8 @@ from kubernetes import client -from utility.utility import list_nodes +from node.utility import list_node_names_by_role + from utility.utility import logging from utility.utility import wait_for_cluster_ready @@ -52,7 +53,7 @@ def reboot_node(self, reboot_node_name, shut_down_time_in_sec=60): logging(f"Started instances") def reboot_all_worker_nodes(self, shut_down_time_in_sec=60): - instance_ids = [self.mapping[value] for value in list_nodes()] + instance_ids = [self.mapping[value] for value in list_node_names_by_role("worker")] resp = self.aws_client.stop_instances(InstanceIds=instance_ids) logging(f"Stopping instances {instance_ids} response: {resp}") diff --git a/e2e/libs/node/utility.py b/e2e/libs/node/utility.py index 571b983b6f..3ac20ba74d 100644 --- a/e2e/libs/node/utility.py +++ b/e2e/libs/node/utility.py @@ -7,3 +7,26 @@ def get_node_by_name(node_name): def get_node_cpu_cores(node_name): node = get_node_by_name(node_name) return node.status.capacity['cpu'] + +def list_node_names_by_role(role="all"): + if role not in ["all", "control-plane", "worker"]: + raise ValueError("Role must be one of 'all', 'master' or 'worker'") + + def filter_nodes(nodes, condition): + return [node.metadata.name for node in nodes if condition(node)] + + core_api = client.CoreV1Api() + nodes = core_api.list_node().items + + control_plane_labels = ['node-role.kubernetes.io/master', 'node-role.kubernetes.io/control-plane'] + + if role == "all": + return sorted(filter_nodes(nodes, lambda node: True)) + + if role == "control-plane": + condition = lambda node: all(label in node.metadata.labels for label in control_plane_labels) + return sorted(filter_nodes(nodes, condition)) + + if role == "worker": + condition = lambda node: not any(label in node.metadata.labels for label in control_plane_labels) + return sorted(filter_nodes(nodes, condition)) diff --git a/e2e/libs/utility/utility.py b/e2e/libs/utility/utility.py index da7f2fca18..1274b29cf4 100644 --- a/e2e/libs/utility/utility.py +++ b/e2e/libs/utility/utility.py @@ -15,6 +15,8 @@ from robot.api import logger from robot.libraries.BuiltIn import BuiltIn +from node.utility import list_node_names_by_role + def logging(msg, also_report=False): if also_report: @@ -35,7 +37,6 @@ def generate_name(name_prefix="test-"): def generate_volume_name(): return generate_name("vol-") - def init_k8s_api_client(): if os.getenv('LONGHORN_CLIENT_URL'): # for develop or debug, run test in local environment @@ -46,26 +47,6 @@ def init_k8s_api_client(): config.load_incluster_config() logging("Initialized in-cluster k8s api client") -def list_nodes(): - core_api = client.CoreV1Api() - obj = core_api.list_node() - nodes = [] - for item in obj.items: - if 'node-role.kubernetes.io/control-plane' not in item.metadata.labels and \ - 'node-role.kubernetes.io/master' not in item.metadata.labels: - nodes.append(item.metadata.name) - return sorted(nodes) - -def get_control_plane_nodes(): - core_api = client.CoreV1Api() - obj = core_api.list_node() - nodes = [] - for item in obj.items: - if 'node-role.kubernetes.io/control-plane' in item.metadata.labels or \ - 'node-role.kubernetes.io/master' in item.metadata.labels: - nodes.append(item.metadata.name) - return sorted(nodes) - def wait_for_cluster_ready(): core_api = client.CoreV1Api() retry_count, retry_interval = get_retry_count_and_interval() @@ -87,9 +68,8 @@ def wait_for_cluster_ready(): assert ready, f"expect cluster's ready but it isn't {resp}" def wait_for_all_instance_manager_running(): - core_api = client.CoreV1Api() longhorn_client = get_longhorn_client() - nodes = list_nodes() + worker_nodes = list_node_names_by_role("worker") retry_count, retry_interval = get_retry_count_and_interval() for _ in range(retry_count): @@ -100,16 +80,16 @@ def wait_for_all_instance_manager_running(): for im in instance_managers: if im.currentState == "running": instance_manager_map[im.nodeID] = im - if len(instance_manager_map) == len(nodes): + if len(instance_manager_map) == len(worker_nodes): break time.sleep(retry_interval) except Exception as e: logging(f"Getting instance manager state error: {e}") - assert len(instance_manager_map) == len(nodes), f"expect all instance managers running, instance_managers = {instance_managers}, instance_manager_map = {instance_manager_map}" + assert len(instance_manager_map) == len(worker_nodes), f"expect all instance managers running, instance_managers = {instance_managers}, instance_manager_map = {instance_manager_map}" def get_node(index): - nodes = list_nodes() - return nodes[int(index)] + worker_nodes = list_node_names_by_role("worker") + return worker_nodes[int(index)] def apply_cr(manifest_dict): dynamic_client = dynamic.DynamicClient(client.api_client.ApiClient()) @@ -214,11 +194,11 @@ def get_test_pod_running_node(): return get_node(0) def get_test_pod_not_running_node(): - nodes = list_nodes() + worker_nodes = list_node_names_by_role("worker") test_pod_running_node = get_test_pod_running_node() - for node in nodes: - if node != test_pod_running_node: - return node + for worker_node in worker_nodes: + if worker_node != test_pod_running_node: + return worker_node def get_test_case_namespace(test_name): return test_name.lower().replace(' ', '-')