diff --git a/manager/integration/tests/test_metric.py b/manager/integration/tests/test_metric.py index 4218d17d0e..3210cf1f00 100644 --- a/manager/integration/tests/test_metric.py +++ b/manager/integration/tests/test_metric.py @@ -22,11 +22,13 @@ from common import wait_for_volume_healthy from common import write_volume_data from common import write_volume_random_data - +from common import set_node_scheduling +from common import set_node_cordon from common import Mi from common import LONGHORN_NAMESPACE from common import RETRY_COUNTS from common import RETRY_INTERVAL +from common import DEFAULT_DISK_PATH # The dictionaries use float type of value because the value obtained from # prometheus_client is in float type. @@ -78,6 +80,33 @@ def find_metrics(metric_data, metric_name): return metrics +def check_metric_with_condition(core_api, metric_name, metric_labels, expected_value=None, metric_node_id=get_self_host_id()): # NOQA) + """ + Some metric have multiple conditions, for exameple metric + longhorn_node_status have condition + - allowScheduling + - mountpropagation + - ready + - schedulable + metric longhorn_disk_status have conditions + - ready + - schedulable + Use this function to get specific condition of a mertic + """ + metric_data = get_metrics(core_api, metric_node_id) + + found_metric = next( + (sample for family in metric_data for sample in family.samples + if sample.name == metric_name and + sample.labels.get("condition") == metric_labels.get("condition")), + None + ) + + assert found_metric is not None + + examine_metric_value(found_metric, metric_labels, expected_value) + + def check_metric(core_api, metric_name, metric_labels, expected_value=None, metric_node_id=get_self_host_id()): # NOQA metric_data = get_metrics(core_api, metric_node_id) @@ -89,6 +118,10 @@ def check_metric(core_api, metric_name, metric_labels, expected_value=None, metr assert found_metric is not None + examine_metric_value(found_metric, metric_labels, expected_value) + + +def examine_metric_value(found_metric, metric_labels, expected_value=None): for key, value in metric_labels.items(): assert found_metric.labels[key] == value @@ -409,3 +442,82 @@ def test_metric_longhorn_snapshot_actual_size_bytes(client, core_api, volume_nam wait_for_metric_count_all_nodes(client, core_api, "longhorn_snapshot_actual_size_bytes", system_snapshot_metric_labels, 1) + + +def test_node_metrics(client, core_api): # NOQA + lht_hostId = get_self_host_id() + node = client.by_id_node(lht_hostId) + disks = node.disks + for _, disk in iter(disks.items()): + if disk.path == DEFAULT_DISK_PATH: + default_disk = disk + break + assert default_disk is not None + + metric_labels = {} + check_metric(core_api, "longhorn_node_count_total", + metric_labels, expected_value=3.0) + + metric_labels = { + "node": lht_hostId, + } + check_metric(core_api, "longhorn_node_cpu_capacity_millicpu", + metric_labels) + check_metric(core_api, "longhorn_node_cpu_usage_millicpu", + metric_labels) + check_metric(core_api, "longhorn_node_memory_capacity_bytes", + metric_labels) + check_metric(core_api, "longhorn_node_memory_usage_bytes", + metric_labels) + check_metric(core_api, "longhorn_node_storage_capacity_bytes", + metric_labels, default_disk.storageMaximum) + check_metric(core_api, "longhorn_node_storage_usage_bytes", + metric_labels) + check_metric(core_api, "longhorn_node_storage_reservation_bytes", + metric_labels, default_disk.storageReserved) + + # check longhorn_node_status by 4 different conditions + metric_labels = { + "condition": "mountpropagation", + "condition_reason": "", + "node": lht_hostId + } + check_metric_with_condition(core_api, "longhorn_node_status", + metric_labels, 1.0) + + metric_labels = { + "condition": "ready", + "condition_reason": "", + "node": lht_hostId + } + check_metric_with_condition(core_api, "longhorn_node_status", + metric_labels, 1.0) + + metric_labels = { + "condition": "allowScheduling", + "condition_reason": "", + "node": lht_hostId, + } + check_metric_with_condition(core_api, "longhorn_node_status", + metric_labels, 1.0) + node = client.by_id_node(lht_hostId) + set_node_scheduling(client, node, allowScheduling=False, retry=True) + check_metric_with_condition(core_api, "longhorn_node_status", + metric_labels, 0.0) + + metric_labels = { + "condition": "schedulable", + "condition_reason": "", + "node": lht_hostId + } + check_metric_with_condition(core_api, "longhorn_node_status", + metric_labels, 1.0) + + metric_labels = { + "condition": "schedulable", + "condition_reason": "KubernetesNodeCordoned", + "node": lht_hostId + } + set_node_cordon(core_api, lht_hostId, True) + check_metric_with_condition(core_api, "longhorn_node_status", + metric_labels, 0.0)