Skip to content

Commit

Permalink
test
Browse files Browse the repository at this point in the history
  • Loading branch information
chriscchien committed Feb 21, 2024
1 parent 57546b9 commit 4eb6a4b
Show file tree
Hide file tree
Showing 3 changed files with 133 additions and 129 deletions.
2 changes: 1 addition & 1 deletion manager/integration/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
FROM registry.suse.com/bci/python:3.9

ARG KUBECTL_VERSION=v1.17.0
ARG KUBECTL_VERSION=v1.28.4
ARG YQ_VERSION=v4.24.2
ARG TERRAFORM_VERSION=1.3.5
ARG ARCH=amd64
Expand Down
10 changes: 8 additions & 2 deletions manager/integration/tests/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -6125,8 +6125,14 @@ def wait_for_instance_manager_count(client, number, retry_counts=120):
return len(ims)


def create_deployment_and_write_data(client, core_api, make_deployment_with_pvc, volume_name, size, replica_count, data_size, attach_node_id=None): # NOQA
print(volume_name)
def create_deployment_and_write_data(client, # NOQA
core_api, # NOQA
make_deployment_with_pvc, # NOQA
volume_name, # NOQA
size, # NOQA
replica_count, # NOQA
data_size, # NOQA
attach_node_id=None): # NOQA
apps_api = get_apps_api_client()
volume = client.create_volume(name=volume_name,
size=size,
Expand Down
250 changes: 124 additions & 126 deletions manager/integration/tests/test_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,6 @@
from common import update_setting
from common import SETTING_NODE_DRAIN_POLICY, DATA_SIZE_IN_MB_3
from common import make_deployment_with_pvc # NOQA
from common import create_pv_for_volume
from common import create_pvc_for_volume, create_and_wait_deployment
from common import get_apps_api_client, write_pod_volume_random_data
from common import prepare_host_disk, wait_for_volume_degraded
from common import create_deployment_and_write_data

Expand Down Expand Up @@ -2693,7 +2690,15 @@ def finalizer():
def drain_node(core_api, node): # NOQA
set_node_cordon(core_api, node.id, True)

command = ["kubectl", "drain", node.id, "--ignore-daemonsets"]
command = [
"kubectl",
"drain",
node.id,
"--ignore-daemonsets",
"--delete-emptydir-data",
"--grace-period=-1"
]

subprocess.run(command, check=True)


Expand All @@ -2713,8 +2718,84 @@ def get_replica_detail(replica_name):
return replica_info


def check_node_auto_evict_state(client, target_node, expect_state): # NOQA
def get_specific_node(client, target_node):
nodes = client.list_node()
for node in nodes:
if node.id == target_node.id:
return node

for i in range(RETRY_COUNTS):
node = get_specific_node(client, target_node)
if node.autoEvicting is expect_state:
break
time.sleep(RETRY_INTERVAL)
assert node.autoEvicting is expect_state


def check_replica_evict_state(client, volume_name, node, expect_state): # NOQA
volume = client.by_id_volume(volume_name)
for replica in volume.replicas:
if replica.hostId == node.id:
replica_name = replica.name
break

replica_info = get_replica_detail(replica_name)
eviction_requested = replica_info["spec"]["evictionRequested"]
assert eviction_requested is expect_state


def wait_drain_complete(future, timeout):
"""
Wait concurrent.futures object complete in a duration
"""
thread_timeout = timeout
try:
future.result(timeout=thread_timeout)
drain_complete = True
except TimeoutError:
print("drain node thread exceed timeout ({})s".format(thread_timeout))
drain_complete = False
future.cancel()
finally:
assert drain_complete is True


def make_replica_on_specific_node(client, volume_name, node): # NOQA
volume = client.by_id_volume(volume_name)
volume.updateReplicaCount(replicaCount=1)
for replica in volume.replicas:
if replica.hostId != node.id:
volume.replicaRemove(name=replica.name)
wait_for_volume_replica_count(client, volume_name, 1)


def get_all_replica_name(client, volume_name): # NOQA
volume_replicas = []
volume = client.by_id_volume(volume_name)
for replica in volume.replicas:
volume_replicas.append(replica.name)

return volume_replicas


def check_all_replicas_evict_state(client, volume_name, expect_state): # NOQA
volume = client.by_id_volume(volume_name)
for replica in volume.replicas:
replica_info = get_replica_detail(replica.name)
eviction_requested = replica_info["spec"]["evictionRequested"]
assert eviction_requested is expect_state


@pytest.mark.skip(reason="Can not run when in-cluster backup store pod exist") # NOQA
def test_drain_with_block_for_eviction_success(client, core_api, volume_name, make_deployment_with_pvc): # NOQA
"""
Test case has the potential to drain node where backup store pods are
loacted.

Check failure on line 2794 in manager/integration/tests/test_node.py

View workflow job for this annotation

GitHub Actions / codespell

loacted ==> located
In that case, test case will fail because backup store pods can only be
forcibly drained.
---
Test drain completes after evicting replica with node-drain-policy
block-for-eviction
Expand Down Expand Up @@ -2746,33 +2827,13 @@ def test_drain_with_block_for_eviction_success(client, core_api, volume_name, ma
client.update(setting, value="block-for-eviction")

# Step 2, 3, 4
volume = client.create_volume(name=volume_name,
size=str(1 * Gi),
numberOfReplicas=3)
volume = common.wait_for_volume_detached(client, volume_name)

pvc_name = volume_name + "-pvc"
create_pv_for_volume(client, core_api, volume, volume_name)
create_pvc_for_volume(client, core_api, volume, pvc_name)
deployment_name = volume_name + "-dep"
deployment = make_deployment_with_pvc(deployment_name, pvc_name)
deployment["spec"]["template"]["spec"]["nodeSelector"] \
= {"kubernetes.io/hostname": host_id}

apps_api = get_apps_api_client()
create_and_wait_deployment(apps_api, deployment)

pod_names = common.get_deployment_pod_names(core_api, deployment)
data_path = '/data/test'
write_pod_volume_random_data(core_api,
pod_names[0],
data_path,
DATA_SIZE_IN_MB_3)
expected_test_data_checksum = get_pod_data_md5sum(core_api,
pod_names[0],
data_path)

volume = wait_for_volume_healthy(client, volume_name)
volume, pod, checksum = create_deployment_and_write_data(client,
core_api,
make_deployment_with_pvc, # NOQA
volume_name,
str(1 * Gi),
3,
DATA_SIZE_IN_MB_3, host_id) # NOQA

# Make replica not locate on eviction target node
volume.updateReplicaCount(replicaCount=2)
Expand All @@ -2789,33 +2850,11 @@ def test_drain_with_block_for_eviction_success(client, core_api, volume_name, ma
future = executor.submit(drain_node, core_api, evict_source_node)

# Step 6
volume = client.by_id_volume(volume_name)
for replica in volume.replicas:
if replica.hostId == evict_source_node.id:
replica_name = replica.name
break

replica_info = get_replica_detail(replica_name)
eviction_requested = replica_info["spec"]["evictionRequested"]
assert eviction_requested is True

nodes = client.list_node()
for node in nodes:
if node.id == evict_source_node.id:
assert node.autoEvicting is True
check_replica_evict_state(client, volume_name, evict_source_node, True)
check_node_auto_evict_state(client, evict_source_node, True)

# Step 7
thread_timeout = 60
try:
future.result(timeout=thread_timeout)
drain_complete = True
except TimeoutError:
print("drain node thread exceed timeout ({})s".format(thread_timeout))
drain_complete = False
future.cancel()
finally:
assert drain_complete is True

wait_drain_complete(future, 60)
wait_for_volume_replica_count(client, volume_name, 2)

# Step 8
Expand All @@ -2828,33 +2867,29 @@ def test_drain_with_block_for_eviction_success(client, core_api, volume_name, ma
assert replica.hostId != evict_source_node.id

# Stpe 10
nodes = client.list_node()
for node in nodes:
assert node.autoEvicting is False
check_node_auto_evict_state(client, evict_source_node, False)

# Step 11
volume = client.by_id_volume(volume_name)
for replica in volume.replicas:
if replica.hostId == evict_target_node.id:
replica_name = replica.name
break

replica_info = get_replica_detail(replica_name)
eviction_requested = replica_info["spec"]["evictionRequested"]
assert eviction_requested is False
check_replica_evict_state(client, volume_name, evict_target_node, False)

# Step 12
data_path = data_path = '/data/test'
test_data_checksum = get_pod_data_md5sum(core_api,
pod_names[0],
pod,
data_path)

assert expected_test_data_checksum == test_data_checksum
assert checksum == test_data_checksum


@pytest.mark.skip(reason="Can not run when in-cluster backup store pod exist") # NOQA
def test_drain_with_block_for_eviction_if_contains_last_replica_success(client, # NOQA
core_api, # NOQA
make_deployment_with_pvc): # NOQA
"""
Test case has the potential to drain node where backup store pods are
loacted.

Check failure on line 2889 in manager/integration/tests/test_node.py

View workflow job for this annotation

GitHub Actions / codespell

loacted ==> located
In that case, test case will fail because backup store pods can only be
forcibly drained.
---
Test drain completes after evicting replicas with node-drain-policy
block-for-eviction-if-contains-last-replica
Expand Down Expand Up @@ -2882,52 +2917,10 @@ def test_drain_with_block_for_eviction_if_contains_last_replica_success(client,
12. Verify that `replica.spec.evictionRequested == false` on all replicas.
13. Verify the the data in both volumes.
"""
def make_replica_on_specific_node(client, volume_name, node): # NOQA
volume = client.by_id_volume(volume_name)
volume.updateReplicaCount(replicaCount=1)
for replica in volume.replicas:
if replica.hostId != node.id:
volume.replicaRemove(name=replica.name)

def get_all_replica_name(client, volume_name): # NOQA
volume_replicas = []
volume = client.by_id_volume(volume_name)
for replica in volume.replicas:
volume_replicas.append(replica.name)

return volume_replicas

def check_replica_evict_state(client, volume_name, node, expect_state): # NOQA
volume = client.by_id_volume(volume_name)
for replica in volume.replicas:
if replica.hostId == node.id:
replica_name = replica.name
break

replica_info = get_replica_detail(replica_name)
eviction_requested = replica_info["spec"]["evictionRequested"]
assert eviction_requested is expect_state

def check_all_replicas_evict_state(client, volume_name, expect_state): # NOQA
volume = client.by_id_volume(volume_name)
for replica in volume.replicas:
replica_info = get_replica_detail(replica.name)
eviction_requested = replica_info["spec"]["evictionRequested"]
assert eviction_requested is expect_state

def check_node_auto_evict_state(client, target_node, expect_state):
nodes = client.list_node()
for node in nodes:
if node.id == target_node.id:
assert node.autoEvicting is expect_state

host_id = get_self_host_id()
nodes = client.list_node()
evict_nodes = [node for node in nodes if node.id != host_id][:2]
evict_source_node = evict_nodes[0]
print()
print(host_id)
print(evict_source_node.id)

# Create extra disk on current node
node = client.by_id_node(host_id)
Expand Down Expand Up @@ -2961,8 +2954,22 @@ def check_node_auto_evict_state(client, target_node, expect_state):
# Step 2, 3
volume1_name = "vol-1"
volume2_name = "vol-2"
volume1, pod1, checksum1 = create_deployment_and_write_data(client, core_api, make_deployment_with_pvc, volume1_name, str(1 * Gi), 3, DATA_SIZE_IN_MB_3, host_id) # NOQA
volume2, pod2, checksum2 = create_deployment_and_write_data(client, core_api, make_deployment_with_pvc, volume2_name, str(1 * Gi), 3, DATA_SIZE_IN_MB_3, host_id) # NOQA
volume1, pod1, checksum1 = create_deployment_and_write_data(client,
core_api,
make_deployment_with_pvc, # NOQA
volume1_name,
str(1 * Gi),
3,
DATA_SIZE_IN_MB_3, # NOQA
host_id) # NOQA
volume2, pod2, checksum2 = create_deployment_and_write_data(client,
core_api,
make_deployment_with_pvc, # NOQA
volume2_name,
str(1 * Gi),
3,
DATA_SIZE_IN_MB_3, # NOQA
host_id) # NOQA
# Make volume 1 replica only located on evict_source_node
make_replica_on_specific_node(client, volume1_name, evict_source_node)
volume2_replicas = get_all_replica_name(client, volume2_name)
Expand All @@ -2979,16 +2986,7 @@ def check_node_auto_evict_state(client, target_node, expect_state):
check_all_replicas_evict_state(client, volume2_name, False)

# Step 7
thread_timeout = 60
try:
future.result(timeout=thread_timeout)
drain_complete = True
except TimeoutError:
print("drain node thread exceed timeout ({})s".format(thread_timeout))
drain_complete = False
future.cancel()
finally:
assert drain_complete is True
wait_drain_complete(future, 60)

# Step 8
set_node_cordon(core_api, evict_source_node.id, False)
Expand All @@ -3001,7 +2999,7 @@ def check_node_auto_evict_state(client, target_node, expect_state):

# Step 10
# Verify volume2 replicas not moved by check replica name
# stored before node drain
# stored before the node drain
volume2 = wait_for_volume_healthy(client, volume2_name)
for replica in volume2.replicas:
assert replica.name in volume2_replicas
Expand Down

0 comments on commit 4eb6a4b

Please sign in to comment.