Skip to content

Commit

Permalink
Add test case test_drain_with_block_for_eviction_failure
Browse files Browse the repository at this point in the history
ref: 7521

Signed-off-by: Chris <[email protected]>
  • Loading branch information
chriscchien committed Feb 23, 2024
1 parent b489201 commit 2f3ad3e
Showing 1 changed file with 61 additions and 21 deletions.
82 changes: 61 additions & 21 deletions manager/integration/tests/test_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -2745,20 +2745,29 @@ def check_replica_evict_state(client, volume_name, node, expect_state): # NOQA
assert eviction_requested is expect_state


def wait_drain_complete(future, timeout):
def wait_drain_complete(future, timeout, copmpleted=True):
"""
Wait concurrent.futures object complete in a duration
"""
def stop_drain_process():
"""
Both future.cancel() and executer.shutdown(wait=False) can not really
stop the drain process.
Use this function to stop drain process
"""
command = ["pkill", "-f", "kubectl drain"]
subprocess.check_output(command, text=True)

thread_timeout = timeout
try:
future.result(timeout=thread_timeout)
drain_complete = True
except TimeoutError:
print("drain node thread exceed timeout ({})s".format(thread_timeout))
drain_complete = False
future.cancel()
stop_drain_process()
finally:
assert drain_complete is True
assert drain_complete is copmpleted


def make_replica_on_specific_node(client, volume_name, node): # NOQA
Expand Down Expand Up @@ -2787,15 +2796,11 @@ def check_all_replicas_evict_state(client, volume_name, expect_state): # NOQA
assert eviction_requested is expect_state


@pytest.mark.skip(reason="Can not run when in-cluster backup store pod exist") # NOQA
def test_drain_with_block_for_eviction_success(client, core_api, volume_name, make_deployment_with_pvc): # NOQA
def test_drain_with_block_for_eviction_success(client, # NOQA
core_api, # NOQA
volume_name, # NOQA
make_deployment_with_pvc): # NOQA
"""
Test case has the potential to drain node where backup store pods are
located.
In that case, test case will fail because backup store pods can only be
forcibly drained.
---
Test drain completes after evicting replica with node-drain-policy
block-for-eviction
Expand Down Expand Up @@ -2880,16 +2885,10 @@ def test_drain_with_block_for_eviction_success(client, core_api, volume_name, ma
assert checksum == test_data_checksum


@pytest.mark.skip(reason="Can not run when in-cluster backup store pod exist") # NOQA
def test_drain_with_block_for_eviction_if_contains_last_replica_success(client, # NOQA
core_api, # NOQA
make_deployment_with_pvc): # NOQA
"""
Test case has the potential to drain node where backup store pods are
located.
In that case, test case will fail because backup store pods can only be
forcibly drained.
---
Test drain completes after evicting replicas with node-drain-policy
block-for-eviction-if-contains-last-replica
Expand Down Expand Up @@ -2921,7 +2920,6 @@ def test_drain_with_block_for_eviction_if_contains_last_replica_success(client,
nodes = client.list_node()
evict_nodes = [node for node in nodes if node.id != host_id][:2]
evict_source_node = evict_nodes[0]

# Create extra disk on current node
node = client.by_id_node(host_id)
disks = node.disks
Expand Down Expand Up @@ -2993,7 +2991,7 @@ def test_drain_with_block_for_eviction_if_contains_last_replica_success(client,

# Step 9
volume1 = client.by_id_volume(volume1_name)
assert len(volume1.replicas) == 1
wait_for_volume_replica_count(client, volume1_name, 1)
for replica in volume1.replicas:
assert replica.hostId != evict_source_node.id

Expand Down Expand Up @@ -3024,8 +3022,10 @@ def test_drain_with_block_for_eviction_if_contains_last_replica_success(client,
assert checksum2 == test_data_checksum2


@pytest.mark.skip(reason="TODO") # NOQA
def test_drain_with_block_for_eviction_failure():
def test_drain_with_block_for_eviction_failure(client, # NOQA
core_api, # NOQA
volume_name, # NOQA
make_deployment_with_pvc): # NOQA
"""
Test drain never completes with node-drain-policy block-for-eviction
Expand All @@ -3040,7 +3040,47 @@ def test_drain_with_block_for_eviction_failure():
- Verify that `node.status.autoEvicting == true`.
- Verify that `replica.spec.evictionRequested == true`.
7. Verify the drain never completes.
8. Stop the drain, check volume is healthy and data correct
"""
host_id = get_self_host_id()
nodes = client.list_node()
evict_nodes = [node for node in nodes if node.id != host_id][:2]
evict_source_node = evict_nodes[0]

# Step 1
setting = client.by_id_setting(
SETTING_NODE_DRAIN_POLICY)
client.update(setting, value="block-for-eviction")

# Step 2, 3, 4
volume, pod, checksum = create_deployment_and_write_data(client,
core_api,
make_deployment_with_pvc, # NOQA
volume_name,
str(1 * Gi),
3,
DATA_SIZE_IN_MB_3, host_id) # NOQA

# Step 5
executor = ThreadPoolExecutor(max_workers=5)
future = executor.submit(drain_node, core_api, evict_source_node)

# Step 6
check_replica_evict_state(client, volume_name, evict_source_node, True)
check_node_auto_evict_state(client, evict_source_node, True)

# Step 7
wait_drain_complete(future, 90, False)

# Step 8
set_node_cordon(core_api, evict_source_node.id, False)
wait_for_volume_healthy(client, volume_name)
data_path = '/data/test'
test_data_checksum = get_pod_data_md5sum(core_api,
pod,
data_path)
assert checksum == test_data_checksum


@pytest.mark.node # NOQA
def test_auto_detach_volume_when_node_is_cordoned(client, core_api, volume_name): # NOQA
Expand Down

0 comments on commit 2f3ad3e

Please sign in to comment.