Skip to content

Commit

Permalink
Add test case test_drain_with_block_for_eviction_if_contains_last_rep…
Browse files Browse the repository at this point in the history
…lica_success

ref: 7521

Signed-off-by: Chris <[email protected]>
  • Loading branch information
chriscchien committed Feb 20, 2024
1 parent 6a084af commit 2a64a59
Showing 1 changed file with 191 additions and 3 deletions.
194 changes: 191 additions & 3 deletions manager/integration/tests/test_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
from common import create_pv_for_volume
from common import create_pvc_for_volume, create_and_wait_deployment
from common import get_apps_api_client, write_pod_volume_random_data
from common import prepare_host_disk, wait_for_volume_degraded

from backupstore import set_random_backupstore # NOQA
from concurrent.futures import ThreadPoolExecutor, TimeoutError
Expand Down Expand Up @@ -2849,8 +2850,9 @@ def test_drain_with_block_for_eviction_success(client, core_api, volume_name, ma
assert expected_test_data_checksum == test_data_checksum


@pytest.mark.skip(reason="TODO") # NOQA
def test_drain_with_block_for_eviction_if_contains_last_replica_success():
def test_drain_with_block_for_eviction_if_contains_last_replica_success(client, # NOQA
core_api, # NOQA
make_deployment_with_pvc): # NOQA
"""
Test drain completes after evicting replicas with node-drain-policy
block-for-eviction-if-contains-last-replica
Expand All @@ -2864,7 +2866,6 @@ def test_drain_with_block_for_eviction_if_contains_last_replica_success():
4. Write data to the volumes.
5. Drain a node both volumes have a replica scheduled to.
6. While the drain is ongoing:
- Verify that the volume with one replica never becomes degraded.
- Verify that the volume with three replicas becomes degraded.
- Verify that `node.status.autoEvicting == true`.
- Optionally verify that `replica.spec.evictionRequested == true` on the
Expand All @@ -2880,6 +2881,193 @@ def test_drain_with_block_for_eviction_if_contains_last_replica_success():
12. Verify that `replica.spec.evictionRequested == false` on all replicas.
13. Verify the the data in both volumes.
"""
apps_api = get_apps_api_client()
host_id = get_self_host_id()
nodes = client.list_node()
evict_nodes = [node for node in nodes if node.id != host_id][:2]
evict_source_node = evict_nodes[0]

# Create extra disk on current node
node = client.by_id_node(host_id)
disks = node.disks

disk_volume_name = 'vol-disk'
disk_volume = client.create_volume(name=disk_volume_name,
size=str(2 * Gi),
numberOfReplicas=1,
dataLocality="strict-local")
disk_volume = wait_for_volume_detached(client, disk_volume_name)

disk_volume.attach(hostId=host_id)
disk_volume = wait_for_volume_healthy(client, disk_volume_name)
disk_path = prepare_host_disk(get_volume_endpoint(disk_volume),
disk_volume_name)
disk = {"path": disk_path, "allowScheduling": True}

update_disk = get_update_disks(disks)
update_disk["disk1"] = disk

node = update_node_disks(client, node.name, disks=update_disk, retry=True)
node = wait_for_disk_update(client, host_id, len(update_disk))
assert len(node.disks) == len(update_disk)

# Step 1
setting = client.by_id_setting(
SETTING_NODE_DRAIN_POLICY)
client.update(setting, value="block-for-eviction-if-contains-last-replica")

# Step 2, 3
volume1_name = "vol-1"
volume1 = client.create_volume(name=volume1_name,
size=str(1 * Gi),
numberOfReplicas=3)
volume1 = common.wait_for_volume_detached(client, volume1_name)

pvc1_name = volume1_name + "-pvc"
create_pv_for_volume(client, core_api, volume1, volume1_name)
create_pvc_for_volume(client, core_api, volume1, pvc1_name)
deployment1_name = volume1_name + "-dep"
deployment1 = make_deployment_with_pvc(deployment1_name, pvc1_name)
deployment1["spec"]["template"]["spec"]["nodeSelector"] \
= {"kubernetes.io/hostname": host_id}

create_and_wait_deployment(apps_api, deployment1)

volume1 = wait_for_volume_healthy(client, volume1_name)
# Make volume 1 replica only located on evict_source_node
volume1.updateReplicaCount(replicaCount=1)
for replica in volume1.replicas:
if replica.hostId != evict_source_node.id:
volume1.replicaRemove(name=replica.name)

# volume 2 attach to current with 3 replicas
volume2_name = "vol-2"
volume2 = client.create_volume(name=volume2_name,
size=str(1 * Gi),
numberOfReplicas=3)
volume2 = common.wait_for_volume_detached(client, volume2_name)

pvc2_name = volume2_name + "-pvc"
create_pv_for_volume(client, core_api, volume2, volume2_name)
create_pvc_for_volume(client, core_api, volume2, pvc2_name)
deployment2_name = volume2_name + "-dep"
deployment2 = make_deployment_with_pvc(deployment2_name, pvc2_name)
deployment2["spec"]["template"]["spec"]["nodeSelector"] \
= {"kubernetes.io/hostname": host_id}

create_and_wait_deployment(apps_api, deployment2)

volume2_replicas = []
volume2 = client.by_id_volume(volume2_name)
for replica in volume2.replicas:
volume2_replicas.append(replica.name)

# Step 4
data_path = '/data/test'

deployment1_pod_names = common.get_deployment_pod_names(core_api,
deployment1)
write_pod_volume_random_data(core_api,
deployment1_pod_names[0],
data_path,
DATA_SIZE_IN_MB_3)
expected_test_data_checksum1 = get_pod_data_md5sum(core_api,
deployment1_pod_names[0], # NOQA
data_path)

deployment2_pod_names = common.get_deployment_pod_names(core_api,
deployment2)
write_pod_volume_random_data(core_api,
deployment2_pod_names[0],
data_path,
DATA_SIZE_IN_MB_3)
expected_test_data_checksum2 = get_pod_data_md5sum(core_api,
deployment2_pod_names[0], # NOQA
data_path)

# Step 5
executor = ThreadPoolExecutor(max_workers=5)
future = executor.submit(drain_node, core_api, evict_source_node)

# Step 6
volume1 = client.by_id_volume(volume1_name)
for replica in volume1.replicas:
if replica.hostId == evict_source_node.id:
replica_name = replica.name
break

replica_info = get_replica_detail(replica_name)
eviction_requested = replica_info["spec"]["evictionRequested"]
assert eviction_requested is True

nodes = client.list_node()
for node in nodes:
if node.id == evict_source_node.id:
assert node.autoEvicting is True

volume2 = wait_for_volume_degraded(client, volume2_name)

for replica in volume2.replicas:
replica_info = get_replica_detail(replica.name)
eviction_requested = replica_info["spec"]["evictionRequested"]
assert eviction_requested is False

# Step 7
thread_timeout = 60
try:
future.result(timeout=thread_timeout)
drain_complete = True
except TimeoutError:
print("drain node thread exceed timeout ({})s".format(thread_timeout))
drain_complete = False
future.cancel()
finally:
assert drain_complete is True

# Step 8
set_node_cordon(core_api, evict_source_node.id, False)

# Step 9
volume1 = client.by_id_volume(volume1_name)
assert len(volume1.replicas) == 1
for replica in volume1.replicas:
assert replica.hostId != evict_source_node.id

# Step 10
# Verify volume2 replicas not moved by check replica name
# stored before node drain
volume2 = wait_for_volume_healthy(client, volume2_name)
for replica in volume2.replicas:
assert replica.name in volume2_replicas

# Step 11
nodes = client.list_node()
for node in nodes:
if node.id == evict_source_node.id:
assert node.autoEvicting is False

# Step 12
def check_all_replica_eviction_request(client, volume_name, expect_result): # NOQA
volume = client.by_id_volume(client, volume_name)
for replica in volume.replicas:
replica_info = get_replica_detail(replica.name)
eviction_requested = replica_info["spec"]["evictionRequested"]
assert eviction_requested is expect_result

check_all_replica_eviction_request(volume1_name, expect_result=False)
check_all_replica_eviction_request(volume2_name, expect_result=False)

# Step 13
test_data_checksum1 = get_pod_data_md5sum(core_api,
deployment1_pod_names[0],
data_path)
assert expected_test_data_checksum1 == test_data_checksum1

test_data_checksum2 = get_pod_data_md5sum(core_api,
deployment2_pod_names[0],
data_path)
assert expected_test_data_checksum2 == test_data_checksum2


@pytest.mark.skip(reason="TODO") # NOQA
def test_drain_with_block_for_eviction_failure():
Expand Down

0 comments on commit 2a64a59

Please sign in to comment.