Skip to content

Commit

Permalink
Add test case test_drain_with_block_for_eviction_if_contains_last_rep…
Browse files Browse the repository at this point in the history
…lica_success

ref: 7521

Signed-off-by: Chris <[email protected]>
  • Loading branch information
chriscchien committed Feb 20, 2024
1 parent 6a084af commit d25e193
Show file tree
Hide file tree
Showing 2 changed files with 195 additions and 3 deletions.
26 changes: 26 additions & 0 deletions manager/integration/tests/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -6123,3 +6123,29 @@ def wait_for_instance_manager_count(client, number, retry_counts=120):
time.sleep(RETRY_INTERVAL_LONG)

return len(ims)


def create_disk_on_current_node(host_id, data_locality):
# Create extra disk on current node
node = client.by_id_node(host_id)
disks = node.disks

disk_volume_name = 'vol-disk'
disk_volume = client.create_volume(name=disk_volume_name,
size=str(2 * Gi),
numberOfReplicas=1,
dataLocality=data_locality)
disk_volume = wait_for_volume_detached(client, disk_volume_name)

disk_volume.attach(hostId=host_id)
disk_volume = wait_for_volume_healthy(client, disk_volume_name)
disk_path = prepare_host_disk(get_volume_endpoint(disk_volume),
disk_volume_name)
disk = {"path": disk_path, "allowScheduling": True}

update_disk = get_update_disks(disks)
update_disk["disk1"] = disk

node = update_node_disks(client, node.name, disks=update_disk, retry=True)
node = wait_for_disk_update(client, host_id, len(update_disk))
assert len(node.disks) == len(update_disk)
172 changes: 169 additions & 3 deletions manager/integration/tests/test_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@
from common import create_pv_for_volume
from common import create_pvc_for_volume, create_and_wait_deployment
from common import get_apps_api_client, write_pod_volume_random_data
from common import wait_for_volume_degraded
from common import create_disk_on_current_node

from backupstore import set_random_backupstore # NOQA
from concurrent.futures import ThreadPoolExecutor, TimeoutError
Expand Down Expand Up @@ -2849,8 +2851,22 @@ def test_drain_with_block_for_eviction_success(client, core_api, volume_name, ma
assert expected_test_data_checksum == test_data_checksum


@pytest.mark.skip(reason="TODO") # NOQA
def test_drain_with_block_for_eviction_if_contains_last_replica_success():
def write_deployment_and_get_md5(core_api, data_path, deployment, size): # NOQA
deployment_pod_names = common.get_deployment_pod_names(core_api,
deployment)
write_pod_volume_random_data(core_api,
deployment_pod_names[0],
data_path,
size)
expected_test_data_checksum = get_pod_data_md5sum(core_api,
deployment_pod_names[0],
data_path)
return expected_test_data_checksum, deployment_pod_names[0]


def test_drain_with_block_for_eviction_if_contains_last_replica_success(client, # NOQA
core_api, # NOQA
make_deployment_with_pvc): # NOQA
"""
Test drain completes after evicting replicas with node-drain-policy
block-for-eviction-if-contains-last-replica
Expand All @@ -2864,7 +2880,6 @@ def test_drain_with_block_for_eviction_if_contains_last_replica_success():
4. Write data to the volumes.
5. Drain a node both volumes have a replica scheduled to.
6. While the drain is ongoing:
- Verify that the volume with one replica never becomes degraded.
- Verify that the volume with three replicas becomes degraded.
- Verify that `node.status.autoEvicting == true`.
- Optionally verify that `replica.spec.evictionRequested == true` on the
Expand All @@ -2880,6 +2895,157 @@ def test_drain_with_block_for_eviction_if_contains_last_replica_success():
12. Verify that `replica.spec.evictionRequested == false` on all replicas.
13. Verify the the data in both volumes.
"""
apps_api = get_apps_api_client()
host_id = get_self_host_id()
nodes = client.list_node()
evict_nodes = [node for node in nodes if node.id != host_id][:2]
evict_source_node = evict_nodes[0]

create_disk_on_current_node(client, host_id, "strict-local")
# Step 1
setting = client.by_id_setting(
SETTING_NODE_DRAIN_POLICY)
client.update(setting, value="block-for-eviction-if-contains-last-replica")

# Step 2, 3
def create_volume_with_deployment(client, volume_name, on_current_node=False): # NOQA
volume = client.create_volume(name=volume_name,
size=str(1 * Gi),
numberOfReplicas=3)
volume = common.wait_for_volume_detached(client, volume_name)

pvc_name = volume_name + "-pvc"
create_pv_for_volume(client, core_api, volume, volume_name)
create_pvc_for_volume(client, core_api, volume, pvc_name)
deployment_name = volume_name + "-dep"
deployment = make_deployment_with_pvc(deployment_name, pvc_name)
if on_current_node is True:
deployment["spec"]["template"]["spec"]["nodeSelector"] \
= {"kubernetes.io/hostname": get_self_host_id()}

create_and_wait_deployment(apps_api, deployment)

volume = wait_for_volume_healthy(client, volume_name)
return volume, deployment

volume1_name = "vol-1"
volume1, deployment1 = create_volume_with_deployment(client,
volume1_name,
True)

volume1.updateReplicaCount(replicaCount=1)
for replica in volume1.replicas:
if replica.hostId != evict_source_node.id:
volume1.replicaRemove(name=replica.name)

volume2_name = "vol-2"
volume2, deployment2 = create_volume_with_deployment(client,
volume2_name,
True)

volume2_replicas = []
volume2 = client.by_id_volume(volume2_name)
for replica in volume2.replicas:
volume2_replicas.append(replica.name)

# Step 4
data_path = '/data/test'
expected_test_data_checksum1, deployment_pod1 = \
write_deployment_and_get_md5(core_api,
data_path,
deployment1,
DATA_SIZE_IN_MB_3)
expected_test_data_checksum2, deployment_pod2 = \
write_deployment_and_get_md5(core_api,
data_path,
deployment2,
DATA_SIZE_IN_MB_3)

# Step 5
executor = ThreadPoolExecutor(max_workers=5)
future = executor.submit(drain_node, core_api, evict_source_node)

# Step 6
volume1 = client.by_id_volume(volume1_name)
for replica in volume1.replicas:
if replica.hostId == evict_source_node.id:
replica_name = replica.name
break

replica_info = get_replica_detail(replica_name)
eviction_requested = replica_info["spec"]["evictionRequested"]
assert eviction_requested is True

nodes = client.list_node()
for node in nodes:
if node.id == evict_source_node.id:
assert node.autoEvicting is True

volume2 = wait_for_volume_degraded(client, volume2_name)

for replica in volume2.replicas:
replica_info = get_replica_detail(replica.name)
eviction_requested = replica_info["spec"]["evictionRequested"]
assert eviction_requested is False

# Step 7
thread_timeout = 60
try:
future.result(timeout=thread_timeout)
drain_complete = True
except TimeoutError:
print("drain node thread exceed timeout ({})s".format(thread_timeout))
drain_complete = False
future.cancel()
finally:
assert drain_complete is True

# Step 8
set_node_cordon(core_api, evict_source_node.id, False)

# Step 9
volume1 = client.by_id_volume(volume1_name)
assert len(volume1.replicas) == 1
for replica in volume1.replicas:
assert replica.hostId != evict_source_node.id

# Step 10
volume2 = wait_for_volume_healthy(client, volume2_name)
for replica in volume2.replicas:
assert replica.name in volume2_replicas

# Step 11
nodes = client.list_node()
for node in nodes:
if node.id == evict_source_node.id:
assert node.autoEvicting is False

# Step 12
def check_all_replica_eviction_request(client, volume_name, expect_result): # NOQA
volume = client.by_id_volume(client, volume_name)
for replica in volume.replicas:
replica_info = get_replica_detail(replica.name)
eviction_requested = replica_info["spec"]["evictionRequested"]
assert eviction_requested is expect_result

check_all_replica_eviction_request(client,
volume1_name,
expect_result=False)
check_all_replica_eviction_request(client,
volume2_name,
expect_result=False)

# Step 13
test_data_checksum1 = get_pod_data_md5sum(core_api,
deployment_pod1,
data_path)
assert expected_test_data_checksum1 == test_data_checksum1

test_data_checksum2 = get_pod_data_md5sum(core_api,
deployment_pod2,
data_path)
assert expected_test_data_checksum2 == test_data_checksum2


@pytest.mark.skip(reason="TODO") # NOQA
def test_drain_with_block_for_eviction_failure():
Expand Down

0 comments on commit d25e193

Please sign in to comment.