Skip to content

Commit

Permalink
test(negative): add try/catch for api calls to increase robustness
Browse files Browse the repository at this point in the history
Signed-off-by: Yang Chiu <[email protected]>
  • Loading branch information
yangchiu committed Feb 2, 2024
1 parent d5b25d9 commit 1a1d40a
Show file tree
Hide file tree
Showing 3 changed files with 98 additions and 71 deletions.
62 changes: 36 additions & 26 deletions e2e/libs/replica/rest.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from replica.base import Base

from utils import common_utils
from utility.utility import logging

from replica.constant import RETRY_COUNTS
from replica.constant import RETRY_INTERVAL
Expand All @@ -22,45 +23,54 @@ def delete_replica(self, volume_name, node_name):
def wait_for_replica_rebuilding_start(self, volume_name, node_name):
rebuilding_replica_name = None
for i in range(RETRY_COUNTS):
v = self.longhorn_client.by_id_volume(volume_name)
for replica in v.replicas:
if replica.hostId == node_name:
rebuilding_replica_name = replica.name
try:
v = self.longhorn_client.by_id_volume(volume_name)
for replica in v.replicas:
if replica.hostId == node_name:
rebuilding_replica_name = replica.name
break
if rebuilding_replica_name:
break
if rebuilding_replica_name:
break
except Exception as e:
logging(f"Failed to get volume {e}")
time.sleep(RETRY_INTERVAL)
assert rebuilding_replica_name != None, f'failed to get rebuilding replica name'

started = False
for i in range(RETRY_COUNTS):
v = self.longhorn_client.by_id_volume(volume_name)
for status in v.rebuildStatus:
if status.replica == rebuilding_replica_name and\
status.state == "in_progress":
started = True
try:
v = self.longhorn_client.by_id_volume(volume_name)
for status in v.rebuildStatus:
if status.replica == rebuilding_replica_name and\
status.state == "in_progress":
started = True
break
if started:
break
if started:
break
except Exception as e:
logging(f"Failed to get volume {e}")
time.sleep(RETRY_INTERVAL)
assert started, f'replica {rebuilding_replica_name} rebuilding starting failed'

def wait_for_replica_rebuilding_complete(self, volume_name, node_name):
completed = False
for i in range(RETRY_COUNTS):
v = self.longhorn_client.by_id_volume(volume_name)
for replica in v.replicas:
# use replica.mode is RW or RO to check if this replica
# has been rebuilt or not
# because rebuildStatus is not reliable
# when the rebuild progress reaches 100%
# it will be removed from rebuildStatus immediately
# and you will just get an empty rebuildStatus []
# so it's no way to distinguish "rebuilding not started yet"
# or "rebuilding already completed" using rebuildStatus
if replica.hostId == node_name and replica.mode == "RW":
completed = True
break
try:
v = self.longhorn_client.by_id_volume(volume_name)
for replica in v.replicas:
# use replica.mode is RW or RO to check if this replica
# has been rebuilt or not
# because rebuildStatus is not reliable
# when the rebuild progress reaches 100%
# it will be removed from rebuildStatus immediately
# and you will just get an empty rebuildStatus []
# so it's no way to distinguish "rebuilding not started yet"
# or "rebuilding already completed" using rebuildStatus
if replica.hostId == node_name and replica.mode == "RW":
completed = True
break
except Exception as e:
logging(f"Failed to get volume {e}")
if completed:
break
time.sleep(RETRY_INTERVAL)
Expand Down
4 changes: 2 additions & 2 deletions e2e/libs/utility/utility.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,9 +81,9 @@ def wait_for_all_instance_manager_running():
retry_count, retry_interval = get_retry_count_and_interval()
for _ in range(retry_count):
logging(f"Waiting for all instance manager running ({_}) ...")
instance_managers = longhorn_client.list_instance_manager()
instance_manager_map = {}
try:
instance_managers = longhorn_client.list_instance_manager()
instance_manager_map = {}
for im in instance_managers:
if im.currentState == "running":
instance_manager_map[im.nodeID] = im
Expand Down
103 changes: 60 additions & 43 deletions e2e/libs/volume/rest.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,12 @@ def __init__(self, node_exec):
self.node_exec = node_exec

def get(self, volume_name):
return self.longhorn_client.by_id_volume(volume_name)
for i in range(RETRY_COUNTS):
try:
return self.longhorn_client.by_id_volume(volume_name)
except Exception as e:
logging(f"Failed to get volume {e}")
time.sleep(RETRY_INTERVAL)

def create(self, volume_name, size, replica_count):
return NotImplemented
Expand All @@ -36,20 +41,23 @@ def wait_for_volume_state(self, volume_name, desired_state):

def get_endpoint(self, volume_name):
endpoint = ""
v = self.longhorn_client.by_id_volume(volume_name)
v = self.get(volume_name)
if v.disableFrontend:
assert endpoint == ""
return endpoint
else:
assert v.frontend == VOLUME_FRONTEND_BLOCKDEV or\
v.frontend == VOLUME_FRONTEND_ISCSI
for i in range(RETRY_COUNTS):
v = self.longhorn_client.by_id_volume(volume_name)
engines = v.controllers
assert len(engines) != 0
endpoint = engines[0].endpoint
if endpoint != "":
break
try:
v = self.longhorn_client.by_id_volume(volume_name)
engines = v.controllers
assert len(engines) != 0
endpoint = engines[0].endpoint
if endpoint != "":
break
except Exception as e:
logging(f"Failed to get volume {e}")
time.sleep(RETRY_INTERVAL)

logging(f"Got volume {volume_name} endpoint = {endpoint}")
Expand All @@ -72,55 +80,64 @@ def delete_replica(self, volume_name, node_name):
def wait_for_replica_rebuilding_start(self, volume_name, node_name):
rebuilding_replica_name = None
for i in range(RETRY_COUNTS):
v = self.longhorn_client.by_id_volume(volume_name)
logging(f"Got volume {volume_name} replicas = {v.replicas}")
for replica in v.replicas:
if replica.hostId == node_name:
rebuilding_replica_name = replica.name
try:
v = self.longhorn_client.by_id_volume(volume_name)
logging(f"Got volume {volume_name} replicas = {v.replicas}")
for replica in v.replicas:
if replica.hostId == node_name:
rebuilding_replica_name = replica.name
break
if rebuilding_replica_name:
break
if rebuilding_replica_name:
break
except Exception as e:
logging(f"Failed to get volume {e}")
time.sleep(RETRY_INTERVAL)
assert rebuilding_replica_name != None
logging(f"Got rebuilding replica = {rebuilding_replica_name}")

started = False
for i in range(RETRY_COUNTS):
v = self.longhorn_client.by_id_volume(volume_name)
logging(f"Got volume rebuild status = {v.rebuildStatus}")
for status in v.rebuildStatus:
for replica in v.replicas:
if status.replica == replica.name and \
replica.hostId == node_name and \
status.state == "in_progress":
logging(f"Started {node_name}'s replica {replica.name} rebuilding")
started = True
break
if started:
break
try:
v = self.longhorn_client.by_id_volume(volume_name)
logging(f"Got volume rebuild status = {v.rebuildStatus}")
for status in v.rebuildStatus:
for replica in v.replicas:
if status.replica == replica.name and \
replica.hostId == node_name and \
status.state == "in_progress":
logging(f"Started {node_name}'s replica {replica.name} rebuilding")
started = True
break
if started:
break
except Exception as e:
logging(f"Failed to get volume {e}")
time.sleep(RETRY_INTERVAL)
assert started, f"wait for replica on node {node_name} rebuilding timeout: {v}"

def wait_for_replica_rebuilding_complete(self, volume_name, node_name):
completed = False
for i in range(RETRY_COUNTS):
v = self.longhorn_client.by_id_volume(volume_name)
logging(f"Got volume {volume_name} replicas = {v.replicas}")
for replica in v.replicas:
# use replica.mode is RW or RO to check if this replica
# has been rebuilt or not
# because rebuildStatus is not reliable
# when the rebuild progress reaches 100%
# it will be removed from rebuildStatus immediately
# and you will just get an empty rebuildStatus []
# so it's no way to distinguish "rebuilding not started yet"
# or "rebuilding already completed" using rebuildStatus
if replica.hostId == node_name and replica.mode == "RW":
logging(f"Completed {node_name}'s replica {replica.name} rebuilding")
completed = True
try:
v = self.longhorn_client.by_id_volume(volume_name)
logging(f"Got volume {volume_name} replicas = {v.replicas}")
for replica in v.replicas:
# use replica.mode is RW or RO to check if this replica
# has been rebuilt or not
# because rebuildStatus is not reliable
# when the rebuild progress reaches 100%
# it will be removed from rebuildStatus immediately
# and you will just get an empty rebuildStatus []
# so it's no way to distinguish "rebuilding not started yet"
# or "rebuilding already completed" using rebuildStatus
if replica.hostId == node_name and replica.mode == "RW":
logging(f"Completed {node_name}'s replica {replica.name} rebuilding")
completed = True
break
if completed:
break
if completed:
break
except Exception as e:
logging(f"Failed to get volume {e}")
time.sleep(RETRY_INTERVAL)
assert completed

Expand Down

0 comments on commit 1a1d40a

Please sign in to comment.