From 820ab5e3bfbc616a9d10d9026fcf5bfe6a8293c8 Mon Sep 17 00:00:00 2001 From: Noha Ihab <49988746+NohaIhab@users.noreply.github.com> Date: Tue, 17 Sep 2024 14:19:12 +0300 Subject: [PATCH] fix: refactor the `create_profile` fixture to ensure deletion (#117) * fix: delete the Profile in the foreground * assert profile deleted --- driver/test_kubeflow_workloads.py | 12 ++++------ driver/utils.py | 37 ++++++++++++++++++++++++++----- 2 files changed, 36 insertions(+), 13 deletions(-) diff --git a/driver/test_kubeflow_workloads.py b/driver/test_kubeflow_workloads.py index 5d9dd22..a2c0637 100644 --- a/driver/test_kubeflow_workloads.py +++ b/driver/test_kubeflow_workloads.py @@ -14,7 +14,8 @@ create_namespaced_resource, load_in_cluster_generic_resources, ) -from utils import assert_namespace_active, delete_job, fetch_job_logs, wait_for_job +from lightkube.types import CascadeType +from utils import assert_namespace_active, assert_profile_deleted, fetch_job_logs, wait_for_job log = logging.getLogger(__name__) @@ -93,7 +94,8 @@ def create_profile(lightkube_client): # delete the Profile at the end of the module tests log.info(f"Deleting Profile {NAMESPACE}...") - lightkube_client.delete(PROFILE_RESOURCE, name=NAMESPACE) + lightkube_client.delete(PROFILE_RESOURCE, name=NAMESPACE, cascade=CascadeType.FOREGROUND) + assert_profile_deleted(lightkube_client, NAMESPACE, log) @pytest.fixture(scope="function") @@ -180,12 +182,6 @@ def test_kubeflow_workloads( fetch_job_logs(JOB_NAME, NAMESPACE, TESTS_LOCAL_RUN) -def teardown_module(): - """Cleanup resources.""" - log.info(f"Deleting Job {NAMESPACE}/{JOB_NAME}...") - delete_job(JOB_NAME, NAMESPACE) - - def proxy_context(request) -> Dict[str, str]: """Return a dictionary with proxy environment variables from user input.""" proxy_context = {} diff --git a/driver/utils.py b/driver/utils.py index 19aa0f0..bb3d824 100644 --- a/driver/utils.py +++ b/driver/utils.py @@ -5,10 +5,18 @@ import subprocess import tenacity -from lightkube import Client +from lightkube import ApiError, Client +from lightkube.generic_resource import create_global_resource from lightkube.resources.batch_v1 import Job from lightkube.resources.core_v1 import Namespace +PROFILE_RESOURCE = create_global_resource( + group="kubeflow.org", + version="v1", + kind="profile", + plural="profiles", +) + log = logging.getLogger(__name__) @@ -91,7 +99,26 @@ def fetch_job_logs(job_name, namespace, tests_local_run): subprocess.check_call(command) -def delete_job(job_name, namespace, lightkube_client=None): - """Delete a Kubernetes Job.""" - client = lightkube_client or Client(trust_env=False) - client.delete(Job, name=job_name, namespace=namespace) +@tenacity.retry( + wait=tenacity.wait_exponential(multiplier=2, min=1, max=10), + stop=tenacity.stop_after_attempt(10), + reraise=True, +) +def assert_profile_deleted(client, profile_name, logger: logging.Logger): + """Assert that the Profile is deleted. + + Retries multiple times to allow for the Profile to be deleted. + """ + deleted = False + try: + client.get(PROFILE_RESOURCE, profile_name) + except ApiError as error: + if error.status.code != 404: + logger.info(f"Unable to get Profile {profile_name} (status: {error.status.code})") + raise + else: + deleted = True + + logger.info(f"Waiting for Profile {profile_name} to be deleted..") + + assert deleted, f"Waited too long for Profile {profile_name} to be deleted!"