From d67a1b8a0cb1f80b1a2b45f0d0149e8003cea822 Mon Sep 17 00:00:00 2001 From: helenxie-bit Date: Thu, 5 Sep 2024 15:51:12 +0800 Subject: [PATCH] add step of checking pod Signed-off-by: helenxie-bit --- .github/workflows/e2e-test-tune-api.yaml | 12 ++++++++++++ .../v1beta1/scripts/gh-actions/run-e2e-tune-api.py | 2 +- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/.github/workflows/e2e-test-tune-api.yaml b/.github/workflows/e2e-test-tune-api.yaml index e72e6f6ef9b..31d3585cff2 100644 --- a/.github/workflows/e2e-test-tune-api.yaml +++ b/.github/workflows/e2e-test-tune-api.yaml @@ -33,6 +33,18 @@ jobs: with: tune-api: true training-operator: true + + - name: Check the status of Experiment and Trials + shell: bash + run: | + kubectl get pods -n default + + # describe pod + pod_name=$(kubectl get pods -n default -o jsonpath='{.items[?(@.metadata.labels.trial-name)].metadata.name}') + kubectl describe pod $pod_name -n default + + # check the logs of pod + kubectl logs $pod_name -n default strategy: fail-fast: false diff --git a/test/e2e/v1beta1/scripts/gh-actions/run-e2e-tune-api.py b/test/e2e/v1beta1/scripts/gh-actions/run-e2e-tune-api.py index 640cb2a595b..135f40c6ef8 100644 --- a/test/e2e/v1beta1/scripts/gh-actions/run-e2e-tune-api.py +++ b/test/e2e/v1beta1/scripts/gh-actions/run-e2e-tune-api.py @@ -14,7 +14,7 @@ from verify import verify_experiment_results # Experiment timeout is 40 min. -EXPERIMENT_TIMEOUT = 60 * 40 +EXPERIMENT_TIMEOUT = 60 * 10 # The default logging config. logging.basicConfig(level=logging.INFO)