[SDK] test: Add e2e test for tune function. (#2399)

* fix(sdk): fix error field metrics_collector in tune function. Signed-off-by: Electronic-Waste <[email protected]> * test(sdk): Add e2e tests for tune function. Signed-off-by: Electronic-Waste <[email protected]> * test(sdk): add missing field parameters. Signed-off-by: Electronic-Waste <[email protected]> * refactor(test/sdk): add run-e2e-tune-api.py. Signed-off-by: Electronic-Waste <[email protected]> * test(sdk): delete tune testing code in run-e2e-experiment. Signed-off-by: Electronic-Waste <[email protected]> * test(sdk): add blank lines. Signed-off-by: Electronic-Waste <[email protected]> * test(sdk): add verbose and temporarily delete e2e-experiment test. Signed-off-by: Electronic-Waste <[email protected]> * test(sdk): add namespace_labels. Signed-off-by: Electronic-Waste <[email protected]> * test(sdk): add time.sleep(5). Signed-off-by: Electronic-Waste <[email protected]> * test(sdk): add error output. Signed-off-by: Electronic-Waste <[email protected]> * test(sdk): build random image for tune. Signed-off-by: Electronic-Waste <[email protected]> * test(sdk): delete extra debug log. Signed-off-by: Electronic-Waste <[email protected]> * refactor(test/sdk): create separate workflow for tune. Signed-off-by: Electronic-Waste <[email protected]> * test(sdk): change api to API. Signed-off-by: Electronic-Waste <[email protected]> * test(sdk): change the permission of scripts. Signed-off-by: Electronic-Waste <[email protected]> * test(sdk): delete exit code & comment image pulling. Signed-off-by: Electronic-Waste <[email protected]> * test(sdk): delete image pulling phase. Signed-off-by: Electronic-Waste <[email protected]> * test(sdk): refactor workflow file to use template. Signed-off-by: Electronic-Waste <[email protected]> * test(sdk): mark experiments and trial-images as not required. Signed-off-by: Electronic-Waste <[email protected]> * test(sdk): pass tune-api param to setup-minikube.sh. Signed-off-by: Electronic-Waste <[email protected]> * test(sdk): fix err in template-e2e-test. Signed-off-by: Electronic-Waste <[email protected]> * test(sdk): add debug logs. Signed-off-by: Electronic-Waste <[email protected]> * test(sdk): reorder params and delete logs. Signed-off-by: Electronic-Waste <[email protected]> --------- Signed-off-by: Electronic-Waste <[email protected]>
kubeflow · Aug 6, 2024 · b6f7cfd · b6f7cfd
1 parent 51b246f
commit b6f7cfd
Show file tree

Hide file tree

Showing 9 changed files with 341 additions and 149 deletions.
diff --git a/.github/workflows/e2e-test-tune-api.yaml b/.github/workflows/e2e-test-tune-api.yaml
@@ -0,0 +1,34 @@
+name: E2E Test with tune API
+
+on:
+ pull_request:
+ paths-ignore:
+ - "pkg/ui/v1beta1/frontend/**"
+
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}
+ cancel-in-progress: true
+
+jobs:
+ e2e:
+ runs-on: ubuntu-22.04
+ timeout-minutes: 120
+ steps:
+ - name: Checkout
+ uses: actions/checkout@v4
+
+ - name: Setup Test Env
+ uses: ./.github/workflows/template-setup-e2e-test
+ with:
+ kubernetes-version: ${{ matrix.kubernetes-version }}
+
+ - name: Run e2e test with tune API
+ uses: ./.github/workflows/template-e2e-test
+ with:
+ tune-api: true
+
+ strategy:
+ fail-fast: false
+ matrix:
+ # Detail: https://hub.docker.com/r/kindest/node
+ kubernetes-version: ["v1.27.11", "v1.28.7", "v1.29.2"]
diff --git a/.github/workflows/template-e2e-test/action.yaml b/.github/workflows/template-e2e-test/action.yaml
@@ -4,15 +4,17 @@ description: Run e2e test using the minikube cluster
 
 inputs:
  experiments:
- required: true
+ required: false
  description: comma delimited experiment name
+ default: ""
  training-operator:
  required: false
  description: whether to deploy training-operator or not
  default: false
  trial-images:
- required: true
+ required: false
  description: comma delimited trial image name
+ default: ""
  katib-ui:
  required: true
  description: whether to deploy katib-ui or not
@@ -21,18 +23,27 @@ inputs:
  required: false
  description: mysql or postgres
  default: mysql
+ tune-api:
+ required: true
+ description: whether to execute tune-api test or not
+ default: false
 
 runs:
  using: composite
  steps:
  - name: Setup Minikube Cluster
  shell: bash
- run: ./test/e2e/v1beta1/scripts/gh-actions/setup-minikube.sh ${{ inputs.katib-ui }} ${{ inputs.trial-images }} ${{ inputs.experiments }}
+ run: ./test/e2e/v1beta1/scripts/gh-actions/setup-minikube.sh ${{ inputs.katib-ui }} ${{ inputs.tune-api }} ${{ inputs.trial-images }} ${{ inputs.experiments }}
 
  - name: Setup Katib
  shell: bash
  run: ./test/e2e/v1beta1/scripts/gh-actions/setup-katib.sh ${{ inputs.katib-ui }} ${{ inputs.training-operator }} ${{ inputs.database-type }}
 
  - name: Run E2E Experiment
  shell: bash
- run: ./test/e2e/v1beta1/scripts/gh-actions/run-e2e-experiment.sh ${{ inputs.experiments }}
+ run: |
+ if "${{ inputs.tune-api }}"; then
+ ./test/e2e/v1beta1/scripts/gh-actions/run-e2e-tune-api.sh
+ else
+ ./test/e2e/v1beta1/scripts/gh-actions/run-e2e-experiment.sh ${{ inputs.experiments }}
+ fi
diff --git a/sdk/python/v1beta1/kubeflow/katib/api/katib_client.py b/sdk/python/v1beta1/kubeflow/katib/api/katib_client.py
@@ -386,7 +386,7 @@ def tune(
 
  # Add metrics collector to the Katib Experiment.
  # Up to now, We only support parameter `kind`, of which default value is `StdOut`, to specify the kind of metrics collector. 
- experiment.spec.metrics_collector = models.V1beta1MetricsCollectorSpec(
+ experiment.spec.metrics_collector_spec = models.V1beta1MetricsCollectorSpec(
  collector=models.V1beta1CollectorSpec(kind=metrics_collector_config["kind"])
  )
 

diff --git a/test/e2e/v1beta1/scripts/gh-actions/build-load.sh b/test/e2e/v1beta1/scripts/gh-actions/build-load.sh
@@ -25,9 +25,10 @@ pushd .
 cd "$(dirname "$0")/../../../../.."
 trap popd EXIT
 
-TRIAL_IMAGES=${1:-""}
-EXPERIMENTS=${2:-""}
-DEPLOY_KATIB_UI=${3:-false}
+DEPLOY_KATIB_UI=${1:-false}
+TUNE_API=${2:-false}
+TRIAL_IMAGES=${3:-""}
+EXPERIMENTS=${4:-""}
 
 REGISTRY="docker.io/kubeflowkatib"
 TAG="e2e-test"
@@ -162,6 +163,12 @@ for name in "${TRIAL_IMAGE_ARRAY[@]}"; do
  run "$name" "examples/$VERSION/trial-images/$name/Dockerfile"
 done
 
+# Testing image for tune function
+if "$TUNE_API"; then
+ echo -e "\nPulling and building testing image for tune function..."
+ _build_containers "suggestion-hyperopt" "$CMD_PREFIX/suggestion/hyperopt/$VERSION/Dockerfile"
+fi
+
 echo -e "\nCleanup Build Cache...\n"
 docker buildx prune -f
 

diff --git a/test/e2e/v1beta1/scripts/gh-actions/run-e2e-experiment.py b/test/e2e/v1beta1/scripts/gh-actions/run-e2e-experiment.py
@@ -1,13 +1,13 @@
 import argparse
 import logging
-import time
 
 from kubeflow.katib import ApiClient
 from kubeflow.katib import KatibClient
 from kubeflow.katib import models
 from kubeflow.katib.constants import constants
 from kubeflow.katib.utils.utils import FakeResponse
 from kubernetes import client
+from verify import verify_experiment_results
 import yaml
 
 # Experiment timeout is 40 min.
@@ -17,143 +17,6 @@
 logging.basicConfig(level=logging.INFO)
 
 
-def verify_experiment_results(
- katib_client: KatibClient,
- experiment: models.V1beta1Experiment,
- exp_name: str,
- exp_namespace: str,
-):
-
- # Get the best objective metric.
- best_objective_metric = None
- for metric in experiment.status.current_optimal_trial.observation.metrics:
- if metric.name == experiment.spec.objective.objective_metric_name:
- best_objective_metric = metric
- break
-
- if best_objective_metric is None:
- raise Exception(
- "Unable to get the best metrics for objective: {}. Current Optimal Trial: {}".format(
- experiment.spec.objective.objective_metric_name,
- experiment.status.current_optimal_trial,
- )
- )
-
- # Get Experiment Succeeded reason.
- for c in experiment.status.conditions:
- if (
- c.type == constants.EXPERIMENT_CONDITION_SUCCEEDED
- and c.status == constants.CONDITION_STATUS_TRUE
- ):
- succeeded_reason = c.reason
- break
-
- trials_completed = experiment.status.trials_succeeded or 0
- trials_completed += experiment.status.trials_early_stopped or 0
- max_trial_count = experiment.spec.max_trial_count
-
- # If Experiment is Succeeded because of Max Trial Reached, all Trials must be completed.
- if (
- succeeded_reason == "ExperimentMaxTrialsReached"
- and trials_completed != max_trial_count
- ):
- raise Exception(
- "All Trials must be Completed. Max Trial count: {}, Experiment status: {}".format(
- max_trial_count, experiment.status
- )
- )
-
- # If Experiment is Succeeded because of Goal reached, the metrics must be correct.
- if succeeded_reason == "ExperimentGoalReached" and (
- (
- experiment.spec.objective.type == "minimize"
- and float(best_objective_metric.min) > float(experiment.spec.objective.goal)
- )
- or (
- experiment.spec.objective.type == "maximize"
- and float(best_objective_metric.max) < float(experiment.spec.objective.goal)
- )
- ):
- raise Exception(
- "Experiment goal is reached, but metrics are incorrect. "
- f"Experiment objective: {experiment.spec.objective}. "
- f"Experiment best objective metric: {best_objective_metric}"
- )
-
- # Verify Suggestion's resources. Suggestion name = Experiment name.
- suggestion = katib_client.get_suggestion(exp_name, exp_namespace)
-
- # For the Never or FromVolume resume policies Suggestion must be Succeeded.
- # For the LongRunning resume policy Suggestion must be always Running.
- for c in suggestion.status.conditions:
- if (
- c.type == constants.EXPERIMENT_CONDITION_SUCCEEDED
- and c.status == constants.CONDITION_STATUS_TRUE
- and experiment.spec.resume_policy == "LongRunning"
- ):
- raise Exception(
- f"Suggestion is Succeeded while Resume Policy is {experiment.spec.resume_policy}."
- f"Suggestion conditions: {suggestion.status.conditions}"
- )
- elif (
- c.type == constants.EXPERIMENT_CONDITION_RUNNING
- and c.status == constants.CONDITION_STATUS_TRUE
- and experiment.spec.resume_policy != "LongRunning"
- ):
- raise Exception(
- f"Suggestion is Running while Resume Policy is {experiment.spec.resume_policy}."
- f"Suggestion conditions: {suggestion.status.conditions}"
- )
-
- # For Never and FromVolume resume policies verify Suggestion's resources.
- if (
- experiment.spec.resume_policy == "Never"
- or experiment.spec.resume_policy == "FromVolume"
- ):
- resource_name = exp_name + "-" + experiment.spec.algorithm.algorithm_name
-
- # Suggestion's Service and Deployment should be deleted.
- for i in range(10):
- try:
- client.AppsV1Api().read_namespaced_deployment(
- resource_name, exp_namespace
- )
- except client.ApiException as e:
- if e.status == 404:
- break
- else:
- raise e
- # Deployment deletion might take some time.
- time.sleep(1)
- if i == 10:
- raise Exception(
- "Suggestion Deployment is still alive for Resume Policy: {}".format(
- experiment.spec.resume_policy
- )
- )
-
- try:
- client.CoreV1Api().read_namespaced_service(resource_name, exp_namespace)
- except client.ApiException as e:
- if e.status != 404:
- raise e
- else:
- raise Exception(
- "Suggestion Service is still alive for Resume Policy: {}".format(
- experiment.spec.resume_policy
- )
- )
-
- # For FromVolume resume policy PVC should not be deleted.
- if experiment.spec.resume_policy == "FromVolume":
- try:
- client.CoreV1Api().read_namespaced_persistent_volume_claim(
- resource_name, exp_namespace
- )
- except client.ApiException:
- raise Exception("PVC is deleted for FromVolume Resume Policy")
-
-
 def run_e2e_experiment(
  katib_client: KatibClient,
  experiment: models.V1beta1Experiment,

diff --git a/test/e2e/v1beta1/scripts/gh-actions/run-e2e-tune-api.py b/test/e2e/v1beta1/scripts/gh-actions/run-e2e-tune-api.py
@@ -0,0 +1,97 @@
+import argparse
+import logging
+
+from kubeflow.katib import KatibClient
+from kubeflow.katib import search
+from kubernetes import client
+from verify import verify_experiment_results
+
+# Experiment timeout is 40 min.
+EXPERIMENT_TIMEOUT = 60 * 40
+
+# The default logging config.
+logging.basicConfig(level=logging.INFO)
+
+
+def run_e2e_experiment_create_by_tune(
+ katib_client: KatibClient,
+ exp_name: str,
+ exp_namespace: str,
+):
+ # Create Katib Experiment and wait until it is finished.
+ logging.debug("Creating Experiment: {}/{}".format(exp_namespace, exp_name))
+
+ # Use the test case from get-started tutorial.
+ # https://www.kubeflow.org/docs/components/katib/getting-started/#getting-started-with-katib-python-sdk
+ # [1] Create an objective function.
+ def objective(parameters):
+ import time
+ time.sleep(5)
+ result = 4 * int(parameters["a"]) - float(parameters["b"]) ** 2
+ print(f"result={result}")
+
+ # [2] Create hyperparameter search space.
+ parameters = {
+ "a": search.int(min=10, max=20),
+ "b": search.double(min=0.1, max=0.2)
+ }
+
+ # [3] Create Katib Experiment with 4 Trials and 2 CPUs per Trial.
+ # And Wait until Experiment reaches Succeeded condition.
+ katib_client.tune(
+ name=exp_name,
+ namespace=exp_namespace,
+ objective=objective,
+ parameters=parameters,
+ objective_metric_name="result",
+ max_trial_count=4,
+ resources_per_trial={"cpu": "2"},
+ )
+ experiment = katib_client.wait_for_experiment_condition(
+ exp_name, exp_namespace, timeout=EXPERIMENT_TIMEOUT
+ )
+
+ # Verify the Experiment results.
+ verify_experiment_results(katib_client, experiment, exp_name, exp_namespace)
+
+ # Print the Experiment and Suggestion.
+ logging.debug(katib_client.get_experiment(exp_name, exp_namespace))
+ logging.debug(katib_client.get_suggestion(exp_name, exp_namespace))
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ "--namespace", type=str, required=True, help="Namespace for the Katib E2E test",
+ )
+ parser.add_argument(
+ "--verbose", action="store_true", help="Verbose output for the Katib E2E test",
+ )
+ args = parser.parse_args()
+
+ if args.verbose:
+ logging.getLogger().setLevel(logging.DEBUG)
+
+ katib_client = KatibClient()
+
+ namespace_labels = client.CoreV1Api().read_namespace(args.namespace).metadata.labels
+ if 'katib.kubeflow.org/metrics-collector-injection' not in namespace_labels:
+ namespace_labels['katib.kubeflow.org/metrics-collector-injection'] = 'enabled'
+ client.CoreV1Api().patch_namespace(args.namespace, {'metadata': {'labels': namespace_labels}})
+
+ # Test with run_e2e_experiment_create_by_tune
+ exp_name = "tune-example"
+ exp_namespace = args.namespace
+ try:
+ run_e2e_experiment_create_by_tune(katib_client, exp_name, exp_namespace)
+ logging.info("---------------------------------------------------------------")
+ logging.info(f"E2E is succeeded for Experiment created by tune: {exp_namespace}/{exp_name}")
+ except Exception as e:
+ logging.info("---------------------------------------------------------------")
+ logging.info(f"E2E is failed for Experiment created by tune: {exp_namespace}/{exp_name}")
+ raise e
+ finally:
+ # Delete the Experiment.
+ logging.info("---------------------------------------------------------------")
+ logging.info("---------------------------------------------------------------")
+ katib_client.delete_experiment(exp_name, exp_namespace)