diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 82b26dc8..3e1ca71d 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -38,6 +38,9 @@ jobs: - name: Build image run: sudo make build + - name: Build image (Jupyter) + run: sudo make build FLAVOUR=jupyter + - name: Get Artifact Name id: artifact run: | diff --git a/.github/workflows/integration.yaml b/.github/workflows/integration.yaml index 07e0fc04..c15f0c23 100644 --- a/.github/workflows/integration.yaml +++ b/.github/workflows/integration.yaml @@ -52,9 +52,16 @@ jobs: # Import artifact into microk8s to be used in integration tests sudo make import TARGET=microk8s PREFIX=test- REPOSITORY=ghcr.io/canonical/ \ -o $(find .make_cache -name "*.tag") + + sg microk8s -c "make tests" + - name: Run tests (Jupyter) + run: | # Import artifact into docker with new tag - sudo make jupyter TARGET=docker REPOSITORY=ghcr.io/canonical/ PREFIX=test- \ + sudo make import \ + FLAVOUR=jupyter TARGET=microk8s \ + TAG=$(yq .version rockcraft.yaml) \ + REPOSITORY=ghcr.io/canonical/ PREFIX=test- \ -o $(find .make_cache -name "*.tag") - - sg microk8s -c "make tests" + + sg microk8s -c "make tests FLAVOUR=jupyter" diff --git a/.github/workflows/publish.yaml b/.github/workflows/publish.yaml index 4868dcec..870cd88e 100644 --- a/.github/workflows/publish.yaml +++ b/.github/workflows/publish.yaml @@ -87,7 +87,7 @@ jobs: TRACK=${{ needs.release_checks.outputs.track }} if [ ! -z "$RISK" ] && [ "${RISK}" != "no-risk" ]; then TAG=${TRACK}_${RISK}; else TAG=${TRACK}; fi - IMAGE_NAME=$(make REPOSITORY=${REPOSITORY} TAG=${TAG} help | grep "Image\:" | cut -d ":" -f2 | xargs) + IMAGE_NAME=$(make help REPOSITORY=${REPOSITORY} TAG=${TAG} help | grep "Image\:" | cut -d ":" -f2 | xargs) # Import artifact into docker with new tag sudo make import TARGET=docker REPOSITORY=${REPOSITORY} TAG=${TAG}\ @@ -107,29 +107,28 @@ jobs: - name: Publish JupyterLab Image to Channel run: | - + REPOSITORY="ghcr.io/canonical/" RISK=${{ needs.release_checks.outputs.risk }} TRACK=${{ needs.release_checks.outputs.track }} if [ ! -z "$RISK" ] && [ "${RISK}" != "no-risk" ]; then TAG=${TRACK}_${RISK}; else TAG=${TRACK}; fi - + # Import artifact into docker with new tag - sudo make jupyter REPOSITORY=${REPOSITORY} TAG=${TAG}\ + sudo make import TARGET=docker FLAVOUR=jupyter \ + REPOSITORY=${REPOSITORY} TAG=${TAG}\ -o $(find .make_cache -name "*.tag") - - IMAGE_NAME=$(make REPOSITORY=${REPOSITORY} TAG=${TAG} help | grep "Jupyter\:" | cut -d ":" -f2 | xargs) - - echo "Publishing ${IMAGE_NAME}:${TAG}" + + IMAGE_NAME=$(make help FLAVOUR=jupyter REPOSITORY=${REPOSITORY} TAG=${TAG} help | grep "Image\:" | cut -d ":" -f2 | xargs) + + echo "Publishing ${IMAGE_NAME}:${TAG}" docker push ${IMAGE_NAME}:${TAG} - - if [[ "$RISK" == "edge" ]]; then - VERSION_TAG="${{ needs.release_checks.outputs.version }}-${{ needs.release_checks.outputs.base }}_edge" - - docker tag ${IMAGE_NAME}:${TAG} ${IMAGE_NAME}:${VERSION_TAG} - - echo "Publishing ${IMAGE_NAME}:${VERSION_TAG}" + + if [[ "$RISK" == "edge" ]]; then + VERSION_LONG=$(make help FLAVOUR=jupyter | grep "Tag\:" | cut -d ":" -f2 | xargs) + VERSION_TAG="${VERSION_LONG}-${{ needs.release_checks.outputs.base }}_edge" + + docker tag ${IMAGE_NAME}:${TAG} ${IMAGE_NAME}:${VERSION_TAG} + + echo "Publishing ${IMAGE_NAME}:${VERSION_TAG}" docker push ${IMAGE_NAME}:${VERSION_TAG} fi - - - diff --git a/Makefile b/Makefile index a1348b27..31dc5e99 100644 --- a/Makefile +++ b/Makefile @@ -15,6 +15,7 @@ REPOSITORY := PREFIX := TARGET := docker PLATFORM := amd64 +FLAVOUR := "spark" # ====================== # INTERNAL VARIABLES @@ -26,22 +27,33 @@ $(shell mkdir -p $(_MAKE_DIR)) K8S_TAG := $(_MAKE_DIR)/.k8s_tag IMAGE_NAME := $(shell yq .name rockcraft.yaml) -VERSION := $(shell yq .version rockcraft.yaml) -TAG := $(VERSION) +VERSION := $(shell yq .version rockcraft.yaml) -BASE_NAME=$(IMAGE_NAME)_$(VERSION)_$(PLATFORM).tar +VERSION_FLAVOUR=$(shell grep "version:$(FLAVOUR)" rockcraft.yaml | sed "s/^#//" | cut -d ":" -f3) _ROCK_OCI=$(IMAGE_NAME)_$(VERSION)_$(PLATFORM).rock -_TMP_OCI_NAME := stage-$(IMAGE_NAME) -_TMP_OCI_TAG := $(_MAKE_DIR)/$(_TMP_OCI_NAME)/$(TAG).tag - CHARMED_OCI_FULL_NAME=$(REPOSITORY)$(PREFIX)$(IMAGE_NAME) -CHARMED_OCI_TAG := $(_MAKE_DIR)/$(CHARMED_OCI_FULL_NAME)/$(TAG).tag +CHARMED_OCI_JUPYTER=$(CHARMED_OCI_FULL_NAME)-jupyterlab + +ifeq ($(FLAVOUR), jupyter) +NAME=$(CHARMED_OCI_JUPYTER) +TAG=$(VERSION)-$(VERSION_FLAVOUR) +BASE_NAME=$(IMAGE_NAME)-jupyterlab_$(VERSION)_$(PLATFORM).tar +else +NAME=$(CHARMED_OCI_FULL_NAME) +TAG=$(VERSION) +BASE_NAME=$(IMAGE_NAME)_$(VERSION)_$(PLATFORM).tar +endif + +FTAG=$(_MAKE_DIR)/$(NAME)/$(TAG) + +CHARMED_OCI_TAG := $(_MAKE_DIR)/$(CHARMED_OCI_FULL_NAME)/$(TAG) +CHARMED_OCI_JUPYTER_TAG := $(_MAKE_DIR)/$(CHARMED_OCI_JUPYTER)/$(TAG) -CHARMED_OCI_JUPYTER=$(CHARMED_OCI_FULL_NAME)-jupyterlab4 -CHARMED_OCI_JUPYTER_TAG := $(_MAKE_DIR)/$(CHARMED_OCI_JUPYTER)/$(TAG).tag +_TMP_OCI_NAME := stage-$(IMAGE_NAME) +_TMP_OCI_TAG := $(_MAKE_DIR)/$(_TMP_OCI_NAME)/$(TAG) help: @echo "---------------HELP-----------------" @@ -49,10 +61,11 @@ help: @echo "Version: $(VERSION)" @echo "Platform: $(PLATFORM)" @echo " " - @echo "Artifact: $(BASE_NAME)" + @echo "Flavour: $(FLAVOUR)" @echo " " - @echo "Image: $(CHARMED_OCI_FULL_NAME)" - @echo "Jupyter: $(CHARMED_OCI_JUPYTER)" + @echo "Image: $(NAME)" + @echo "Tag: $(TAG)" + @echo "Artifact: $(BASE_NAME)" @echo " " @echo "Type 'make' followed by one of these keywords:" @echo " " @@ -67,18 +80,13 @@ $(_ROCK_OCI): rockcraft.yaml @echo "=== Building Charmed Image ===" rockcraft pack -$(_TMP_OCI_TAG): $(_ROCK_OCI) +$(_TMP_OCI_TAG).tag: $(_ROCK_OCI) skopeo --insecure-policy \ copy \ oci-archive:"$(_ROCK_OCI)" \ docker-daemon:"$(_TMP_OCI_NAME):$(TAG)" if [ ! -d "$(_MAKE_DIR)/$(_TMP_OCI_NAME)" ]; then mkdir -p "$(_MAKE_DIR)/$(_TMP_OCI_NAME)"; fi - touch $(_TMP_OCI_TAG) - -$(CHARMED_OCI_TAG): $(_TMP_OCI_TAG) build/Dockerfile - docker build -t "$(CHARMED_OCI_FULL_NAME):$(TAG)" --build-arg BASE_IMAGE="$(_TMP_OCI_NAME):$(TAG)" -f build/Dockerfile . - if [ ! -d "$(_MAKE_DIR)/$(CHARMED_OCI_FULL_NAME)" ]; then mkdir -p "$(_MAKE_DIR)/$(CHARMED_OCI_FULL_NAME)"; fi - touch $(CHARMED_OCI_TAG) + touch $(_TMP_OCI_TAG).tag $(K8S_TAG): @echo "=== Setting up and configure local Microk8s cluster ===" @@ -88,42 +96,52 @@ $(K8S_TAG): microk8s: $(K8S_TAG) -$(_MAKE_DIR)/%/$(TAG).tar: $(_MAKE_DIR)/%/$(TAG).tag - docker save $*:$(TAG) > $(_MAKE_DIR)/$*/$(TAG).tar +$(CHARMED_OCI_TAG).tag: $(_TMP_OCI_TAG).tag build/Dockerfile + docker build -t "$(CHARMED_OCI_FULL_NAME):$(TAG)" \ + --build-arg BASE_IMAGE="$(_TMP_OCI_NAME):$(TAG)" \ + -f build/Dockerfile . + if [ ! -d "$(_MAKE_DIR)/$(CHARMED_OCI_FULL_NAME)" ]; then mkdir -p "$(_MAKE_DIR)/$(CHARMED_OCI_FULL_NAME)"; fi + touch $(CHARMED_OCI_TAG).tag -$(CHARMED_OCI_JUPYTER_TAG): $(CHARMED_OCI_TAG) build/Dockerfile.jupyter - docker build -t "$(CHARMED_OCI_JUPYTER):$(TAG)" --build-arg BASE_IMAGE="$(CHARMED_OCI_FULL_NAME):$(TAG)" -f build/Dockerfile.jupyter . +$(CHARMED_OCI_JUPYTER_TAG).tag: $(CHARMED_OCI_TAG).tag build/Dockerfile.jupyter files/jupyter + docker build -t "$(CHARMED_OCI_JUPYTER):$(TAG)" \ + --build-arg BASE_IMAGE="$(CHARMED_OCI_FULL_NAME):$(TAG)" \ + --build-arg JUPYTERLAB_VERSION="$(VERSION_FLAVOUR)" \ + -f build/Dockerfile.jupyter . if [ ! -d "$(_MAKE_DIR)/$(CHARMED_OCI_JUPYTER)" ]; then mkdir -p "$(_MAKE_DIR)/$(CHARMED_OCI_JUPYTER)"; fi - touch $(CHARMED_OCI_JUPYTER_TAG) + touch $(CHARMED_OCI_JUPYTER_TAG).tag -$(BASE_NAME): $(_MAKE_DIR)/$(CHARMED_OCI_FULL_NAME)/$(TAG).tar - @echo "=== Creating $(BASE_NAME) OCI archive ===" - cp $(_MAKE_DIR)/$(CHARMED_OCI_FULL_NAME)/$(TAG).tar $(BASE_NAME) +$(_MAKE_DIR)/%/$(TAG).tar: $(_MAKE_DIR)/%/$(TAG).tag + docker save $*:$(TAG) > $(_MAKE_DIR)/$*/$(TAG).tar -build: $(BASE_NAME) +$(BASE_NAME): $(FTAG).tar + @echo "=== Creating $(BASE_NAME) OCI archive (flavour: $(FLAVOUR)) ===" + cp $(FTAG).tar $(BASE_NAME) -jupyter: $(_MAKE_DIR)/$(CHARMED_OCI_JUPYTER)/$(TAG).tar - @echo "=== Creating $(BASE_NAME) OCI jupyter archive ===" - cp $(_MAKE_DIR)/$(CHARMED_OCI_JUPYTER)/$(TAG).tar $(IMAGE_NAME)-jupyter_$(VERSION)_$(PLATFORM).tar +build: $(BASE_NAME) ifeq ($(TARGET), docker) import: build - @echo "=== Importing image $(CHARMED_OCI_FULL_NAME):$(TAG) into docker ===" + @echo "=== Importing image $(NAME):$(TAG) into docker ===" $(eval IMAGE := $(shell docker load -i $(BASE_NAME))) - docker tag $(lastword $(IMAGE)) $(CHARMED_OCI_FULL_NAME):$(TAG) - if [ ! -d "$(_MAKE_DIR)/$(CHARMED_OCI_FULL_NAME)" ]; then mkdir -p "$(_MAKE_DIR)/$(CHARMED_OCI_FULL_NAME)"; fi - touch $(CHARMED_OCI_TAG) + docker tag $(lastword $(IMAGE)) $(NAME):$(TAG) + if [ ! -d "$(_MAKE_DIR)/$(NAME)" ]; then mkdir -p "$(_MAKE_DIR)/$(NAME)"; fi + touch $(FTAG).tag endif ifeq ($(TARGET), microk8s) import: $(K8S_TAG) build - @echo "=== Importing image $(CHARMED_OCI_FULL_NAME):$(TAG) into Microk8s container registry ===" - microk8s ctr images import --base-name $(CHARMED_OCI_FULL_NAME):$(TAG) $(BASE_NAME) + @echo "=== Importing image $(NAME):$(TAG) into Microk8s container registry ===" + microk8s ctr images import --base-name $(NAME):$(TAG) $(BASE_NAME) endif tests: @echo "=== Running Integration Tests ===" +ifeq ($(FLAVOUR), jupyter) + /bin/bash ./tests/integration/integration-tests-jupyter.sh +else /bin/bash ./tests/integration/integration-tests.sh +endif clean: @echo "=== Cleaning environment ===" diff --git a/README.md b/README.md index 922426f1..960bb501 100644 --- a/README.md +++ b/README.md @@ -41,7 +41,7 @@ https://github.com/canonical/charmed-spark-rock/pkgs/container/charmed-spark The image can be used straight away when running Spark on Kubernetes by setting the appropriate configuration property: ```shell -spark.kubernetes.container.image=ghcr.io/canonical/charmed-spark:3.4.2-22.04_edge +spark.kubernetes.container.image=ghcr.io/canonical/charmed-spark:3.4-22.04_edge ``` ### Using `spark8t` CLI @@ -49,7 +49,7 @@ spark.kubernetes.container.image=ghcr.io/canonical/charmed-spark:3.4.2-22.04_edg The `spark8t` CLI tooling interacts with the K8s API to create, manage and delete K8s resources representing the Spark service account. Make sure that the kube config file is correctly loaded into the container, e.g. ```shell -docker run --name chamed-spark -v /path/to/kube/config:/var/lib/spark/.kube/config ghcr.io/canonical/charmed-spark:3.4.2-22.04_edge +docker run --name chamed-spark -v /path/to/kube/config:/var/lib/spark/.kube/config ghcr.io/canonical/charmed-spark:3.4-22.04_edge ``` Note that this will start the image and a long-living service, allowing you to exec commands: @@ -59,7 +59,7 @@ docker exec charmed-spark spark-client.service-account-registry list If you prefer to run one-shot commands, without having the Charmed Spark image running, use `\; exec` prefix, e.g. ```shell -docker run -v ... ghcr.io/canonical/charmed-spark:3.4.2-22.04_edge \; exec spark-client.service-account-registry list +docker run -v ... ghcr.io/canonical/charmed-spark:3.4-22.04_edge \; exec spark-client.service-account-registry list ``` For more information about spark-client API and `spark8t` tooling, please refer to [here](https://discourse.charmhub.io/t/spark-client-snap-how-to-manage-spark-accounts/8959). @@ -71,7 +71,7 @@ Charmed Spark Rock Image is delivered with Pebble already included in order to m #### Starting History Server ```shell -docker run ghcr.io/canonical/charmed-spark:3.4.2-22.04_edge \; start history-server +docker run ghcr.io/canonical/charmed-spark:3.4-22.04_edge \; start history-server ``` ### Running Jupyter Lab @@ -86,7 +86,7 @@ To start a JupyterLab server using the `charmed-spark-jupyter` image, use docker run \ -v /path/to/kube/config:/var/lib/spark/.kube/config \ -p :8888 - ghcr.io/canonical/charmed-spark-jupyter:3.4.1-22.04_edge \ + ghcr.io/canonical/charmed-spark-jupyter:3.4-22.04_edge \ --username --namespace ``` diff --git a/build/Dockerfile.jupyter b/build/Dockerfile.jupyter index b87d49e0..e73abd4e 100644 --- a/build/Dockerfile.jupyter +++ b/build/Dockerfile.jupyter @@ -1,11 +1,16 @@ ARG BASE_IMAGE=base-charmed-spark:latest +ARG JUPYTERLAB_VERSION=4.0.0 FROM $BASE_IMAGE +ARG JUPYTERLAB_VERSION USER root -RUN rm /var/lib/pebble/default/layers/*.yaml -RUN python3 -m pip install "jupyterlab~=4.0" +RUN rm /var/lib/pebble/default/layers/*.yaml /opt/pebble/*.sh + +RUN python3 -m pip install "jupyterlab==$JUPYTERLAB_VERSION" COPY ./files/jupyter/pebble/layers.yaml /var/lib/pebble/default/layers/001-charmed-jupyter.yaml +COPY ./files/jupyter/bin/jupyterlab-server.sh /opt/pebble/jupyterlab-server.sh +RUN chown _daemon_:_daemon_ /opt/pebble/jupyterlab-server.sh USER _daemon_ diff --git a/files/jupyter/bin/jupyterlab-server.sh b/files/jupyter/bin/jupyterlab-server.sh new file mode 100755 index 00000000..05a960a1 --- /dev/null +++ b/files/jupyter/bin/jupyterlab-server.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +sleep 5 + +export PYSPARK_DRIVER_PYTHON=jupyter + +# This variable is injected when running a notebook from Kubeflow. +if [ ! -z "${NB_PREFIX}" ]; then + NB_PREFIX_ARG="--NotebookApp.base_url '${NB_PREFIX}'" +fi + +export PYSPARK_DRIVER_PYTHON_OPTS="lab --no-browser --port=8888 ${NB_PREFIX_ARG} --ip=0.0.0.0 --NotebookApp.token='' --notebook-dir=/var/lib/spark/notebook" + +echo "PYSPARK_DRIVER_PYTHON_OPTS: ${PYSPARK_DRIVER_PYTHON_OPTS}" + +spark-client.pyspark $* diff --git a/files/jupyter/pebble/layers.yaml b/files/jupyter/pebble/layers.yaml index d2fe4ba4..3ecb33a1 100644 --- a/files/jupyter/pebble/layers.yaml +++ b/files/jupyter/pebble/layers.yaml @@ -1,9 +1,6 @@ services: jupyter: - command: "spark-client.pyspark" + command: "/opt/pebble/jupyterlab-server.sh" summary: "This is the Spark-powered Jupyter service" override: replace startup: enabled - environment: - PYSPARK_DRIVER_PYTHON: jupyter - PYSPARK_DRIVER_PYTHON_OPTS: "lab --no-browser --port=8888 --ip=0.0.0.0 --NotebookApp.token='' --notebook-dir=/var/lib/spark/notebook" diff --git a/rockcraft.yaml b/rockcraft.yaml index 579f70d4..3a80677e 100644 --- a/rockcraft.yaml +++ b/rockcraft.yaml @@ -4,7 +4,11 @@ description: Spark ROCK license: Apache-2.0 version: "3.4.2" +# version:spark:3.4.2 +# version:jupyter:4.0.11 + base: ubuntu@22.04 + platforms: amd64: @@ -17,7 +21,6 @@ environment: PYTHONPATH: /opt/spark/python:/opt/spark8t/python/dist:/usr/lib/python3.10/site-packages PATH: /usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/opt/spark:/opt/spark/bin:/opt/spark/python/bin:/opt/spark-client/python/bin HOME: /var/lib/spark - KUBECONFIG: /var/lib/spark/.kube/config SPARK_USER_DATA: /var/lib/spark SPARK_LOG_DIR: /var/log/spark @@ -133,7 +136,7 @@ parts: - python3-pip overlay-script: | mkdir -p $CRAFT_PART_INSTALL/opt/spark8t/python/dist - pip install --target=${CRAFT_PART_INSTALL}/opt/spark8t/python/dist https://github.com/canonical/spark-k8s-toolkit-py/releases/download/v0.0.2/spark8t-0.0.2-py3-none-any.whl + pip install --target=${CRAFT_PART_INSTALL}/opt/spark8t/python/dist https://github.com/canonical/spark-k8s-toolkit-py/releases/download/v0.0.3/spark8t-0.0.3-py3-none-any.whl rm usr/bin/pip* stage: - opt/spark8t/python/dist diff --git a/tests/integration/integration-tests-jupyter.sh b/tests/integration/integration-tests-jupyter.sh new file mode 100755 index 00000000..c6d8617e --- /dev/null +++ b/tests/integration/integration-tests-jupyter.sh @@ -0,0 +1,190 @@ +#!/bin/bash + +# The integration tests are designed to tests that Spark Jobs can be submitted and/or shell processes are +# working properly with restricted permission of the service account starting the process. For this reason, +# in the tests we spawn two pods: +# +# 1. Admin pod, that is used to create and delete service accounts +# 2. User pod, that is used to start and execute Spark Jobs +# +# The Admin pod is created once at the beginning of the tests and it is used to manage Spark service accounts +# throughtout the integration tests. On the other hand, the User pod(s) are created together with the creation +# of the Spark user (service accounts and secrets) at the beginning of each test, and they are destroyed at the +# end of the test. + +NAMESPACE=tests + +get_spark_version(){ + SPARK_VERSION=$(yq '(.version)' rockcraft.yaml) + echo "$SPARK_VERSION" +} + +spark_image(){ + echo "ghcr.io/canonical/test-charmed-spark-jupyterlab:$(get_spark_version)" +} + +setup_jupyter() { + echo "setup_jupyter() ${1} ${2}" + + USERNAME=$1 + NAMESPACE=$2 + + kubectl -n $NAMESPACE exec testpod-admin -- env UU="$USERNAME" NN="$NAMESPACE" \ + /bin/bash -c 'spark-client.service-account-registry create --username $UU --namespace $NN' + + IMAGE=$(spark_image) + echo $IMAGE + + # Create the pod with the Spark service account + sed -e "s%%${IMAGE}%g" \ + -e "s//${USERNAME}/g" \ + -e "s//${NAMESPACE}/g" \ + ./tests/integration/resources/jupyter.yaml | \ + kubectl -n tests apply -f - + + wait_for_pod charmed-spark-jupyter $NAMESPACE + + # WAIT FOR SERVER TO BE UP AND RUNNING + sleep 10 +} + +cleanup_user() { + EXIT_CODE=$1 + USERNAME=$2 + NAMESPACE=$3 + + kubectl -n $NAMESPACE delete pod charmed-spark-jupyter --wait=true + + kubectl -n $NAMESPACE exec testpod-admin -- env UU="$USERNAME" NN="$NAMESPACE" \ + /bin/bash -c 'spark-client.service-account-registry delete --username $UU --namespace $NN' + + OUTPUT=$(kubectl -n $NAMESPACE exec testpod-admin -- /bin/bash -c 'spark-client.service-account-registry list') + + EXISTS=$(echo -e "$OUTPUT" | grep "$NAMESPACE:$USERNAME" | wc -l) + + if [ "${EXISTS}" -ne "0" ]; then + exit 2 + fi + + if [ "${EXIT_CODE}" -ne "0" ]; then + kubectl delete ns $NAMESPACE + exit 1 + fi +} + +cleanup_user_success() { + echo "cleanup_user_success()......" + cleanup_user 0 spark $NAMESPACE +} + +cleanup_user_failure() { + echo "cleanup_user_failure()......" + cleanup_user 1 spark $NAMESPACE +} + +wait_for_pod() { + + POD=$1 + NAMESPACE=$2 + + SLEEP_TIME=1 + for i in {1..5} + do + pod_status=$(kubectl -n ${NAMESPACE} get pod ${POD} | awk '{ print $3 }' | tail -n 1) + echo $pod_status + if [[ "${pod_status}" == "Running" ]] + then + echo "testpod is Running now!" + break + elif [[ "${i}" -le "5" ]] + then + echo "Waiting for the pod to come online..." + sleep $SLEEP_TIME + else + echo "testpod did not come up. Test Failed!" + exit 3 + fi + SLEEP_TIME=$(expr $SLEEP_TIME \* 2); + done +} + +setup_admin_test_pod() { + kubectl create ns $NAMESPACE + + echo "Creating admin test-pod" + + # Create a pod with admin service account + yq ea '.spec.containers[0].env[0].name = "KUBECONFIG" | .spec.containers[0].env[0].value = "/var/lib/spark/.kube/config" | .metadata.name = "testpod-admin"' \ + ./tests/integration/resources/testpod.yaml | \ + kubectl -n tests apply -f - + + wait_for_pod testpod-admin $NAMESPACE + + MY_KUBE_CONFIG=$(cat /home/${USER}/.kube/config) + + kubectl -n $NAMESPACE exec testpod-admin -- /bin/bash -c 'mkdir -p ~/.kube' + kubectl -n $NAMESPACE exec testpod-admin -- env KCONFIG="$MY_KUBE_CONFIG" /bin/bash -c 'echo "$KCONFIG" > ~/.kube/config' +} + +teardown_test_pod() { + kubectl -n $NAMESPACE delete pod testpod-admin + kubectl delete namespace $NAMESPACE +} + +get_status_code() { + URL=$1 + + STATUS_CODE=$(curl -X GET -o /dev/null --silent --head --write-out '%{http_code}\n' "${URL}") + + echo $STATUS_CODE +} + +test_connection(){ + SERVICE_IP=$(kubectl get svc jupyter-service -n $NAMESPACE -o yaml | yq .spec.clusterIP) + + echo "Jupyter service IP: ${SERVICE_IP}" + + STATUS_CODE=$(get_status_code "http://${SERVICE_IP}:8888/jupyter-test/lab") + + if [[ "${STATUS_CODE}" -ne "200" ]]; then + echo "200 exit code NOT returned" + exit 1 + fi + + STATUS_CODE=$(get_status_code "http://${SERVICE_IP}:8888/jupyter-test") + + if [[ "${STATUS_CODE}" -ne "302" ]]; then + echo "302 exit code NOT returned" + exit 1 + fi + + STATUS_CODE=$(get_status_code "http://${SERVICE_IP}:8888") + + if [[ "${STATUS_CODE}" -ne "404" ]]; then + echo "404 exit code NOT returned" + exit 1 + fi + +} + +echo -e "##################################" +echo -e "SETUP TEST POD" +echo -e "##################################" + +setup_admin_test_pod + +echo -e "##################################" +echo -e "START JUPYTER SERVICE" +echo -e "##################################" + +(setup_jupyter spark tests && test_connection && cleanup_user_success) || cleanup_user_failure + +echo -e "##################################" +echo -e "TEARDOWN ADMIN POD" +echo -e "##################################" + +teardown_test_pod + +echo -e "##################################" +echo -e "END OF THE TEST" +echo -e "##################################" diff --git a/tests/integration/integration-tests.sh b/tests/integration/integration-tests.sh index a9267613..ead36ffe 100755 --- a/tests/integration/integration-tests.sh +++ b/tests/integration/integration-tests.sh @@ -1,5 +1,20 @@ #!/bin/bash +# The integration tests are designed to tests that Spark Jobs can be submitted and/or shell processes are +# working properly with restricted permission of the service account starting the process. For this reason, +# in the tests we spawn two pods: +# +# 1. Admin pod, that is used to create and delete service accounts +# 2. User pod, that is used to start and execute Spark Jobs +# +# The Admin pod is created once at the beginning of the tests and it is used to manage Spark service accounts +# throughtout the integration tests. On the other hand, the User pod(s) are created together with the creation +# of the Spark user (service accounts and secrets) at the beginning of each test, and they are destroyed at the +# end of the test. + + +NAMESPACE=tests + get_spark_version(){ SPARK_VERSION=$(yq '(.version)' rockcraft.yaml) echo "$SPARK_VERSION" @@ -27,39 +42,30 @@ validate_metrics() { fi } -test_restricted_account() { - - kubectl config set-context spark-context --namespace=tests --cluster=prod --user=spark - - run_example_job tests spark -} - setup_user() { - echo "setup_user() ${1} ${2} ${3}" + echo "setup_user() ${1} ${2}" USERNAME=$1 NAMESPACE=$2 - kubectl create namespace ${NAMESPACE} + kubectl -n $NAMESPACE exec testpod-admin -- env UU="$USERNAME" NN="$NAMESPACE" \ + /bin/bash -c 'spark-client.service-account-registry create --username $UU --namespace $NN' - if [ "$#" -gt 2 ] - then - CONTEXT=$3 - kubectl exec testpod -- env UU="$USERNAME" NN="$NAMESPACE" CX="$CONTEXT" \ - /bin/bash -c 'spark-client.service-account-registry create --context $CX --username $UU --namespace $NN' - else - kubectl exec testpod -- env UU="$USERNAME" NN="$NAMESPACE" \ - /bin/bash -c 'spark-client.service-account-registry create --username $UU --namespace $NN' - fi + # Create the pod with the Spark service account + yq ea ".spec.serviceAccountName = \"${USERNAME}\"" \ + ./tests/integration/resources/testpod.yaml | \ + kubectl -n tests apply -f - -} + wait_for_pod testpod $NAMESPACE -setup_user_admin_context() { - setup_user spark tests + TEST_POD_TEMPLATE=$(cat tests/integration/resources/podTemplate.yaml) + + kubectl -n $NAMESPACE exec testpod -- /bin/bash -c 'cp -r /opt/spark/python /var/lib/spark/' + kubectl -n $NAMESPACE exec testpod -- env PTEMPLATE="$TEST_POD_TEMPLATE" /bin/bash -c 'echo "$PTEMPLATE" > /etc/spark/conf/podTemplate.yaml' } -setup_user_restricted_context() { - setup_user spark tests microk8s +setup_user_context() { + setup_user spark $NAMESPACE } cleanup_user() { @@ -67,10 +73,12 @@ cleanup_user() { USERNAME=$2 NAMESPACE=$3 - kubectl exec testpod -- env UU="$USERNAME" NN="$NAMESPACE" \ + kubectl -n $NAMESPACE delete pod testpod --wait=true + + kubectl -n $NAMESPACE exec testpod-admin -- env UU="$USERNAME" NN="$NAMESPACE" \ /bin/bash -c 'spark-client.service-account-registry delete --username $UU --namespace $NN' - OUTPUT=$(kubectl exec testpod -- /bin/bash -c 'spark-client.service-account-registry list') + OUTPUT=$(kubectl -n $NAMESPACE exec testpod-admin -- /bin/bash -c 'spark-client.service-account-registry list') EXISTS=$(echo -e "$OUTPUT" | grep "$NAMESPACE:$USERNAME" | wc -l) @@ -78,8 +86,6 @@ cleanup_user() { exit 2 fi - kubectl delete namespace ${NAMESPACE} - if [ "${EXIT_CODE}" -ne "0" ]; then exit 1 fi @@ -87,27 +93,29 @@ cleanup_user() { cleanup_user_success() { echo "cleanup_user_success()......" - cleanup_user 0 spark tests + cleanup_user 0 spark $NAMESPACE } cleanup_user_failure() { echo "cleanup_user_failure()......" - cleanup_user 1 spark tests + cleanup_user 1 spark $NAMESPACE } -setup_test_pod() { - kubectl apply -f ./tests/integration/resources/testpod.yaml +wait_for_pod() { + + POD=$1 + NAMESPACE=$2 SLEEP_TIME=1 for i in {1..5} do - pod_status=$(kubectl get pod testpod | awk '{ print $3 }' | tail -n 1) + pod_status=$(kubectl -n ${NAMESPACE} get pod ${POD} | awk '{ print $3 }' | tail -n 1) echo $pod_status - if [ "${pod_status}" == "Running" ] + if [[ "${pod_status}" == "Running" ]] then echo "testpod is Running now!" break - elif [ "${i}" -le "5" ] + elif [[ "${i}" -le "5" ]] then echo "Waiting for the pod to come online..." sleep $SLEEP_TIME @@ -117,29 +125,41 @@ setup_test_pod() { fi SLEEP_TIME=$(expr $SLEEP_TIME \* 2); done +} + +setup_admin_test_pod() { + kubectl create ns $NAMESPACE + + echo "Creating admin test-pod" + + # Create a pod with admin service account + yq ea '.spec.containers[0].env[0].name = "KUBECONFIG" | .spec.containers[0].env[0].value = "/var/lib/spark/.kube/config" | .metadata.name = "testpod-admin"' \ + ./tests/integration/resources/testpod.yaml | \ + kubectl -n tests apply -f - + + wait_for_pod testpod-admin $NAMESPACE MY_KUBE_CONFIG=$(cat /home/${USER}/.kube/config) - TEST_POD_TEMPLATE=$(cat tests/integration/resources/podTemplate.yaml) - kubectl exec testpod -- /bin/bash -c 'mkdir -p ~/.kube' - kubectl exec testpod -- env KCONFIG="$MY_KUBE_CONFIG" /bin/bash -c 'echo "$KCONFIG" > ~/.kube/config' - kubectl exec testpod -- /bin/bash -c 'cat ~/.kube/config' - kubectl exec testpod -- /bin/bash -c 'cp -r /opt/spark/python /var/lib/spark/' - kubectl exec testpod -- env PTEMPLATE="$TEST_POD_TEMPLATE" /bin/bash -c 'echo "$PTEMPLATE" > /etc/spark/conf/podTemplate.yaml' + kubectl -n $NAMESPACE exec testpod-admin -- /bin/bash -c 'mkdir -p ~/.kube' + kubectl -n $NAMESPACE exec testpod-admin -- env KCONFIG="$MY_KUBE_CONFIG" /bin/bash -c 'echo "$KCONFIG" > ~/.kube/config' } teardown_test_pod() { - kubectl delete pod testpod + kubectl -n $NAMESPACE delete pod testpod + kubectl -n $NAMESPACE delete pod testpod-admin + + kubectl delete namespace $NAMESPACE } run_example_job_in_pod() { SPARK_EXAMPLES_JAR_NAME="spark-examples_2.12-$(get_spark_version).jar" - PREVIOUS_JOB=$(kubectl get pods | grep driver | tail -n 1 | cut -d' ' -f1) + PREVIOUS_JOB=$(kubectl -n $NAMESPACE get pods | grep driver | tail -n 1 | cut -d' ' -f1) NAMESPACE=$1 USERNAME=$2 - kubectl exec testpod -- env UU="$USERNAME" NN="$NAMESPACE" JJ="$SPARK_EXAMPLES_JAR_NAME" IM="$(spark_image)" \ + kubectl -n $NAMESPACE exec testpod -- env UU="$USERNAME" NN="$NAMESPACE" JJ="$SPARK_EXAMPLES_JAR_NAME" IM="$(spark_image)" \ /bin/bash -c 'spark-client.spark-submit \ --username $UU --namespace $NN \ --conf spark.kubernetes.driver.request.cores=100m \ @@ -169,11 +189,11 @@ run_example_job_in_pod() { run_example_job_in_pod_with_pod_templates() { SPARK_EXAMPLES_JAR_NAME="spark-examples_2.12-$(get_spark_version).jar" - PREVIOUS_JOB=$(kubectl get pods | grep driver | tail -n 1 | cut -d' ' -f1) + PREVIOUS_JOB=$(kubectl -n $NAMESPACE get pods | grep driver | tail -n 1 | cut -d' ' -f1) NAMESPACE=$1 USERNAME=$2 - kubectl exec testpod -- env UU="$USERNAME" NN="$NAMESPACE" JJ="$SPARK_EXAMPLES_JAR_NAME" IM="$(spark_image)" \ + kubectl -n $NAMESPACE exec testpod -- env UU="$USERNAME" NN="$NAMESPACE" JJ="$SPARK_EXAMPLES_JAR_NAME" IM="$(spark_image)" \ /bin/bash -c 'spark-client.spark-submit \ --username $UU --namespace $NN \ --conf spark.kubernetes.driver.request.cores=100m \ @@ -215,7 +235,7 @@ run_example_job_in_pod_with_metrics() { SPARK_EXAMPLES_JAR_NAME="spark-examples_2.12-$(get_spark_version).jar" LOG_FILE="/tmp/server.log" SERVER_PORT=9091 - PREVIOUS_JOB=$(kubectl get pods | grep driver | tail -n 1 | cut -d' ' -f1) + PREVIOUS_JOB=$(kubectl -n $NAMESPACE get pods | grep driver | tail -n 1 | cut -d' ' -f1) # start simple http server python3 tests/integration/resources/test_web_server.py $SERVER_PORT > $LOG_FILE & HTTP_SERVER_PID=$! @@ -224,7 +244,7 @@ run_example_job_in_pod_with_metrics() { echo "IP: $IP_ADDRESS" NAMESPACE=$1 USERNAME=$2 - kubectl exec testpod -- env PORT="$SERVER_PORT" IP="$IP_ADDRESS" UU="$USERNAME" NN="$NAMESPACE" JJ="$SPARK_EXAMPLES_JAR_NAME" IM="$(spark_image)" \ + kubectl -n $NAMESPACE exec testpod -- env PORT="$SERVER_PORT" IP="$IP_ADDRESS" UU="$USERNAME" NN="$NAMESPACE" JJ="$SPARK_EXAMPLES_JAR_NAME" IM="$(spark_image)" \ /bin/bash -c 'spark-client.spark-submit \ --username $UU --namespace $NN \ --conf spark.kubernetes.driver.request.cores=100m \ @@ -262,11 +282,11 @@ run_example_job_in_pod_with_metrics() { run_example_job_with_error_in_pod() { SPARK_EXAMPLES_JAR_NAME="spark-examples_2.12-$(get_spark_version).jar" - PREVIOUS_JOB=$(kubectl get pods | grep driver | tail -n 1 | cut -d' ' -f1) + PREVIOUS_JOB=$(kubectl -n $NAMESPACE get pods | grep driver | tail -n 1 | cut -d' ' -f1) NAMESPACE=$1 USERNAME=$2 - kubectl exec testpod -- env UU="$USERNAME" NN="$NAMESPACE" JJ="$SPARK_EXAMPLES_JAR_NAME" IM="$(spark_image)" \ + kubectl -n $NAMESPACE exec testpod -- env UU="$USERNAME" NN="$NAMESPACE" JJ="$SPARK_EXAMPLES_JAR_NAME" IM="$(spark_image)" \ /bin/bash -c 'spark-client.spark-submit \ --username $UU --namespace $NN \ --conf spark.kubernetes.driver.request.cores=100m \ @@ -303,21 +323,21 @@ run_example_job_with_error_in_pod() { } test_example_job_in_pod_with_errors() { - run_example_job_with_error_in_pod tests spark + run_example_job_with_error_in_pod $NAMESPACE spark } test_example_job_in_pod_with_templates() { - run_example_job_in_pod_with_pod_templates tests spark + run_example_job_in_pod_with_pod_templates $NAMESPACE spark } test_example_job_in_pod() { - run_example_job_in_pod tests spark + run_example_job_in_pod $NAMESPACE spark } test_example_job_in_pod_with_metrics() { - run_example_job_in_pod_with_metrics tests spark + run_example_job_in_pod_with_metrics $NAMESPACE spark } @@ -334,7 +354,7 @@ run_spark_shell_in_pod() { # Sample output # "Pi is roughly 3.13956232343" - echo -e "$(kubectl exec testpod -- env UU="$USERNAME" NN="$NAMESPACE" CMDS="$SPARK_SHELL_COMMANDS" IM="$(spark_image)" /bin/bash -c 'echo "$CMDS" | spark-client.spark-shell --username $UU --namespace $NN --conf spark.kubernetes.container.image=$IM')" > spark-shell.out + echo -e "$(kubectl -n $NAMESPACE exec testpod -- env UU="$USERNAME" NN="$NAMESPACE" CMDS="$SPARK_SHELL_COMMANDS" IM="$(spark_image)" /bin/bash -c 'echo "$CMDS" | spark-client.spark-shell --username $UU --namespace $NN --conf spark.kubernetes.container.image=$IM')" > spark-shell.out pi=$(cat spark-shell.out | grep "^Pi is roughly" | rev | cut -d' ' -f1 | rev | cut -c 1-3) echo -e "Spark-shell Pi Job Output: \n ${pi}" @@ -343,7 +363,7 @@ run_spark_shell_in_pod() { } test_spark_shell_in_pod() { - run_spark_shell_in_pod tests spark + run_spark_shell_in_pod $NAMESPACE spark } run_pyspark_in_pod() { @@ -358,7 +378,7 @@ run_pyspark_in_pod() { # Sample output # "Pi is roughly 3.13956232343" - echo -e "$(kubectl exec testpod -- env UU="$USERNAME" NN="$NAMESPACE" CMDS="$PYSPARK_COMMANDS" IM="$(spark_image)" /bin/bash -c 'echo "$CMDS" | spark-client.pyspark --username $UU --namespace $NN --conf spark.kubernetes.container.image=$IM')" > pyspark.out + echo -e "$(kubectl -n $NAMESPACE exec testpod -- env UU="$USERNAME" NN="$NAMESPACE" CMDS="$PYSPARK_COMMANDS" IM="$(spark_image)" /bin/bash -c 'echo "$CMDS" | spark-client.pyspark --username $UU --namespace $NN --conf spark.kubernetes.container.image=$IM')" > pyspark.out cat pyspark.out pi=$(cat pyspark.out | grep "Pi is roughly" | tail -n 1 | rev | cut -d' ' -f1 | rev | cut -c 1-3) @@ -368,14 +388,7 @@ run_pyspark_in_pod() { } test_pyspark_in_pod() { - run_pyspark_in_pod tests spark -} - -test_restricted_account_in_pod() { - - kubectl config set-context spark-context --namespace=tests --cluster=prod --user=spark - - run_example_job_in_pod tests spark + run_pyspark_in_pod $NAMESPACE spark } cleanup_user_failure_in_pod() { @@ -387,44 +400,44 @@ echo -e "##################################" echo -e "SETUP TEST POD" echo -e "##################################" -setup_test_pod +setup_admin_test_pod echo -e "##################################" echo -e "RUN EXAMPLE JOB" echo -e "##################################" -(setup_user_admin_context && test_example_job_in_pod && cleanup_user_success) || cleanup_user_failure_in_pod +(setup_user_context && test_example_job_in_pod && cleanup_user_success) || cleanup_user_failure_in_pod echo -e "##################################" echo -e "RUN SPARK SHELL IN POD" echo -e "##################################" -(setup_user_admin_context && test_spark_shell_in_pod && cleanup_user_success) || cleanup_user_failure_in_pod +(setup_user_context && test_spark_shell_in_pod && cleanup_user_success) || cleanup_user_failure_in_pod echo -e "##################################" echo -e "RUN PYSPARK IN POD" echo -e "##################################" -(setup_user_admin_context && test_pyspark_in_pod && cleanup_user_success) || cleanup_user_failure_in_pod +(setup_user_context && test_pyspark_in_pod && cleanup_user_success) || cleanup_user_failure_in_pod echo -e "##################################" echo -e "RUN EXAMPLE JOB WITH POD TEMPLATE" echo -e "##################################" -(setup_user_admin_context && test_example_job_in_pod_with_templates && cleanup_user_success) || cleanup_user_failure_in_pod +(setup_user_context && test_example_job_in_pod_with_templates && cleanup_user_success) || cleanup_user_failure_in_pod echo -e "########################################" echo -e "RUN EXAMPLE JOB WITH PROMETHEUS METRICS" echo -e "########################################" -(setup_user_admin_context && test_example_job_in_pod_with_metrics && cleanup_user_success) || cleanup_user_failure_in_pod +(setup_user_context && test_example_job_in_pod_with_metrics && cleanup_user_success) || cleanup_user_failure_in_pod echo -e "########################################" echo -e "RUN EXAMPLE JOB WITH ERRORS" echo -e "########################################" -(setup_user_admin_context && test_example_job_in_pod_with_errors && cleanup_user_success) || cleanup_user_failure_in_pod +(setup_user_context && test_example_job_in_pod_with_errors && cleanup_user_success) || cleanup_user_failure_in_pod echo -e "##################################" echo -e "TEARDOWN TEST POD" echo -e "##################################" diff --git a/tests/integration/resources/jupyter.yaml b/tests/integration/resources/jupyter.yaml new file mode 100644 index 00000000..44785433 --- /dev/null +++ b/tests/integration/resources/jupyter.yaml @@ -0,0 +1,34 @@ +apiVersion: v1 +kind: Pod +metadata: + name: charmed-spark-jupyter + labels: + app.kubernetes.io/name: charmed-spark-jupyter +spec: + serviceAccountName: "" + containers: + - image: + name: spark + # IT WOULD BE NICE IF THESE PARAMETERS COULD BE AUTO-INFERRED FROM THE + # SERVICE ACCOUNT USED TO RUN THE JOB + # (JIRA TICKET https://warthogs.atlassian.net/browse/DPE-3460) + args: ["--username", "", "--namespace", ""] + ports: + - containerPort: 8888 + env: + - name: NB_PREFIX + value: "jupyter-test" +--- +apiVersion: v1 +kind: Service +metadata: + name: jupyter-service + labels: + app: charmed-spark-jupyter +spec: + ports: + - port: 8888 + protocol: TCP + targetPort: 8888 + selector: + app.kubernetes.io/name: charmed-spark-jupyter \ No newline at end of file