fix: charts and deploy scripts

maint: add debug
FZJ-INM1-BDA · Feb 27, 2024 · 45c666c · 45c666c
1 parent 1c19673
commit 45c666c
Show file tree

Hide file tree

Showing 13 changed files with 212 additions and 53 deletions.
diff --git a/.github/workflows/composite-set-k8s-cred/action.yml b/.github/workflows/composite-set-k8s-cred/action.yml
@@ -0,0 +1,17 @@
+name: Set k8s cred
+author: 'Xiao Gui <[email protected]>'
+description: 'Populates the k8s secret'
+inputs:
+  secrets:
+    description: "k8s cfg string"
+    required: true
+runs:
+  using: composite
+  steps:
+  - id: 'set-id'
+    run: |
+      kubecfg_path=${{ runner.temp }}/.kube_config
+      install -m 600 $kubecfg_path
+      echo "${{ secrets.KUBECONFIG }}" > $kubecfg_path
+      echo "KUBECONFIG=$kubecfg_path:$KUBECONFIG" >> $GITHUB_ENV
+    shell: bash
diff --git a/.github/workflows/deploy-helm.yml b/.github/workflows/deploy-helm.yml
@@ -10,36 +10,77 @@ on:
       KUBECONFIG:
         required: true
 
+env:
+  RC_INGRESS_HOST: '[{"host": "siibra-api-rc.apps.tc.humanbrainproject.eu", "paths": [{ "path": "/", "pathType": "Prefix" }]}]'
+  RC_INGRESS_TLS: '[{"secretName": "siibra-api-rc-secret", "hosts": ["siibra-api-rc.apps.tc.humanbrainproject.eu"]}]'
+
 jobs:
-  trigger-deploy:
+  set-vars:
     runs-on: ubuntu-latest
     steps:
-    - uses: actions/checkout@v3
-    - name: 'Deploy'
+    - uses: actions/checkout@v4
+    - uses: '.github/workflows/composite-set-k8s-cred'
+      with:
+        secrets: ${{ secrets.KUBECONFIG }}
+
+    - name: 'Get status'
+      run: |
+        helm status ${{ inputs.DEPLOYMENT_NAME }}
+        HELM_STATUS=$(echo $?)
+        echo "HELM_STATUS: $HELM_STATUS"
+        echo "HELM_STATUS=$HELM_STATUS" >> $GITHUB_ENV
+
+    - name: Set Vars
       run: |
-        kubecfg_path=${{ runner.temp }}/.kube_config
-        version=$(cat api/VERSION)
+        SAPI_VERSION=$(cat api/VERSION)
+        git_hash=$(git rev-parse --short HEAD)
         
-        echo "${{ secrets.KUBECONFIG }}" > $kubecfg_path
-        helm --kubeconfig=$kubecfg_path status ${{ inputs.DEPLOYMENT_NAME }}
-        helm_status=$(echo $?)
+        echo "SAPI_VERSION: $SAPI_VERSION"
+        echo "GIT_HASH: $GIT_HASH"
 
-        if [[ $helm_status = "0" ]]
+        if [[ -z "$SAPI_VERSION" ]]
         then
-          echo "tag ${{ inputs.DEPLOYMENT_NAME }} found. Update"
-          helm --set sapiVersion=${version} \
-            --kubeconfig=$kubecfg_path \
-            --history-max=3 \
-            upgrade \
-            ${{ inputs.DEPLOYMENT_NAME }} \
-            .helm/siibra-api/
+          echo "SAPI_VERSION cannot be found $SAPI_VERSION"
+          exit 1
+        fi
+        
+        if [[ -z "$GIT_HASH" ]]
+        then
+          echo "GIT_HASH cannot be found $GIT_HASH"
+          exit 1
+        fi
+
+        echo "SAPI_VERSION=$SAPI_VERSION" >> $GITHUB_ENV
+        echo "GIT_HASH=$GIT_HASH" >> $GITHUB_ENV
+        
+    - name: 'deploy rc'
+      if: ${{ inputs.DEPLOYMENT_NAME == 'rc' }}
+      run: |
+        if [[ "$HELM_STATUS" == "0" ]]
+        then
+          helm --set sapiVersion=$version \
+            --reuse-values \
+            --set podLabels.hash="$GIT_HASH" \
+            --set image.pullPolicy=Always \
+            upgrade rc .helm/siibra-api/
         else
-          echo "tag ${{ inputs.DEPLOYMENT_NAME }} not found. Install"
-          helm --set sapiVersion=${version} \
-            --kubeconfig=$kubecfg_path \
-            install \
-            ${{ inputs.DEPLOYMENT_NAME }} \
-            .helm/siibra-api/
+          helm --set sapiVersion=$version \
+            --set sapiFlavor=rc --set-json ingress.hosts='${{ env.RC_INGRESS_HOST }}'\
+            --set-json ingress.tls='${{ env.RC_INGRESS_TLS }}' \
+            --set podLabels.hash="$GIT_HASH" \
+            --set image.pullPolicy=Always \
+            install rc .helm/siibra-api/
         fi
 
-        rm $kubecfg_path
+    - name: 'deploy prod'
+      if: ${{ inputs.DEPLOYMENT_NAME == 'prod' }}
+      run: |
+        if [[ "$HELM_STATUS" == "0" ]]
+        then
+          helm --set sapiVersion=$version \
+            --reuse-values \
+            upgrade rc .helm/siibra-api/
+        else 
+          helm --set sapiVersion=$version \
+            install rc .helm/siibra-api/
+        fi
diff --git a/.github/workflows/docker-img.yml b/.github/workflows/docker-img.yml
@@ -38,7 +38,7 @@ jobs:
     steps:
     - name: "Sanity check github.ref"
       run: echo GITHUB_REF - $GITHUB_REF - github.ref - ${{ github.ref }}
-    - uses: actions/checkout@v3
+    - uses: actions/checkout@v4
     - name: "Build docker image"
       run: |
         GIT_HASH=$(git rev-parse --short HEAD)
@@ -104,15 +104,26 @@ jobs:
       version: ${{ steps.set-env-var.outputs.version }}
     needs: build-docker-img
     steps:
-    - uses: actions/checkout@v3
+    - uses: actions/checkout@v4
     - uses: actions/setup-python@v4
       with:
         python-version: '3.10'
     - name: 'Setting env var'
       id: set-env-var
       run: |
-        echo queues=$(python -c 'import api.siibra_api_config as cfg; print(" ".join([f"{q!r}" for q in cfg._queues]))') >> "$GITHUB_OUTPUT"
+        echo queues=$(python -c 'import api.siibra_api_config as cfg; print(" ".join([f"{q!r}" for q in cfg.queues]))') >> "$GITHUB_OUTPUT"
         echo version=$(python -c 'import api.siibra_api_config as cfg; print(cfg.__version__)') >> "$GITHUB_OUTPUT"
+        
+        if [[ -z "$version" ]]
+        then
+          echo "Version population failed: $version"
+          exit 1
+        fi
+        if [[ -z "$queues" ]]
+        then
+          echo "Queues population failed: $queues"
+          exit 1
+        fi
 
   deploy-latest-on-okd:
     needs: setup-envvar
@@ -196,32 +207,35 @@ jobs:
     secrets:
       okd_token: ${{ matrix.deploy-site == 'jsc' && secrets.OKD_JSC_SECRET || secrets.OKD_PROD_SECRET }}
 
+
+  # rc
   warmup-rc-at-helm:
     needs: setup-envvar
     if:  ${{ github.event_name == 'release' && contains(github.ref, 'rc') }}
     runs-on: ubuntu-latest
     steps:
     - uses: actions/checkout@v4
+    - uses: .github/workflows/composite-set-k8s-cred
+      with:
+        secrets: ${{ secrets.KUBECONFIG }}
     - timeout-minutes: 15 # should not take more than 15 minutes to warmup cache
       run: |
-        kubecfg_path=${{ runner.temp }}/.kube_config
-        echo "${{ secrets.KUBECONFIG }}" > $kubecfg_path
-
-        kubectl --kubeconfig=$kubecfg_path delete pod/warmup-pod || echo "Pod pod/warmup-pod not found." 
+        # Delete pod at the beginning of the workflow
+        # This is so that logs can be inspected
+        kubectl delete pod/warmup-pod || echo "Pod pod/warmup-pod not found." 
 
         # TODO Flaky
         # see .helm/siibra-api/templates/_helpers.tpl how siibra-api.cache-dir is defined
 
-        SIIBRA_CACHEDIR=/siibra-api-volume/${{ needs.setup-envvar.outputs.version }}-rc/
-        WARM_CACHE_YML=$(envsubst < .helm/adhoc/warm-cache.yaml)
+        WARM_CACHE_YML=$(SIIBRA_CACHEDIR=/siibra-api-volume/${{ needs.setup-envvar.outputs.version }}-rc/ envsubst < .helm/adhoc/warm-cache.yaml)
         echo -e "WARM_CACHE_YML: \n$WARM_CACHE_YML"
 
-        echo -e "$WARM_CACHE_YML" | kubectl --kubeconfig=$kubecfg_path apply -f -
+        echo "$WARM_CACHE_YML" | kubectl apply -f -
         
         while true
         do
           sleep 10
-          POD_PHASE=$(kubectl --kubeconfig=$kubecfg_path get pod warmup-pod -o json | jq -r '.status.phase')
+          POD_PHASE=$(kubectl get pod warmup-pod -o json | jq -r '.status.phase')
 
           echo Possible phases: Pending, Running, Succeeded, Failed, Unknown
           echo Found phase: $POD_PHASE
@@ -238,16 +252,16 @@ jobs:
         done
   
   clear-rc-redis-cache:
-    runs-on: 'ubuntu'
+    runs-on: ubuntu-latest
     timeout-minutes: 1 # should not take more than 1 minute to clear the cache
     needs:
     - warmup-rc-at-helm
     - setup-envvar
     steps:
     - run: |
         REDIS_POD=$(kubectl get pod -l app=cache-redis | grep Running | awk '{print $1}')
-        echo kubectl --kubeconfig=SECRET exec $REDIS_POD -- /bin/ash -c "redis-cli redis-cli --scan --pattern "*${{ needs.setup-envvar.outputs.version }}*" | while IFS= read -r line; do redis-cli del "$line"; done"
-        kubectl --kubeconfig=$kubecfg_path exec $REDIS_POD -- /bin/ash -c "redis-cli redis-cli --scan --pattern "*${{ needs.setup-envvar.outputs.version }}*" | while IFS= read -r line; do redis-cli del "$line"; done"
+        echo kubectl --kubeconfig=SECRET exec $REDIS_POD -- /bin/ash -c "redis-cli redis-cli --scan --pattern '*\[${{ needs.setup-envvar.outputs.version }}\]*' | while IFS= read -r line; do redis-cli del "$line"; done"
+        kubectl --kubeconfig=$kubecfg_path exec $REDIS_POD -- /bin/ash -c "redis-cli redis-cli --scan --pattern '*\[${{ needs.setup-envvar.outputs.version }}\]*' | while IFS= read -r line; do redis-cli del "$line"; done"
 
   deploy-rc-via-helm:
     needs: warmup-rc-at-helm
@@ -258,10 +272,65 @@ jobs:
     secrets:
       KUBECONFIG: ${{ secrets.KUBECONFIG }}
 
+  # prod
+  copy-by-helm:
+    needs: setup-envar
+    if:  ${{ github.event_name == 'release' && !contains(github.ref, 'rc') }}
+    runs-on: ubuntu-latest
+    timeout-minutes: 15 # should not take more than 15 minutes to copy cache
+    steps:
+    - uses: actions/checkout@v4
+    - uses: .github/workflows/composite-set-k8s-cred
+      with:
+        secrets: ${{ secrets.KUBECONFIG }}
+    - name: 'set FROM_DIR TO_DIR'
+      run: |
+        VERSION=${{ setup-envar.outputs.version }}
+
+        # TODO use label exclusively in the future
+        POD=$(kubectl get pod -l role=server | grep rc-siibra-api | awk '{print $1}')
+        echo POD: $POD
+
+        cache_str=$(kubectl exec $POD env | grep SIIBRA_CACHEDIR)
+        FROM_DIR=${cache_str//SIIBRA_CACHEDIR=/}
+        TO_DIR=${FROM_DIR//-rc/}
+        POD_NAME=copy-cache
+
+        echo FROM_DIR: $FROM_DIR, TO_DIR: $TO_DIR, POD_NAME: $POD_NAME
+
+        echo "FROM_DIR=$FROM_DIR" >> $GITHUB_ENV
+        echo "TO_DIR=$TO_DIR" >> $GITHUB_ENV
+        echo "POD_NAME=$POD_NAME" >> $GITHUB_ENV
+
+    - name: 'start container'
+      run: |
+        # delete pod before workflow, so that logs can be inspected
+        kubectl delete pod/$POD_NAME || echo "Pod pod/$POD_NAME not found." 
+        FROM_DIR=$FROM_DIR TO_DIR=$TO_DIR envsubst < .helm/adhoc/copy-cache.yaml | kubectl apply -f -
+    - name: 'Ensure copy completes'
+      run: |
+        while true
+        do
+          sleep 10
+          POD_PHASE=$(kubectl get pod $POD_NAME -o json | jq -r '.status.phase')
+
+          echo Possible phases: Pending, Running, Succeeded, Failed, Unknown
+          echo Found phase: $POD_PHASE
+
+          if [[ "$POD_PHASE" == "Failed" ]] || [[ "$POD_PHASE" == "Unknown" ]]
+          then
+            exit 1
+          fi
+
+          if [[ "$POD_PHASE" == "Succeeded" ]]
+          then
+            exit 0
+          fi
+        done
+
   deploy-prod-via-helm:
-    needs: setup-envvar
+    needs: copy-by-helm
     if:  ${{ github.event_name == 'release' && !contains(github.ref, 'rc') }}
-
     uses: ./.github/workflows/deploy-helm.yml
     with:
       DEPLOYMENT_NAME: prod

diff --git a/.github/workflows/ebrains_sync.yml b/.github/workflows/ebrains_sync.yml
@@ -9,7 +9,7 @@ jobs:
   sync:
     runs-on: ubuntu-latest
     steps:
-    - uses: actions/checkout@v3
+    - uses: actions/checkout@v4
     - uses: wei/git-sync@v3
       with:
         source_repo: ${GITHUB_REPOSITORY}

diff --git a/.github/workflows/github-actions.yml b/.github/workflows/github-actions.yml
@@ -10,7 +10,7 @@ jobs:
   model-module-import:
     runs-on: ubuntu-latest
     steps:
-    - uses: actions/checkout@v3
+    - uses: actions/checkout@v4
     - uses: actions/setup-python@v4
       with:
         python-version: '3.10'
@@ -34,7 +34,7 @@ jobs:
       SIIBRA_API_REDIS_PORT: 6379
       SIIBRA_API_ROLE: server
     steps:
-    - uses: actions/checkout@v3
+    - uses: actions/checkout@v4
     - uses: actions/setup-python@v4
       with:
         python-version: '3.10'
@@ -56,8 +56,7 @@ jobs:
       # only included here for clarity (if user would like to run tests locally)
       CI: true
     steps:
-      - name: Check out repository code
-        uses: actions/checkout@v3
+      - uses: actions/checkout@v4
       - name: Set up Python 3.10
         uses: actions/setup-python@v4
         with:

diff --git a/.helm/adhoc/copy-cache.yaml b/.helm/adhoc/copy-cache.yaml
@@ -0,0 +1,30 @@
+apiVersion: v1
+kind: Pod
+metadata:
+  labels:
+    app: copy-cache
+  name: copy-cache
+spec:
+  containers:
+  - name: copy-container
+    image: docker-registry.ebrains.eu/siibra/siibra-api:0.3-server
+    command:
+    - /bin/ash
+    args:
+    - -c
+    - "echo FROM_DIR: $FROM_DIR TO_DIR: $TO_DIR && cp -r $FROM_DIR $TO_DIR"
+    resources:
+      limits:
+        cpu: 200m
+        memory: 200Mi
+      requests:
+        cpu: 200m
+        memory: 200Mi
+    volumeMounts:
+    - mountPath: /siibra-api-volume
+      name: data-volume
+  restartPolicy: Never
+  volumes:
+  - name: data-volume
+    persistentVolumeClaim:
+      claimName: data-volume-claim
diff --git a/.helm/adhoc/warm-cache.yaml b/.helm/adhoc/warm-cache.yaml
@@ -12,7 +12,7 @@ spec:
     - python
     args:
     - -c
-    - import siibra; siibra.cache.clear(); siibra.warm_cache()
+    - import siibra; siibra.cache.clear(); siibra.warm_cache(999)
     resources:
       limits:
         cpu: 900m

diff --git a/.helm/siibra-api/Chart.yaml b/.helm/siibra-api/Chart.yaml
@@ -15,7 +15,7 @@ type: application
 # This is the chart version. This version number should be incremented each time you make changes
 # to the chart and its templates, including the app version.
 # Versions are expected to follow Semantic Versioning (https://semver.org/)
-version: 0.1.1
+version: 0.1.2
 
 # This is the version number of the application being deployed. This version number should be
 # incremented each time you make changes to the application. Versions are not expected to

diff --git a/.helm/siibra-api/templates/_helpers.tpl b/.helm/siibra-api/templates/_helpers.tpl
@@ -23,7 +23,7 @@ This is because, on deploy staging it will rm -rf cache-dir.
 This should prevent misconfiguration from deleting prod cache
 */}}
 {{- define "siibra-api.cache-dir" -}}
-{{- if eq .Values.sapiFlavor "rc" }}
+{{- if eq .Values.sapiFlavor "rc" -}}
 {{/*
 N.B. *any* update here *needs* to be reflected in
 .github/workflows/docker-img.yml#jobs>warmup-rc-at-helm
@@ -71,6 +71,7 @@ app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
 {{- end }}
 app.kubernetes.io/managed-by: {{ .Release.Service }}
 app: siibra-api
+app-flavor: {{ .Values.sapiFlavor }}
 {{- end }}
 
 {{/*