Skip to content

Commit

Permalink
fix metrics and worker
Browse files Browse the repository at this point in the history
  • Loading branch information
xgui3783 committed Feb 15, 2024
1 parent 5d94418 commit 00ab119
Show file tree
Hide file tree
Showing 7 changed files with 82 additions and 45 deletions.
55 changes: 28 additions & 27 deletions .github/workflows/docker-img.yml
Original file line number Diff line number Diff line change
Expand Up @@ -142,34 +142,35 @@ jobs:
secrets:
okd_token: ${{ secrets.OKD_PROD_SECRET }}


data-validation-config-hash:
if: ${{ github.event_name == 'release' && contains(github.ref, 'rc') }}
runs-on: ubuntu-latest
outputs:
CONFIG_SHORT_REV: ${{ steps.parse-rev.outputs.CONFIG_SHORT_REV }}
steps:
- id: parse-rev
name: Get short rev of HEAD at master
run: |
git clone https://jugit.fz-juelich.de/t.dickscheid/brainscapes-configurations.git
CONFIG_SHORT_REV=$(git -C brainscapes-configurations rev-parse --short=6 HEAD)
echo CONFIG_SHORT_REV=$CONFIG_SHORT_REV >> $GITHUB_OUTPUT
# disable deployment on jsc

# data-validation-config-hash:
# if: ${{ github.event_name == 'release' && contains(github.ref, 'rc') }}
# runs-on: ubuntu-latest
# outputs:
# CONFIG_SHORT_REV: ${{ steps.parse-rev.outputs.CONFIG_SHORT_REV }}
# steps:
# - id: parse-rev
# name: Get short rev of HEAD at master
# run: |
# git clone https://jugit.fz-juelich.de/t.dickscheid/brainscapes-configurations.git
# CONFIG_SHORT_REV=$(git -C brainscapes-configurations rev-parse --short=6 HEAD)
# echo CONFIG_SHORT_REV=$CONFIG_SHORT_REV >> $GITHUB_OUTPUT

deploy-rc-on-data-validation:
needs:
- setup-envvar
- data-validation-config-hash
if: ${{ github.event_name == 'release' && contains(github.ref, 'rc') }}
uses: ./.github/workflows/deploy-on-okd.yml
with:
okd_endpoint: https://okd.jsc.hbp.eu:443
flavor: rc
queues: ${{ needs.setup-envvar.outputs.queues }}
version: c.${{ needs.data-validation-config-hash.outputs.CONFIG_SHORT_REV }}
workerimage: docker-registry.ebrains.eu/siibra/siibra-api:rc-worker
secrets:
okd_token: ${{ secrets.OKD_JSC_SECRET }}
# deploy-rc-on-data-validation:
# needs:
# - setup-envvar
# - data-validation-config-hash
# if: ${{ github.event_name == 'release' && contains(github.ref, 'rc') }}
# uses: ./.github/workflows/deploy-on-okd.yml
# with:
# okd_endpoint: https://okd.jsc.hbp.eu:443
# flavor: rc
# queues: ${{ needs.setup-envvar.outputs.queues }}
# version: c.${{ needs.data-validation-config-hash.outputs.CONFIG_SHORT_REV }}
# workerimage: docker-registry.ebrains.eu/siibra/siibra-api:rc-worker
# secrets:
# okd_token: ${{ secrets.OKD_JSC_SECRET }}

deploy-prod-on-okd:
needs: setup-envvar
Expand Down
20 changes: 20 additions & 0 deletions .helm/adhoc/certificate-rc.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
apiVersion: cert-manager.io/v1
kind: Certificate
metadata:
name: siibra-api-rc
spec:
commonName: siibra-api-rc.apps.tc.humanbrainproject.eu
isCA: false
dnsNames:
- siibra-api-rc.apps.tc.humanbrainproject.eu
issuerRef:
kind: ClusterIssuer
name: letsencrypt-production-issuer-1
privateKey:
algorithm: RSA
encoding: PKCS1
size: 2048
renewBefore: 120h
secretName: siibra-api-rc-secret
usages:
- server auth
4 changes: 2 additions & 2 deletions .helm/siibra-api/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,10 @@ type: application
# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 0.1.0
version: 0.1.1

# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
# follow Semantic Versioning. They should reflect the version the application is using.
# It is recommended to use it with quotes.
appVersion: "0.3.15"
appVersion: "0.3.17"
8 changes: 8 additions & 0 deletions .helm/siibra-api/templates/_helpers.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,14 @@ Expand the name of the chart.
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
{{- end }}

{{- define "siibra-api.root-img" -}}
{{- if eq .Values.sapiFlavor "rc" }}
{{- "rc" }}
{{- else }}
{{- .Values.sapiVersion }}
{{- end }}
{{- end }}

{{/*
Create a default fully qualified app name.
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
Expand Down
4 changes: 3 additions & 1 deletion .helm/siibra-api/templates/deployment-server.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ spec:
- name: {{ .Chart.Name }}
securityContext:
{{- toYaml .Values.securityContext | nindent 12 }}
image: "{{ .Values.image.repository }}:{{ .Values.sapiVersion }}-server"
image: "{{ .Values.image.repository }}:{{ include "siibra-api.root-img" . }}-server"
imagePullPolicy: {{ .Values.image.pullPolicy }}
ports:
- name: http
Expand All @@ -59,6 +59,8 @@ spec:
env:
- name: SIIBRA_CACHEDIR
value: "/siibra-api-volume/{{ .Values.sapiVersion }}"
- name: SIIBRA_API_NAMESPACE
value: {{ $.Values.sapiFlavor }}
resources:
{{- toYaml .Values.resourcesServerPod | nindent 12 }}
{{- with .Values.volumeMounts }}
Expand Down
4 changes: 3 additions & 1 deletion .helm/siibra-api/templates/deployment-worker.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ spec:
- name: {{ $.Chart.Name }}
securityContext:
{{- toYaml $.Values.securityContext | nindent 12 }}
image: "{{ $.Values.image.repository }}:{{ $.Values.sapiVersion }}-worker"
image: "{{ $.Values.image.repository }}:{{ include "siibra-api.root-img" $ }}-worker"
imagePullPolicy: {{ $.Values.image.pullPolicy }}
command: ["celery"]
args:
Expand Down Expand Up @@ -75,6 +75,8 @@ spec:
env:
- name: SIIBRA_CACHEDIR
value: "/siibra-api-volume/{{ $.Values.sapiVersion }}"
- name: SIIBRA_API_NAMESPACE
value: {{ $.Values.sapiFlavor }}
resources:
{{- toYaml $.Values.resourcesWorkerPod | nindent 12 }}
{{- with $.Values.volumeMounts }}
Expand Down
32 changes: 18 additions & 14 deletions api/server/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from subprocess import run
import os
from pathlib import Path
from collections import defaultdict
from api.siibra_api_config import ROLE, CELERY_CONFIG, NAME_SPACE, MONITOR_FIRSTLVL_DIR, queues
from api.common.timer import RepeatTimer
from api.common import general_logger
Expand Down Expand Up @@ -31,6 +32,8 @@ def timed_du():
return

for dir in dirs:
if dir == "lost+found":
continue
path_to_dir = Path(MONITOR_FIRSTLVL_DIR) / dir
try:
result = run(["du", "-s", str(path_to_dir)], capture_output=True, text=True)
Expand Down Expand Up @@ -83,7 +86,7 @@ def refresh_prom_metrics():
**common_kwargs)
num_worker_gauge = Gauge("num_workers",
"Number of workers",
labelnames=("hostname", "q_name", "ok"), **common_kwargs)
labelnames=("version", "namespace", "queue"), **common_kwargs)
scheduled_gauge = Gauge("scheduled_tasks","Number of scheduled tasks", labelnames=("hostname",), **common_kwargs)
active_gauge = Gauge("active_tasks", "Number of active tasks", labelnames=("hostname",), **common_kwargs)
reserved_gauge = Gauge("reserved_tasks", "Number of reserved tasks", labelnames=("hostname",), **common_kwargs)
Expand All @@ -103,19 +106,20 @@ def refresh_prom_metrics():
i = app.control.inspect()

# number of active workers
result = i.ping()
if result is None:
num_worker_gauge.set(0)
else:
for worker_hostname, resp in result.items():
for queue in queues:
if queue in worker_hostname:
break
else:
queue = "celery"
num_worker_gauge.labels(hostname=worker_hostname.replace("celery@", ""),
ok=resp.get("ok"),
q_name=queue).set(len(result))
result = app.control.inspect().active_queues()

tally = defaultdict(0)
for hostname in result:
for queue in result[hostname]:
routing_key = queue.get("routing_key")
*_, namespace, queue = routing_key
version = ".".join(_)
tally[(version, namespace, queue)] += 1

for ((version, namespace, queue), total) in tally.items():
num_worker_gauge.labels(version=version,
namespace=namespace,
queue=queue).set(total)

for workername, queue in (i.scheduled() or {}).items():
scheduled_gauge.labels(hostname=workername).set(len(queue))
Expand Down

0 comments on commit 00ab119

Please sign in to comment.