From 9ebbfb6f65ad68715d2492f88fc1b7b079f27249 Mon Sep 17 00:00:00 2001 From: Mathew Wicks Date: Sat, 27 Mar 2021 02:15:56 +1100 Subject: [PATCH] release 8.0.0 - airflow 2.0 (#96) Signed-off-by: Mathew Wicks --- .github/workflows/lint-test.yaml | 30 +- .github/workflows/release.yaml | 4 +- charts/airflow/Chart.yaml | 4 +- charts/airflow/README.md | 1614 ++++++++++------- charts/airflow/UPGRADE.md | 194 +- .../examples/google-gke/custom-values.yaml | 288 ++- .../k8s_resources/configmap-webserver.yaml | 28 - ...t-git-keys.yaml => secret-git-secret.yaml} | 8 +- .../examples/minikube/custom-values.yaml | 150 +- .../files/pod_template.kubernetes-helm-yaml | 65 + charts/airflow/files/webserver_config.py | 8 + charts/airflow/templates/NOTES.txt | 50 +- charts/airflow/templates/_helpers.tpl | 193 -- charts/airflow/templates/_helpers/common.tpl | 106 ++ charts/airflow/templates/_helpers/pods.tpl | 332 ++++ .../templates/_helpers/validate-values.tpl | 92 + .../templates/config/configmap-env.yaml | 125 -- .../config/configmap-pod-template.yaml | 18 + .../config/configmap-scripts-git.yaml | 77 - .../templates/config/configmap-scripts.yaml | 60 - .../config/configmap-variables-pools.yaml | 20 - .../templates/config/secret-config.yaml | 165 ++ .../templates/config/secret-connections.yaml | 31 - .../templates/config/secret-known-hosts.yaml | 14 + .../config/secret-webserver-config.yaml | 18 + .../templates/flower/flower-deployment.yaml | 155 +- .../templates/flower/flower-ingress.yaml | 2 +- .../airflow/templates/flower/flower-pdb.yaml | 2 +- .../templates/flower/flower-service.yaml | 2 +- .../jobs/job-create-connections.yaml | 56 + .../templates/jobs/job-create-pools.yaml | 56 + .../templates/jobs/job-create-users.yaml | 55 + .../templates/jobs/job-create-variables.yaml | 55 + .../templates/jobs/job-upgrade-db.yaml | 47 + .../templates/jobs/secret-job-scripts.yaml | 274 +++ .../templates/{pvc.yaml => pvc-dags.yaml} | 2 +- .../airflow/templates/rbac/airflow-role.yaml | 1 + .../scheduler/scheduler-deployment.yaml | 329 +--- .../webserver/webserver-deployment.yaml | 247 +-- .../webserver/webserver-service-monitor.yaml | 6 +- .../templates/worker/worker-service.yaml | 2 +- .../templates/worker/worker-statefulset.yaml | 266 +-- charts/airflow/values.yaml | 1093 ++++++----- ct-config.yaml | 2 +- 44 files changed, 3602 insertions(+), 2744 deletions(-) delete mode 100644 charts/airflow/examples/google-gke/k8s_resources/configmap-webserver.yaml rename charts/airflow/examples/google-gke/k8s_resources/{secret-git-keys.yaml => secret-git-secret.yaml} (88%) create mode 100644 charts/airflow/files/pod_template.kubernetes-helm-yaml create mode 100644 charts/airflow/files/webserver_config.py delete mode 100644 charts/airflow/templates/_helpers.tpl create mode 100644 charts/airflow/templates/_helpers/common.tpl create mode 100644 charts/airflow/templates/_helpers/pods.tpl create mode 100644 charts/airflow/templates/_helpers/validate-values.tpl delete mode 100644 charts/airflow/templates/config/configmap-env.yaml create mode 100644 charts/airflow/templates/config/configmap-pod-template.yaml delete mode 100644 charts/airflow/templates/config/configmap-scripts-git.yaml delete mode 100644 charts/airflow/templates/config/configmap-scripts.yaml delete mode 100644 charts/airflow/templates/config/configmap-variables-pools.yaml create mode 100644 charts/airflow/templates/config/secret-config.yaml delete mode 100644 charts/airflow/templates/config/secret-connections.yaml create mode 100644 charts/airflow/templates/config/secret-known-hosts.yaml create mode 100644 charts/airflow/templates/config/secret-webserver-config.yaml create mode 100644 charts/airflow/templates/jobs/job-create-connections.yaml create mode 100644 charts/airflow/templates/jobs/job-create-pools.yaml create mode 100644 charts/airflow/templates/jobs/job-create-users.yaml create mode 100644 charts/airflow/templates/jobs/job-create-variables.yaml create mode 100644 charts/airflow/templates/jobs/job-upgrade-db.yaml create mode 100644 charts/airflow/templates/jobs/secret-job-scripts.yaml rename charts/airflow/templates/{pvc.yaml => pvc-dags.yaml} (90%) diff --git a/.github/workflows/lint-test.yaml b/.github/workflows/lint-test.yaml index 14a94c0e..424f7875 100644 --- a/.github/workflows/lint-test.yaml +++ b/.github/workflows/lint-test.yaml @@ -14,21 +14,27 @@ jobs: - name: Install Helm uses: azure/setup-helm@v1 with: - version: v3.4.0 + version: v3.5.3 - - name: Run chart-testing (lint) - id: lint - uses: helm/chart-testing-action@v1.1.0 + - name: Install chart-testing + uses: helm/chart-testing-action@v2.0.1 with: - command: lint - config: ct-config.yaml + version: v3.3.1 + + - name: Run chart-testing (list-changed) + id: list-changed + run: | + changed=$(ct list-changed --config ct-config.yaml) + if [[ -n "$changed" ]]; then + echo "::set-output name=changed::true" + fi + + - name: Run chart-testing (lint) + run: ct lint --config ct-config.yaml - name: Create kind cluster - uses: helm/kind-action@v1.0.0 - if: steps.lint.outputs.changed == 'true' + uses: helm/kind-action@v1.1.0 + if: steps.list-changed.outputs.changed == 'true' - name: Run chart-testing (install) - uses: helm/chart-testing-action@v1.1.0 - with: - command: install - config: ct-config.yaml + run: ct install --config ct-config.yaml diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 23e51878..b5f0818d 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -22,13 +22,13 @@ jobs: - name: Install Helm uses: azure/setup-helm@v1 with: - version: v3.4.0 + version: v3.5.3 - name: Add dependency chart repos run: | helm repo add stable https://charts.helm.sh/stable - name: Run chart-releaser - uses: helm/chart-releaser-action@v1.0.0 + uses: helm/chart-releaser-action@v1.2.0 env: CR_TOKEN: "${{ secrets.GITHUB_TOKEN }}" diff --git a/charts/airflow/Chart.yaml b/charts/airflow/Chart.yaml index ccc916d3..78b3c85c 100644 --- a/charts/airflow/Chart.yaml +++ b/charts/airflow/Chart.yaml @@ -1,8 +1,8 @@ apiVersion: v1 description: airflow is a platform to programmatically author, schedule, and monitor workflows name: airflow -version: 7.16.0 -appVersion: 1.10.12 +version: 8.0.0 +appVersion: 2.0.1 icon: https://airflow.apache.org/_images/pin_large.png home: https://airflow.apache.org/ maintainers: diff --git a/charts/airflow/README.md b/charts/airflow/README.md index 503380b8..2e148705 100644 --- a/charts/airflow/README.md +++ b/charts/airflow/README.md @@ -1,311 +1,595 @@ # Airflow Helm Chart -> ⚠️ this chart is the continuation of [stable/airflow](https://github.com/helm/charts/tree/master/stable/airflow), see [issue #6](https://github.com/airflow-helm/charts/issues/6) for upgrade guide +> ⚠️ this chart is the continuation of [stable/airflow](https://github.com/helm/charts/tree/master/stable/airflow), see [issue #6](https://github.com/airflow-helm/charts/issues/6) for upgrade guide from the old chart [Airflow](https://airflow.apache.org/) is a platform to programmatically author, schedule, and monitor workflows. -## Usage +--- -### 1 - Add the Repo +### 1 - Add the Repo: ```sh helm repo add airflow-stable https://airflow-helm.github.io/charts helm repo update ``` -### 2 - Install the Chart +### 2 - Install the Chart: + +> ⚠️ find values for `CHART_VERSION` under [GitHub Releases](https://github.com/airflow-helm/charts/releases) ```sh +export RELEASE_NAME=my-airflow-cluster # set a name! +export NAMESPACE=my-airflow-namespace # set a namespace! +export CHART_VERSION=8.X.X # set a version! +export VALUES_FILE=./custom-values.yaml # set your values file path! + # Helm 3 helm install \ - [RELEASE_NAME] \ + $RELEASE_NAME \ airflow-stable/airflow \ - --version [VERSION] \ - --namespace [NAMESPACE] \ - --values ./custom-values.yaml + --namespace $NAMESPACE \ + --version $CHART_VERSION \ + --values $VALUES_FILE # Helm 2 helm install \ airflow-stable/airflow \ - --name [RELEASE_NAME] \ - --version [VERSION] \ - --namespace [NAMESPACE] \ - --values ./custom-values.yaml + --name $RELEASE_NAME \ + --namespace $NAMESPACE \ + --version $CHART_VERSION \ + --values $VALUES_FILE ``` -### 3 - Run commands in Webserver Pod +### 3 - Access the WebUI + +> ⚠️ browse to http://localhost:8080 after running the following commands ```sh -kubectl exec \ - -it \ - --namespace [NAMESPACE] \ - --container airflow-web \ - Deployment/[RELEASE_NAME]-web \ - /bin/bash +export NAMESPACE=my-airflow-namespace # set a namespace! -# then run commands like -airflow create_user ... +export POD_NAME=$(kubectl get pods --namespace $NAMESPACE -l "component=web,app=airflow" -o jsonpath="{.items[0].metadata.name}") +kubectl port-forward --namespace $NAMESPACE $POD_NAME 8080:8080 ``` ---- +__NOTE:__ +- default credentials -- user: __admin__ - password: __admin__ +- [How to create airflow users?](#how-to-create-airflow-users) +- [How to authenticate airflow users with LDAP/OAUTH?](#how-to-authenticate-airflow-users-with-ldapoauth) -## Upgrade Steps - -Find chart version numbers under [GitHub Releases](https://github.com/airflow-helm/charts/releases): - -- [v7.15.X → v7.16.0](UPGRADE.md#v715x--v7160) -- [v7.14.X → v7.15.0](UPGRADE.md#v714x--v7150) -- [v7.13.X → v7.14.0](UPGRADE.md#v713x--v7140) -- [v7.12.X → v7.13.0](UPGRADE.md#v712x--v7130) -- [v7.11.X → v7.12.0](UPGRADE.md#v711x--v7120) -- [v7.10.X → v7.11.0](UPGRADE.md#v710x--v7110) -- [v7.9.X → v7.10.0](UPGRADE.md#v79x--v7100) -- [v7.8.X → v7.9.0](UPGRADE.md#v78x--v790) -- [v7.7.X → v7.8.0](UPGRADE.md#v77x--v780) -- [v7.6.X → v7.7.0](UPGRADE.md#v76x--v770) -- [v7.5.X → v7.6.0](UPGRADE.md#v75x--v760) -- [v7.4.X → v7.5.0](UPGRADE.md#v74x--v750) -- [v7.3.X → v7.4.0](UPGRADE.md#v73x--v740) -- [v7.2.X → v7.3.0](UPGRADE.md#v72x--v730) -- [v7.1.X → v7.2.0](UPGRADE.md#v71x--v720) -- [v7.0.X → v7.1.0](UPGRADE.md#v70x--v710) -- [v6.X.X → v7.0.0](UPGRADE.md#v6xx--v700) ---- +# Documentation -## Example Values +## Upgrade Guides -Here are some starting points for your `custom-values.yaml`: +Old Version | New Version | Upgrade Guide +--- | --- | --- +v7.15.X | v8.0.0 | [link](UPGRADE.md#v715x--v800) +v7.14.X | v7.15.0 | [link](UPGRADE.md#v714x--v7150) +v7.13.X | v7.14.0 | [link](UPGRADE.md#v713x--v7140) +v7.12.X | v7.13.0 | [link](UPGRADE.md#v712x--v7130) +v7.11.X | v7.12.0 | [link](UPGRADE.md#v711x--v7120) +v7.10.X | v7.11.0 | [link](UPGRADE.md#v710x--v7110) +v7.9.X | v7.10.0 | [link](UPGRADE.md#v79x--v7100) -| Name | File | Description | -| --- | --- | --- | -| (CeleryExecutor) Minimal | [examples/minikube/custom-values.yaml](examples/minikube/custom-values.yaml) | a __non-production__ starting point | -| (CeleryExecutor) Google Cloud | [examples/google-gke/custom-values.yaml](examples/google-gke/custom-values.yaml) | a __production__ starting point for GKE on Google Cloud | +## Examples ---- +Description | Example `values.yaml` +--- | --- +A __non-production__ starting point for use with minikube (CeleryExecutor) | [link](examples/minikube/custom-values.yaml) +A __production__ starting point for GKE on Google Cloud (CeleryExecutor) | [link](examples/google-gke/custom-values.yaml) + +## Airflow Configs -## Docs (Airflow) - Configs +### How to set airflow configs? +
+Show More +
-While we don't expose the `airflow.cfg` directly, you can use [environment variables](https://airflow.apache.org/docs/stable/howto/set-config.html) to set Airflow configs. +While we don't expose the "airflow.cfg" file directly, you can use [environment variables](https://airflow.apache.org/docs/stable/howto/set-config.html) to set Airflow configs. -We expose the `airflow.config` value to make this easier: +The `airflow.config` value makes this easier, each key-value is mounted as an environment variable on each scheduler/web/worker/flower Pod: ```yaml airflow: config: - ## Security - AIRFLOW__CORE__SECURE_MODE: "True" - AIRFLOW__API__AUTH_BACKEND: "airflow.api.auth.backend.deny_all" + ## security AIRFLOW__WEBSERVER__EXPOSE_CONFIG: "False" - AIRFLOW__WEBSERVER__RBAC: "False" - - ## DAGS - AIRFLOW__SCHEDULER__DAG_DIR_LIST_INTERVAL: "30" + + ## dags AIRFLOW__CORE__LOAD_EXAMPLES: "False" - - ## Email (SMTP) + AIRFLOW__SCHEDULER__DAG_DIR_LIST_INTERVAL: "30" + + ## email AIRFLOW__EMAIL__EMAIL_BACKEND: "airflow.utils.email.send_email_smtp" AIRFLOW__SMTP__SMTP_HOST: "smtpmail.example.com" - AIRFLOW__SMTP__SMTP_STARTTLS: "False" - AIRFLOW__SMTP__SMTP_SSL: "False" - AIRFLOW__SMTP__SMTP_PORT: "25" AIRFLOW__SMTP__SMTP_MAIL_FROM: "admin@example.com" - - ## Disable noisy "Handling signal: ttou" Gunicorn log messages - GUNICORN_CMD_ARGS: "--log-level WARNING" + AIRFLOW__SMTP__SMTP_PORT: "25" + AIRFLOW__SMTP__SMTP_SSL: "False" + AIRFLOW__SMTP__SMTP_STARTTLS: "False" + + ## domain used in airflow emails + AIRFLOW__WEBSERVER__BASE_URL: "http://airflow.example.com" + + ## ether environment variables + HTTP_PROXY: "http://proxy.example.com:8080" ``` -## Docs (Airflow) - Connections +
+
-### Option 1 - Values.yaml +### How to store DAGs? +
+Show More +
-We expose the `scheduler.connections` value to specify [Airflow Connections](https://airflow.apache.org/docs/stable/concepts.html#connections), which will be automatically imported by the airflow-scheduler when it starts up. +

Option 1a - SSH git-sync sidecar (recommended)

-By default, we will delete and re-create connections each time the airflow-scheduler restarts. -(If you want to manually modify a connection in the WebUI, you should disable this behaviour by setting `scheduler.refreshConnections` to `false`) +This method uses an SSH git-sync sidecar to sync your git repo into the dag folder every `dags.gitSync.syncWait` seconds. -For example, to add a connection called `my_aws`: +For example: ```yaml -scheduler: - connections: - - id: my_aws - type: aws - extra: | - { - "aws_access_key_id": "XXXXXXXX", - "aws_secret_access_key": "XXXXXXXX", - "region_name":"eu-central-1" - } +airflow: + config: + AIRFLOW__SCHEDULER__DAG_DIR_LIST_INTERVAL: 60 + +dags: + gitSync: + enabled: true + repo: "git@github.com:USERNAME/REPOSITORY.git" + branch: "master" + revision: "HEAD" + syncWait: 60 + sshSecret: "airflow-ssh-git-secret" + sshSecretKey: "id_rsa" + + # "known_hosts" verification can be disabled by setting to "" + sshKnownHosts: |- + github.com ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAQEAq2A7hRGmdnm9tUDbO9IDSwBK6TbQa+PXYPCPy6rbTrTtw7PHkccKrpp0yVhp5HdEIcKr6pLlVDBfOLX9QUsyCOV0wzfjIJNlGEYsdlLJizHhbn2mUjvSAHQqZETYP81eFzLQNnPHt4EVVUh7VfDESU84KezmD5QlWpXLmvU31/yMf+Se8xhHTvKSCZIFImWwoG6mbUoWf9nzpIoaSjB+weqqUUmpaaasXVal72J+UX2B+2RPW3RcT0eOzQgqlJL3RKrTJvdsjE3JEAvGq3lGHSZXy28G3skua2SmVi/w4yCE6gbODqnTWlg7+wC604ydGXA8VJiS5ap43JXiUFFAaQ== +``` + +You can create the `airflow-ssh-git-secret` Secret using: +```console +kubectl create secret generic \ + airflow-ssh-git-secret \ + --from-file=id_rsa=$HOME/.ssh/id_rsa \ + --namespace my-airflow-namespace ``` -### Option 2 - Kubernetes Secret +

Option 1b - HTTP git-sync sidecar

-If you don't want to store connections in your `values.yaml`, use `scheduler.existingSecretConnections` to specify the name of an existing Kubernetes Secret containing an `add-connections.sh` script. -Note, your script will be run EACH TIME the airflow-scheduler Pod restarts, and `scheduler.connections` will not longer work. +This method uses an HTTP git sidecar to sync your git repo into the dag folder every `dags.gitSync.syncWait` seconds. -Here is an example Secret you might create: -```yaml -apiVersion: v1 -kind: Secret -metadata: - name: my-airflow-connections -type: Opaque -stringData: - add-connections.sh: | - #!/usr/bin/env bash - - # remove any existing connection - airflow connections --delete \ - --conn_id "my_aws" - - # re-add your custom connection - airflow connections --add \ - --conn_id "my_aws" \ - --conn_type "aws" \ - --conn_extra "{\"aws_access_key_id\": \"XXXXXXXX\", \"aws_secret_access_key\": \"XXXXXXXX\", \"region_name\":\"eu-central-1\"}" -``` - -## Docs (Airflow) - Variables - -We expose the `scheduler.variables` value to specify [Airflow Variables](https://airflow.apache.org/docs/stable/concepts.html#variables), which will be automatically imported by the airflow-scheduler when it starts up. - -For example, to specify a variable called `environment`: +For example: ```yaml -scheduler: - variables: | - { "environment": "dev" } +airflow: + config: + AIRFLOW__SCHEDULER__DAG_DIR_LIST_INTERVAL: 60 + +dags: + gitSync: + enabled: true + repo: "https://github.com/USERNAME/REPOSITORY.git" + branch: "master" + revision: "HEAD" + syncWait: 60 + httpSecret: "airflow-http-git-secret" + httpSecretUsernameKey: username + httpSecretPasswordKey: password ``` -## Docs (Airflow) - Pools +You can create `airflow-http-git-secret` Secret using: +```console +kubectl create secret generic \ + airflow-http-git-secret \ + --from-literal=username=MY_GIT_USERNAME \ + --from-literal=password=MY_GIT_TOKEN \ + --namespace my-airflow-namespace +``` -We expose the `scheduler.pools` value to specify [Airflow Pools](https://airflow.apache.org/docs/stable/concepts.html#pools), which will be automatically imported by the Airflow scheduler when it starts up. +

Option 2 - shared volume

-For example, to create a pool called `example`: +With this method, you store your DAGs in a Kubernetes PersistentVolume, which is mounted to all scheduler/web/worker Pods. + +You must configure some external system to ensure this volume has your latest DAGs, for example, you could use your CI/CD pipeline system to preform a sync as changes are pushed to your DAGs git repo. + +> ⚠️ the PVC needs to have `accessMode` = `ReadOnlyMany` (or `ReadWriteMany`) +> +> Different StorageClasses support different [access-modes](https://kubernetes.io/docs/concepts/storage/persistent-volumes/#access-modes). +> For Kubernetes on public cloud, a persistent volume controller is likely built in, so check the available access-modes: [Amazon EKS](https://docs.aws.amazon.com/eks/latest/userguide/storage-classes.html), [Azure AKS](https://docs.microsoft.com/en-us/azure/aks/azure-files-dynamic-pv), [Google GKE](https://cloud.google.com/kubernetes-engine/docs/concepts/persistent-volumes) + +Example values to use a StorageClass called `default`: ```yaml -scheduler: - pools: | - { - "example": { - "description": "This is an example pool with 2 slots.", - "slots": 2 - } - } +dags: + persistence: + enabled: true + storageClass: default + accessMode: ReadOnlyMany + size: 1Gi ``` -## Docs (Airflow) - Environment Variables +

Option 3 - embedded into container image

+ +This method stores your DAGs inside the container image. + +> ⚠️ this chart uses the official [apache/airflow](https://hub.docker.com/r/apache/airflow) images, consult airflow's official [docs about custom images](https://airflow.apache.org/docs/apache-airflow/2.0.1/production-deployment.html#production-container-images) + +For example, extending `airflow:2.0.1-python3.8` with some dags: +```docker +FROM apache/airflow:2.0.1-python3.8 -We expose the `airflow.extraEnv` value to mount extra environment variables, this can be used to pass sensitive configs to Airflow. +# NOTE: dag path is set with the `dags.path` value +COPY ./my_dag_folder /opt/airflow/dags +``` -For example, passing a Fernet key and LDAP password, (the `airflow` and `ldap` Kubernetes Secrets must already exist): +Then use this container image with the chart: ```yaml airflow: - extraEnv: - - name: AIRFLOW__CORE__FERNET_KEY - valueFrom: - secretKeyRef: - name: airflow - key: fernet-key - - name: AIRFLOW__LDAP__BIND_PASSWORD - valueFrom: - secretKeyRef: - name: ldap - key: password + image: + repository: MY_REPO + tag: MY_TAG ``` -## Docs (Airflow) - ConfigMaps +
+
+ +### How to install extra pip packages? +
+Show More +
+ +

Option 1 - use init-containers

+ +> 🛑️️️ seriously consider the implications of having each Pod run `pip install` before using this feature in production -We expose the `airflow.extraConfigmapMounts` value to mount extra Kubernetes ConfigMaps. +You can use the `airflow.extraPipPackages` value to install pip packages on all Pods, you can also use the more specific `scheduler.extraPipPackages`, `web.extraPipPackages`, `worker.extraPipPackages` and `flower.extraPipPackages`. -For example, a `webserver_config.py` file: +Packages defined with the more specific values will take precedence over `airflow.extraPipPackages`, as they are listed at the end of the `pip install ...` command, and pip takes the package version which is __defined last__. + +For example, installing the `airflow-exporter` package on all scheduler/web/worker/flower Pods: ```yaml airflow: - extraConfigmapMounts: - - name: my-webserver-config - mountPath: /opt/airflow/webserver_config.py - configMap: my-airflow-webserver-config - readOnly: true - subPath: webserver_config.py + extraPipPackages: + - "airflow-exporter~=1.4.1" ``` -To create the `my-airflow-webserver-config` ConfigMap, you could use: -```console -kubectl create configmap \ - my-airflow-webserver-config \ - --from-file=webserver_config.py \ - --namespace airflow +For example, installing PyTorch on the scheduler/worker Pods only: +```yaml +scheduler: + extraPipPackages: + - "torch~=1.8.0" + +worker: + extraPipPackages: + - "torch~=1.8.0" +``` + +

Option 2 - embedded into container image (recommended)

+ +You can extend the airflow container image with your pip packages. + +> ⚠️ this chart uses the official [apache/airflow](https://hub.docker.com/r/apache/airflow) images, consult airflow's official [docs about custom images](https://airflow.apache.org/docs/apache-airflow/2.0.1/production-deployment.html#production-container-images) + +For example, extending `airflow:2.0.1-python3.8` with the `torch` package: +```docker +FROM apache/airflow:2.0.1-python3.8 + +# install your pip packages +RUN pip install torch~=1.8.0 ``` -## Docs (Airflow) - Install Python Packages +Then use this container image with the chart: +```yaml +airflow: + image: + repository: MY_REPO + tag: MY_TAG +``` -We expose the `airflow.extraPipPackages` and `web.extraPipPackages` values to install Python Pip packages, these will work with any pip package that you can install with `pip install XXXX`. +
+
-For example, enabling the airflow `airflow-exporter` package: +### How to create airflow users? +
+Show More +
+ +You can use the `airflow.users` value to create airflow users with a post-install/post-update helm hook Job. + +> ⚠️ if you need to edit the users in the web-ui (for example, to change their password), you should set `airflow.usersUpdate` to `false` + +For example, to create `admin` (with "Admin" RBAC role) and `user` (with "User" RBAC role): ```yaml airflow: + users: + - username: admin + password: admin + role: Admin + email: admin@example.com + firstName: admin + lastName: admin + - username: user + password: user123 + role: User + email: user@example.com + firstName: user + lastName: user + + ## if we update users or just create them the first time (lookup by `username`) + usersUpdate: true +``` + +
+
+ +### How to authenticate airflow users with LDAP/OAUTH? +
+Show More +
+ +You can use the `web.webserverConfig.*` values to adjust the Flask-Appbuilder `webserver_config.py` file, you can read Flask-builder's security docs [here](https://flask-appbuilder.readthedocs.io/en/latest/security.html). + +> 🛑️️ if you set up LDAP/OAUTH, you should set `airflow.users` to `[]` (and delete any previously created users) + +> ⚠️ the version of Flask-Builder installed by airflow might not be the latest, but you can use `web.extraPipPackages` to install a newer version, if needed + +For example, to integrate with a typical Microsoft Active Directory using `AUTH_LDAP`: +```yaml +web: extraPipPackages: - - "airflow-exporter==1.3.1" + ## the following configs require Flask-AppBuilder 3.2.0 (or later) + - "Flask-AppBuilder~=3.2.0" + ## the following configs require python-ldap + - "python-ldap~=3.3.1" + + webserverConfig: + stringOverride: |- + from airflow import configuration as conf + from flask_appbuilder.security.manager import AUTH_LDAP + + SQLALCHEMY_DATABASE_URI = conf.get('core', 'SQL_ALCHEMY_CONN') + + AUTH_TYPE = AUTH_LDAP + AUTH_LDAP_SERVER = "ldap://ldap.example.com" + AUTH_LDAP_USE_TLS = False + + # registration configs + AUTH_USER_REGISTRATION = True # allow users who are not already in the FAB DB + AUTH_USER_REGISTRATION_ROLE = "Public" # this role will be given in addition to any AUTH_ROLES_MAPPING + AUTH_LDAP_FIRSTNAME_FIELD = "givenName" + AUTH_LDAP_LASTNAME_FIELD = "sn" + AUTH_LDAP_EMAIL_FIELD = "mail" # if null in LDAP, email is set to: "{username}@email.notfound" + + # bind username (for password validation) + AUTH_LDAP_USERNAME_FORMAT = "uid=%s,ou=users,dc=example,dc=com" # %s is replaced with the provided username + # AUTH_LDAP_APPEND_DOMAIN = "example.com" # bind usernames will look like: {USERNAME}@example.com + + # search configs + AUTH_LDAP_SEARCH = "ou=users,dc=example,dc=com" # the LDAP search base (if non-empty, a search will ALWAYS happen) + AUTH_LDAP_UID_FIELD = "uid" # the username field + + # a mapping from LDAP DN to a list of FAB roles + AUTH_ROLES_MAPPING = { + "cn=airflow_users,ou=groups,dc=example,dc=com": ["User"], + "cn=airflow_admins,ou=groups,dc=example,dc=com": ["Admin"], + } + + # the LDAP user attribute which has their role DNs + AUTH_LDAP_GROUP_FIELD = "memberOf" + + # if we should replace ALL the user's roles each login, or only on registration + AUTH_ROLES_SYNC_AT_LOGIN = True + + # force users to re-auth after 30min of inactivity (to keep roles in sync) + PERMANENT_SESSION_LIFETIME = 1800 ``` -For example, you may be using `flask_oauthlib` to integrate with Okta/Google/etc for authorizing WebUI users: +For example, to integrate with Okta using `AUTH_OAUTH`: ```yaml web: extraPipPackages: - - "apache-airflow[google_auth]==1.10.12" + ## the following configs require Flask-AppBuilder 3.2.0 (or later) + - "Flask-AppBuilder~=3.2.0" + ## the following configs require Authlib + - "Authlib~=0.15.3" + + webserverConfig: + stringOverride: |- + from airflow import configuration as conf + from flask_appbuilder.security.manager import AUTH_LDAP + + SQLALCHEMY_DATABASE_URI = conf.get('core', 'SQL_ALCHEMY_CONN') + + AUTH_TYPE = AUTH_OAUTH + + # registration configs + AUTH_USER_REGISTRATION = True # allow users who are not already in the FAB DB + AUTH_USER_REGISTRATION_ROLE = "Public" # this role will be given in addition to any AUTH_ROLES_MAPPING + + # the list of providers which the user can choose from + OAUTH_PROVIDERS = [ + { + 'name': 'okta', + 'icon': 'fa-circle-o', + 'token_key': 'access_token', + 'remote_app': { + 'client_id': 'OKTA_KEY', + 'client_secret': 'OKTA_SECRET', + 'api_base_url': 'https://OKTA_DOMAIN.okta.com/oauth2/v1/', + 'client_kwargs': { + 'scope': 'openid profile email groups' + }, + 'access_token_url': 'https://OKTA_DOMAIN.okta.com/oauth2/v1/token', + 'authorize_url': 'https://OKTA_DOMAIN.okta.com/oauth2/v1/authorize', + } + } + ] + + # a mapping from the values of `userinfo["role_keys"]` to a list of FAB roles + AUTH_ROLES_MAPPING = { + "FAB_USERS": ["User"], + "FAB_ADMINS": ["Admin"], + } + + # if we should replace ALL the user's roles each login, or only on registration + AUTH_ROLES_SYNC_AT_LOGIN = True + + # force users to re-auth after 30min of inactivity (to keep roles in sync) + PERMANENT_SESSION_LIFETIME = 1800 ``` ---- +
+
-## Docs (Kubernetes) - Ingress +### How to set a custom fernet (encryption) key? +
+Show More +
-This chart provides an optional Kubernetes Ingress resource, for accessing airflow-webserver and airflow-flower outside of the cluster. +

Option 1 - using value

-### URL Prefix: +You can customize the fernet encryption key using the `airflow.fernetKey` value, which sets the `AIRFLOW__CORE__FERNET_KEY` environment variable. -If you already have something hosted at the root of your domain, you might want to place airflow under a URL-prefix: -- http://example.com/airflow/ -- http://example.com/airflow/flower +For example: +```yaml +aiflow: + fernetKey: "7T512UXSSmBOkpWimFHIVb8jK6lfmSAvx4mO6Arehnc=" +``` -In this example, would set these values: +

Option 2 - using secret (recommended)

+ +You can customize the fernet encryption key by pre-creating a Secret, and specifying it with the `airflow.extraEnv` value. + +For example, if the Secret `airflow-fernet-key` already exist, and contains a key called `value`: ```yaml -web: - baseUrl: "http://example.com/airflow/" +airflow: + extraEnv: + - name: AIRFLOW__CORE__FERNET_KEY + valueFrom: + secretKeyRef: + name: airflow-fernet-key + key: value +``` -flower: - urlPrefix: "/airflow/flower" +
+
-ingress: - web: - path: "/airflow" +### How to create airflow connections? +
+Show More +
- flower: - path: "/airflow/flower" +You can use the `airflow.connections` value to create airflow [Connections](https://airflow.apache.org/docs/apache-airflow/stable/concepts.html#connections) with a post-install/post-update helm hook Job. + +> ⚠️ if you need to edit the connections in the web-ui (for example, to add a sensitive password), you should set `airflow.connectionsUpdate` to `false` + +For example, to create connections called `my_aws`, `my_gcp`, `my_postgres`, and `my_ssh`: +```yaml +scheduler: + connections: + ## see docs: https://airflow.apache.org/docs/apache-airflow-providers-amazon/stable/connections/aws.html + - id: my_aws + type: aws + description: my AWS connection + extra: |- + { "aws_access_key_id": "XXXXXXXX", + "aws_secret_access_key": "XXXXXXXX", + "region_name":"eu-central-1" } + ## see docs: https://airflow.apache.org/docs/apache-airflow-providers-google/stable/connections/gcp.html + - id: my_gcp + type: google_cloud_platform + description: my GCP connection + extra: |- + { "extra__google_cloud_platform__keyfile_dict": "XXXXXXXX", + "extra__google_cloud_platform__num_retries: "XXXXXXXX" } + ## see docs: https://airflow.apache.org/docs/apache-airflow-providers-postgres/stable/connections/postgres.html + - id: my_postgres + type: postgres + description: my Postgres connection + host: postgres.example.com + port: 5432 + login: db_user + password: db_pass + schema: my_db + extra: |- + { "sslmode": "allow" } + ## see docs: https://airflow.apache.org/docs/apache-airflow-providers-ssh/stable/connections/ssh.html + - id: my_ssh + type: ssh + description: my SSH connection + host: ssh.example.com + port: 22 + login: ssh_user + password: ssh_pass + extra: |- + { "timeout": "15" } + + ## if we update connections or just create them the first time (lookup by `id`) + connectionsUpdate: true ``` -### Custom Paths: +
+
-We expose the `ingress.web.precedingPaths` and `ingress.web.succeedingPaths` values, which are __before__ and __after__ the default path respectively. +### How to create airflow variables? +
+Show More +
-A common use-case is enabling https with the `aws-alb-ingress-controller` [ssl-redirect](https://kubernetes-sigs.github.io/aws-alb-ingress-controller/guide/tasks/ssl_redirect/), which needs a redirect path to be hit before the airflow-webserver one. +You can use the `airflow.variables` value to create airflow [Variables](https://airflow.apache.org/docs/apache-airflow/stable/concepts.html#variables) with a post-install/post-update helm hook Job. -You would set the values of `precedingPaths` as the following: +> ⚠️ if you need to edit the variables in the web-ui, you should set `airflow.variablesUpdate` to `false` + +For example, to create variables called `var_1`, `var_2`: ```yaml -ingress: - web: - precedingPaths: - - path: "/*" - serviceName: "ssl-redirect" - servicePort: "use-annotation" +airflow: + variables: + - key: "var_1" + value: "my_value_1" + - key: "var_2" + value: "my_value_2" + + ## if we update variables or just create them the first time (lookup by `key`) + variablesUpdate: true ``` -## Docs (Kubernetes) - Worker Autoscaling +
+
-We use a Kubernetes StatefulSet for the Celery workers, this allows the webserver to requests logs from each workers individually, with a fixed DNS name. +### How to create airflow pools? +
+Show More +
-Celery workers can be scaled using the [Horizontal Pod Autoscaler](https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale/). -To enable autoscaling, you must set `workers.autoscaling.enabled=true`, then provide `workers.autoscaling.maxReplicas`, and `workers.replicas` for the minimum amount. +You can use the `airflow.pools` value to create airflow [Pools](https://airflow.apache.org/docs/apache-airflow/stable/concepts.html#pools) with a post-install/post-update helm hook Job. -Assume every task a worker executes consumes approximately `200Mi` memory, that means memory is a good metric for utilisation monitoring. -For a worker pod you can calculate it: `WORKER_CONCURRENCY * 200Mi`, so for `10 tasks` a worker will consume `~2Gi` of memory. +> ⚠️ if you need to edit the variables in the web-ui, you should set `airflow.poolsUpdate` to `false` + +For example, to create pools called `pool_1`, `pool_2`: +```yaml +airflow: + variables: + - name: "pool_1" + slots: 5 + description: "example pool with 5 slots" + - name: "pool_2" + slots: 10 + description: "example pool with 10 slots" + + ## if we update pools or just create them the first time (lookup by `name`) + poolsUpdate: true +``` +
+
+ +### How to set up celery worker autoscaling? +
+Show More +
+ +The Airflow Celery Workers can be scaled using the [Horizontal Pod Autoscaler](https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale/), to enable autoscaling, you must set `workers.autoscaling.enabled=true`, then provide `workers.autoscaling.maxReplicas`. + +Assume every task a worker executes consumes approximately `200Mi` memory, that means memory is a good metric for utilisation monitoring. +For a worker pod you can calculate it: `WORKER_CONCURRENCY * 200Mi`, so for `10 tasks` a worker will consume `~2Gi` of memory. In the following config if a worker consumes `80%` of `2Gi` (which will happen if it runs 9-10 tasks at the same time), an autoscaling event will be triggered, and a new worker will be added. If you have many tasks in a queue, Kubernetes will keep adding workers until maxReplicas reached, in this case `16`. ```yaml @@ -338,8 +622,8 @@ workers: ## wait at most 9min for running tasks to complete before SIGTERM ## WARNING: - ## - some cluster-autoscaler (GKE) will not respect graceful - ## termination periods over 10min + ## - some cloud cluster-autoscaler configs will not respect graceful termination + ## longer than 10min, for example, Google Kubernetes Engine (GKE) gracefullTermination: true gracefullTerminationPeriod: 540 @@ -347,94 +631,125 @@ workers: terminationPeriod: 60 dags: - git: - gitSync: - resources: - requests: - ## IMPORTANT! for autoscaling to work - memory: "64Mi" + gitSync: + resources: + requests: + ## IMPORTANT! for autoscaling to work with gitSync + memory: "64Mi" ``` -## Docs (Kubernetes) - Worker Secrets +
+
-We expose the `workers.secrets` value to allow mounting secrets at `{workers.secretsDir}/` in airflow-worker Pods. +### How to persist Airflow logs (recommended)? +
+Show More +
-For example, mounting password Secrets: +> 🛑️️ you should persist logs in a production deployment using one of the following methods +> +> By default, logs from the airflow-web/scheduler/worker are written within the Docker container's filesystem, therefore any restart of the pod will wipe the logs. + +

Option 1 - Kubernetes PVC

+ +Example using a 1Gb Kubernetes PVC: ```yaml -workers: - secretsDir: /var/airflow/secrets - secrets: - - redshift-user - - redshift-password - - elasticsearch-user - - elasticsearch-password +logs: + persistence: + enabled: true + storageClass: "" ## WARNING: your StorageClass MUST SUPPORT `ReadWriteMany` + accessMode: ReadWriteMany + size: 1Gi ``` -With the above configuration, you could read the `redshift-user` password from within a DAG or Python function using: -```python -import os -from pathlib import Path +

Option 2 - Remote Bucket (recommended)

-def get_secret(secret_name): - secrets_dir = Path('/var/airflow/secrets') - secret_path = secrets_dir / secret_name - assert secret_path.exists(), f'could not find {secret_name} at {secret_path}' - secret_data = secret_path.read_text().strip() - return secret_data +You must give airflow credentials for it to read/write on the remote bucket, this can be achieved with `AIRFLOW__LOGGING__REMOTE_LOG_CONN_ID`, or by using something like [Workload Identity (GKE)](https://cloud.google.com/kubernetes-engine/docs/how-to/workload-identity), or [IAM Roles for Service Accounts (EKS)](https://docs.aws.amazon.com/eks/latest/userguide/iam-roles-for-service-accounts.html). -redshift_user = get_secret('redshift-user') +Example, using `AIRFLOW__LOGGING__REMOTE_LOG_CONN_ID` (can be used with S3 + AWS connection too): +```yaml +airflow: + config: + AIRFLOW__LOGGING__REMOTE_LOGGING: "True" + AIRFLOW__LOGGING__REMOTE_BASE_LOG_FOLDER: "gs://<>/airflow/logs" + AIRFLOW__LOGGING__REMOTE_LOG_CONN_ID: "my_gcp" + + connections: + ## see docs: https://airflow.apache.org/docs/apache-airflow-providers-google/stable/connections/gcp.html + - id: my_gcp + type: google_cloud_platform + description: my GCP connection + extra: |- + { "extra__google_cloud_platform__keyfile_dict": "XXXXXXXX", + "extra__google_cloud_platform__keyfile_dict: "XXXXXXXX", + "extra__google_cloud_platform__num_retries": "5" } ``` + +Example, using [Workload Identity (GKE)](https://cloud.google.com/kubernetes-engine/docs/how-to/workload-identity): +```yaml +airflow: + config: + AIRFLOW__LOGGING__REMOTE_LOGGING: "True" + AIRFLOW__LOGGING__REMOTE_BASE_LOG_FOLDER: "gs://<>/airflow/logs" + AIRFLOW__LOGGING__REMOTE_LOG_CONN_ID: "google_cloud_default" -To create the `redshift-user` Secret, you could use: -```console -kubectl create secret generic \ - redshift-user \ - --from-literal=redshift-user=MY_REDSHIFT_USERNAME \ - --namespace airflow +serviceAccount: + annotations: + iam.gke.io/gcp-service-account: "<>@<>.iam.gserviceaccount.com" ``` -## Docs (Kubernetes) - Additional Manifests +Example, using [IAM Roles for Service Accounts (EKS)](https://docs.aws.amazon.com/eks/latest/userguide/iam-roles-for-service-accounts.html): +```yaml +airflow: + config: + AIRFLOW__LOGGING__REMOTE_LOGGING: "True" + AIRFLOW__LOGGING__REMOTE_BASE_LOG_FOLDER: "s3://<>/airflow/logs" + AIRFLOW__LOGGING__REMOTE_LOG_CONN_ID: "aws_default" -We expose the `extraManifests.[]` value to add custom Kubernetes manifests to the chart. +scheduler: + securityContext: + fsGroup: 65534 -For example, adding a `BackendConfig` resource for GKE: -```yaml -extraManifests: - - apiVersion: cloud.google.com/v1beta1 - kind: BackendConfig - metadata: - name: "{{ .Release.Name }}-test" - spec: - securityPolicy: - name: "gcp-cloud-armor-policy-test" -``` +web: + securityContext: + fsGroup: 65534 ---- +workers: + securityContext: + fsGroup: 65534 -## Docs (Database) - DB Initialization +serviceAccount: + annotations: + eks.amazonaws.com/role-arn: "arn:aws:iam::XXXXXXXXXX:role/<>" +``` -If the value `scheduler.initdb` is set to `true` (this is the default), the airflow-scheduler container will run `airflow upgradedb || airflow db upgrade` as part of its startup script. +
+
-If the value `scheduler.preinitdb` is set to `true`, then we ALSO RUN `airflow upgradedb || airflow db upgrade` in an init-container (retrying 5 times). -This is unusually NOT necessary unless your synced DAGs include custom database hooks that prevent `airflow upgradedb || airflow db upgrade` from running. +## Database Configs -## Docs (Database) - Passwords +### How to use the embedded Postgres? +
+Show More +
-PostgreSQL is the default database in this chart, because we use insecure username/password combinations by default, you should create secure credentials before installing the Helm chart. +> 🛑️️ the embedded Postgres is NOT SUITABLE for production, you should configure one of the external databases -Example bash command to create the required Kubernetes Secrets: -```console +The embedded Postgres database has an insecure username/password by default, you should create secure credentials before using it. + +For example, to create the required Kubernetes Secrets: +```sh # set postgress password kubectl create secret generic \ airflow-postgresql \ --from-literal=postgresql-password=$(openssl rand -base64 13) \ - --namespace airflow + --namespace my-airflow-namespace # set redis password kubectl create secret generic \ airflow-redis \ --from-literal=redis-password=$(openssl rand -base64 13) \ - --namespace airflow + --namespace my-airflow-namespace ``` Example `values.yaml`, to use those secrets: @@ -446,11 +761,15 @@ redis: existingSecret: airflow-redis ``` -## Docs (Database) - External Database +
+
-> 🛑️️ the embedded PostgreSQL is NOT SUITABLE for production, you should configure one of the following external databases +### How to use an external database (recommended)? +
+Show More +
-### Option 1 - Postgres +

Option 1 - Postgres

Example values for an external Postgres database, with an existing `airflow_cluster1` database: ```yaml @@ -464,7 +783,7 @@ externalDatabase: passwordSecretKey: "postgresql-password" ``` -### Option 2 - MySQL +

Option 2 - MySQL

> ⚠️ you must set `explicit_defaults_for_timestamp=1` in your MySQL instance, [see here](https://airflow.apache.org/docs/stable/howto/initialize-database.html) @@ -480,414 +799,451 @@ externalDatabase: passwordSecretKey: "mysql-password" ``` ---- - -## Docs (Other) - Log Persistence +
+
-> 🛑️️ you should persist logs in a production deployment using one of the following methods +## Kubernetes Configs -By default, logs from the airflow-web/scheduler/worker are written within the Docker container's filesystem, therefore any restart of the pod will wipe the logs. +### How to mount ConfigMaps/Secrets as environment variables? +
+Show More +
-### Option 1 - S3/GCS bucket (Recommended) +You can use the `airflow.extraEnv` value to mount extra environment variables with the same structure as [EnvVar in ContainerSpec](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.20/#envvar-v1-core). -You must give airflow credentials for it to read/write on the remote bucket, this can be achieved with `AIRFLOW__CORE__REMOTE_LOG_CONN_ID`, or by using something like [Workload Identity (GKE)](https://cloud.google.com/kubernetes-engine/docs/how-to/workload-identity), or [IAM Roles for Service Accounts (EKS)](https://docs.aws.amazon.com/eks/latest/userguide/iam-roles-for-service-accounts.html). +This method can be used to pass sensitive configs to Airflow. -Example, using `AIRFLOW__CORE__REMOTE_LOG_CONN_ID` (can be used with AWS too): +For example, if the Secret `airflow-fernet-key` already exist, and contains a key called `value`: ```yaml airflow: - config: - AIRFLOW__CORE__REMOTE_LOGGING: "True" - AIRFLOW__CORE__REMOTE_BASE_LOG_FOLDER: "gs://<>/airflow/logs" - AIRFLOW__CORE__REMOTE_LOG_CONN_ID: "google_cloud_airflow" - -scheduler: - connections: - - id: google_cloud_airflow - type: google_cloud_platform - extra: |- - { - "extra__google_cloud_platform__num_retries": "5", - "extra__google_cloud_platform__keyfile_dict": "{...}" - } + extraEnv: + - name: AIRFLOW__CORE__FERNET_KEY + valueFrom: + secretKeyRef: + name: airflow-fernet-key + key: value ``` - -Example, using [Workload Identity (GKE)](https://cloud.google.com/kubernetes-engine/docs/how-to/workload-identity): -```yaml -airflow: - config: - AIRFLOW__CORE__REMOTE_LOGGING: "True" - AIRFLOW__CORE__REMOTE_BASE_LOG_FOLDER: "gs://<>/airflow/logs" - AIRFLOW__CORE__REMOTE_LOG_CONN_ID: "google_cloud_default" -serviceAccount: - annotations: - iam.gke.io/gcp-service-account: "<>@<>.iam.gserviceaccount.com" -``` +
+
-Example, using [IAM Roles for Service Accounts (EKS)](https://docs.aws.amazon.com/eks/latest/userguide/iam-roles-for-service-accounts.html): -```yaml -airflow: - config: - AIRFLOW__CORE__REMOTE_LOGGING: "True" - AIRFLOW__CORE__REMOTE_BASE_LOG_FOLDER: "s3://<>/airflow/logs" - AIRFLOW__CORE__REMOTE_LOG_CONN_ID: "aws_default" +### How to mount Secrets/Configmaps as files on workers? +
+Show More +
-scheduler: - securityContext: - fsGroup: 65534 - -web: - securityContext: - fsGroup: 65534 +You can use the `workers.extraVolumeMounts` and `workers.extraVolumes` values to mount Secretes as files. +For example, if the Secret `redshift-creds` already exist, and has keys called `user` and `password`: +```yaml workers: - securityContext: - fsGroup: 65534 + extraVolumeMounts: + - name: redshift-creds + mountPath: /opt/airflow/secrets/redshift-creds + readOnly: true -serviceAccount: - annotations: - eks.amazonaws.com/role-arn: "arn:aws:iam::XXXXXXXXXX:role/<>" + extraVolumes: + - name: redshift-creds + secret: + secretName: redshift-creds ``` -### Option 2 - Kubernetes PVC - -```yaml -logs: - persistence: - enabled: true +You could then read the `/opt/airflow/secrets/redshift-creds` files from within a DAG Python function: +```python +from pathlib import Path +redis_user = Path("/opt/airflow/secrets/redshift-creds/user").read_text().strip() +redis_password = Path("/opt/airflow/secrets/redshift-creds/password").read_text().strip() ``` -## Docs (Other) - Prometheus ServiceMonitor - -A [ServiceMonitor](https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#servicemonitor) is a resource introduced by the [Prometheus Operator](https://github.com/prometheus-operator/prometheus-operator). - -To be able to expose Airflow metrics to Prometheus you will need install a plugin, one option is [epoch8/airflow-exporter](https://github.com/epoch8/airflow-exporter), which exposes dag and task based metrics from Airflow. - -For more information, see the `serviceMonitor` section of `values.yaml`. - ---- +To create the `redshift-creds` Secret, you could use: +```console +kubectl create secret generic \ + redshift-creds \ + --from-literal=user=MY_REDSHIFT_USERNAME \ + --from-literal=password=MY_REDSHIFT_PASSWORD \ + --namespace my-airflow-namespace +``` -## Docs (Other) - DAG Storage +
+
-### Option 1 - Git Sidecar (Recommended) +### How to set up an Ingress? +
+Show More +
-> ⚠️ specifying `known_hosts` inside `dags.git.secret` reduces the possibility of a man-in-the-middle attack, however, if you want to implicitly trust all repo host signatures set `dags.git.sshKeyscan` to `true` +The chart provides the `ingress.*` values for deploying a Kubernetes Ingress to allow access to airflow outside the cluster. -This method places a git sidecar in each worker/scheduler/web Pod, that syncs your git repo into the dag folder every `dags.git.gitSync.refreshTime` seconds. +Consider the situation where you already have something hosted at the root of your domain, you might want to place airflow under a URL-prefix: +- http://example.com/airflow/ +- http://example.com/airflow/flower +In this example, would set these values: ```yaml -dags: - git: - #ssh://git@example.com:22/REPOSITORY.git - url: git@github.com:USERNAME/REPOSITORY.git - ref: master - secret: airflow-git-keys - privateKeyName: id_rsa - repoHost: github.com - repoPort: 22 - - gitSync: - enabled: true - refreshTime: 60 -``` +airflow: + config: + AIRFLOW__WEBSERVER__BASE_URL: "http://example.com/airflow/" + AIRFLOW__CELERY__FLOWER_URL_PREFIX: "/airflow/flower" -You can create the `dags.git.secret` from your local `$HOME/.ssh` folder using: -```console -kubectl create secret generic \ - airflow-git-keys \ - --from-file=id_rsa=$HOME/.ssh/id_rsa \ - --from-file=id_rsa.pub=$HOME/.ssh/id_rsa.pub \ - --from-file=known_hosts=$HOME/.ssh/known_hosts \ - --namespace airflow +ingress: + enabled: true + web: + path: "/airflow" + flower: + path: "/airflow/flower" ``` -### Option 2a - PersistentVolume - -> ⚠️️ this method requires a PersistentVolumeClaim which supports `accessMode = ReadOnlyMany or ReadWriteMany` - -This method stores your DAGs in a PersistentVolume. - -You must configure some external system to ensure this volume has your latest DAGs. -For example, you could use your CI/CD pipeline system to preform a sync as changes are pushed to a git repo. - -Since ALL Pods MUST HAVE the same collection of DAG files, it is recommended to create just one PVC that is shared. -To share a PVC with multiple Pods, the PVC needs to have `accessMode` set to `ReadOnlyMany` or `ReadWriteMany` (Note: different StorageClass support different [access modes](https://kubernetes.io/docs/concepts/storage/persistent-volumes/#access-modes)). +We expose the `ingress.web.precedingPaths` and `ingress.web.succeedingPaths` values, which are __before__ and __after__ the default path respectively. -If you are using Kubernetes on a public cloud, a persistent volume controller is likely built in: - - [Amazon EKS](https://docs.aws.amazon.com/eks/latest/userguide/storage-classes.html) - - [Azure AKS](https://docs.microsoft.com/en-us/azure/aks/azure-files-dynamic-pv) - - [Google GKE](https://cloud.google.com/kubernetes-engine/docs/concepts/persistent-volumes) +> ⚠️ A common use-case is [enabling SSL with the aws-alb-ingress-controller](https://kubernetes-sigs.github.io/aws-load-balancer-controller/v2.1/guide/tasks/ssl_redirect/), which needs a redirect path to be hit before the airflow-webserver one. -For example, to use the storage class called `default`: +For example, setting `ingress.web.precedingPaths` for an aws-alb-ingress-controller with SSL: ```yaml -dags: - persistence: - enabled: true - storageClass: default - accessMode: ReadOnlyMany - size: 1Gi +ingress: + web: + precedingPaths: + - path: "/*" + serviceName: "ssl-redirect" + servicePort: "use-annotation" ``` -### Option 2b - Shared PersistentVolume +
+
-> ⚠️ this method requires a PersistentVolumeClaim which supports `accessMode = ReadWriteMany` +### How to integrate airflow with Prometheus? +
+Show More +
-This method stores both DAGs and logs on the same PersistentVolume. +To be able to expose Airflow metrics to Prometheus you will need install a plugin, one option is [epoch8/airflow-exporter](https://github.com/epoch8/airflow-exporter) which exports DAG and task metrics from Airflow. -Here's an approach that achieves this: -- Configure `airflow.extraVolume` and `airflow.extraVolumeMount` to put a volume at `/opt/airflow/efs` -- Configure `dags.persistence.enabled` and `logs.persistence.enabled` to be `false` -- Configure `dags.path` to be `/opt/airflow/efs/dags` -- Configure `logs.path` to be `/opt/airflow/efs/logs` +A [ServiceMonitor](https://github.com/prometheus-operator/prometheus-operator/blob/master/Documentation/api.md#servicemonitor) is a resource introduced by the [Prometheus Operator](https://github.com/prometheus-operator/prometheus-operator), ror more information, see the `serviceMonitor` section of `values.yaml`. -### Option 3 - Container Image +
+
-This method stores your DAGs inside the container image. - -This chart uses the official [apache/airflow image](https://hub.docker.com/r/apache/airflow), extend this image and COPY your DAGs into the `dags.path` folder: -```docker -FROM apache/airflow:1.10.12-python3.6 +### How to add extra manifests? +
+Show More +
-# NOTE: dag path is set with the `dags.path` value -COPY ./my_dag_folder /opt/airflow/dags -``` +You can use the `extraManifests.[]` value to add custom Kubernetes manifests to the chart. -The following values can be used to specify the container image: +For example, adding a `BackendConfig` resource for GKE: ```yaml -airflow: - image: - repository: MY_REPO - tag: MY_TAG +extraManifests: + - apiVersion: cloud.google.com/v1beta1 + kind: BackendConfig + metadata: + name: "{{ .Release.Name }}-test" + spec: + securityPolicy: + name: "gcp-cloud-armor-policy-test" ``` -## Docs (Other) - requirements.txt - -> ⚠️ if you update the `requirements.txt`, you will have to restart each worker Pod for changes to take effect, you might consider using `airflow.extraPipPackages` instead - -We expose the `dags.installRequirements` value to pip install any `requirements.txt` found at the root of your `dags.path` folder as airflow-worker Pods start. - ---- - -## Helm Chart Values - -Full documentation can be found in the comments of the `values.yaml` file, but a high level overview is provided here. - -__Global Values:__ - -| Parameter | Description | Default | -| --- | --- | --- | -| `airflow.image.*` | configs for the docker image of the web/scheduler/worker | `` | -| `airflow.executor` | the airflow executor type to use | `CeleryExecutor` | -| `airflow.fernetKey` | the fernet key used to encrypt the connections/variables in the database | `7T512UXSSmBOkpWimFHIVb8jK6lfmSAvx4mO6Arehnc=` | -| `airflow.config` | environment variables for the web/scheduler/worker pods (for airflow configs) | `{}` | -| `airflow.podAnnotations` | extra annotations for the web/scheduler/worker/flower Pods | `{}` | -| `airflow.extraEnv` | extra environment variables for the web/scheduler/worker/flower Pods | `[]` | -| `airflow.extraConfigmapMounts` | extra configMap volumeMounts for the web/scheduler/worker/flower Pods | `[]` | -| `airflow.extraContainers` | extra containers for the web/scheduler/worker Pods | `[]` | -| `airflow.extraPipPackages` | extra pip packages to install in the web/scheduler/worker Pods | `[]` | -| `airflow.extraVolumeMounts` | extra volumeMounts for the web/scheduler/worker Pods | `[]` | -| `airflow.extraVolumes` | extra volumes for the web/scheduler/worker Pods | `[]` | - -__Airflow Scheduler values:__ - -| Parameter | Description | Default | -| --- | --- | --- | -| `scheduler.resources` | resource requests/limits for the scheduler Pods | `{}` | -| `scheduler.nodeSelector` | the nodeSelector configs for the scheduler Pods | `{}` | -| `scheduler.affinity` | the affinity configs for the scheduler Pods | `{}` | -| `scheduler.tolerations` | the toleration configs for the scheduler Pods | `[]` | -| `scheduler.securityContext` | the security context for the scheduler Pods | `{}` | -| `scheduler.labels` | labels for the scheduler Deployment | `{}` | -| `scheduler.podLabels` | Pod labels for the scheduler Deployment | `{}` | -| `scheduler.annotations` | annotations for the scheduler Deployment | `{}` | -| `scheduler.podAnnotations` | Pod Annotations for the scheduler Deployment | `{}` | -| `scheduler.safeToEvict` | if we should tell Kubernetes Autoscaler that its safe to evict these Pods | `true` | -| `scheduler.podDisruptionBudget.*` | configs for the PodDisruptionBudget of the scheduler | `` | -| `scheduler.connections` | custom airflow connections for the airflow scheduler | `[]` | -| `scheduler.refreshConnections` | if `scheduler.connections` are deleted and re-added after each scheduler restart | `true` | -| `scheduler.existingSecretConnections` | the name of an existing Secret containing an `add-connections.sh` script to run on scheduler start | `""` | -| `scheduler.variables` | custom airflow variables for the airflow scheduler | `"{}"` | -| `scheduler.pools` | custom airflow pools for the airflow scheduler | `"{}"` | -| `scheduler.numRuns` | the value of the `airflow --num_runs` parameter used to run the airflow scheduler | `-1` | -| `scheduler.initdb` | if we run `airflow upgradedb` when the scheduler starts | `true` | -| `scheduler.preinitdb` | if we run `airflow upgradedb` inside a special initContainer | `false` | -| `scheduler.initialStartupDelay` | the number of seconds to wait (in bash) before starting the scheduler container | `0` | -| `scheduler.livenessProbe.*` | configs for the scheduler liveness probe | `` | -| `scheduler.secretsDir` | the directory in which to mount secrets on scheduler containers | `/var/airflow/secrets` | -| `scheduler.secrets` | the names of existing Kubernetes Secrets to mount as files at `{workers.secretsDir}//` | `[]` | -| `scheduler.secretsMap` | the name of an existing Kubernetes Secret to mount as files to `{web.secretsDir}/` | `""` | -| `scheduler.extraInitContainers` | extra init containers to run before the scheduler pod | `[]` | - -__Airflow Webserver Values:__ - -| Parameter | Description | Default | -| --- | --- | --- | -| `web.resources` | resource requests/limits for the airflow web pods | `{}` | -| `web.replicas` | the number of web Pods to run | `1` | -| `web.nodeSelector` | the number of web Pods to run | `{}` | -| `web.affinity` | the affinity configs for the web Pods | `{}` | -| `web.tolerations` | the toleration configs for the web Pods | `[]` | -| `web.securityContext` | the security context for the web Pods | `{}` | -| `web.labels` | labels for the web Deployment | `{}` | -| `web.podLabels` | Pod labels for the web Deployment | `{}` | -| `web.annotations` | annotations for the web Deployment | `{}` | -| `web.podAnnotations` | Pod annotations for the web Deployment | `{}` | -| `web.safeToEvict` | if we should tell Kubernetes Autoscaler that its safe to evict these Pods | `true` | -| `web.podDisruptionBudget.*` | configs for the PodDisruptionBudget of the web Deployment | `` | -| `web.service.*` | configs for the Service of the web pods | `` | -| `web.baseUrl` | sets `AIRFLOW__WEBSERVER__BASE_URL` | `http://localhost:8080` | -| `web.serializeDAGs` | sets `AIRFLOW__CORE__STORE_SERIALIZED_DAGS` | `false` | -| `web.extraPipPackages` | extra pip packages to install in the web container | `[]` | -| `web.initialStartupDelay` | the number of seconds to wait (in bash) before starting the web container | `0` | -| `web.minReadySeconds` | the number of seconds to wait before declaring a new Pod available | `5` | -| `web.readinessProbe.*` | configs for the web Service readiness probe | `` | -| `web.livenessProbe.*` | configs for the web Service liveness probe | `` | -| `web.secretsDir` | the directory in which to mount secrets on web containers | `/var/airflow/secrets` | -| `web.secrets` | the names of existing Kubernetes Secrets to mount as files at `{workers.secretsDir}//` | `[]` | -| `web.secretsMap` | the name of an existing Kubernetes Secret to mount as files to `{web.secretsDir}/` | `""` | - -__Airflow Worker Values:__ - -| Parameter | Description | Default | -| --- | --- | --- | -| `workers.enabled` | if the airflow workers StatefulSet should be deployed | `true` | -| `workers.resources` | resource requests/limits for the airflow worker Pods | `{}` | -| `workers.replicas` | the number of workers Pods to run | `1` | -| `workers.nodeSelector` | the nodeSelector configs for the worker Pods | `{}` | -| `workers.affinity` | the affinity configs for the worker Pods | `{}` | -| `workers.tolerations` | the toleration configs for the worker Pods | `[]` | -| `workers.securityContext` | the security context for the worker Pods | `{}` | -| `workers.labels` | labels for the worker StatefulSet | `{}` | -| `workers.podLabels` | Pod labels for the worker StatefulSet | `{}` | -| `workers.annotations` | annotations for the worker StatefulSet | `{}` | -| `workers.podAnnotations` | Pod annotations for the worker StatefulSet | `{}` | -| `workers.safeToEvict` | if we should tell Kubernetes Autoscaler that its safe to evict these Pods | `true` | -| `workers.podDisruptionBudget.*` | configs for the PodDisruptionBudget of the worker StatefulSet | `` | -| `workers.autoscaling.*` | configs for the HorizontalPodAutoscaler of the worker Pods | `` | -| `workers.initialStartupDelay` | the number of seconds to wait (in bash) before starting each worker container | `0` | -| `workers.celery.*` | configs for the celery worker Pods | `` | -| `workers.terminationPeriod` | how many seconds to wait after SIGTERM before SIGKILL of the celery worker | `60` | -| `workers.secretsDir` | directory in which to mount secrets on worker containers | `/var/airflow/secrets` | -| `workers.secrets` | the names of existing Kubernetes Secrets to mount as files at `{workers.secretsDir}//` | `[]` | -| `workers.secretsMap` | the name of an existing Kubernetes Secret to mount as files to `{web.secretsDir}/` | `""` | - -__Airflow Flower Values:__ - -| Parameter | Description | Default | -| --- | --- | --- | -| `flower.enabled` | if the Flower UI should be deployed | `true` | -| `flower.resources` | resource requests/limits for the flower Pods | `{}` | -| `flower.affinity` | the affinity configs for the flower Pods | `{}` | -| `flower.tolerations` | the toleration configs for the flower Pods | `[]` | -| `flower.securityContext` | the security context for the flower Pods | `{}` | -| `flower.labels` | labels for the flower Deployment | `{}` | -| `flower.podLabels` | Pod labels for the flower Deployment | `{}` | -| `flower.annotations` | annotations for the flower Deployment | `{}` | -| `flower.podAnnotations` | Pod annotations for the flower Deployment | `{}` | -| `flower.safeToEvict` | if we should tell Kubernetes Autoscaler that its safe to evict these Pods | `true` | -| `flower.podDisruptionBudget.*` | configs for the PodDisruptionBudget of the flower Deployment | `` | -| `flower.oauthDomains` | the value of the flower `--auth` argument | `""` | -| `flower.basicAuthSecret` | the name of a pre-created secret containing the basic authentication value for flower | `""` | -| `flower.basicAuthSecretKey` | the key within `flower.basicAuthSecret` containing the basic authentication string | `""` | -| `flower.urlPrefix` | sets `AIRFLOW__CELERY__FLOWER_URL_PREFIX` | `""` | -| `flower.service.*` | configs for the Service of the flower Pods | `` | -| `flower.initialStartupDelay` | the number of seconds to wait (in bash) before starting the flower container | `0` | -| `flower.minReadySeconds` | the number of seconds to wait before declaring a new Pod available | `5` | -| `flower.extraConfigmapMounts` | extra ConfigMaps to mount on the flower Pods | `[]` | - -__Airflow Logs Values:__ - -| Parameter | Description | Default | -| --- | --- | --- | -| `logs.path` | the airflow logs folder | `/opt/airflow/logs` | -| `logs.persistence.*` | configs for the logs PVC | `` | - -__Airflow DAGs Values:__ - -| Parameter | Description | Default | -| --- | --- | --- | -| `dags.path` | the airflow dags folder | `/opt/airflow/dags` | -| `dags.doNotPickle` | whether to disable pickling dags from the scheduler to workers | `false` | -| `dags.installRequirements` | install any Python `requirements.txt` at the root of `dags.path` automatically | `false` | -| `dags.persistence.*` | configs for the dags PVC | `` | -| `dags.git.*` | configs for the DAG git repository & sync container | `` | -| `dags.initContainer.*` | configs for the git-clone container | `` | - -__Airflow Ingress Values:__ - -| Parameter | Description | Default | -| --- | --- | --- | -| `ingress.enabled` | if we should deploy Ingress resources | `false` | -| `ingress.web.*` | configs for the Ingress of the web Service | `` | -| `ingress.flower.*` | configs for the Ingress of the flower Service | `` | - -__Airflow Kubernetes Values:__ - -| Parameter | Description | Default | -| --- | --- | --- | -| `rbac.create` | if Kubernetes RBAC resources are created | `true` | -| `rbac.events` | if the created RBAR role has GET/LIST access to Event resources | `false` | -| `serviceAccount.create` | if a Kubernetes ServiceAccount is created | `true` | -| `serviceAccount.name` | the name of the ServiceAccount | `""` | -| `serviceAccount.annotations` | annotations for the ServiceAccount | `{}` | -| `extraManifests` | additional Kubernetes manifests to include with this chart | `[]` | - -__Airflow Database (Internal PostgreSQL) Values:__ - -| Parameter | Description | Default | -| --- | --- | --- | -| `postgresql.enabled` | if the `stable/postgresql` chart is used | `true` | -| `postgresql.postgresqlDatabase` | the postgres database to use | `airflow` | -| `postgresql.postgresqlUsername` | the postgres user to create | `postgres` | -| `postgresql.postgresqlPassword` | the postgres user's password | `airflow` | -| `postgresql.existingSecret` | the name of a pre-created secret containing the postgres password | `""` | -| `postgresql.existingSecretKey` | the key within `postgresql.passwordSecret` containing the password string | `postgresql-password` | -| `postgresql.persistence.*` | configs for the PVC of postgresql | `` | -| `postgresql.master.*` | configs for the postgres StatefulSet | `` | - -__Airflow Database (External) Values:__ - -| Parameter | Description | Default | -| --- | --- | --- | -| `externalDatabase.type` | the type of external database: {mysql,postgres} | `postgres` | -| `externalDatabase.host` | the host of the external database | `localhost` | -| `externalDatabase.port` | the port of the external database | `5432` | -| `externalDatabase.database` | the database/scheme to use within the the external database | `airflow` | -| `externalDatabase.user` | the user of the external database | `airflow` | -| `externalDatabase.passwordSecret` | the name of a pre-created secret containing the external database password | `""` | -| `externalDatabase.passwordSecretKey` | the key within `externalDatabase.passwordSecret` containing the password string | `postgresql-password` | -| `externalDatabase.properties` | the connection properties e.g. "?sslmode=require" | `""` | - -__Airflow Redis (Internal) Values:__ - -| Parameter | Description | Default | -| --- | --- | --- | -| `redis.enabled` | if the `stable/redis` chart is used | `true` | -| `redis.password` | the redis password | `airflow` | -| `redis.existingSecret` | the name of a pre-created secret containing the redis password | `""` | -| `redis.existingSecretPasswordKey` | the key within `redis.existingSecret` containing the password string | `redis-password` | -| `redis.cluster.*` | configs for redis cluster mode | `` | -| `redis.master.*` | configs for the redis master | `` | -| `redis.slave.*` | configs for the redis slaves | `` | - -__Airflow Redis (External) Values:__ - -| Parameter | Description | Default | -| --- | --- | --- | -| `externalRedis.host` | the host of the external redis | `localhost` | -| `externalRedis.port` | the port of the external redis | `6379` | -| `externalRedis.databaseNumber` | the database number to use within the the external redis | `1` | -| `externalRedis.passwordSecret` | the name of a pre-created secret containing the external redis password | `""` | -| `externalRedis.passwordSecretKey` | the key within `externalRedis.passwordSecret` containing the password string | `redis-password` | - -__Airflow Prometheus Values:__ - -| Parameter | Description | Default | -| --- | --- | --- | -| `serviceMonitor.enabled` | if the ServiceMonitor resources should be deployed | `false` | -| `serviceMonitor.selector` | labels for ServiceMonitor, so that Prometheus can select it | `{ prometheus: "kube-prometheus" }` | -| `serviceMonitor.path` | the ServiceMonitor web endpoint path | `/admin/metrics` | -| `serviceMonitor.interval` | the ServiceMonitor web endpoint path | `30s` | -| `prometheusRule.enabled` | if the PrometheusRule resources should be deployed | `false` | -| `prometheusRule.additionalLabels` | labels for PrometheusRule, so that Prometheus can select it | `{}` | -| `prometheusRule.groups` | alerting rules for Prometheus | `[]` | +
+
+ +## Chart Values + +### Global: +
+Show More +
+ +Parameter | Description | Default +--- | --- | --- +`airflow.legacyCommands` | if we use legacy 1.10 airflow commands | `false` +`airflow.image.*` | configs for the airflow container image | `` +`airflow.executor` | the airflow executor type to use | `CeleryExecutor` +`airflow.fernetKey` | the fernet key used to encrypt the connections/variables in the database | `7T512UXSSmBOkpWimFHIVb8jK6lfmSAvx4mO6Arehnc=` +`airflow.config` | environment variables for airflow configs | `{}` +`airflow.users` | a list of initial users to create | `` +`airflow.usersUpdate` | if we update users or just create them the first time (lookup by `username`) | `true` +`airflow.users` | a list of initial users to create | `` +`airflow.connections` | a list of initial connections to create | `` +`airflow.connectionsUpdate` | if we update connections or just create them the first time (lookup by `id`) | `true` +`airflow.variables` | a list of initial variables to create | `` +`airflow.variablesUpdate` | if we update variables or just create them the first time (lookup by `key`) | `true` +`airflow.pools` | a list of initial pools to create | `` +`airflow.poolsUpdate` | if we update pools or just create them the first time (lookup by `name`) | `true` +`airflow.podAnnotations` | extra annotations for the web/scheduler/worker/flower Pods | `{}` +`airflow.extraPipPackages` | extra pip packages to install in the web/scheduler/worker/flower Pods | `[]` +`airflow.extraEnv` | extra environment variables for the web/scheduler/worker/flower Pods | `[]` +`airflow.extraContainers` | extra containers for the web/scheduler/worker/flower Pods | `[]` +`airflow.extraVolumeMounts` | extra VolumeMounts for the web/scheduler/worker/flower Pods | `[]` +`airflow.extraVolumes` | extra Volumes for the web/scheduler/worker/flower Pods | `[]` +`airflow.kubernetesPodTemplate.*` | configs to generate the AIRFLOW__KUBERNETES__POD_TEMPLATE_FILE | `` + +
+
+ +### Airflow Scheduler: +
+Show More +
+ +Parameter | Description | Default +--- | --- | --- +`scheduler.replicas` | the number of scheduler Pods to run | `1` +`scheduler.resources` | resource requests/limits for the scheduler Pods | `{}` +`scheduler.nodeSelector` | the nodeSelector configs for the scheduler Pods | `{}` +`scheduler.affinity` | the affinity configs for the scheduler Pods | `{}` +`scheduler.tolerations` | the toleration configs for the scheduler Pods | `[]` +`scheduler.securityContext` | the security context for the scheduler Pods | `{}` +`scheduler.labels` | labels for the scheduler Deployment | `{}` +`scheduler.podLabels` | Pod labels for the scheduler Deployment | `{}` +`scheduler.annotations` | annotations for the scheduler Deployment | `{}` +`scheduler.podAnnotations` | Pod annotations for the scheduler Deployment | `{}` +`scheduler.safeToEvict` | if we add the annotation: "cluster-autoscaler.kubernetes.io/safe-to-evict" = "true" | `true` +`scheduler.podDisruptionBudget.*` | configs for the PodDisruptionBudget of the scheduler | `` +`scheduler.numRuns` | the value of the `airflow --num_runs` parameter used to run the airflow scheduler | `-1` +`scheduler.extraPipPackages` | extra pip packages to install in the scheduler Pods | `[]` +`scheduler.extraVolumeMounts` | extra VolumeMounts for the scheduler Pods | `[]` +`scheduler.extraVolumes` | extra Volumes for the scheduler Pods | `[]` +`scheduler.livenessProbe.*` | configs for the scheduler Pods' liveness probe | `` +`scheduler.extraInitContainers` | extra init containers to run in the scheduler Pods | `[]` + +
+
+ +### Airflow Webserver: +
+Show More +
+ +Parameter | Description | Default +--- | --- | --- +`web.webserverConfig.*` | configs to generate webserver_config.py | `` +`web.replicas` | the number of web Pods to run | `1` +`web.resources` | resource requests/limits for the airflow web pods | `{}` +`web.nodeSelector` | the number of web Pods to run | `{}` +`web.affinity` | the affinity configs for the web Pods | `{}` +`web.tolerations` | the toleration configs for the web Pods | `[]` +`web.securityContext` | the security context for the web Pods | `{}` +`web.labels` | labels for the web Deployment | `{}` +`web.podLabels` | Pod labels for the web Deployment | `{}` +`web.annotations` | annotations for the web Deployment | `{}` +`web.podAnnotations` | Pod annotations for the web Deployment | `{}` +`web.safeToEvict` | if we add the annotation: "cluster-autoscaler.kubernetes.io/safe-to-evict" = "true" | `true` +`web.podDisruptionBudget.*` | configs for the PodDisruptionBudget of the web Deployment | `` +`web.service.*` | configs for the Service of the web pods | `` +`web.readinessProbe.*` | configs for the web Pods' readiness probe | `` +`web.livenessProbe.*` | configs for the web Pods' liveness probe | `` +`web.extraPipPackages` | extra pip packages to install in the web Pods | `[]` +`web.extraVolumeMounts` | extra VolumeMounts for the web Pods | `[]` +`web.extraVolumes` | extra Volumes for the web Pods | `[]` + +
+
+ +### Airflow Celery Worker: +
+Show More +
+ +Parameter | Description | Default +--- | --- | --- +`workers.enabled` | if the airflow workers StatefulSet should be deployed | `true` +`workers.replicas` | the number of workers Pods to run | `1` +`workers.resources` | resource requests/limits for the airflow worker Pods | `{}` +`workers.nodeSelector` | the nodeSelector configs for the worker Pods | `{}` +`workers.affinity` | the affinity configs for the worker Pods | `{}` +`workers.tolerations` | the toleration configs for the worker Pods | `[]` +`workers.securityContext` | the security context for the worker Pods | `{}` +`workers.labels` | labels for the worker StatefulSet | `{}` +`workers.podLabels` | Pod labels for the worker StatefulSet | `{}` +`workers.annotations` | annotations for the worker StatefulSet | `{}` +`workers.podAnnotations` | Pod annotations for the worker StatefulSet | `{}` +`workers.safeToEvict` | if we add the annotation: "cluster-autoscaler.kubernetes.io/safe-to-evict" = "true" | `true` +`workers.podDisruptionBudget.*` | configs for the PodDisruptionBudget of the worker StatefulSet | `` +`workers.autoscaling.*` | configs for the HorizontalPodAutoscaler of the worker Pods | `` +`workers.celery.*` | configs for the celery worker Pods | `` +`workers.terminationPeriod` | how many seconds to wait after SIGTERM before SIGKILL of the celery worker | `60` +`workers.extraPipPackages` | extra pip packages to install in the worker Pods | `[]` +`workers.extraVolumeMounts` | extra VolumeMounts for the worker Pods | `[]` +`workers.extraVolumes` | extra Volumes for the worker Pods | `[]` + +
+
+ +### Airflow Flower: +
+Show More +
+ +Parameter | Description | Default +--- | --- | --- +`flower.enabled` | if the Flower UI should be deployed | `true` +`flower.resources` | resource requests/limits for the flower Pods | `{}` +`flower.nodeSelector` | the nodeSelector configs for the flower Pods | `{}` +`flower.affinity` | the affinity configs for the flower Pods | `{}` +`flower.tolerations` | the toleration configs for the flower Pods | `[]` +`flower.securityContext` | the security context for the flower Pods | `{}` +`flower.labels` | labels for the flower Deployment | `{}` +`flower.podLabels` | Pod labels for the flower Deployment | `{}` +`flower.annotations` | annotations for the flower Deployment | `{}` +`flower.podAnnotations` | Pod annotations for the flower Deployment | `{}` +`flower.safeToEvict` | if we add the annotation: "cluster-autoscaler.kubernetes.io/safe-to-evict" = "true" | `true` +`flower.podDisruptionBudget.*` | configs for the PodDisruptionBudget of the flower Deployment | `` +`flower.oauthDomains` | the value of the flower `--auth` argument | `""` +`flower.basicAuthSecret` | the name of a pre-created secret containing the basic authentication value for flower | `""` +`flower.basicAuthSecretKey` | the key within `flower.basicAuthSecret` containing the basic authentication string | `""` +`flower.service.*` | configs for the Service of the flower Pods | `` +`flower.extraPipPackages` | extra pip packages to install in the flower Pod | `[]` +`flower.extraVolumeMounts` | extra VolumeMounts for the flower Pods | `[]` +`flower.extraVolumes` | extra Volumes for the flower Pods | `[]` + +
+
+ +### Logs: +
+Show More +
+ +Parameter | Description | Default +--- | --- | --- +`logs.path` | the airflow logs folder | `/opt/airflow/logs` +`logs.persistence.*` | configs for the logs PVC | `` + +
+
+ +### DAGs: +
+Show More +
+ +Parameter | Description | Default +--- | --- | --- +`dags.path` | the airflow dags folder | `/opt/airflow/dags` +`dags.persistence.*` | configs for the dags PVC | `` +`dags.gitSync.*` | configs for the git-sync sidecar | `` + +
+
+ +### Kubernetes (Ingress): +
+Show More +
+ +Parameter | Description | Default +--- | --- | --- +`ingress.enabled` | if we should deploy Ingress resources | `false` +`ingress.web.*` | configs for the Ingress of the web Service | `` +`ingress.flower.*` | configs for the Ingress of the flower Service | `` + +
+
+ +### Kubernetes (Other): +
+Show More +
+ +Parameter | Description | Default +--- | --- | --- +`rbac.create` | if Kubernetes RBAC resources are created | `true` +`rbac.events` | if the created RBAR role has GET/LIST access to Event resources | `false` +`serviceAccount.create` | if a Kubernetes ServiceAccount is created | `true` +`serviceAccount.name` | the name of the ServiceAccount | `""` +`serviceAccount.annotations` | annotations for the ServiceAccount | `{}` +`extraManifests` | extra Kubernetes manifests to include alongside this chart | `[]` + +
+
+ +### Database (Embedded - Postgres): +
+Show More +
+ +Parameter | Description | Default +--- | --- | --- +`postgresql.enabled` | if the `stable/postgresql` chart is used | `true` +`postgresql.postgresqlDatabase` | the postgres database to use | `airflow` +`postgresql.postgresqlUsername` | the postgres user to create | `postgres` +`postgresql.postgresqlPassword` | the postgres user's password | `airflow` +`postgresql.existingSecret` | the name of a pre-created secret containing the postgres password | `""` +`postgresql.existingSecretKey` | the key within `postgresql.passwordSecret` containing the password string | `postgresql-password` +`postgresql.persistence.*` | configs for the PVC of postgresql | `` +`postgresql.master.*` | configs for the postgres StatefulSet | `` + +
+
+ +### Database (External - Postgres/MySQL): +
+Show More +
+ +Parameter | Description | Default +--- | --- | --- +`externalDatabase.type` | the type of external database: {mysql,postgres} | `postgres` +`externalDatabase.host` | the host of the external database | `localhost` +`externalDatabase.port` | the port of the external database | `5432` +`externalDatabase.database` | the database/scheme to use within the the external database | `airflow` +`externalDatabase.user` | the user of the external database | `airflow` +`externalDatabase.passwordSecret` | the name of a pre-created secret containing the external database password | `""` +`externalDatabase.passwordSecretKey` | the key within `externalDatabase.passwordSecret` containing the password string | `postgresql-password` +`externalDatabase.properties` | the connection properties e.g. "?sslmode=require" | `""` + +
+
+ +### Redis (Embedded): +
+Show More +
+ +Parameter | Description | Default +--- | --- | --- +`redis.enabled` | if the `stable/redis` chart is used | `true` +`redis.password` | the redis password | `airflow` +`redis.existingSecret` | the name of a pre-created secret containing the redis password | `""` +`redis.existingSecretPasswordKey` | the key within `redis.existingSecret` containing the password string | `redis-password` +`redis.cluster.*` | configs for redis cluster mode | `` +`redis.master.*` | configs for the redis master | `` +`redis.slave.*` | configs for the redis slaves | `` + +
+
+ +### Redis (External): +
+Show More +
+ +Parameter | Description | Default +--- | --- | --- +`externalRedis.host` | the host of the external redis | `localhost` +`externalRedis.port` | the port of the external redis | `6379` +`externalRedis.databaseNumber` | the database number to use within the the external redis | `1` +`externalRedis.passwordSecret` | the name of a pre-created secret containing the external redis password | `""` +`externalRedis.passwordSecretKey` | the key within `externalRedis.passwordSecret` containing the password string | `redis-password` + +
+
+ +### Prometheus: +
+Show More +
+ +Parameter | Description | Default +--- | --- | --- +`serviceMonitor.enabled` | if ServiceMonitor resources should be deployed | `false` +`serviceMonitor.selector` | labels for ServiceMonitor, so that Prometheus can select it | `{ prometheus: "kube-prometheus" }` +`serviceMonitor.path` | the ServiceMonitor web endpoint path | `/admin/metrics` +`serviceMonitor.interval` | the ServiceMonitor web endpoint path | `30s` +`prometheusRule.enabled` | if the PrometheusRule resources should be deployed | `false` +`prometheusRule.additionalLabels` | labels for PrometheusRule, so that Prometheus can select it | `{}` +`prometheusRule.groups` | alerting rules for Prometheus | `[]` + +
+
+ +
+
\ No newline at end of file diff --git a/charts/airflow/UPGRADE.md b/charts/airflow/UPGRADE.md index e6ea4011..184ac6dd 100644 --- a/charts/airflow/UPGRADE.md +++ b/charts/airflow/UPGRADE.md @@ -1,5 +1,197 @@ # Upgrading Steps +## `v7.15.X` → `v8.0.0` + +> 🛑️️ this is a MAJOR update, meaning there are BREAKING changes +> - you might want to start your `values.yaml` file again + +> ⚠️ the default version of airflow has changed to `2.0.1` +> - check your dags [are compatible](https://airflow.apache.org/docs/apache-airflow/stable/upgrading-to-2.html#step-5-upgrade-airflow-dags) +> - note that you won't be able to downgrade your database back to `1.10.X` schema +> - the default version of python has changed to `3.8` + +### Feature Highlights: +- native support for "KubernetesExecutor", and "CeleryKubernetesExecutor", see the new `airflow.kubernetesPodTemplate.*` values +- native support for "webserver_config.py", see the new `web.webserverConfig.*` values +- native support for [Airflow 2.0's HA scheduler](https://airflow.apache.org/docs/apache-airflow/stable/scheduler.html#running-more-than-one-scheduler), see the new `scheduler.replicas` value +- significantly improved git-sync system by moving to [kubernetes/git-sync](https://github.com/kubernetes/git-sync) +- significantly improved pip installs by moving to an init-container +- added a [guide for integrating airflow with your "Microsoft AD" or "OAUTH"](README.md#how-to-authenticate-airflow-users-with-ldapoauth) +- general cleanup of almost every helm file +- significant docs/README rewrite + +### Other Features: +- added `airflow.users` to help you create/update airflow web users: + - __WARNING:__ default settings create an admin user (user: __admin__ - password: __admin__), disable by setting `airflow.users` to `[]` +- added `airflow.connections` to help you create/update airflow connections: +- added `airflow.variables` to help you create/update airflow variables: +- added `airflow.pools` to help you create/update airflow pools: +- flower Pods are now affected by `airflow.extraPipPackages`, `airflow.extraContainers`, `airflow.extraVolumeMounts`, `airlfow.extraVolumes` +- you no longer need to set `web.readinessProbe.scheme` or `web.livenessProbe.scheme`, we now only use HTTPS if `AIRFLOW__WEBSERVER__WEB_SERVER_SSL_CERT` and `AIRFLOW__WEBSERVER__WEB_SERVER_SSL_KEY` are set +- airflow db upgrades are now managed with a post "helm upgrade" Job, meaning it only runs once per upgrade (rather than each time the scheduler starts) + +### Removed Features +- the `XXX.extraConfigmapMounts`, `XXX.secretsDir`, `XXX.secrets`, `XXX.secretsMap` values have been removed, and replaced with `XXX.extraVolumes` and `XXX.extraVolumeMounts`, which use typical Kubernetes volume-mount syntax +- the `dags.installRequirements` value has been removed, please instead use the `XXX.extraPipPackages` values, this change was made for two main reasons: + 1. allowed us to move the pip-install commands into an init-container, which greatly simplifies pod-startup, and removes the need to set any kind of readiness-probe delay in Webserver/Flower Pods + 2. the installRequirements command only ran at Pod start up, meaning you would have to restart all your pods if you updated the `requirements.txt` in your git repo (which isn't very declarative) + +### Known Issues: +- if you want to continue using airflow `1.10.X`, you must enable `airflow.legacyCommands`, but note that not all features of the chart will work (and there is no expectation of full support for `1.10.X`) +- if you were using `dags.persistence.enabled` but not explicitly setting `dags.persistence.existingClaim`, the name of the PVC will change (meaning your dags will disappear) + - to fix this, set `dags.persistence.existingClaim` to the value of the previous dags PVC (which will be your Helm RELEASE_NAME) + +### Recommendations: +- start your values.yaml from scratch (by looking at the new examples, and defaults) + +### Request for Contributions: +- improvements for the docs +- any feature you need to get the chart running in your environment (NOTE: we won't always implement every feature proposed) +- replace the `postgresql` and `redis` sub-charts (currently declared in `requirements.yaml`) with straight YAML in this chart +- implement a system where `XXX.extraPipPackages` only requires a single installation after each "helm upgrade" (probably using Jobs and PersistentVolumeClaims) + - This will be most beneficial for `airflow.kubernetesPodTemplate.extraPipPackages`, as those pip installs have to run for every task in "KubernetesExecutor" mode +- autoscaling using [KEDA](https://github.com/kedacore/keda) for the scheduler/worker replica counts (this will let us remove the largely useless HorizontalPodAutoscaler approach) + +### VALUES - Changed Defaults: +- `rbac.events` = `true` +- `scheduler.livenessProbe.initialDelaySeconds` = `10` +- `web.readinessProbe.enabled` = `true` +- `web.readinessProbe.timeoutSeconds` = `5` +- `web.livenessProbe.periodSeconds` = `10` +- `web.readinessProbe.failureThreshold` = `6` +- `web.livenessProbe.initialDelaySeconds` = `10` +- `web.livenessProbe.timeoutSeconds` = `5` +- `web.livenessProbe.failureThreshold` = `6` +- `scheduler.podDisruptionBudget.enabled` = `false` + +### VALUES - New: +- `airflow.legacyCommands` +- `airflow.image.uid` +- `airflow.image.gid` +- `airflow.users` +- `airflow.usersUpdate` +- `airflow.connections` +- `airflow.connectionsUpdate` +- `airflow.variables` +- `airflow.variablesUpdate` +- `airflow.pools` +- `airflow.poolsUpdate` +- `airflow.kubernetesPodTemplate.stringOverride` +- `airflow.kubernetesPodTemplate.nodeSelector` +- `airflow.kubernetesPodTemplate.affinity` +- `airflow.kubernetesPodTemplate.tolerations` +- `airflow.kubernetesPodTemplate.podAnnotations` +- `airflow.kubernetesPodTemplate.securityContext` +- `airflow.kubernetesPodTemplate.extraPipPackages` +- `airflow.kubernetesPodTemplate.extraVolumeMounts` +- `airflow.kubernetesPodTemplate.extraVolumes` +- `scheduler.replicas` +- `scheduler.livenessProbe.timeoutSeconds` +- `scheduler.extraPipPackages` +- `scheduler.extraVolumeMounts` +- `scheduler.extraVolumes` +- `web.webserverConfig.stringOverride` +- `web.webserverConfig.existingSecret` +- `web.extraVolumeMounts` +- `web.extraVolumes` +- `workers.extraPipPackages` +- `workers.extraVolumeMounts` +- `workers.extraVolumes` +- `flower.readinessProbe.enabled` +- `flower.readinessProbe.initialDelaySeconds` +- `flower.readinessProbe.periodSeconds` +- `flower.readinessProbe.timeoutSeconds` +- `flower.readinessProbe.failureThreshold` +- `flower.livenessProbe.enabled` +- `flower.livenessProbe.initialDelaySeconds` +- `flower.livenessProbe.periodSeconds` +- `flower.livenessProbe.timeoutSeconds` +- `flower.livenessProbe.failureThreshold` +- `flower.extraPipPackages` +- `flower.extraVolumeMounts` +- `flower.extraVolumes` +- `dags.gitSync.enabled` +- `dags.gitSync.image.repository` +- `dags.gitSync.image.tag` +- `dags.gitSync.image.pullPolicy` +- `dags.gitSync.image.uid` +- `dags.gitSync.image.gid` +- `dags.gitSync.resources` +- `dags.gitSync.repo` +- `dags.gitSync.repoSubPath` +- `dags.gitSync.branch` +- `dags.gitSync.revision` +- `dags.gitSync.depth` +- `dags.gitSync.syncWait` +- `dags.gitSync.syncTimeout` +- `dags.gitSync.httpSecret` +- `dags.gitSync.httpSecretUsernameKey` +- `dags.gitSync.httpSecretPasswordKey` +- `dags.gitSync.sshSecret` +- `dags.gitSync.sshSecretKey` +- `dags.gitSync.sshKnownHosts` + +### VALUES - Removed: +- `airflow.extraConfigmapMounts` +- `scheduler.initialStartupDelay` +- `scheduler.preinitdb` +- `scheduler.initdb` +- `scheduler.connections` +- `scheduler.refreshConnections` +- `scheduler.existingSecretConnections` +- `scheduler.pools` +- `scheduler.variables` +- `scheduler.secretsDir` +- `scheduler.secrets` +- `scheduler.secretsMap` +- `web.initialStartupDelay` +- `web.minReadySeconds` +- `web.baseUrl` +- `web.serializeDAGs` +- `web.readinessProbe.scheme` +- `web.readinessProbe.successThreshold` +- `web.livenessProbe.scheme` +- `web.livenessProbe.successThreshold` +- `web.secretsDir` +- `web.secrets` +- `web.secretsMap` +- `workers.celery.instances` +- `workers.initialStartupDelay` +- `workers.secretsDir` +- `workers.secrets` +- `workers.secretsMap` +- `flower.initialStartupDelay` +- `flower.minReadySeconds` +- `flower.extraConfigmapMounts` +- `flower.urlPrefix` +- `flower.secretsDir` +- `flower.secrets` +- `flower.secretsMap` +- `dags.doNotPickle` +- `dags.installRequirements` +- `dags.git.url` +- `dags.git.ref` +- `dags.git.secret` +- `dags.git.sshKeyscan` +- `dags.git.privateKeyName` +- `dags.git.repoHost` +- `dags.git.repoPort` +- `dags.git.gitSync.enabled` +- `dags.git.gitSync.resources` +- `dags.git.gitSync.image` +- `dags.git.gitSync.refreshTime` +- `dags.git.gitSync.mountPath` +- `dags.git.gitSync.syncSubPath` +- `dags.initContainer.enabled` +- `dags.initContainer.resources` +- `dags.initContainer.image.repository` +- `dags.initContainer.image.tag` +- `dags.initContainer.image.pullPolicy` +- `dags.initContainer.mountPath` +- `dags.initContainer.syncSubPath` +- `ingress.web.livenessPath` +- `ingress.flower.livenessPath` + ## `v7.14.X` → `v7.15.0` __The following IMPROVEMENTS have been made:__ @@ -115,8 +307,6 @@ __The following IMPROVEMENTS have been made:__ * You can now specify minReadySeconds for flower __The following values have CHANGED DEFAULTS:__ -* `workers.celery.instances`: - * Is now `16` by default (letting each worker take 16 tasks) * `postgresql.master.podAnnotations`: * Is now `{"cluster-autoscaler.kubernetes.io/safe-to-evict": "true"}` * `redis.master.podAnnotations`: diff --git a/charts/airflow/examples/google-gke/custom-values.yaml b/charts/airflow/examples/google-gke/custom-values.yaml index a1ff54cc..f050c7c3 100644 --- a/charts/airflow/examples/google-gke/custom-values.yaml +++ b/charts/airflow/examples/google-gke/custom-values.yaml @@ -27,25 +27,9 @@ # - Secret: airflow-cluster1-fernet-key # - Secret: airflow-cluster1-mysql-password # - Secret: airflow-cluster1-redis-password -# - Secret: airflow-cluster1-git-keys -# - ConfigMap: airflow-cluster1-webserver-config +# - Secret: airflow-cluster1-git-secret # - cert-manager.io/Certificate: airflow-cluster1-cert # -# Helm Install Commands: -# helm install stable/airflow \ -# --version "X.X.X" \ -# --name "airflow-cluster1" \ -# --namespace "airflow-cluster1" \ -# --values ./custom-values.yaml -# -# Run bash commands in the Scheduler Pod: (use to: `airflow create_user`) -# kubectl exec \ -# -it \ -# --namespace airflow-cluster1 \ -# --container airflow-scheduler \ -# Deployment/airflow--airflow-cluster1-scheduler \ -# /bin/bash -# ################################### # Airflow - Common Configs @@ -58,36 +42,71 @@ airflow: ## environment variables for the web/scheduler/worker Pods (for airflow configs) ## config: - ## Security - AIRFLOW__CORE__SECURE_MODE: "True" - AIRFLOW__API__AUTH_BACKEND: "airflow.api.auth.backend.deny_all" - AIRFLOW__WEBSERVER__EXPOSE_CONFIG: "False" - AIRFLOW__WEBSERVER__RBAC: "True" - - ## SSL - ## NOTE: This effectively disables HTTP, so `web.readinessProbe.scheme` and `web.livenessProbe.scheme` - ## need to be set accordingly - AIRFLOW__WEBSERVER__WEB_SERVER_SSL_CERT: "/var/airflow/secrets/airflow-cluster1-cert/tls.crt" - AIRFLOW__WEBSERVER__WEB_SERVER_SSL_KEY: "/var/airflow/secrets/airflow-cluster1-cert/tls.key" - - ## DAGS - AIRFLOW__SCHEDULER__DAG_DIR_LIST_INTERVAL: "30" - - ## GCP Remote Logging - AIRFLOW__CORE__REMOTE_LOGGING: "True" - AIRFLOW__CORE__REMOTE_BASE_LOG_FOLDER: "gs://XXXXXXXX--airflow-cluster1/airflow/logs" - AIRFLOW__CORE__REMOTE_LOG_CONN_ID: "google_cloud_airflow" - - ## Email (SMTP) - AIRFLOW__EMAIL__EMAIL_BACKEND: "airflow.utils.email.send_email_smtp" - AIRFLOW__SMTP__SMTP_HOST: "smtpmail.example.com" - AIRFLOW__SMTP__SMTP_STARTTLS: "False" - AIRFLOW__SMTP__SMTP_SSL: "False" - AIRFLOW__SMTP__SMTP_PORT: "25" - AIRFLOW__SMTP__SMTP_MAIL_FROM: "admin@airflow-cluster1.example.com" - - ## Disable noisy "Handling signal: ttou" Gunicorn log messages - GUNICORN_CMD_ARGS: "--log-level WARNING" + ## security + AIRFLOW__WEBSERVER__EXPOSE_CONFIG: "False" + + ## enable SSL for webserver + AIRFLOW__WEBSERVER__WEB_SERVER_SSL_CERT: "/opt/airflow/ssl-cert/tls.crt" + AIRFLOW__WEBSERVER__WEB_SERVER_SSL_KEY: "/opt/airflow/ssl-cert/tls.key" + + ## dags + AIRFLOW__SCHEDULER__DAG_DIR_LIST_INTERVAL: "30" + + ## remote log storage + AIRFLOW__LOGGING__REMOTE_LOGGING: "True" + AIRFLOW__LOGGING__REMOTE_BASE_LOG_FOLDER: "gs://XXXXXXXX--airflow-cluster1/airflow/logs" + AIRFLOW__LOGGING__REMOTE_LOG_CONN_ID: "my_gcp" + + ## email + AIRFLOW__EMAIL__EMAIL_BACKEND: "airflow.utils.email.send_email_smtp" + AIRFLOW__SMTP__SMTP_HOST: "smtpmail.example.com" + AIRFLOW__SMTP__SMTP_MAIL_FROM: "admin@airflow-cluster1.example.com" + AIRFLOW__SMTP__SMTP_PORT: "25" + AIRFLOW__SMTP__SMTP_SSL: "False" + AIRFLOW__SMTP__SMTP_STARTTLS: "False" + + ## domain used in airflow emails + AIRFLOW__WEBSERVER__BASE_URL: "https://airflow-cluster1.example.com/" + + ## a list of initial users to create + ## + users: + - username: admin + password: admin + role: Admin + email: admin@example.com + firstName: admin + lastName: admin + + ## if we update users or just create them the first time (lookup by `username`) + ## + usersUpdate: false + + ## a list of initial connections to create + ## + connections: + ## see docs: https://airflow.apache.org/docs/apache-airflow-providers-google/stable/connections/gcp.html + - id: my_gcp + type: google_cloud_platform + description: my GCP connection + extra: |- + { "extra__google_cloud_platform__num_retries": "5" } + + ## a list of initial variables to create + ## + variables: + - key: "environment" + value: "prod" + + ## a list of initial pools to create + ## + pools: + - name: "pool_1" + slots: 5 + description: "example pool with 5 slots" + - name: "pool_2" + slots: 10 + description: "example pool with 10 slots" ## extra environment variables for the web/scheduler/worker (AND flower) Pods ## @@ -98,15 +117,6 @@ airflow: name: airflow-cluster1-fernet-key key: value - ## extra configMap volumeMounts for the web/scheduler/worker Pods - ## - extraConfigmapMounts: - - name: airflow-cluster1-webserver-config - mountPath: /opt/airflow/webserver_config.py - configMap: airflow-cluster1-webserver-config - readOnly: true - subPath: webserver_config.py - ################################### # Airflow - Scheduler Configs ################################### @@ -116,45 +126,34 @@ scheduler: resources: requests: cpu: "1000m" - memory: "1Gi" - - ## custom airflow connections for the airflow scheduler - ## - connections: - - id: google_cloud_airflow - type: google_cloud_platform - extra: '{"extra__google_cloud_platform__num_retries": "5"}' - - ## custom airflow variables for the airflow scheduler - ## - variables: | - { "environment": "prod" } - - ## custom airflow pools for the airflow scheduler - ## - pools: | - { - "example": { - "description": "This is an example pool with 2 slots.", - "slots": 2 - } - } + memory: "512Mi" ################################### # Airflow - WebUI Configs ################################### web: - ## resource requests/limits for the airflow web Pods + ## configs to generate webserver_config.py ## - resources: - requests: - cpu: "500m" - memory: "1Gi" + webserverConfig: + ## the full text value to mount as the webserver_config.py file + ## + stringOverride: |- + from flask_appbuilder.security.manager import AUTH_DB + + # use embedded DB for auth + AUTH_TYPE = AUTH_DB ## the number of web Pods to run ## replicas: 1 + ## resource requests/limits for the airflow web Pods + ## + resources: + requests: + cpu: "200m" + memory: "900Mi" + ## configs for the Service of the web Pods ## service: @@ -165,37 +164,19 @@ web: loadBalancerIP: XXX.XXX.XXX.XXX loadBalancerSourceRanges: [] - ## sets `AIRFLOW__WEBSERVER__BASE_URL` - ## - baseUrl: "https://airflow-cluster1.example.com/" - - ## extra pip packages to install in the web container - ## - extraPipPackages: [] - - ## configs for the web Service liveness probe + ## extra VolumeMounts for the web Pods ## - livenessProbe: - ## the scheme used in the liveness probe: {HTTP,HTTPS} - ## - scheme: HTTPS - - ## the number of seconds to wait before checking pod health - ## - ## NOTE: - ## - make larger if you are installing many packages with: - ## `airflow.extraPipPackages`, `web.extraPipPackages`, or `dags.installRequirements` - ## - initialDelaySeconds: 300 - - ## the directory in which to mount secrets on web containers - ## - secretsDir: /var/airflow/secrets + extraVolumeMounts: + - name: ssl-cert + mountPath: /opt/airflow/ssl-cert + readOnly: true - ## secret names which will be mounted as a file at `{web.secretsDir}/` + ## extra Volumes for the web Pods ## - secrets: - - airflow-cluster1-cert + extraVolumes: + - name: ssl-cert + secret: + secretName: airflow-cluster1-cert ################################### # Airflow - Worker Configs @@ -205,17 +186,17 @@ workers: ## enabled: true + ## the number of workers Pods to run + ## + replicas: 2 + ## resource requests/limits for the airflow worker Pods ## resources: requests: - cpu: "1000m" + cpu: "256m" memory: "2Gi" - ## the number of workers Pods to run - ## - replicas: 2 - ## configs for the PodDisruptionBudget of the worker StatefulSet ## podDisruptionBudget: @@ -271,14 +252,6 @@ workers: ## terminationPeriod: 60 - ## directory in which to mount secrets on worker containers - ## - secretsDir: /var/airflow/secrets - - ## secret names which will be mounted as a file at `{workers.secretsDir}/` - ## - secrets: [] - ################################### # Airflow - Flower Configs ################################### @@ -291,8 +264,8 @@ flower: ## resources: requests: - cpu: "100m" - memory: "126Mi" + cpu: "10m" + memory: "64Mi" ## configs for the Service of the flower Pods ## @@ -318,57 +291,36 @@ logs: # Airflow - DAGs Configs ################################### dags: - ## configs for the DAG git repository & sync container + ## configs for the git-sync sidecar (https://github.com/kubernetes/git-sync) ## - git: - ## url of the git repository + gitSync: + ## if the git-sync sidecar container is enabled ## - url: "ssh://git@repo.example.com/my-airflow-dags.git" + enabled: true - ## the branch/tag/sha1 which we clone + ## the url of the git repo ## - ref: master + repo: "git@repo.example.com/my-airflow-dags.git" - ## the name of a pre-created secret containing files for ~/.ssh/ - ## - ## NOTE: - ## - this is ONLY RELEVANT for SSH git repos - ## - the secret commonly includes files: id_rsa, id_rsa.pub, known_hosts - ## - known_hosts is NOT NEEDED if `git.sshKeyscan` is true + ## the git branch to check out ## - secret: airflow-cluster1-git-keys + branch: master - ## the name of the private key file in your `git.secret` + ## the git revision (tag or hash) to check out ## - ## NOTE: - ## - this is ONLY RELEVANT for PRIVATE SSH git repos - ## - privateKeyName: id_rsa + revision: HEAD - ## the host name of the git repo + ## the number of seconds between syncs ## - ## NOTE: - ## - this is ONLY REQUIRED for SSH git repos - ## - repoHost: "repo.example.com" + syncWait: 60 - ## the port of the git repo + ## the name of a pre-created Secret with git ssh credentials ## - ## NOTE: - ## - this is ONLY REQUIRED for SSH git repos - ## - repoPort: 22 + sshSecret: "airflow-cluster1-git-secret" - ## configs for the git-sync container + ## the key in `dags.gitSync.sshSecret` with your ssh-key file ## - gitSync: - ## enable the git-sync sidecar container - ## - enabled: true - - ## the git sync interval in seconds - ## - refreshTime: 60 + sshSecretKey: id_rsa ################################### # Kubernetes - RBAC @@ -469,8 +421,8 @@ redis: ## resources: requests: - cpu: "100m" - memory: "256Mi" + cpu: "10m" + memory: "32Mi" ## configs for the PVC of the redis master ## @@ -486,8 +438,8 @@ redis: ## resources: requests: - cpu: "100m" - memory: "256Mi" + cpu: "10m" + memory: "32Mi" ## configs for the PVC of the redis slaves ## diff --git a/charts/airflow/examples/google-gke/k8s_resources/configmap-webserver.yaml b/charts/airflow/examples/google-gke/k8s_resources/configmap-webserver.yaml deleted file mode 100644 index 7fc8dcec..00000000 --- a/charts/airflow/examples/google-gke/k8s_resources/configmap-webserver.yaml +++ /dev/null @@ -1,28 +0,0 @@ -apiVersion: v1 -kind: ConfigMap -metadata: - name: airflow-cluster1-webserver-config - namespace: airflow-cluster1 -data: - webserver_config.py: | - import os - from airflow.configuration import conf - from flask_appbuilder.security.manager import AUTH_DB - - basedir = os.path.abspath(os.path.dirname(__file__)) - - # The SQLAlchemy connection string. - SQLALCHEMY_DATABASE_URI = conf.get("core", "SQL_ALCHEMY_CONN") - - # Flask-WTF flag for CSRF - CSRF_ENABLED = True - - # Force users to re-auth after 15min of inactivity - PERMANENT_SESSION_LIFETIME = 900 - - # Don't allow user self registration - AUTH_USER_REGISTRATION = False - AUTH_USER_REGISTRATION_ROLE = "Viewer" - - # Use Database authentication - AUTH_TYPE = AUTH_DB \ No newline at end of file diff --git a/charts/airflow/examples/google-gke/k8s_resources/secret-git-keys.yaml b/charts/airflow/examples/google-gke/k8s_resources/secret-git-secret.yaml similarity index 88% rename from charts/airflow/examples/google-gke/k8s_resources/secret-git-keys.yaml rename to charts/airflow/examples/google-gke/k8s_resources/secret-git-secret.yaml index 5d3c1328..c0d9fde9 100644 --- a/charts/airflow/examples/google-gke/k8s_resources/secret-git-keys.yaml +++ b/charts/airflow/examples/google-gke/k8s_resources/secret-git-secret.yaml @@ -1,10 +1,10 @@ apiVersion: v1 kind: Secret metadata: - name: airflow-cluster1-git-keys + name: airflow-cluster1-git-secret namespace: airflow-cluster1 stringData: - git_key: | + id_rsa: | -----BEGIN OPENSSH PRIVATE KEY----- XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX @@ -32,7 +32,3 @@ stringData: XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX -----END OPENSSH PRIVATE KEY----- - git_key.pub: | - ssh-rsa XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX airflow-cluster1@gke-cluster - known_hosts: | - repo.example.com, ssh-rsa XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX diff --git a/charts/airflow/examples/minikube/custom-values.yaml b/charts/airflow/examples/minikube/custom-values.yaml index 751c8317..5cebd126 100644 --- a/charts/airflow/examples/minikube/custom-values.yaml +++ b/charts/airflow/examples/minikube/custom-values.yaml @@ -3,7 +3,7 @@ # - This is intended to be a `custom-values.yaml` starting point for non-production deployment (like minikube) # External Dependencies: -# - A PUBLIC git repo for DAGs: ssh://git@repo.example.com:my-airflow-dags.git +# - A PUBLIC git repo for DAGs: https://github.com/USERNAME/REPOSITORY.git # ################################### @@ -22,10 +22,7 @@ airflow: ## config: # Security - AIRFLOW__CORE__SECURE_MODE: "True" - AIRFLOW__API__AUTH_BACKEND: "airflow.api.auth.backend.deny_all" AIRFLOW__WEBSERVER__EXPOSE_CONFIG: "False" - AIRFLOW__WEBSERVER__RBAC: "False" # DAGS AIRFLOW__CORE__LOAD_EXAMPLES: "True" @@ -33,37 +30,49 @@ airflow: ## Disable noisy "Handling signal: ttou" Gunicorn log messages GUNICORN_CMD_ARGS: "--log-level WARNING" -################################### -# Airflow - Scheduler Configs -################################### -scheduler: - - ## custom airflow connections for the airflow scheduler + ## a list of initial users to create + ## + users: + - username: admin + password: admin + role: Admin + email: admin@example.com + firstName: admin + lastName: admin + + ## a list of initial connections to create ## connections: - - id: my_aws - type: aws - extra: | - { - "aws_access_key_id": "XXXXXXXXXXXXXXXXXXX", - "aws_secret_access_key": "XXXXXXXXXXXXXXX", - "region_name":"eu-central-1" - } - - ## custom airflow variables for the airflow scheduler + ## see docs: https://airflow.apache.org/docs/apache-airflow-providers-google/stable/connections/gcp.html + - id: my_gcp + type: google_cloud_platform + description: my GCP connection + extra: |- + { "extra__google_cloud_platform__num_retries": "5" } + + ## a list of initial variables to create + ## + variables: + - key: "environment" + value: "dev" + + ## a list of initial pools to create ## - variables: | - { "environment": "dev" } + pools: + - name: "pool_1" + slots: 5 + description: "example pool with 5 slots" + - name: "pool_2" + slots: 10 + description: "example pool with 10 slots" - ## custom airflow pools for the airflow scheduler +################################### +# Airflow - Scheduler Configs +################################### +scheduler: + ## the number of scheduler Pods to run ## - pools: | - { - "example": { - "description": "This is an example pool with 2 slots.", - "slots": 2 - } - } + replicas: 1 ################################### # Airflow - WebUI Configs @@ -78,68 +87,71 @@ web: # Airflow - Worker Configs ################################### workers: + ## if the airflow workers StatefulSet should be deployed + ## + enabled: true + ## the number of workers Pods to run ## replicas: 1 +################################### +# Airflow - Flower Configs +################################### +flower: + ## if the Flower UI should be deployed + ## + enabled: true + +################################### +# Airflow - Logs Configs +################################### +logs: + ## configs for the logs PVC + ## + persistence: + ## if a persistent volume is mounted at `logs.path` + ## + enabled: false + ################################### # Airflow - DAGs Configs ################################### dags: - ## configs for the DAG git repository & sync container + ## configs for the git-sync sidecar (https://github.com/kubernetes/git-sync) ## - git: - ## url of the git repository + gitSync: + ## if the git-sync sidecar container is enabled ## - url: "ssh://git@repo.example.com/my-airflow-dags.git" + enabled: false - ## the branch/tag/sha1 which we clone + ## the url of the git repo ## - ref: master + repo: "https://github.com/USERNAME/REPOSITORY.git" - ## the name of a pre-created secret containing files for ~/.ssh/ + ## the git branch to check out ## - ## NOTE: - ## - this is ONLY RELEVANT for SSH git repos - ## - the secret commonly includes files: id_rsa, id_rsa.pub, known_hosts - ## - known_hosts is NOT NEEDED if `git.sshKeyscan` is true - ## - secret: "" + branch: master - ## the name of the private key file in your `git.secret` - ## - ## NOTE: - ## - this is ONLY RELEVANT for PRIVATE SSH git repos + ## the git revision (tag or hash) to check out ## - privateKeyName: id_rsa + revision: HEAD - ## the host name of the git repo + ## the number of seconds between syncs ## - ## NOTE: - ## - this is ONLY REQUIRED for SSH git repos - ## - ## EXAMPLE: - ## repoHost: "github.com" - ## - repoHost: "repo.example.com" + syncWait: 60 - ## the port of the git repo + ## the name of a pre-created Secret with git http credentials ## - ## NOTE: - ## - this is ONLY REQUIRED for SSH git repos + httpSecret: "" + + ## the key in `dags.gitSync.httpSecret` with your git username ## - repoPort: 22 + httpSecretUsernameKey: username - ## configs for the git-sync container + ## the key in `dags.gitSync.httpSecret` with your git password/token ## - gitSync: - ## enable the git-sync sidecar container - ## - enabled: true - - ## the git sync interval in seconds - ## - refreshTime: 60 + httpSecretPasswordKey: password ################################### # Database - PostgreSQL Chart diff --git a/charts/airflow/files/pod_template.kubernetes-helm-yaml b/charts/airflow/files/pod_template.kubernetes-helm-yaml new file mode 100644 index 00000000..12dd5fe2 --- /dev/null +++ b/charts/airflow/files/pod_template.kubernetes-helm-yaml @@ -0,0 +1,65 @@ +{{- $extraPipPackages := .Values.airflow.kubernetesPodTemplate.extraPipPackages }} +apiVersion: v1 +kind: Pod +metadata: + name: dummy-name + {{- if .Values.airflow.kubernetesPodTemplate.podAnnotations }} + annotations: + {{- toYaml .Values.airflow.kubernetesPodTemplate.podAnnotations | nindent 4 }} + {{- end }} +spec: + restartPolicy: Never + {{- if .Values.airflow.image.pullSecret }} + imagePullSecrets: + - name: {{ .Values.airflow.image.pullSecret }} + {{- end }} + serviceAccountName: {{ include "airflow.serviceAccountName" . }} + {{- if .Values.airflow.kubernetesPodTemplate.nodeSelector }} + nodeSelector: + {{- toYaml .Values.airflow.kubernetesPodTemplate.nodeSelector | nindent 4 }} + {{- end }} + {{- if .Values.airflow.kubernetesPodTemplate.affinity }} + affinity: + {{- toYaml .Values.airflow.kubernetesPodTemplate.affinity | nindent 4 }} + {{- end }} + {{- if .Values.airflow.kubernetesPodTemplate.tolerations }} + tolerations: + {{- toYaml .Values.airflow.kubernetesPodTemplate.tolerations | nindent 4 }} + {{- end }} + {{- if .Values.airflow.kubernetesPodTemplate.securityContext }} + securityContext: + {{- toYaml .Values.airflow.kubernetesPodTemplate.securityContext | nindent 4 }} + {{- end }} + {{- if or ($extraPipPackages) (.Values.dags.gitSync.enabled) }} + initContainers: + {{- if $extraPipPackages }} + {{- include "airflow.init_container.install_pip_packages" (dict "Values" .Values "extraPipPackages" $extraPipPackages) | indent 4 }} + {{- end }} + {{- include "airflow.container.git_sync" (dict "Values" .Values "sync_one_time" "true") | indent 4 }} + {{- end }} + containers: + - name: base + image: dummy_image + imagePullPolicy: IfNotPresent + envFrom: + - configMapRef: + name: "{{ include "airflow.fullname" . }}-env" + env: + {{- include "airflow.env" . | indent 8 }} + - name: AIRFLOW__CORE__EXECUTOR + value: LocalExecutor + ports: [] + command: [] + args: [] + {{- $extraVolumeMounts := .Values.airflow.kubernetesPodTemplate.extraVolumeMounts }} + {{- $volumeMounts := include "airflow.volumeMounts" (dict "Values" .Values "extraPipPackages" $extraPipPackages "extraVolumeMounts" $extraVolumeMounts) }} + {{- if $volumeMounts }} + volumeMounts: + {{- $volumeMounts | indent 8 }} + {{- end }} + {{- $extraVolumes := .Values.airflow.kubernetesPodTemplate.extraVolumes }} + {{- $volumes := include "airflow.volumes" (dict "Values" .Values "extraPipPackages" $extraPipPackages "extraVolumes" $extraVolumes) }} + {{- if $volumes }} + volumes: + {{- $volumes | indent 4 }} + {{- end }} diff --git a/charts/airflow/files/webserver_config.py b/charts/airflow/files/webserver_config.py new file mode 100644 index 00000000..d02e2b40 --- /dev/null +++ b/charts/airflow/files/webserver_config.py @@ -0,0 +1,8 @@ +from airflow import configuration as conf +from flask_appbuilder.security.manager import AUTH_DB + +# the SQLAlchemy connection string +SQLALCHEMY_DATABASE_URI = conf.get('core', 'SQL_ALCHEMY_CONN') + +# use embedded DB for auth +AUTH_TYPE = AUTH_DB diff --git a/charts/airflow/templates/NOTES.txt b/charts/airflow/templates/NOTES.txt index 6612bf6b..e8682fc1 100644 --- a/charts/airflow/templates/NOTES.txt +++ b/charts/airflow/templates/NOTES.txt @@ -1,30 +1,34 @@ -Congratulations. You have just deployed Apache Airflow! +================ +Congratulations, you have just deployed Apache Airflow! {{- if .Values.ingress.enabled }} -Ingress URL to Airflow and Flower: - - - Airflow: http://{{ .Values.ingress.web.host }}{{ .Values.ingress.web.path }}/ - {{- if .Values.flower.enabled }} - - Flower: http://{{ .Values.ingress.flower.host }}{{ .Values.ingress.flower.path }}/ - {{- end }} - +---------------- +You have deployed an Ingress (for public-internet access) +---------------- +- Webserver: http://{{ .Values.ingress.web.host }}{{ .Values.ingress.web.path }}/ +{{- if .Values.flower.enabled }} +- Flower: http://{{ .Values.ingress.flower.host }}{{ .Values.ingress.flower.path }}/ +{{- end }} {{- end }} -1. Get the Airflow Service URL by running these commands: - +---------------- +Access the Webserver Service with your browser +---------------- {{- if contains "NodePort" .Values.web.service.type }} - export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "airflow.fullname" . }}-web) - export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}") - echo http://$NODE_IP:$NODE_PORT/ + export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "airflow.fullname" . }}-web) + export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}") + echo "URL: http://$NODE_IP:$NODE_PORT/" + {{- else if contains "LoadBalancer" .Values.web.service.type }} - NOTE: It may take a few minutes for the LoadBalancer IP to be available. - You can watch the status of the service by running 'kubectl get svc -w {{ include "airflow.fullname" . }}' - export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "airflow.fullname" . }} -o jsonpath='{.status.loadBalancer.ingress[0].ip}') - echo http://$SERVICE_IP:{{ .Values.web.service.externalPort }}/ -{{- else if contains "ClusterIP" .Values.web.service.type }} - export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "component=web,app={{ include "airflow.labels.app" . }}" -o jsonpath="{.items[0].metadata.name}") - echo http://127.0.0.1:8080 - kubectl port-forward --namespace {{ .Release.Namespace }} $POD_NAME 8080:8080 -{{- end }} + export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "airflow.fullname" . }} -o jsonpath='{.status.loadBalancer.ingress[0].ip}') + echo "URL: http://$SERVICE_IP:{{ .Values.web.service.externalPort }}/" -2. Open Airflow in your web browser +NOTE: it may take a few minutes for the LoadBalancer IP to become available + kubectl get svc -w {{ include "airflow.fullname" . }} + +{{- else if contains "ClusterIP" .Values.web.service.type }} + export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "component=web,app={{ include "airflow.labels.app" . }}" -o jsonpath="{.items[0].metadata.name}") + echo "URL: http://127.0.0.1:8080" + kubectl port-forward --namespace {{ .Release.Namespace }} $POD_NAME 8080:8080 +================ +{{- end }} \ No newline at end of file diff --git a/charts/airflow/templates/_helpers.tpl b/charts/airflow/templates/_helpers.tpl deleted file mode 100644 index c4eb22c4..00000000 --- a/charts/airflow/templates/_helpers.tpl +++ /dev/null @@ -1,193 +0,0 @@ -{{/* vim: set filetype=mustache: */}} - -{{/* -Construct the base name for all resources in this chart. -We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). -*/}} -{{- define "airflow.fullname" -}} -{{- printf "%s" .Release.Name | trunc 63 | trimSuffix "-" -}} -{{- end -}} - -{{/* -Construct the `labels.app` for used by all resources in this chart. -*/}} -{{- define "airflow.labels.app" -}} -{{- .Values.nameOverride | default .Chart.Name | trunc 63 | trimSuffix "-" -}} -{{- end -}} - -{{/* -Construct the `labels.chart` for used by all resources in this chart. -*/}} -{{- define "airflow.labels.chart" -}} -{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}} -{{- end -}} - -{{/* -Construct the name of the airflow ServiceAccount. -*/}} -{{- define "airflow.serviceAccountName" -}} -{{- if .Values.serviceAccount.create -}} -{{- .Values.serviceAccount.name | default (include "airflow.fullname" .) -}} -{{- else -}} -{{- .Values.serviceAccount.name | default "default" -}} -{{- end -}} -{{- end -}} - -{{/* -Construct the `postgresql.fullname` of the postgresql sub-chat chart. -Used to discover the Service and Secret name created by the sub-chart. -*/}} -{{- define "airflow.postgresql.fullname" -}} -{{- if .Values.postgresql.fullnameOverride -}} -{{- .Values.postgresql.fullnameOverride | trunc 63 | trimSuffix "-" -}} -{{- else -}} -{{- $name := default "postgresql" .Values.postgresql.nameOverride -}} -{{- if contains $name .Release.Name -}} -{{- .Release.Name | trunc 63 | trimSuffix "-" -}} -{{- else -}} -{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} -{{- end -}} -{{- end -}} -{{- end -}} - -{{/* -Construct the `redis.fullname` of the redis sub-chat chart. -Used to discover the master Service and Secret name created by the sub-chart. -*/}} -{{- define "airflow.redis.fullname" -}} -{{- if .Values.redis.fullnameOverride -}} -{{- .Values.redis.fullnameOverride | trunc 63 | trimSuffix "-" -}} -{{- else -}} -{{- $name := default "redis" .Values.redis.nameOverride -}} -{{- if contains $name .Release.Name -}} -{{- .Release.Name | trunc 63 | trimSuffix "-" -}} -{{- else -}} -{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} -{{- end -}} -{{- end -}} -{{- end -}} - -{{/* -Bash command which echos the DB connection string in SQLAlchemy format. -NOTE: - - used by `AIRFLOW__CORE__SQL_ALCHEMY_CONN_CMD` - - the `DATABASE_PASSWORD_CMD` sub-command is set in `configmap-env` -*/}} -{{- define "DATABASE_SQLALCHEMY_CMD" -}} -{{- if .Values.postgresql.enabled -}} -echo -n "postgresql+psycopg2://${DATABASE_USER}:$(eval $DATABASE_PASSWORD_CMD)@${DATABASE_HOST}:${DATABASE_PORT}/${DATABASE_DB}" -{{- else if and (not .Values.postgresql.enabled) (eq "postgres" .Values.externalDatabase.type) -}} -echo -n "postgresql+psycopg2://${DATABASE_USER}:$(eval $DATABASE_PASSWORD_CMD)@${DATABASE_HOST}:${DATABASE_PORT}/${DATABASE_DB}${DATABASE_PROPERTIES}" -{{- else if and (not .Values.postgresql.enabled) (eq "mysql" .Values.externalDatabase.type) -}} -echo -n "mysql+mysqldb://${DATABASE_USER}:$(eval $DATABASE_PASSWORD_CMD)@${DATABASE_HOST}:${DATABASE_PORT}/${DATABASE_DB}${DATABASE_PROPERTIES}" -{{- end -}} -{{- end -}} - -{{/* -Bash command which echos the DB connection string in Celery result_backend format. -NOTE: - - used by `AIRFLOW__CELERY__RESULT_BACKEND_CMD` - - the `DATABASE_PASSWORD_CMD` sub-command is set in `configmap-env` -*/}} -{{- define "DATABASE_CELERY_CMD" -}} -{{- if .Values.postgresql.enabled -}} -echo -n "db+postgresql://${DATABASE_USER}:$(eval $DATABASE_PASSWORD_CMD)@${DATABASE_HOST}:${DATABASE_PORT}/${DATABASE_DB}" -{{- else if and (not .Values.postgresql.enabled) (eq "postgres" .Values.externalDatabase.type) -}} -echo -n "db+postgresql://${DATABASE_USER}:$(eval $DATABASE_PASSWORD_CMD)@${DATABASE_HOST}:${DATABASE_PORT}/${DATABASE_DB}${DATABASE_PROPERTIES}" -{{- else if and (not .Values.postgresql.enabled) (eq "mysql" .Values.externalDatabase.type) -}} -echo -n "db+mysql://${DATABASE_USER}:$(eval $DATABASE_PASSWORD_CMD)@${DATABASE_HOST}:${DATABASE_PORT}/${DATABASE_DB}${DATABASE_PROPERTIES}" -{{- end -}} -{{- end -}} - -{{/* -Bash command which echos the Redis connection string. -NOTE: - - used by `AIRFLOW__CELERY__BROKER_URL_CMD` - - the `REDIS_PASSWORD_CMD` sub-command is set in `configmap-env` -*/}} -{{- define "REDIS_CONNECTION_CMD" -}} -echo -n "redis://$(eval $REDIS_PASSWORD_CMD)${REDIS_HOST}:${REDIS_PORT}/${REDIS_DBNUM}" -{{- end -}} - -{{/* -Construct a set of secret environment variables to be mounted in web, scheduler, worker, and flower pods. -When applicable, we use the secrets created by the postgres/redis charts (which have fixed names and secret keys). -*/}} -{{- define "airflow.mapenvsecrets" -}} -{{- /* ------------------------------ */ -}} -{{- /* ---------- POSTGRES ---------- */ -}} -{{- /* ------------------------------ */ -}} -{{- if .Values.postgresql.enabled }} -{{- if .Values.postgresql.existingSecret }} -- name: DATABASE_PASSWORD - valueFrom: - secretKeyRef: - name: {{ .Values.postgresql.existingSecret }} - key: {{ .Values.postgresql.existingSecretKey }} -{{- else }} -- name: DATABASE_PASSWORD - valueFrom: - secretKeyRef: - name: {{ include "airflow.postgresql.fullname" . }} - key: postgresql-password -{{- end }} -{{- else }} -{{- if .Values.externalDatabase.passwordSecret }} -- name: DATABASE_PASSWORD - valueFrom: - secretKeyRef: - name: {{ .Values.externalDatabase.passwordSecret }} - key: {{ .Values.externalDatabase.passwordSecretKey }} -{{- else }} -- name: DATABASE_PASSWORD - value: "" -{{- end }} -{{- end }} -{{- /* --------------------------- */ -}} -{{- /* ---------- REDIS ---------- */ -}} -{{- /* --------------------------- */ -}} -{{- if eq .Values.airflow.executor "CeleryExecutor" }} -{{- if .Values.redis.enabled }} -{{- if .Values.redis.existingSecret }} -- name: REDIS_PASSWORD - valueFrom: - secretKeyRef: - name: {{ .Values.redis.existingSecret }} - key: {{ .Values.redis.existingSecretPasswordKey }} -{{- else }} -- name: REDIS_PASSWORD - valueFrom: - secretKeyRef: - name: {{ include "airflow.redis.fullname" . }} - key: redis-password -{{- end }} -{{- else }} -{{- if .Values.externalRedis.passwordSecret }} -- name: REDIS_PASSWORD - valueFrom: - secretKeyRef: - name: {{ .Values.externalRedis.passwordSecret }} - key: {{ .Values.externalRedis.passwordSecretKey }} -{{- else }} -- name: REDIS_PASSWORD - value: "" -{{- end }} -{{- end }} -{{- end }} -{{- /* ---------------------------- */ -}} -{{- /* ---------- FLOWER ---------- */ -}} -{{- /* ---------------------------- */ -}} -{{- if and (.Values.flower.basicAuthSecret) (not .Values.airflow.config.AIRFLOW__CELERY__FLOWER_BASIC_AUTH) }} -- name: AIRFLOW__CELERY__FLOWER_BASIC_AUTH - valueFrom: - secretKeyRef: - name: {{ .Values.flower.basicAuthSecret }} - key: {{ .Values.flower.basicAuthSecretKey }} -{{- end }} -{{- /* ---------------------------- */ -}} -{{- /* ---------- EXTRAS ---------- */ -}} -{{- /* ---------------------------- */ -}} -{{- if .Values.airflow.extraEnv }} -{{ toYaml .Values.airflow.extraEnv }} -{{- end }} -{{- end }} diff --git a/charts/airflow/templates/_helpers/common.tpl b/charts/airflow/templates/_helpers/common.tpl new file mode 100644 index 00000000..56947899 --- /dev/null +++ b/charts/airflow/templates/_helpers/common.tpl @@ -0,0 +1,106 @@ +{{/* +Construct the base name for all resources in this chart. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +*/}} +{{- define "airflow.fullname" -}} +{{- printf "%s" .Release.Name | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Construct the `labels.app` for used by all resources in this chart. +*/}} +{{- define "airflow.labels.app" -}} +{{- printf "%s" .Chart.Name | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Construct the `labels.chart` for used by all resources in this chart. +*/}} +{{- define "airflow.labels.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Construct the name of the airflow ServiceAccount. +*/}} +{{- define "airflow.serviceAccountName" -}} +{{- if .Values.serviceAccount.create -}} +{{- .Values.serviceAccount.name | default (include "airflow.fullname" .) -}} +{{- else -}} +{{- .Values.serviceAccount.name | default "default" -}} +{{- end -}} +{{- end -}} + +{{/* +A flag indicating if a celery-like executor is selected (empty if false) +*/}} +{{- define "airflow.executor.celery_like" -}} +{{- if or (eq .Values.airflow.executor "CeleryExecutor") (eq .Values.airflow.executor "CeleryKubernetesExecutor") -}} +true +{{- end -}} +{{- end -}} + +{{/* +A flag indicating if a kubernetes-like executor is selected (empty if false) +*/}} +{{- define "airflow.executor.kubernetes_like" -}} +{{- if or (eq .Values.airflow.executor "KubernetesExecutor") (eq .Values.airflow.executor "CeleryKubernetesExecutor") -}} +true +{{- end -}} +{{- end -}} + +{{/* +The scheme (HTTP, HTTPS) used by the webserver +*/}} +{{- define "airflow.web.scheme" -}} +{{- if and (.Values.airflow.config.AIRFLOW__WEBSERVER__WEB_SERVER_SSL_CERT) (.Values.airflow.config.AIRFLOW__WEBSERVER__WEB_SERVER_SSL_KEY) -}} +HTTPS +{{- else -}} +HTTP +{{- end -}} +{{- end -}} + +{{/* +The path containing DAG files +*/}} +{{- define "airflow.dags.path" -}} +{{- if .Values.dags.gitSync.enabled -}} +{{- printf "%s/repo/%s" (.Values.dags.path | trimSuffix "/") (.Values.dags.gitSync.repoSubPath | trimAll "/") -}} +{{- else -}} +{{- printf .Values.dags.path -}} +{{- end -}} +{{- end -}} + +{{/* +Construct the `postgresql.fullname` of the postgresql sub-chat chart. +Used to discover the Service and Secret name created by the sub-chart. +*/}} +{{- define "airflow.postgresql.fullname" -}} +{{- if .Values.postgresql.fullnameOverride -}} +{{- .Values.postgresql.fullnameOverride | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- $name := default "postgresql" .Values.postgresql.nameOverride -}} +{{- if contains $name .Release.Name -}} +{{- .Release.Name | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} +{{- end -}} + +{{/* +Construct the `redis.fullname` of the redis sub-chat chart. +Used to discover the master Service and Secret name created by the sub-chart. +*/}} +{{- define "airflow.redis.fullname" -}} +{{- if .Values.redis.fullnameOverride -}} +{{- .Values.redis.fullnameOverride | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- $name := default "redis" .Values.redis.nameOverride -}} +{{- if contains $name .Release.Name -}} +{{- .Release.Name | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} +{{- end -}} diff --git a/charts/airflow/templates/_helpers/pods.tpl b/charts/airflow/templates/_helpers/pods.tpl new file mode 100644 index 00000000..c2abaec0 --- /dev/null +++ b/charts/airflow/templates/_helpers/pods.tpl @@ -0,0 +1,332 @@ +{{/* +Define the image configs for airflow containers +*/}} +{{- define "airflow.image" }} +image: {{ .Values.airflow.image.repository }}:{{ .Values.airflow.image.tag }} +imagePullPolicy: {{ .Values.airflow.image.pullPolicy }} +securityContext: + runAsUser: {{ .Values.airflow.image.uid }} + runAsGroup: {{ .Values.airflow.image.gid }} +{{- end }} + +{{/* +Define an init-container which checks the DB status +*/}} +{{- define "airflow.init_container.check_db" }} +- name: check-db + {{- include "airflow.image" . | indent 2 }} + envFrom: + {{- include "airflow.envFrom" . | indent 4 }} + env: + {{- include "airflow.env" . | indent 4 }} + command: + - "/usr/bin/dumb-init" + - "--" + args: + - "bash" + - "-c" + {{- if .Values.airflow.legacyCommands }} + - "exec airflow checkdb" + {{- else }} + - "exec airflow db check" + {{- end }} +{{- end }} + +{{/* +Define an init-container which waits for DB migrations +*/}} +{{- define "airflow.init_container.wait_for_db_migrations" }} +- name: wait-for-db-migrations + {{- include "airflow.image" . | indent 2 }} + envFrom: + {{- include "airflow.envFrom" . | indent 4 }} + env: + {{- include "airflow.env" . | indent 4 }} + command: + - "/usr/bin/dumb-init" + - "--" + args: + - "bash" + - "-c" + {{- if .Values.airflow.legacyCommands }} + ## airflow 1.10 has no check-migrations command + - "exec sleep 5" + {{- else }} + - "exec airflow db check-migrations -t 60" + {{- end }} +{{- end }} + +{{/* +Define an init-container which installs a list of pip packages +EXAMPLE USAGE: {{ include "airflow.init_container.install_pip_packages" (dict "Values" .Values "extraPipPackages" $extraPipPackages) }} +*/}} +{{- define "airflow.init_container.install_pip_packages" }} +- name: install-pip-packages + {{- include "airflow.image" . | indent 2 }} + command: + - "/usr/bin/dumb-init" + - "--" + args: + - "pip" + - "install" + - "--target" + - "/opt/python/site-packages" + {{- range .extraPipPackages }} + - {{ . | quote }} + {{- end }} + volumeMounts: + - name: python-site-packages + mountPath: /opt/python/site-packages +{{- end }} + +{{/* +Define a container which regularly syncs a git-repo +EXAMPLE USAGE: {{ include "airflow.container.git_sync" (dict "Values" .Values "sync_one_time" "true") }} +*/}} +{{- define "airflow.container.git_sync" }} +- name: dags-git-sync + image: {{ .Values.dags.gitSync.image.repository }}:{{ .Values.dags.gitSync.image.tag }} + imagePullPolicy: {{ .Values.dags.gitSync.image.pullPolicy }} + securityContext: + runAsUser: {{ .Values.dags.gitSync.image.uid }} + runAsGroup: {{ .Values.dags.gitSync.image.gid }} + resources: + {{- toYaml .Values.dags.gitSync.resources | nindent 4 }} + env: + {{- if .sync_one_time }} + - name: GIT_SYNC_ONE_TIME + value: "true" + {{- end }} + - name: GIT_SYNC_ROOT + value: "/dags" + - name: GIT_SYNC_DEST + value: "repo" + - name: GIT_SYNC_REPO + value: {{ .Values.dags.gitSync.repo | quote }} + - name: GIT_SYNC_BRANCH + value: {{ .Values.dags.gitSync.branch | quote }} + - name: GIT_SYNC_REV + value: {{ .Values.dags.gitSync.revision | quote }} + - name: GIT_SYNC_DEPTH + value: {{ .Values.dags.gitSync.depth | quote }} + - name: GIT_SYNC_WAIT + value: {{ .Values.dags.gitSync.syncWait | quote }} + - name: GIT_SYNC_TIMEOUT + value: {{ .Values.dags.gitSync.syncTimeout | quote }} + - name: GIT_SYNC_ADD_USER + value: "true" + {{- if .Values.dags.gitSync.sshSecret }} + - name: GIT_SYNC_SSH + value: "true" + - name: GIT_SSH_KEY_FILE + value: "/etc/git-secret/id_rsa" + {{- end }} + {{- if .Values.dags.gitSync.sshKnownHosts }} + - name: GIT_KNOWN_HOSTS + value: "true" + - name: GIT_SSH_KNOWN_HOSTS_FILE + value: "/etc/git-secret/known_hosts" + {{- else }} + - name: GIT_KNOWN_HOSTS + value: "false" + {{- end }} + {{- if .Values.dags.gitSync.httpSecret }} + - name: GIT_SYNC_USERNAME + valueFrom: + secretKeyRef: + name: {{ .Values.dags.gitSync.httpSecret }} + key: {{ .Values.dags.gitSync.httpSecretUsernameKey }} + - name: GIT_SYNC_PASSWORD + valueFrom: + secretKeyRef: + name: {{ .Values.dags.gitSync.httpSecret }} + key: {{ .Values.dags.gitSync.httpSecretPasswordKey }} + {{- end }} + volumeMounts: + - name: dags-data + mountPath: /dags + {{- if .Values.dags.gitSync.sshSecret }} + - name: git-secret + mountPath: /etc/git-secret/id_rsa + readOnly: true + subPath: {{ .Values.dags.gitSync.sshSecretKey }} + {{- end }} + {{- if .Values.dags.gitSync.sshKnownHosts }} + - name: git-known-hosts + mountPath: /etc/git-secret/known_hosts + readOnly: true + subPath: known_hosts + {{- end }} +{{- end }} + +{{/* +The list of `volumeMounts` for web/scheduler/worker/flower container +EXAMPLE USAGE: {{ include "airflow.volumeMounts" (dict "Values" .Values "extraPipPackages" $extraPipPackages "extraVolumeMounts" $extraVolumeMounts) }} +*/}} +{{- define "airflow.volumeMounts" }} +{{- /* dags */ -}} +{{- if .Values.dags.persistence.enabled }} +- name: dags-data + mountPath: {{ .Values.dags.path }} + subPath: {{ .Values.dags.persistence.subPath }} +{{- else if .Values.dags.gitSync.enabled }} +- name: dags-data + mountPath: {{ .Values.dags.path }} +{{- end }} + +{{- /* logs */ -}} +{{- if .Values.logs.persistence.enabled }} +- name: logs-data + mountPath: {{ .Values.logs.path }} + subPath: {{ .Values.logs.persistence.subPath }} +{{- end }} + +{{- /* pip-packages */ -}} +{{- if .extraPipPackages }} +- name: python-site-packages + mountPath: /opt/python/site-packages +{{- end }} + +{{- /* user-defined (global) */ -}} +{{- if .Values.airflow.extraVolumeMounts }} +{{- toYaml .Values.airflow.extraVolumeMounts }} +{{- end }} + +{{- /* user-defined */ -}} +{{- if .extraVolumeMounts }} +{{- toYaml .extraVolumeMounts }} +{{- end }} +{{- end }} + +{{/* +The list of `volumes` for web/scheduler/worker/flower Pods +EXAMPLE USAGE: {{ include "airflow.volumes" (dict "Values" .Values "extraPipPackages" $extraPipPackages "extraVolumes" $extraVolumes) }} +*/}} +{{- define "airflow.volumes" }} +{{- /* dags */ -}} +{{- if .Values.dags.persistence.enabled }} +- name: dags-data + persistentVolumeClaim: + {{- if .Values.dags.persistence.existingClaim }} + claimName: {{ .Values.dags.persistence.existingClaim }} + {{- else }} + claimName: {{ printf "%s-dags" (include "airflow.fullname" . | trunc 58) }} + {{- end }} +{{- else if .Values.dags.gitSync.enabled }} +- name: dags-data + emptyDir: {} +{{- end }} + +{{- /* logs */ -}} +{{- if .Values.logs.persistence.enabled }} +- name: logs-data + mountPath: {{ .Values.logs.path }} + subPath: {{ .Values.logs.persistence.subPath }} +{{- end }} + +{{- /* git-sync */ -}} +{{- if .Values.dags.gitSync.enabled }} +{{- if .Values.dags.gitSync.sshSecret }} +- name: git-secret + secret: + secretName: {{ .Values.dags.gitSync.sshSecret }} + defaultMode: 0644 +{{- end }} +{{- if .Values.dags.gitSync.sshKnownHosts }} +- name: git-known-hosts + secret: + secretName: {{ include "airflow.fullname" . }}-known-hosts + defaultMode: 0644 +{{- end }} +{{- end }} + +{{- /* pip-packages */ -}} +{{- if .extraPipPackages }} +- name: python-site-packages + emptyDir: {} +{{- end }} + +{{- /* user-defined (global) */ -}} +{{- if .Values.airflow.extraVolumes }} +{{- toYaml .Values.airflow.extraVolumes }} +{{- end }} + +{{- /* user-defined */ -}} +{{- if .extraVolumes }} +{{- toYaml .extraVolumes }} +{{- end }} +{{- end }} + +{{/* +The list of `envFrom` for web/scheduler/worker/flower Pods +*/}} +{{- define "airflow.envFrom" }} +- secretRef: + name: "{{ include "airflow.fullname" . }}-config" +{{- end }} + +{{/* +The list of `env` for web/scheduler/worker/flower Pods +*/}} +{{- define "airflow.env" }} +{{- /* postgres environment variables */ -}} +{{- if .Values.postgresql.enabled }} +{{- if .Values.postgresql.existingSecret }} +- name: DATABASE_PASSWORD + valueFrom: + secretKeyRef: + name: {{ .Values.postgresql.existingSecret }} + key: {{ .Values.postgresql.existingSecretKey }} +{{- else }} +- name: DATABASE_PASSWORD + valueFrom: + secretKeyRef: + name: {{ include "airflow.postgresql.fullname" . }} + key: postgresql-password +{{- end }} +{{- else }} +{{- if .Values.externalDatabase.passwordSecret }} +- name: DATABASE_PASSWORD + valueFrom: + secretKeyRef: + name: {{ .Values.externalDatabase.passwordSecret }} + key: {{ .Values.externalDatabase.passwordSecretKey }} +{{- else }} +- name: DATABASE_PASSWORD + value: "" +{{- end }} +{{- end }} + +{{- /* redis environment variables */ -}} +{{- if .Values.redis.enabled }} +{{- if .Values.redis.existingSecret }} +- name: REDIS_PASSWORD + valueFrom: + secretKeyRef: + name: {{ .Values.redis.existingSecret }} + key: {{ .Values.redis.existingSecretPasswordKey }} +{{- else }} +- name: REDIS_PASSWORD + valueFrom: + secretKeyRef: + name: {{ include "airflow.redis.fullname" . }} + key: redis-password +{{- end }} +{{- else }} +{{- if .Values.externalRedis.passwordSecret }} +- name: REDIS_PASSWORD + valueFrom: + secretKeyRef: + name: {{ .Values.externalRedis.passwordSecret }} + key: {{ .Values.externalRedis.passwordSecretKey }} +{{- else }} +- name: REDIS_PASSWORD + value: "" +{{- end }} +{{- end }} + +{{- /* user-defined environment variables */ -}} +{{- if .Values.airflow.extraEnv }} +{{ toYaml .Values.airflow.extraEnv }} +{{- end }} +{{- end }} \ No newline at end of file diff --git a/charts/airflow/templates/_helpers/validate-values.tpl b/charts/airflow/templates/_helpers/validate-values.tpl new file mode 100644 index 00000000..2a082212 --- /dev/null +++ b/charts/airflow/templates/_helpers/validate-values.tpl @@ -0,0 +1,92 @@ +{{/* Checks for `.Release.name` */}} +{{- if gt (len .Release.Name) 43 }} + {{ required "The `.Release.name` must be less than 43 characters (due to the 63 character limit for names in Kubernetes)!" nil }} +{{- end }} + +{{/* Checks for `airflow.legacyCommands` */}} +{{- if .Values.airflow.legacyCommands }} + {{- if not (eq "1" (.Values.scheduler.replicas | toString)) }} + {{ required "If `airflow.legacyCommands=true`, then `scheduler.replicas` must be set to `1`!" nil }} + {{- end }} +{{- end }} + +{{/* Checks for `airflow.executor` */}} +{{- if not (has .Values.airflow.executor (list "CeleryExecutor" "CeleryKubernetesExecutor" "KubernetesExecutor")) }} + {{ required "The `airflow.executor` must be one of: [CeleryExecutor, CeleryKubernetesExecutor, KubernetesExecutor]!" nil }} +{{- end }} +{{- if eq .Values.airflow.executor "CeleryExecutor" }} + {{- if or (not .Values.workers.enabled) (not .Values.redis.enabled) }} + {{ required "If `airflow.executor=CeleryExecutor`, all of [`workers.enabled`, `redis.enabled`] should be `true`!" nil }} + {{- end }} +{{- end }} +{{- if eq .Values.airflow.executor "CeleryKubernetesExecutor" }} + {{- if or (not .Values.workers.enabled) (not .Values.redis.enabled) }} + {{ required "If `airflow.executor=CeleryKubernetesExecutor`, all of [`workers.enabled`, `redis.enabled`] should be `true`!" nil }} + {{- end }} +{{- end }} +{{- if eq .Values.airflow.executor "KubernetesExecutor" }} + {{- if or (.Values.workers.enabled) (.Values.flower.enabled) (.Values.redis.enabled) }} + {{ required "If `airflow.executor=KubernetesExecutor`, none of [`workers.enabled`, `flower.enabled`, `redis.enabled`] should be `true`!" nil }} + {{- end }} +{{- end }} + +{{/* Checks for `airflow.config` */}} +{{- if .Values.airflow.config.AIRFLOW__CORE__EXECUTOR }} + {{ required "Don't define `airflow.config.AIRFLOW__CORE__EXECUTOR`, it will be automatically set by the chart!" nil }} +{{- end }} +{{- if or .Values.airflow.config.AIRFLOW__CORE__DAGS_FOLDER }} + {{ required "Don't define `airflow.config.AIRFLOW__CORE__EXECUTOR`, it will be automatically set by the chart!" nil }} +{{- end }} +{{- if or (.Values.airflow.config.AIRFLOW__CELERY__BROKER_URL) (.Values.airflow.config.AIRFLOW__CELERY__BROKER_URL_CMD) }} + {{ required "Don't define `airflow.config.AIRFLOW__CELERY__BROKER_URL`, it will be automatically set by the chart!" nil }} +{{- end }} +{{- if or (.Values.airflow.config.AIRFLOW__CELERY__RESULT_BACKEND) (.Values.airflow.config.AIRFLOW__CELERY__RESULT_BACKEND_CMD) }} + {{ required "Don't define `airflow.config.AIRFLOW__CELERY__RESULT_BACKEND`, it will be automatically set by the chart!" nil }} +{{- end }} +{{- if or (.Values.airflow.config.AIRFLOW__CORE__SQL_ALCHEMY_CONN) (.Values.airflow.config.AIRFLOW__CORE__SQL_ALCHEMY_CONN_CMD) }} + {{ required "Don't define `airflow.config.AIRFLOW__CORE__SQL_ALCHEMY_CONN`, it will be automatically set by the chart!" nil }} +{{- end }} + +{{/* Checks for `dags.gitSync` */}} +{{- if .Values.dags.gitSync.enabled }} + {{- if .Values.dags.persistence.enabled }} + {{ required "If `dags.gitSync.enabled=true`, then `persistence.enabled` must be disabled!" nil }} + {{- end }} + {{- if not .Values.dags.gitSync.repo }} + {{ required "If `dags.gitSync.enabled=true`, then `dags.gitSync.repo` must be non-empty!" nil }} + {{- end }} + {{- if and (.Values.dags.gitSync.sshSecret) (.Values.dags.gitSync.httpSecret) }} + {{ required "At most, one of `dags.gitSync.sshSecret` and `dags.gitSync.httpSecret` can be defined!" nil }} + {{- end }} + {{- if and (.Values.dags.gitSync.repo | lower | hasPrefix "git@github.com") (not .Values.dags.gitSync.sshSecret) }} + {{ required "You must define `dags.gitSync.sshSecret` when using GitHub with SSH for `dags.gitSync.repo`!" nil }} + {{- end }} +{{- end }} + +{{/* Checks for `ingress` */}} +{{- if .Values.ingress }} + {{/* Checks for `ingress.web.path` */}} + {{- if .Values.ingress.web.path }} + {{- if not (.Values.ingress.web.path | hasPrefix "/") }} + {{ required "The `ingress.web.path` should start with a '/'!" nil }} + {{- end }} + {{- if .Values.ingress.web.path | hasSuffix "/" }} + {{ required "The `ingress.web.path` should NOT include a trailing '/'!" nil }} + {{- end }} + {{- end }} + + {{/* Checks for `ingress.flower.path` */}} + {{- if .Values.ingress.flower.path }} + {{- if not (.Values.ingress.flower.path | hasPrefix "/") }} + {{ required "The `ingress.flower.path` should start with a '/'!" nil }} + {{- end }} + {{- if .Values.ingress.flower.path | hasSuffix "/" }} + {{ required "The `ingress.flower.path` should NOT include a trailing '/'!" nil }} + {{- end }} + {{- if .Values.airflow.config.AIRFLOW__CELERY__FLOWER_URL_PREFIX }} + {{- if not (eq .Values.ingress.flower.path .Values.airflow.config.AIRFLOW__CELERY__FLOWER_URL_PREFIX) }} + {{ required "The `ingress.flower.path` should be the same as `airflow.config.AIRFLOW__CELERY__FLOWER_URL_PREFIX`!" nil }} + {{- end }} + {{- end }} + {{- end }} +{{- end }} \ No newline at end of file diff --git a/charts/airflow/templates/config/configmap-env.yaml b/charts/airflow/templates/config/configmap-env.yaml deleted file mode 100644 index b2953f0a..00000000 --- a/charts/airflow/templates/config/configmap-env.yaml +++ /dev/null @@ -1,125 +0,0 @@ -apiVersion: v1 -kind: ConfigMap -metadata: - name: {{ include "airflow.fullname" . }}-env - labels: - app: {{ include "airflow.labels.app" . }} - chart: {{ include "airflow.labels.chart" . }} - release: {{ .Release.Name }} - heritage: {{ .Release.Service }} -data: - ## Force UTC timezone - TZ: Etc/UTC - - ## ---------------- - ## Database - ## ---------------- - {{- if .Values.postgresql.enabled }} - DATABASE_HOST: "{{ include "airflow.postgresql.fullname" . }}" - DATABASE_PORT: "5432" - DATABASE_USER: "{{ .Values.postgresql.postgresqlUsername }}" - DATABASE_DB: "{{ .Values.postgresql.postgresqlDatabase }}" - {{- else }} - DATABASE_HOST: "{{ .Values.externalDatabase.host }}" - DATABASE_PORT: "{{ .Values.externalDatabase.port }}" - DATABASE_USER: "{{ .Values.externalDatabase.user }}" - DATABASE_DB: "{{ .Values.externalDatabase.database }}" - DATABASE_PROPERTIES: |- - {{ .Values.externalDatabase.properties }} - {{- end }} - # bash command which echos the URL encoded value of $DATABASE_PASSWORD - DATABASE_PASSWORD_CMD: |- - echo ${DATABASE_PASSWORD} | python3 -c "import urllib.parse; encoded_pass = urllib.parse.quote(input()); print(encoded_pass)" - # bash command which echos the DB connection string in SQLAlchemy format - DATABASE_SQLALCHEMY_CMD: |- - {{ include "DATABASE_SQLALCHEMY_CMD" . }} - # bash command which echos the DB connection string in Celery result_backend format - DATABASE_CELERY_CMD: |- - {{ include "DATABASE_CELERY_CMD" . }} - - {{- if eq .Values.airflow.executor "CeleryExecutor" }} - ## ---------------- - ## Redis - ## ---------------- - {{- if .Values.redis.enabled }} - REDIS_HOST: "{{ include "airflow.redis.fullname" . }}-master" - REDIS_PORT: "6379" - REDIS_DBNUM: "1" - {{- else }} - REDIS_HOST: "{{ .Values.externalRedis.host }}" - REDIS_PORT: "{{ .Values.externalRedis.port }}" - REDIS_DBNUM: "{{ .Values.externalRedis.databaseNumber }}" - {{- end }} - # a bash command which echos the URL encoded value of $REDIS_PASSWORD - # NOTE: if $REDIS_PASSWORD is non-empty, prints `:${REDIS_PASSWORD}@`, else `` - REDIS_PASSWORD_CMD: |- - echo ${REDIS_PASSWORD} | python3 -c "import urllib.parse; encoded_pass = urllib.parse.quote(input()); print(f\":{encoded_pass}@\") if len(encoded_pass) > 0 else None" - # a bash command which echos the Redis connection string - REDIS_CONNECTION_CMD: |- - {{ include "REDIS_CONNECTION_CMD" . }} - {{- end }} - - ## ---------------- - ## Airflow - ## ---------------- - AIRFLOW__CORE__BASE_LOG_FOLDER: "{{ .Values.logs.path }}" - AIRFLOW__CORE__DAGS_FOLDER: "{{ .Values.dags.path }}" - AIRFLOW__CORE__DAG_PROCESSOR_MANAGER_LOG_LOCATION: "{{ .Values.logs.path }}/dag_processor_manager/dag_processor_manager.log" - AIRFLOW__CORE__DONOT_PICKLE: "{{ .Values.dags.doNotPickle }}" - AIRFLOW__CORE__ENABLE_XCOM_PICKLING: "false" # for forward compatibility with 2.0 - AIRFLOW__CORE__EXECUTOR: "{{ .Values.airflow.executor }}" - {{- if .Values.airflow.fernetKey }} - AIRFLOW__CORE__FERNET_KEY: "{{ .Values.airflow.fernetKey }}" - {{- end }} - AIRFLOW__CORE__SQL_ALCHEMY_CONN_CMD: |- - bash -c 'eval "$DATABASE_SQLALCHEMY_CMD"' - {{- if .Values.web.serializeDAGs }} - AIRFLOW__CORE__STORE_SERIALIZED_DAGS: "{{ .Values.web.serializeDAGs }}" - {{- end }} - AIRFLOW__SCHEDULER__CHILD_PROCESS_LOG_DIRECTORY: "{{ .Values.logs.path }}/scheduler" - AIRFLOW__WEBSERVER__BASE_URL: "{{ .Values.web.baseUrl }}" - AIRFLOW__WEBSERVER__WEB_SERVER_PORT: "8080" - - {{- if (eq .Values.airflow.executor "CeleryExecutor") }} - ## ---------------- - ## Airflow - CeleryExecutor - ## ---------------- - AIRFLOW__CELERY__BROKER_URL_CMD: |- - bash -c 'eval "$REDIS_CONNECTION_CMD"' - {{- if .Values.flower.enabled }} - AIRFLOW__CELERY__FLOWER_PORT: "5555" - AIRFLOW__CELERY__FLOWER_URL_PREFIX: "{{ .Values.flower.urlPrefix }}" - {{- end }} - AIRFLOW__CELERY__RESULT_BACKEND_CMD: |- - bash -c 'eval "$DATABASE_CELERY_CMD"' - AIRFLOW__CELERY__WORKER_CONCURRENCY: "{{ .Values.workers.celery.instances }}" - AIRFLOW__CELERY__WORKER_LOG_SERVER_PORT: "8793" - {{- end }} - - {{- if (eq .Values.airflow.executor "KubernetesExecutor") }} - ## ---------------- - ## Airflow - KubernetesExecutor - ## ---------------- - {{- if (not .Values.airflow.config.AIRFLOW__KUBERNETES__ENV_FROM_CONFIGMAP_REF) }} - AIRFLOW__KUBERNETES__ENV_FROM_CONFIGMAP_REF: "{{ include "airflow.fullname" . }}-env" - {{- end }} - {{- if (not .Values.airflow.config.AIRFLOW__KUBERNETES__NAMESPACE) }} - AIRFLOW__KUBERNETES__NAMESPACE: "{{ .Release.Namespace }}" - {{- end }} - {{- if (not .Values.airflow.config.AIRFLOW__KUBERNETES__WORKER_SERVICE_ACCOUNT_NAME) }} - AIRFLOW__KUBERNETES__WORKER_SERVICE_ACCOUNT_NAME: "{{ include "airflow.serviceAccountName" . }}" - {{- end }} - {{- if (not .Values.airflow.config.AIRFLOW__KUBERNETES__WORKER_CONTAINER_REPOSITORY) }} - AIRFLOW__KUBERNETES__WORKER_CONTAINER_REPOSITORY: {{ .Values.airflow.image.repository }} - {{- end }} - {{- if (not .Values.airflow.config.AIRFLOW__KUBERNETES__WORKER_CONTAINER_TAG) }} - AIRFLOW__KUBERNETES__WORKER_CONTAINER_TAG: {{ .Values.airflow.image.tag }} - {{- end }} - {{- end }} - - {{- if .Values.airflow.config }} - ## ---------------- - ## Airflow - User Configs - ## ---------------- - {{- toYaml .Values.airflow.config | nindent 2 }} - {{- end }} diff --git a/charts/airflow/templates/config/configmap-pod-template.yaml b/charts/airflow/templates/config/configmap-pod-template.yaml new file mode 100644 index 00000000..5b2dbe2e --- /dev/null +++ b/charts/airflow/templates/config/configmap-pod-template.yaml @@ -0,0 +1,18 @@ +{{- if include "airflow.executor.kubernetes_like" . }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "airflow.fullname" . }}-pod-template + labels: + app: {{ include "airflow.labels.app" . }} + chart: {{ include "airflow.labels.chart" . }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +data: + pod_template.yaml: |- + {{- if .Values.airflow.kubernetesPodTemplate.stringOverride }} + {{- .Values.airflow.kubernetesPodTemplate.stringOverride | nindent 4 }} + {{- else }} + {{- tpl (.Files.Get "files/pod_template.kubernetes-helm-yaml") . | indent 4 }} + {{- end }} +{{- end }} \ No newline at end of file diff --git a/charts/airflow/templates/config/configmap-scripts-git.yaml b/charts/airflow/templates/config/configmap-scripts-git.yaml deleted file mode 100644 index ebb341e1..00000000 --- a/charts/airflow/templates/config/configmap-scripts-git.yaml +++ /dev/null @@ -1,77 +0,0 @@ -apiVersion: v1 -kind: ConfigMap -metadata: - name: {{ include "airflow.fullname" . }}-scripts-git - labels: - app: {{ include "airflow.labels.app" . }} - chart: {{ include "airflow.labels.chart" . }} - release: {{ .Release.Name }} - heritage: {{ .Release.Service }} -data: - git-clone.sh: | - #!/bin/sh -e - REPO=$1 - REF=$2 - DIR=$3 - REPO_HOST=$4 - REPO_PORT=$5 - PRIVATE_KEY=$6 - - mkdir -p ~/.ssh/ - - {{- if .Values.dags.git.sshKeyscan }} - ssh-keyscan -p $REPO_PORT $REPO_HOST >> ~/.ssh/known_hosts - {{- end }} - - {{- if .Values.dags.git.secret }} - cp -rL /keys/* ~/.ssh/ - chmod 600 ~/.ssh/* - echo -e "Host $REPO_HOST\n Port $REPO_PORT\n IdentityFile ~/.ssh/$PRIVATE_KEY" > ~/.ssh/config - {{- end }} - - # ensure the git directory is empty, so we can safely clone - if [ -d "$DIR" ]; then - rm -rf $( find $DIR -mindepth 1 ) - fi - - git clone $REPO -b $REF $DIR - git-sync.sh: | - #!/bin/sh -e - REPO=$1 - REF=$2 - DIR=$3 - REPO_HOST=$4 - REPO_PORT=$5 - PRIVATE_KEY=$6 - SYNC_TIME=$7 - - mkdir -p ~/.ssh/ - - {{- if .Values.dags.git.sshKeyscan }} - ssh-keyscan -p $REPO_PORT $REPO_HOST >> ~/.ssh/known_hosts - {{- end }} - - {{- if .Values.dags.git.secret }} - cp -rL /keys/* ~/.ssh/ - chmod 600 ~/.ssh/* - echo -e "Host $REPO_HOST\n Port $REPO_PORT\n IdentityFile ~/.ssh/$PRIVATE_KEY" > ~/.ssh/config - {{- end }} - - {{- if and (.Values.dags.git.gitSync.enabled) (not .Values.dags.initContainer.enabled) }} - if [ -d "$DIR" ]; then - rm -rf $( find $DIR -mindepth 1 ) - fi - git clone $REPO -b $REF $DIR - {{- end }} - - # to break the infinite loop when we receive SIGTERM - trap "exit 0" SIGTERM - - cd $DIR - while true; do - git fetch origin $REF; - git reset --hard origin/$REF; - git clean -fd; - date; - sleep $SYNC_TIME; - done diff --git a/charts/airflow/templates/config/configmap-scripts.yaml b/charts/airflow/templates/config/configmap-scripts.yaml deleted file mode 100644 index 0f13497b..00000000 --- a/charts/airflow/templates/config/configmap-scripts.yaml +++ /dev/null @@ -1,60 +0,0 @@ -apiVersion: v1 -kind: ConfigMap -metadata: - name: {{ include "airflow.fullname" . }}-scripts - labels: - app: {{ include "airflow.labels.app" . }} - chart: {{ include "airflow.labels.chart" . }} - release: {{ .Release.Name }} - heritage: {{ .Release.Service }} -data: - install-requirements.sh: | - #!/bin/bash -e - if [ ! -d {{ .Values.dags.path | quote }} ]; then - echo 'No folder {{ .Values.dags.path | quote }}' - exit 0 - fi - - cd {{ .Values.dags.path | quote }} - if [ -f requirements.txt ]; then - pip install --user -r requirements.txt - else - exit 0 - fi - graceful-stop-celery-worker.sh: | - #!/bin/bash -e - echo "*** starting graceful worker shutdown" - - # set the required environment variables - export AIRFLOW__CELERY__BROKER_URL=$(eval $AIRFLOW__CELERY__BROKER_URL_CMD) - - # prevent the worker accepting new tasks - echo "*** preventing worker accepting new tasks" - celery control --broker $AIRFLOW__CELERY__BROKER_URL --destination celery@$HOSTNAME cancel_consumer default - sleep 5 - - # loop until all active task are finished - echo "*** waiting for active tasks to finish" - while (( celery inspect --broker $AIRFLOW__CELERY__BROKER_URL --destination celery@$HOSTNAME --json active | python3 -c "import json; active_tasks = json.loads(input())['celery@$HOSTNAME']; print(len(active_tasks))" > 0 )); do - sleep 10 - done - preinit-db.sh: | - #!/bin/bash - echo "*** Waiting 10s for database" - sleep 10 - - COUNT=0 - while [ "${COUNT}" -lt 5 ]; do - echo "*** upgrading airflow db" - if airflow upgradedb || airflow db upgrade; then - echo "*** upgradedb succeeded" - exit 0 - else - ((COUNT++)) - echo "*** upgradedb failed: waiting 5s before retry #${COUNT}" - sleep 5 - fi - done - - echo "*** upgradedb failed after ${COUNT} retries; failed." - exit 1 diff --git a/charts/airflow/templates/config/configmap-variables-pools.yaml b/charts/airflow/templates/config/configmap-variables-pools.yaml deleted file mode 100644 index 1f7c1179..00000000 --- a/charts/airflow/templates/config/configmap-variables-pools.yaml +++ /dev/null @@ -1,20 +0,0 @@ -{{- if or (.Values.scheduler.variables) (.Values.scheduler.pools) }} -apiVersion: v1 -kind: ConfigMap -metadata: - name: {{ include "airflow.fullname" . }}-variables-pools - labels: - app: {{ include "airflow.labels.app" . }} - chart: {{ include "airflow.labels.chart" . }} - release: {{ .Release.Name }} - heritage: {{ .Release.Service }} -data: - {{- if .Values.scheduler.variables }} - variables.json: | - {{- .Values.scheduler.variables | nindent 4 }} - {{- end }} - {{- if .Values.scheduler.pools }} - pools.json: | - {{- .Values.scheduler.pools | nindent 4 }} - {{- end }} -{{- end }} diff --git a/charts/airflow/templates/config/secret-config.yaml b/charts/airflow/templates/config/secret-config.yaml new file mode 100644 index 00000000..4d99c5b5 --- /dev/null +++ b/charts/airflow/templates/config/secret-config.yaml @@ -0,0 +1,165 @@ +apiVersion: v1 +kind: Secret +metadata: + name: {{ include "airflow.fullname" . }}-config + labels: + app: {{ include "airflow.labels.app" . }} + chart: {{ include "airflow.labels.chart" . }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +stringData: + ## ================ + ## Linux Configs + ## ================ + TZ: "Etc/UTC" + + ## ================ + ## Database Configs + ## ================ + ## connection string components + {{- if .Values.postgresql.enabled }} + DATABASE_HOST: {{ (include "airflow.postgresql.fullname" .) | quote }} + DATABASE_PORT: "5432" + DATABASE_USER: {{ .Values.postgresql.postgresqlUsername | quote }} + DATABASE_DB: {{ .Values.postgresql.postgresqlDatabase | quote }} + {{- else }} + DATABASE_HOST: {{ .Values.externalDatabase.host | quote }} + DATABASE_PORT: {{ .Values.externalDatabase.port | quote }} + DATABASE_USER: {{ .Values.externalDatabase.user | quote}} + DATABASE_DB: {{ .Values.externalDatabase.database | quote }} + DATABASE_PROPERTIES: {{ .Values.externalDatabase.properties | quote }} + {{- end }} + + ## bash command which echos the URL encoded value of $DATABASE_PASSWORD + DATABASE_PASSWORD_CMD: |- + echo ${DATABASE_PASSWORD} | python3 -c "import urllib.parse; encoded_pass = urllib.parse.quote(input()); print(encoded_pass)" + + ## bash command which echos the DB connection string in SQLAlchemy format + DATABASE_SQLALCHEMY_CMD: |- + {{- if .Values.postgresql.enabled }} + echo -n "postgresql+psycopg2://${DATABASE_USER}:$(eval $DATABASE_PASSWORD_CMD)@${DATABASE_HOST}:${DATABASE_PORT}/${DATABASE_DB}" + {{- else if and (not .Values.postgresql.enabled) (eq "postgres" .Values.externalDatabase.type) }} + echo -n "postgresql+psycopg2://${DATABASE_USER}:$(eval $DATABASE_PASSWORD_CMD)@${DATABASE_HOST}:${DATABASE_PORT}/${DATABASE_DB}${DATABASE_PROPERTIES}" + {{- else if and (not .Values.postgresql.enabled) (eq "mysql" .Values.externalDatabase.type) }} + echo -n "mysql+mysqldb://${DATABASE_USER}:$(eval $DATABASE_PASSWORD_CMD)@${DATABASE_HOST}:${DATABASE_PORT}/${DATABASE_DB}${DATABASE_PROPERTIES}" + {{- end }} + + ## bash command which echos the DB connection string in Celery result_backend format + DATABASE_CELERY_CMD: |- + {{- if .Values.postgresql.enabled }} + echo -n "db+postgresql://${DATABASE_USER}:$(eval $DATABASE_PASSWORD_CMD)@${DATABASE_HOST}:${DATABASE_PORT}/${DATABASE_DB}" + {{- else if and (not .Values.postgresql.enabled) (eq "postgres" .Values.externalDatabase.type) }} + echo -n "db+postgresql://${DATABASE_USER}:$(eval $DATABASE_PASSWORD_CMD)@${DATABASE_HOST}:${DATABASE_PORT}/${DATABASE_DB}${DATABASE_PROPERTIES}" + {{- else if and (not .Values.postgresql.enabled) (eq "mysql" .Values.externalDatabase.type) }} + echo -n "db+mysql://${DATABASE_USER}:$(eval $DATABASE_PASSWORD_CMD)@${DATABASE_HOST}:${DATABASE_PORT}/${DATABASE_DB}${DATABASE_PROPERTIES}" + {{- end }} + + ## ================ + ## Redis Configs + ## ================ + {{- if include "airflow.executor.celery_like" . }} + ## connection string components + {{- if .Values.redis.enabled }} + REDIS_HOST: "{{ include "airflow.redis.fullname" . }}-master" + REDIS_PORT: "6379" + REDIS_DBNUM: "1" + {{- else }} + REDIS_HOST: {{ .Values.externalRedis.host | quote }} + REDIS_PORT: {{ .Values.externalRedis.port | quote }} + REDIS_DBNUM: {{ .Values.externalRedis.databaseNumber | quote }} + {{- end }} + + ## a bash command which echos the URL encoded value of $REDIS_PASSWORD + ## NOTE: if $REDIS_PASSWORD is non-empty, prints `:${REDIS_PASSWORD}@`, else `` + REDIS_PASSWORD_CMD: |- + echo ${REDIS_PASSWORD} | python3 -c "import urllib.parse; encoded_pass = urllib.parse.quote(input()); print(f\":{encoded_pass}@\") if len(encoded_pass) > 0 else None" + + ## a bash command which echos the Redis connection string + REDIS_CONNECTION_CMD: |- + echo -n "redis://$(eval $REDIS_PASSWORD_CMD)${REDIS_HOST}:${REDIS_PORT}/${REDIS_DBNUM}" + {{- end }} + + ## ================ + ## Airflow Configs (General) + ## ================ + AIRFLOW__CORE__DAGS_FOLDER: {{ (include "airflow.dags.path" .) | quote }} + AIRFLOW__CORE__EXECUTOR: {{ .Values.airflow.executor | quote }} + AIRFLOW__CORE__FERNET_KEY: {{ .Values.airflow.fernetKey | quote }} + AIRFLOW__CORE__SQL_ALCHEMY_CONN_CMD: |- + bash -c 'eval "$DATABASE_SQLALCHEMY_CMD"' + AIRFLOW__WEBSERVER__WEB_SERVER_PORT: "8080" + AIRFLOW__CELERY__FLOWER_PORT: "5555" + + {{- if and (.Values.dags.gitSync.enabled) (not .Values.airflow.config.AIRFLOW__SCHEDULER__DAG_DIR_LIST_INTERVAL) }} + ## refresh the dags folder at the same frequency as git-sync + AIRFLOW__SCHEDULER__DAG_DIR_LIST_INTERVAL: {{ .Values.dags.gitSync.syncWait | quote }} + {{- end }} + + {{- if and (.Values.ingress.enabled) (.Values.ingress.flower.path) (not .Values.airflow.config.AIRFLOW__CELERY__FLOWER_URL_PREFIX) }} + ## ensuring the ingress path is aligned with flower URL prefix + AIRFLOW__CELERY__FLOWER_URL_PREFIX: {{ .Values.ingress.flower.path | quote }} + {{- end }} + + {{- if .Values.airflow.legacyCommands }} + {{- if not .Values.airflow.config.AIRFLOW__WEBSERVER__RBAC }} + ## default to the RBAC UI when in legacy mode + AIRFLOW__WEBSERVER__RBAC: "true" + {{- end }} + {{- end }} + + ## ================ + ## Airflow Configs (Logging) + ## ================ + {{- if .Values.airflow.legacyCommands }} + AIRFLOW__CORE__BASE_LOG_FOLDER: {{ .Values.logs.path | quote }} + AIRFLOW__CORE__DAG_PROCESSOR_MANAGER_LOG_LOCATION: {{ (printf "%s/dag_processor_manager/dag_processor_manager.log" .Values.logs.path) | quote }} + {{- else }} + AIRFLOW__LOGGING__BASE_LOG_FOLDER: {{ .Values.logs.path | quote }} + AIRFLOW__LOGGING__DAG_PROCESSOR_MANAGER_LOG_LOCATION: {{ (printf "%s/dag_processor_manager/dag_processor_manager.log" .Values.logs.path) | quote }} + {{- end }} + AIRFLOW__SCHEDULER__CHILD_PROCESS_LOG_DIRECTORY: {{ (printf "%s/scheduler" .Values.logs.path) | quote }} + + ## ================ + ## Airflow Configs (Celery) + ## ================ + {{- if include "airflow.executor.celery_like" . }} + AIRFLOW__CELERY__WORKER_LOG_SERVER_PORT: "8793" + AIRFLOW__CELERY__BROKER_URL_CMD: |- + bash -c 'eval "$REDIS_CONNECTION_CMD"' + AIRFLOW__CELERY__RESULT_BACKEND_CMD: |- + bash -c 'eval "$DATABASE_CELERY_CMD"' + {{- end }} + + ## ================ + ## Airflow Configs (Kubernetes) + ## ================ + {{- if include "airflow.executor.kubernetes_like" . }} + {{- if not .Values.airflow.config.AIRFLOW__KUBERNETES__NAMESPACE }} + AIRFLOW__KUBERNETES__NAMESPACE: {{ .Release.Namespace | quote }} + {{- end }} + {{- if not .Values.airflow.config.AIRFLOW__KUBERNETES__WORKER_CONTAINER_REPOSITORY }} + AIRFLOW__KUBERNETES__WORKER_CONTAINER_REPOSITORY: {{ .Values.airflow.image.repository | quote }} + {{- end }} + {{- if not .Values.airflow.config.AIRFLOW__KUBERNETES__WORKER_CONTAINER_TAG }} + AIRFLOW__KUBERNETES__WORKER_CONTAINER_TAG: {{ .Values.airflow.image.tag | quote }} + {{- end }} + {{- if not .Values.airflow.config.AIRFLOW__KUBERNETES__POD_TEMPLATE_FILE }} + AIRFLOW__KUBERNETES__POD_TEMPLATE_FILE: "/opt/airflow/pod_templates/pod_template.yaml" + {{- end }} + + {{- if .Values.airflow.legacyCommands }} + {{- if not .Values.airflow.config.AIRFLOW__KUBERNETES__ENV_FROM_CONFIGMAP_REF }} + AIRFLOW__KUBERNETES__ENV_FROM_SECRET_REF: "{{ include "airflow.fullname" . }}-config" + {{- end }} + {{- if (not .Values.airflow.config.AIRFLOW__KUBERNETES__WORKER_SERVICE_ACCOUNT_NAME) }} + AIRFLOW__KUBERNETES__WORKER_SERVICE_ACCOUNT_NAME: {{ (include "airflow.serviceAccountName" .) | quote }} + {{- end }} + {{- end }} + {{- end }} + + ## ================ + ## User Configs + ## ================ + {{- if .Values.airflow.config }} + {{- toYaml .Values.airflow.config | nindent 2 }} + {{- end }} \ No newline at end of file diff --git a/charts/airflow/templates/config/secret-connections.yaml b/charts/airflow/templates/config/secret-connections.yaml deleted file mode 100644 index 4f7e7de1..00000000 --- a/charts/airflow/templates/config/secret-connections.yaml +++ /dev/null @@ -1,31 +0,0 @@ -{{- if not .Values.scheduler.existingSecretConnections }} -{{- if .Values.scheduler.connections }} -apiVersion: v1 -kind: Secret -metadata: - name: {{ include "airflow.fullname" . }}-connections - labels: - app: {{ include "airflow.labels.app" . }} - chart: {{ include "airflow.labels.chart" . }} - release: {{ .Release.Name }} - heritage: {{ .Release.Service }} -type: Opaque -stringData: - add-connections.sh: | - #!/usr/bin/env bash - {{- range .Values.scheduler.connections }} - {{- if $.Values.scheduler.refreshConnections }} - airflow connections --delete --conn_id {{ .id | quote | replace `$` `\$` }} - {{- end }} - airflow connections --add --conn_id {{ .id | quote | replace `$` `\$` }} - {{- if .type }} --conn_type {{ .type | quote | replace `$` `\$` }} {{ end -}} - {{- if .uri }} --conn_uri {{ .uri | quote | replace `$` `\$` }} {{ end -}} - {{- if .host }} --conn_host {{ .host | quote | replace `$` `\$` }} {{ end -}} - {{- if .login }} --conn_login {{ .login | quote | replace `$` `\$` }} {{ end -}} - {{- if .password }} --conn_password {{ .password | quote | replace `$` `\$` }} {{ end -}} - {{- if .schema }} --conn_schema {{ .schema | quote | replace `$` `\$` }} {{ end -}} - {{- if .port }} --conn_port {{ .port | quote | replace `$` `\$` }} {{ end -}} - {{- if .extra }} --conn_extra {{ ( regexReplaceAll "[\r\n]+" .extra "" ) | quote | replace `$` `\$` }} {{ end -}} - {{- end }} -{{- end }} -{{- end }} diff --git a/charts/airflow/templates/config/secret-known-hosts.yaml b/charts/airflow/templates/config/secret-known-hosts.yaml new file mode 100644 index 00000000..4ebba50e --- /dev/null +++ b/charts/airflow/templates/config/secret-known-hosts.yaml @@ -0,0 +1,14 @@ +{{- if and (.Values.dags.gitSync.enabled) (.Values.dags.gitSync.sshKnownHosts) }} +apiVersion: v1 +kind: Secret +metadata: + name: {{ include "airflow.fullname" . }}-known-hosts + labels: + app: {{ include "airflow.labels.app" . }} + chart: {{ include "airflow.labels.chart" . }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +stringData: + known_hosts: |- + {{- .Values.dags.gitSync.sshKnownHosts | nindent 4 }} +{{- end }} \ No newline at end of file diff --git a/charts/airflow/templates/config/secret-webserver-config.yaml b/charts/airflow/templates/config/secret-webserver-config.yaml new file mode 100644 index 00000000..319a6879 --- /dev/null +++ b/charts/airflow/templates/config/secret-webserver-config.yaml @@ -0,0 +1,18 @@ +{{- if not .Values.web.webserverConfig.existingSecret }} +apiVersion: v1 +kind: Secret +metadata: + name: {{ include "airflow.fullname" . }}-webserver-config + labels: + app: {{ include "airflow.labels.app" . }} + chart: {{ include "airflow.labels.chart" . }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +stringData: + webserver_config.py: |- + {{- if .Values.web.webserverConfig.stringOverride }} + {{- .Values.web.webserverConfig.stringOverride | nindent 4 }} + {{- else }} + {{- tpl (.Files.Get "files/webserver_config.py") . | nindent 4 }} + {{- end }} +{{- end }} \ No newline at end of file diff --git a/charts/airflow/templates/flower/flower-deployment.yaml b/charts/airflow/templates/flower/flower-deployment.yaml index 00ebd434..c9e23052 100644 --- a/charts/airflow/templates/flower/flower-deployment.yaml +++ b/charts/airflow/templates/flower/flower-deployment.yaml @@ -1,4 +1,5 @@ -{{- if and (.Values.flower.enabled) (eq .Values.airflow.executor "CeleryExecutor") }} +{{- $extraPipPackages := concat .Values.airflow.extraPipPackages .Values.flower.extraPipPackages }} +{{- if .Values.flower.enabled }} apiVersion: apps/v1 kind: Deployment metadata: @@ -18,11 +19,10 @@ metadata: {{- end }} spec: replicas: {{ .Values.flower.replicas }} - minReadySeconds: {{ .Values.flower.minReadySeconds }} strategy: - # this is safe - multiple flower pods can run concurrently type: RollingUpdate rollingUpdate: + ## multiple flower pods can safely run concurrently maxSurge: 25% maxUnavailable: 0 selector: @@ -33,7 +33,10 @@ spec: template: metadata: annotations: - checksum/config-env: {{ include (print $.Template.BasePath "/config/configmap-env.yaml") . | sha256sum }} + checksum/secret-config: {{ include (print $.Template.BasePath "/config/secret-config.yaml") . | sha256sum }} + {{- if .Values.airflow.podAnnotations }} + {{- toYaml .Values.airflow.podAnnotations | nindent 8 }} + {{- end }} {{- if .Values.flower.podAnnotations }} {{- toYaml .Values.flower.podAnnotations | nindent 8 }} {{- end }} @@ -48,11 +51,11 @@ spec: {{- toYaml .Values.flower.podLabels | nindent 8 }} {{- end }} spec: + restartPolicy: Always {{- if .Values.airflow.image.pullSecret }} imagePullSecrets: - name: {{ .Values.airflow.image.pullSecret }} {{- end }} - restartPolicy: Always {{- if .Values.flower.nodeSelector }} nodeSelector: {{- toYaml .Values.flower.nodeSelector | nindent 8 }} @@ -70,91 +73,93 @@ spec: securityContext: {{- toYaml .Values.flower.securityContext | nindent 8 }} {{- end }} + initContainers: + {{- if $extraPipPackages }} + {{- include "airflow.init_container.install_pip_packages" (dict "Values" .Values "extraPipPackages" $extraPipPackages) | indent 8 }} + {{- end }} + {{- include "airflow.init_container.check_db" . | indent 8 }} + {{- include "airflow.init_container.wait_for_db_migrations" . | indent 8 }} containers: - - name: {{ .Chart.Name }}-flower - image: {{ .Values.airflow.image.repository }}:{{ .Values.airflow.image.tag }} - imagePullPolicy: {{ .Values.airflow.image.pullPolicy }} + - name: airflow-flower + {{- include "airflow.image" . | indent 10 }} + resources: + {{- toYaml .Values.flower.resources | nindent 12 }} envFrom: - - configMapRef: - name: "{{ include "airflow.fullname" . }}-env" + {{- include "airflow.envFrom" . | indent 12 }} env: - {{- include "airflow.mapenvsecrets" . | indent 12 }} + {{- include "airflow.env" . | indent 12 }} + {{- if $extraPipPackages }} + - name: PYTHONPATH + value: /opt/python/site-packages + {{- end }} + {{- if .Values.flower.basicAuthSecret }} + - name: AIRFLOW__CELERY__FLOWER_BASIC_AUTH + valueFrom: + secretKeyRef: + name: {{ .Values.flower.basicAuthSecret }} + key: {{ .Values.flower.basicAuthSecretKey }} + {{- end }} ports: - name: flower containerPort: 5555 protocol: TCP - {{- if .Values.flower.extraConfigmapMounts }} - volumeMounts: - {{- range .Values.flower.extraConfigmapMounts }} - - name: {{ .name }} - mountPath: {{ .mountPath }} - readOnly: {{ .readOnly }} - {{- if .subPath }} - subPath: {{ .subPath }} - {{- end }} - {{- end }} - {{- end }} command: - "/usr/bin/dumb-init" - "--" - args: - "bash" - "-c" - - > - true \ - {{- if gt .Values.flower.initialStartupDelay 0.0 }} - && echo "*** waiting {{ .Values.flower.initialStartupDelay }}s..." \ - && sleep {{ .Values.flower.initialStartupDelay }} \ - {{- end }} - && mkdir -p /home/airflow/.local/bin \ - && export PATH="/home/airflow/.local/bin:$PATH" \ - && echo "*** running flower..." \ - {{- if .Values.flower.oauthDomains }} - && exec airflow flower --auth={{ .Values.flower.oauthDomains | quote }} - {{- else }} - && exec airflow flower - {{- end }} - + args: + {{- if .Values.airflow.legacyCommands }} + - "exec airflow flower" + {{- else }} + - "exec airflow celery flower" + {{- end }} + {{- if .Values.flower.oauthDomains }} + - "--auth" + - {{ .Values.flower.oauthDomains | quote }} + {{- end }} + {{- if .Values.flower.readinessProbe.enabled }} + readinessProbe: + initialDelaySeconds: {{ .Values.flower.readinessProbe.initialDelaySeconds }} + periodSeconds: {{ .Values.flower.readinessProbe.periodSeconds }} + timeoutSeconds: {{ .Values.flower.readinessProbe.timeoutSeconds }} + failureThreshold: {{ .Values.flower.readinessProbe.failureThreshold }} + exec: + command: + - "/bin/bash" + - "-c" + {{- if or (.Values.flower.basicAuthSecret) (.Values.airflow.config.AIRFLOW__CELERY__FLOWER_BASIC_AUTH) }} + - "exec curl --user $AIRFLOW__CELERY__FLOWER_BASIC_AUTH 'http://localhost:5555{{ .Values.airflow.config.AIRFLOW__CELERY__FLOWER_URL_PREFIX }}'" + {{- else }} + - "exec curl 'http://localhost:5555{{ .Values.airflow.config.AIRFLOW__CELERY__FLOWER_URL_PREFIX }}'" + {{- end }} + {{- end }} + {{- if .Values.flower.livenessProbe.enabled }} livenessProbe: - {{- if and (.Values.flower.basicAuthSecret) (not .Values.airflow.config.AIRFLOW__CELERY__FLOWER_BASIC_AUTH) }} + initialDelaySeconds: {{ .Values.flower.livenessProbe.initialDelaySeconds }} + periodSeconds: {{ .Values.flower.livenessProbe.periodSeconds }} + timeoutSeconds: {{ .Values.flower.livenessProbe.timeoutSeconds }} + failureThreshold: {{ .Values.flower.livenessProbe.failureThreshold }} exec: command: - - /bin/sh - - -c - - "curl -H 'Authorization: Basic $(echo -n $AIRFLOW__CELERY__FLOWER_BASIC_AUTH | base64)' 'http://localhost:5555 - {{- if .Values.ingress.flower.livenessPath -}} - {{ .Values.ingress.flower.livenessPath }} - {{- else -}} - {{ .Values.ingress.flower.path }}/ - {{- end -}} - '" - {{- else }} - httpGet: - {{- if .Values.ingress.flower.livenessPath }} - path: "{{ .Values.ingress.flower.livenessPath }}" - {{- else }} - path: "{{ .Values.ingress.flower.path }}/" - {{- end }} - port: flower - {{- if .Values.airflow.config.AIRFLOW__CELERY__FLOWER_BASIC_AUTH }} - httpHeaders: - - name: Authorization - value: Basic {{ .Values.airflow.config.AIRFLOW__CELERY__FLOWER_BASIC_AUTH | b64enc }} - {{- end }} - {{- end }} - initialDelaySeconds: 60 - periodSeconds: 30 - timeoutSeconds: 1 - successThreshold: 1 - failureThreshold: 3 - resources: - {{- toYaml .Values.flower.resources | nindent 12 }} - {{- if .Values.flower.extraConfigmapMounts }} + - "/bin/bash" + - "-c" + {{- if or (.Values.flower.basicAuthSecret) (.Values.airflow.config.AIRFLOW__CELERY__FLOWER_BASIC_AUTH) }} + - "exec curl --user $AIRFLOW__CELERY__FLOWER_BASIC_AUTH 'http://localhost:5555{{ .Values.airflow.config.AIRFLOW__CELERY__FLOWER_URL_PREFIX }}'" + {{- else }} + - "exec curl 'http://localhost:5555{{ .Values.airflow.config.AIRFLOW__CELERY__FLOWER_URL_PREFIX }}'" + {{- end }} + {{- end }} + {{- $extraVolumeMounts := .Values.flower.extraVolumeMounts }} + {{- $volumeMounts := include "airflow.volumeMounts" (dict "Values" .Values "extraPipPackages" $extraPipPackages "extraVolumeMounts" $extraVolumeMounts) }} + {{- if $volumeMounts }} + volumeMounts: + {{- $volumeMounts | indent 12 }} + {{- end }} + {{- $extraVolumes := .Values.flower.extraVolumes }} + {{- $volumes := include "airflow.volumes" (dict "Values" .Values "extraPipPackages" $extraPipPackages "extraVolumes" $extraVolumes) }} + {{- if $volumes }} volumes: - {{- range .Values.flower.extraConfigmapMounts }} - - name: {{ .name }} - configMap: - name: {{ .configMap }} - {{- end }} + {{- $volumes | indent 8 }} {{- end }} {{- end }} diff --git a/charts/airflow/templates/flower/flower-ingress.yaml b/charts/airflow/templates/flower/flower-ingress.yaml index d11018d7..e37e557d 100644 --- a/charts/airflow/templates/flower/flower-ingress.yaml +++ b/charts/airflow/templates/flower/flower-ingress.yaml @@ -1,4 +1,4 @@ -{{- if and (.Values.flower.enabled) (eq .Values.airflow.executor "CeleryExecutor") (.Values.ingress.enabled) }} +{{- if and (.Values.flower.enabled) (.Values.ingress.enabled) }} apiVersion: networking.k8s.io/v1beta1 kind: Ingress metadata: diff --git a/charts/airflow/templates/flower/flower-pdb.yaml b/charts/airflow/templates/flower/flower-pdb.yaml index 0aaff54c..1dc58470 100644 --- a/charts/airflow/templates/flower/flower-pdb.yaml +++ b/charts/airflow/templates/flower/flower-pdb.yaml @@ -1,4 +1,4 @@ -{{- if and (.Values.flower.enabled) (eq .Values.airflow.executor "CeleryExecutor") (.Values.flower.podDisruptionBudget.enabled) }} +{{- if and (.Values.flower.enabled) (.Values.flower.podDisruptionBudget.enabled) }} apiVersion: policy/v1beta1 kind: PodDisruptionBudget metadata: diff --git a/charts/airflow/templates/flower/flower-service.yaml b/charts/airflow/templates/flower/flower-service.yaml index 69f82592..013e6487 100644 --- a/charts/airflow/templates/flower/flower-service.yaml +++ b/charts/airflow/templates/flower/flower-service.yaml @@ -1,4 +1,4 @@ -{{- if and (.Values.flower.enabled) (eq .Values.airflow.executor "CeleryExecutor") }} +{{- if .Values.flower.enabled }} apiVersion: v1 kind: Service metadata: diff --git a/charts/airflow/templates/jobs/job-create-connections.yaml b/charts/airflow/templates/jobs/job-create-connections.yaml new file mode 100644 index 00000000..cfcbe35f --- /dev/null +++ b/charts/airflow/templates/jobs/job-create-connections.yaml @@ -0,0 +1,56 @@ +{{- if .Values.airflow.connections }} +apiVersion: batch/v1 +kind: Job +metadata: + name: {{ include "airflow.fullname" . }}-create-connections + annotations: + helm.sh/hook: post-install,post-upgrade + helm.sh/hook-weight: "-1" + helm.sh/hook-delete-policy: before-hook-creation,hook-succeeded + labels: + app: {{ include "airflow.labels.app" . }} + component: jobs + chart: {{ include "airflow.labels.chart" . }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +spec: + ttlSecondsAfterFinished: 300 + template: + metadata: + labels: + app: {{ include "airflow.labels.app" . }} + component: jobs + chart: {{ include "airflow.labels.chart" . }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} + spec: + restartPolicy: OnFailure + {{- if .Values.airflow.image.pullSecret }} + imagePullSecrets: + - name: {{ .Values.airflow.image.pullSecret }} + {{- end }} + containers: + - name: create-connections + {{- include "airflow.image" . | indent 10 }} + envFrom: + {{- include "airflow.envFrom" . | indent 12 }} + env: + {{- include "airflow.env" . | indent 12 }} + command: + - "/usr/bin/dumb-init" + - "--" + args: + - "python" + - "-Wignore" + - "/job_scripts/create_connections.py" + volumeMounts: + - name: job-scripts + mountPath: /job_scripts + readOnly: true + volumes: + - name: job-scripts + secret: + secretName: {{ include "airflow.fullname" . }}-job-scripts + defaultMode: 0755 + +{{- end }} \ No newline at end of file diff --git a/charts/airflow/templates/jobs/job-create-pools.yaml b/charts/airflow/templates/jobs/job-create-pools.yaml new file mode 100644 index 00000000..255cac17 --- /dev/null +++ b/charts/airflow/templates/jobs/job-create-pools.yaml @@ -0,0 +1,56 @@ +{{- if .Values.airflow.pools }} +apiVersion: batch/v1 +kind: Job +metadata: + name: {{ include "airflow.fullname" . }}-create-pools + annotations: + helm.sh/hook: post-install,post-upgrade + helm.sh/hook-weight: "-1" + helm.sh/hook-delete-policy: before-hook-creation,hook-succeeded + labels: + app: {{ include "airflow.labels.app" . }} + component: jobs + chart: {{ include "airflow.labels.chart" . }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +spec: + ttlSecondsAfterFinished: 300 + template: + metadata: + labels: + app: {{ include "airflow.labels.app" . }} + component: jobs + chart: {{ include "airflow.labels.chart" . }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} + spec: + restartPolicy: OnFailure + {{- if .Values.airflow.image.pullSecret }} + imagePullSecrets: + - name: {{ .Values.airflow.image.pullSecret }} + {{- end }} + containers: + - name: create-pools + {{- include "airflow.image" . | indent 10 }} + envFrom: + {{- include "airflow.envFrom" . | indent 12 }} + env: + {{- include "airflow.env" . | indent 12 }} + command: + - "/usr/bin/dumb-init" + - "--" + args: + - "python" + - "-Wignore" + - "/job_scripts/create_pools.py" + volumeMounts: + - name: job-scripts + mountPath: /job_scripts + readOnly: true + volumes: + - name: job-scripts + secret: + secretName: {{ include "airflow.fullname" . }}-job-scripts + defaultMode: 0755 + +{{- end }} \ No newline at end of file diff --git a/charts/airflow/templates/jobs/job-create-users.yaml b/charts/airflow/templates/jobs/job-create-users.yaml new file mode 100644 index 00000000..847c4e8a --- /dev/null +++ b/charts/airflow/templates/jobs/job-create-users.yaml @@ -0,0 +1,55 @@ +{{- if .Values.airflow.users }} +apiVersion: batch/v1 +kind: Job +metadata: + name: {{ include "airflow.fullname" . }}-create-users + annotations: + helm.sh/hook: post-install,post-upgrade + helm.sh/hook-weight: "-1" + helm.sh/hook-delete-policy: before-hook-creation,hook-succeeded + labels: + app: {{ include "airflow.labels.app" . }} + component: jobs + chart: {{ include "airflow.labels.chart" . }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +spec: + ttlSecondsAfterFinished: 300 + template: + metadata: + labels: + app: {{ include "airflow.labels.app" . }} + component: jobs + chart: {{ include "airflow.labels.chart" . }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} + spec: + restartPolicy: OnFailure + {{- if .Values.airflow.image.pullSecret }} + imagePullSecrets: + - name: {{ .Values.airflow.image.pullSecret }} + {{- end }} + containers: + - name: create-users + {{- include "airflow.image" . | indent 10 }} + envFrom: + {{- include "airflow.envFrom" . | indent 12 }} + env: + {{- include "airflow.env" . | indent 12 }} + command: + - "/usr/bin/dumb-init" + - "--" + args: + - "python" + - "-Wignore" + - "/job_scripts/create_users.py" + volumeMounts: + - name: job-scripts + mountPath: /job_scripts + readOnly: true + volumes: + - name: job-scripts + secret: + secretName: {{ include "airflow.fullname" . }}-job-scripts + defaultMode: 0755 +{{- end }} \ No newline at end of file diff --git a/charts/airflow/templates/jobs/job-create-variables.yaml b/charts/airflow/templates/jobs/job-create-variables.yaml new file mode 100644 index 00000000..0f35b91d --- /dev/null +++ b/charts/airflow/templates/jobs/job-create-variables.yaml @@ -0,0 +1,55 @@ +{{- if .Values.airflow.variables }} +apiVersion: batch/v1 +kind: Job +metadata: + name: {{ include "airflow.fullname" . }}-create-variables + annotations: + helm.sh/hook: post-install,post-upgrade + helm.sh/hook-weight: "-1" + helm.sh/hook-delete-policy: before-hook-creation,hook-succeeded + labels: + app: {{ include "airflow.labels.app" . }} + component: jobs + chart: {{ include "airflow.labels.chart" . }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +spec: + ttlSecondsAfterFinished: 300 + template: + metadata: + labels: + app: {{ include "airflow.labels.app" . }} + component: jobs + chart: {{ include "airflow.labels.chart" . }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} + spec: + restartPolicy: OnFailure + {{- if .Values.airflow.image.pullSecret }} + imagePullSecrets: + - name: {{ .Values.airflow.image.pullSecret }} + {{- end }} + containers: + - name: create-variables + {{- include "airflow.image" . | indent 10 }} + envFrom: + {{- include "airflow.envFrom" . | indent 12 }} + env: + {{- include "airflow.env" . | indent 12 }} + command: + - "/usr/bin/dumb-init" + - "--" + args: + - "python" + - "-Wignore" + - "/job_scripts/create_variables.py" + volumeMounts: + - name: job-scripts + mountPath: /job_scripts + readOnly: true + volumes: + - name: job-scripts + secret: + secretName: {{ include "airflow.fullname" . }}-job-scripts + defaultMode: 0755 +{{- end }} \ No newline at end of file diff --git a/charts/airflow/templates/jobs/job-upgrade-db.yaml b/charts/airflow/templates/jobs/job-upgrade-db.yaml new file mode 100644 index 00000000..eeb780ee --- /dev/null +++ b/charts/airflow/templates/jobs/job-upgrade-db.yaml @@ -0,0 +1,47 @@ +apiVersion: batch/v1 +kind: Job +metadata: + name: {{ include "airflow.fullname" . }}-upgrade-db + ## this job can't be a post-install hook, because this job must run BEFORE other + ## resource can become ready, meaning if you use the --wait flag the install would + ## never finish (which is important because chart-testing uses --wait) + labels: + app: {{ include "airflow.labels.app" . }} + component: jobs + chart: {{ include "airflow.labels.chart" . }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +spec: + ttlSecondsAfterFinished: 300 + template: + metadata: + labels: + app: {{ include "airflow.labels.app" . }} + component: jobs + release: {{ .Release.Name }} + spec: + restartPolicy: OnFailure + {{- if .Values.airflow.image.pullSecret }} + imagePullSecrets: + - name: {{ .Values.airflow.image.pullSecret }} + {{- end }} + initContainers: + {{- include "airflow.init_container.check_db" . | indent 8 }} + containers: + - name: upgrade-db + {{- include "airflow.image" . | indent 10 }} + envFrom: + {{- include "airflow.envFrom" . | indent 12 }} + env: + {{- include "airflow.env" . | indent 12 }} + command: + - "/usr/bin/dumb-init" + - "--" + args: + - "bash" + - "-c" + {{- if .Values.airflow.legacyCommands }} + - "exec airflow upgradedb" + {{- else }} + - "exec airflow db upgrade" + {{- end }} \ No newline at end of file diff --git a/charts/airflow/templates/jobs/secret-job-scripts.yaml b/charts/airflow/templates/jobs/secret-job-scripts.yaml new file mode 100644 index 00000000..8b7320e9 --- /dev/null +++ b/charts/airflow/templates/jobs/secret-job-scripts.yaml @@ -0,0 +1,274 @@ +apiVersion: v1 +kind: Secret +metadata: + name: {{ include "airflow.fullname" . }}-job-scripts + annotations: + helm.sh/hook: post-install,post-upgrade + helm.sh/hook-weight: "-5" + helm.sh/hook-delete-policy: before-hook-creation,hook-succeeded + labels: + app: {{ include "airflow.labels.app" . }} + component: jobs + chart: {{ include "airflow.labels.chart" . }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +stringData: + create_connections.py: |- + from airflow.models import Connection + from airflow.utils.db import create_session + + {{- if .Values.airflow.connectionsUpdate }} + {{ "" }} + def compare_connections(c1: Connection, c2: Connection) -> bool: + return c1.conn_id == c2.conn_id \ + and c1.conn_type == c2.conn_type \ + {{- if not .Values.airflow.legacyCommands }} + and c1.description == c2.description \ + {{- end }} + and c1.host == c2.host \ + and c1.login == c2.login \ + and c1.password == c2.password \ + and c1.schema == c2.schema \ + and c1.port == c2.port \ + and c1.extra == c2.extra + {{- end }} + + connections = [ + {{- range .Values.airflow.connections }} + Connection( + conn_id={{ (required "each `id` in `airflow.connecitons` must be non-empty!" .id) | quote }}, + conn_type={{ (required "each `type` in `airflow.connecitons` must be non-empty!" .type) | quote }}, + {{- if .description }} + {{- if not $.Values.airflow.legacyCommands }} + description={{ .description | quote }}, + {{- end }} + {{- end }} + {{- if .host }} + host={{ .host | quote }}, + {{- end }} + {{- if .login }} + login={{ .login | quote }}, + {{- end }} + {{- if .password }} + password={{ .password | quote }}, + {{- end }} + {{- if .schema }} + schema={{ .schema | quote }}, + {{- end }} + {{- if .port }} + {{- if not (typeIs "float64" .port) }} + {{ required "each `port` in `airflow.connections` must be int-type!" nil }} + {{- end }} + port={{ .port }}, + {{- end }} + {{- if .extra }} + extra={{ .extra | quote }}, + {{- end }} + ), + {{- end }} + ] + + for c in connections: + with create_session() as session: + query_result = session.query(Connection).filter(Connection.conn_id == c.conn_id).first() + if not query_result: + session.add(c) + print(f"Connection=`{c.conn_id}` was successfully added.") + else: + {{- if .Values.airflow.connectionsUpdate }} + if compare_connections(c, query_result): + print(f"Connection=`{c.conn_id}` already exists as defined, skipping.") + else: + print(f"Connection=`{c.conn_id}` already exists but has changed, updating...") + query_result.conn_type = c.conn_type + {{- if not .Values.airflow.legacyCommands }} + query_result.description = c.description + {{- end }} + query_result.host = c.host + query_result.login = c.login + query_result.password = c.password + query_result.schema = c.schema + query_result.port = c.port + query_result.extra = c.extra + print(f"Connection=`{c.conn_id}` was successfully updated.") + {{- else }} + print(f"Connection=`{c.conn_id}` already exists, skipping.") + {{- end }} + + create_pools.py: |- + from airflow.models import Pool + from airflow.utils.db import create_session + + def create_pool(name: str, slots: int, description: str) -> Pool: + pool = Pool() + pool.pool = name + pool.slots = slots + pool.description = description + return pool + + {{- if .Values.airflow.poolsUpdate }} + {{ "" }} + def compare_pools(p1: Pool, p2: Pool) -> bool: + return p1.pool == p2.pool \ + and p1.slots == p2.slots \ + and p1.description == p2.description + {{- end }} + + pools = [ + {{- range .Values.airflow.pools }} + create_pool( + name={{ (required "each `name` in `airflow.pools` must be non-empty!" .name) | quote }}, + {{- if not (typeIs "float64" .slots) }} + {{ required "each `slots` in `airflow.pools` must be int-type!" nil }} + {{- end }} + slots={{ (required "each `slots` in `airflow.pools` must be non-empty!" .slots) }}, + description={{ (required "each `description` in `airflow.pools` must be non-empty!" .description) | quote }}, + ), + {{- end }} + ] + + for p in pools: + with create_session() as session: + query_result = session.query(Pool).filter(Pool.pool == p.pool).first() + if not query_result: + session.add(p) + print(f"Pool=`{p.pool}` was successfully added.") + else: + {{- if .Values.airflow.poolsUpdate }} + if compare_pools(p, query_result): + print(f"Pool=`{p.pool}` already exists as defined, skipping.") + else: + print(f"Pool=`{p.pool}` already exists but has changed, updating...") + query_result.slots = p.slots + query_result.description = p.description + print(f"Pool=`{p.pool}` was successfully updated.") + {{- else }} + print(f"Pool=`{p.pool}` already exists, skipping.") + {{- end }} + + create_users.py: |- + import sys + from flask_appbuilder.security.sqla.models import User, Role + from typing import Dict + from werkzeug.security import check_password_hash, generate_password_hash + {{- if .Values.airflow.legacyCommands }} + from airflow.www_rbac.app import cached_appbuilder + appbuilder = cached_appbuilder() + {{- else }} + from airflow.www.app import cached_app + appbuilder = cached_app().appbuilder + {{- end }} + + {{- if .Values.airflow.usersUpdate }} + {{ "" }} + def compare_users(user_dict: Dict, user_model: User) -> bool: + return user_dict["username"] == user_model.username \ + and user_dict["first_name"] == user_model.first_name \ + and user_dict["last_name"] == user_model.last_name \ + and user_dict["email"] == user_model.email \ + and [user_dict["role"]] == user_model.roles \ + and check_password_hash(pwhash=user_model.password, password=user_dict["password"]) + {{- end }} + + def find_role(role_name: str) -> Role: + found_role = appbuilder.sm.find_role(role_name) + if found_role: + return found_role + else: + valid_roles = appbuilder.sm.get_all_roles() + print(f"Failed to find role=`{role_name}`, valid roles are: {valid_roles}") + sys.exit(1) + + user_dicts = [ + {{- range .Values.airflow.users }} + { + "username": {{ (required "each `username` in `airflow.users` must be non-empty!" .username) | quote }}, + "first_name": {{ (required "each `firstName` in `airflow.users` must be non-empty!" .firstName) | quote }}, + "last_name": {{ (required "each `lastName` in `airflow.users` must be non-empty!" .lastName) | quote }}, + "email": {{ (required "each `email` in `airflow.users` must be non-empty!" .email) | quote }}, + "role": find_role({{ (required "each `role` in `airflow.users` must be non-empty!" .role) | quote }}), + "password": {{ (required "each `password` in `airflow.users` must be non-empty!" .password) | quote }}, + }, + {{- end }} + ] + + for u in user_dicts: + existing_user = appbuilder.sm.find_user(username=u["username"]) + if not existing_user: + if appbuilder.sm.add_user( + username=u["username"], + first_name=u["first_name"], + last_name=u["last_name"], + email=u["email"], + role=u["role"], + password=u["password"] + ): + print(f"User=`{u['username']}` was successfully added.") + else: + print(f"Failed to add User=`{u['username']}`") + sys.exit(1) + else: + {{- if .Values.airflow.usersUpdate }} + if compare_users(u, existing_user): + print(f"User=`{u['username']}` already exists as defined, skipping.") + else: + print(f"User=`{u['username']}` already exists but has changed, updating...") + existing_user.first_name = u["first_name"] + existing_user.last_name = u["last_name"] + existing_user.email = u["email"] + existing_user.roles = [u["role"]] + existing_user.password = generate_password_hash(u["password"]) + # strange check for False is because update_user() returns None for success + # but in future might return the User model + if not (appbuilder.sm.update_user(existing_user) is False): + print(f"User=`{u['username']}` was successfully updated.") + else: + print(f"Failed to update User=`{u['username']}`") + sys.exit(1) + {{- else }} + print(f"User=`{u['username']}` already exists, skipping.") + {{- end }} + + create_variables.py: |- + from airflow.models import Variable + from airflow.utils.db import create_session + + def create_variable(key: str, val: str) -> Variable: + variable = Variable() + variable.key = key + variable.val = val + return variable + + {{- if .Values.airflow.poolsUpdate }} + {{ "" }} + def compare_variables(v1: Variable, v2: Variable) -> bool: + return v1.key == v2.key \ + and v1.val == v2.val + {{- end }} + + variables = [ + {{- range .Values.airflow.variables }} + create_variable( + key={{ (required "each `key` in `airflow.variables` must be non-empty!" .key) | quote }}, + val={{ (required "each `value` in `airflow.variables` must be non-empty!" .value) | quote }}, + ), + {{- end }} + ] + + for v in variables: + with create_session() as session: + query_result = session.query(Variable).filter(Variable.key == v.key).first() + if not query_result: + session.add(v) + print(f"Variable=`{v.key}` was successfully added.") + else: + {{- if .Values.airflow.variablesUpdate }} + if compare_variables(v, query_result): + print(f"Variable=`{v.key}` already exists as defined, skipping.") + else: + print(f"Variable=`{v.key}` already exists but has changed, updating...") + query_result.val = v.val + print(f"Variable=`{v.key}` was successfully updated.") + {{- else }} + print(f"Variable=`{v.key}` already exists, skipping.") + {{- end }} \ No newline at end of file diff --git a/charts/airflow/templates/pvc.yaml b/charts/airflow/templates/pvc-dags.yaml similarity index 90% rename from charts/airflow/templates/pvc.yaml rename to charts/airflow/templates/pvc-dags.yaml index fae50cf9..c37685b8 100644 --- a/charts/airflow/templates/pvc.yaml +++ b/charts/airflow/templates/pvc-dags.yaml @@ -2,7 +2,7 @@ kind: PersistentVolumeClaim apiVersion: v1 metadata: - name: {{ include "airflow.fullname" . }} + name: {{ printf "%s-dags" (include "airflow.fullname" . | trunc 58) }} labels: app: {{ include "airflow.labels.app" . }} chart: {{ include "airflow.labels.chart" . }} diff --git a/charts/airflow/templates/rbac/airflow-role.yaml b/charts/airflow/templates/rbac/airflow-role.yaml index 709df0c4..3676f421 100644 --- a/charts/airflow/templates/rbac/airflow-role.yaml +++ b/charts/airflow/templates/rbac/airflow-role.yaml @@ -27,6 +27,7 @@ rules: - "get" - "delete" - "list" + - "patch" - "watch" - apiGroups: - "" diff --git a/charts/airflow/templates/scheduler/scheduler-deployment.yaml b/charts/airflow/templates/scheduler/scheduler-deployment.yaml index 2406b782..95b55a67 100644 --- a/charts/airflow/templates/scheduler/scheduler-deployment.yaml +++ b/charts/airflow/templates/scheduler/scheduler-deployment.yaml @@ -1,3 +1,4 @@ +{{- $extraPipPackages := concat .Values.airflow.extraPipPackages .Values.scheduler.extraPipPackages }} apiVersion: apps/v1 kind: Deployment metadata: @@ -16,13 +17,13 @@ metadata: {{- toYaml .Values.scheduler.labels | nindent 4 }} {{- end }} spec: - replicas: 1 + replicas: {{ .Values.scheduler.replicas }} strategy: - # this is safe as long as `maxSurge` is 0 type: RollingUpdate rollingUpdate: - maxSurge: 0 - maxUnavailable: 100% + ## multiple scheduler pods can safely run concurrently (in airflow 2.0+) + maxSurge: 25% + maxUnavailable: 0 selector: matchLabels: app: {{ include "airflow.labels.app" . }} @@ -31,14 +32,8 @@ spec: template: metadata: annotations: - checksum/config-env: {{ include (print $.Template.BasePath "/config/configmap-env.yaml") . | sha256sum }} - checksum/config-git-clone: {{ include (print $.Template.BasePath "/config/configmap-scripts-git.yaml") . | sha256sum }} - checksum/config-scripts: {{ include (print $.Template.BasePath "/config/configmap-scripts.yaml") . | sha256sum }} - checksum/config-variables-pools: {{ include (print $.Template.BasePath "/config/configmap-variables-pools.yaml") . | sha256sum }} - checksum/secret-connections: {{ include (print $.Template.BasePath "/config/secret-connections.yaml") . | sha256sum }} - {{- if and (.Values.dags.git.url) (.Values.dags.git.ref) }} - checksum/dags-git-ref: {{ .Values.dags.git.ref | sha256sum }} - {{- end }} + checksum/secret-config: {{ include (print $.Template.BasePath "/config/secret-config.yaml") . | sha256sum }} + checksum/config-pod-template: {{ include (print $.Template.BasePath "/config/configmap-pod-template.yaml") . | sha256sum }} {{- if .Values.airflow.podAnnotations }} {{- toYaml .Values.airflow.podAnnotations | nindent 8 }} {{- end }} @@ -56,11 +51,11 @@ spec: {{- toYaml .Values.scheduler.podLabels | nindent 8 }} {{- end }} spec: + restartPolicy: Always {{- if .Values.airflow.image.pullSecret }} imagePullSecrets: - name: {{ .Values.airflow.image.pullSecret }} {{- end }} - restartPolicy: Always {{- if .Values.scheduler.nodeSelector }} nodeSelector: {{- toYaml .Values.scheduler.nodeSelector | nindent 8 }} @@ -78,151 +73,27 @@ spec: {{- toYaml .Values.scheduler.securityContext | nindent 8 }} {{- end }} serviceAccountName: {{ include "airflow.serviceAccountName" . }} - {{- if or (.Values.dags.initContainer.enabled) (.Values.scheduler.preinitdb) (.Values.scheduler.extraInitContainers) }} initContainers: - {{- if .Values.scheduler.preinitdb }} - - name: {{ .Chart.Name }}-preinitdb - image: {{ .Values.airflow.image.repository }}:{{ .Values.airflow.image.tag }} - imagePullPolicy: {{ .Values.airflow.image.pullPolicy}} - command: - - "/usr/bin/dumb-init" - - "--" - args: - - "/bin/bash" - - "-c" - - "/home/airflow/scripts/preinit-db.sh" - envFrom: - - configMapRef: - name: "{{ include "airflow.fullname" . }}-env" - env: - {{- include "airflow.mapenvsecrets" . | indent 12 }} - resources: - {{- toYaml .Values.scheduler.resources | nindent 12 }} - volumeMounts: - - name: scripts - mountPath: /home/airflow/scripts - {{- end }} - {{- if .Values.dags.initContainer.enabled }} - - name: git-clone - image: {{ .Values.dags.initContainer.image.repository }}:{{ .Values.dags.initContainer.image.tag }} - imagePullPolicy: {{ .Values.dags.initContainer.image.pullPolicy }} - envFrom: - - configMapRef: - name: "{{ include "airflow.fullname" . }}-env" - env: - {{- include "airflow.mapenvsecrets" . | indent 12 }} - resources: - {{- toYaml .Values.dags.initContainer.resources | nindent 12 }} - command: - - /home/airflow/git/git-clone.sh - args: - - "{{ .Values.dags.git.url }}" - - "{{ .Values.dags.git.ref }}" - - "{{ .Values.dags.initContainer.mountPath }}{{ .Values.dags.initContainer.syncSubPath }}" - - "{{ .Values.dags.git.repoHost }}" - - "{{ .Values.dags.git.repoPort }}" - - "{{ .Values.dags.git.privateKeyName }}" - volumeMounts: - - name: git-clone - mountPath: /home/airflow/git - - name: dags-data - mountPath: "{{ .Values.dags.initContainer.mountPath }}" - {{- if .Values.dags.git.secret }} - - name: git-clone-secret - mountPath: /keys - {{- end }} + {{- if $extraPipPackages }} + {{- include "airflow.init_container.install_pip_packages" (dict "Values" .Values "extraPipPackages" $extraPipPackages) | indent 8 }} {{- end }} + {{- include "airflow.init_container.check_db" . | indent 8 }} + {{- include "airflow.init_container.wait_for_db_migrations" . | indent 8 }} {{- if .Values.scheduler.extraInitContainers }} {{- toYaml .Values.scheduler.extraInitContainers | nindent 8 }} {{- end }} - {{- end }} containers: - {{- if .Values.dags.git.gitSync.enabled }} - - name: git-sync - image: {{ .Values.dags.git.gitSync.image.repository }}:{{ .Values.dags.git.gitSync.image.tag }} - imagePullPolicy: {{ .Values.dags.git.gitSync.image.pullPolicy }} - envFrom: - - configMapRef: - name: "{{ include "airflow.fullname" . }}-env" - env: - {{- include "airflow.mapenvsecrets" . | indent 12 }} - command: - - /home/airflow/git/git-sync.sh - args: - - "{{ .Values.dags.git.url }}" - - "{{ .Values.dags.git.ref }}" - - "{{ .Values.dags.initContainer.mountPath }}{{ .Values.dags.initContainer.syncSubPath }}" - - "{{ .Values.dags.git.repoHost }}" - - "{{ .Values.dags.git.repoPort }}" - - "{{ .Values.dags.git.privateKeyName }}" - - "{{ .Values.dags.git.gitSync.refreshTime }}" - volumeMounts: - - name: git-clone - mountPath: /home/airflow/git - - name: dags-data - mountPath: "{{ .Values.dags.initContainer.mountPath }}" - {{- if .Values.dags.git.secret }} - - name: git-clone-secret - mountPath: /keys - {{- end }} + - name: airflow-scheduler + {{- include "airflow.image" . | indent 10 }} resources: - {{- toYaml .Values.dags.git.gitSync.resources | nindent 12 }} - {{- end }} - - name: {{ .Chart.Name }}-scheduler - image: {{ .Values.airflow.image.repository }}:{{ .Values.airflow.image.tag }} - imagePullPolicy: {{ .Values.airflow.image.pullPolicy}} + {{- toYaml .Values.scheduler.resources | nindent 12 }} envFrom: - - configMapRef: - name: "{{ include "airflow.fullname" . }}-env" + {{- include "airflow.envFrom" . | indent 12 }} env: - {{- include "airflow.mapenvsecrets" . | indent 12 }} - resources: - {{- toYaml .Values.scheduler.resources | nindent 12 }} - volumeMounts: - - name: scripts - mountPath: /home/airflow/scripts - {{- if .Values.scheduler.secretsMap }} - - name: {{ .Values.scheduler.secretsMap }}-volume - readOnly: true - mountPath: {{ $.Values.scheduler.secretsDir }} - {{- else }} - {{- range .Values.scheduler.secrets }} - - name: {{ . }}-volume - readOnly: true - mountPath: {{ $.Values.scheduler.secretsDir }}/{{ . }} - {{- end }} - {{- end }} - {{- if .Values.dags.persistence.enabled }} - - name: dags-data - mountPath: {{ .Values.dags.path }} - subPath: {{ .Values.dags.persistence.subPath }} - {{- else if or (.Values.dags.initContainer.enabled) (.Values.dags.git.gitSync.enabled) }} - - name: dags-data - mountPath: {{ .Values.dags.path }} - {{- end }} - {{- if .Values.logs.persistence.enabled }} - - name: logs-data - mountPath: {{ .Values.logs.path }} - subPath: {{ .Values.logs.persistence.subPath }} - {{- end }} - {{- if or (.Values.scheduler.connections) (.Values.scheduler.existingSecretConnections) }} - - name: connections - mountPath: /home/airflow/connections - {{- end}} - {{- if or (.Values.scheduler.variables) (.Values.scheduler.pools) }} - - name: variables-pools - mountPath: /home/airflow/variables-pools/ - {{- end}} - {{- range .Values.airflow.extraConfigmapMounts }} - - name: {{ .name }} - mountPath: {{ .mountPath }} - readOnly: {{ .readOnly }} - {{- if .subPath }} - subPath: {{ .subPath }} - {{- end }} - {{- end }} - {{- if .Values.airflow.extraVolumeMounts }} - {{- toYaml .Values.airflow.extraVolumeMounts | nindent 12 }} + {{- include "airflow.env" . | indent 12 }} + {{- if $extraPipPackages }} + - name: PYTHONPATH + value: /opt/python/site-packages {{- end }} command: - "/usr/bin/dumb-init" @@ -230,126 +101,68 @@ spec: args: - "bash" - "-c" - - > - true \ - {{- if gt .Values.scheduler.initialStartupDelay 0.0 }} - && echo "*** waiting {{ .Values.scheduler.initialStartupDelay }}s..." \ - && sleep {{ .Values.scheduler.initialStartupDelay }} \ - {{- end }} - && mkdir -p /home/airflow/.local/bin \ - && export PATH="/home/airflow/.local/bin:$PATH" \ - {{- if .Values.dags.installRequirements }} - && echo "*** installing requirements..." \ - && /home/airflow/scripts/install-requirements.sh \ - {{- end }} - {{- if .Values.airflow.extraPipPackages }} - && echo "*** installing global extra pip packages..." \ - && pip install --user {{ range .Values.airflow.extraPipPackages }} {{ . | quote }} {{ end }} \ - {{- end }} - {{- if .Values.scheduler.initdb }} - && echo "*** upgrading airflow db..." \ - && airflow upgradedb || airflow db upgrade \ - {{- end }} - {{- if .Values.scheduler.variables }} - && echo "*** adding Airflow variables..." \ - && airflow variables -i /home/airflow/variables-pools/variables.json \ - {{- end }} - {{- if or (.Values.scheduler.connections) (.Values.scheduler.existingSecretConnections) }} - && echo "*** adding Airflow connections..." \ - && /home/airflow/connections/add-connections.sh \ - {{- end }} - {{- if .Values.scheduler.pools }} - && echo "*** adding Airflow pools..." \ - && airflow pool -i /home/airflow/variables-pools/pools.json \ - {{- end }} - && echo "*** running scheduler..." \ - && exec airflow scheduler -n {{ .Values.scheduler.numRuns }} + - "exec airflow scheduler -n {{ .Values.scheduler.numRuns }}" {{- if .Values.scheduler.livenessProbe.enabled }} livenessProbe: initialDelaySeconds: {{ .Values.scheduler.livenessProbe.initialDelaySeconds }} periodSeconds: {{ .Values.scheduler.livenessProbe.periodSeconds }} failureThreshold: {{ .Values.scheduler.livenessProbe.failureThreshold }} + timeoutSeconds: {{ .Values.scheduler.livenessProbe.timeoutSeconds }} exec: command: - - python - - -Wignore - - -c - - | - import os - os.environ['AIRFLOW__CORE__LOGGING_LEVEL'] = 'ERROR' - os.environ['AIRFLOW__LOGGING__LOGGING_LEVEL'] = 'ERROR' - from airflow.jobs.scheduler_job import SchedulerJob - from airflow.utils.net import get_hostname - import sys - job = SchedulerJob.most_recent_job() - sys.exit(0 if job.is_alive() and job.hostname == get_hostname() else 1) + - "python" + - "-Wignore" + - "-c" + - | + from typing import List + from airflow.jobs.scheduler_job import SchedulerJob + from airflow.utils.db import create_session + from airflow.utils.net import get_hostname + from airflow.utils.state import State + + with create_session() as session: + hostname = get_hostname() + query = session \ + .query(SchedulerJob) \ + .filter_by(state=State.RUNNING, hostname=hostname) \ + .order_by(SchedulerJob.latest_heartbeat.desc()) + jobs: List[SchedulerJob] = query.all() + alive_jobs = [job for job in jobs if job.is_alive()] + count_alive_jobs = len(alive_jobs) + + if count_alive_jobs == 1: + print(f"HEALTHY - {count_alive_jobs} alive SchedulerJob for: {hostname}") + elif count_alive_jobs == 0: + SystemExit(f"UNHEALTHY - 0 alive SchedulerJob for: {hostname}") + else: + SystemExit(f"UNHEALTHY - {count_alive_jobs} (more than 1) alive SchedulerJob for: {hostname}") + {{- end }} + {{- $extraVolumeMounts := .Values.scheduler.extraVolumeMounts }} + {{- $volumeMounts := include "airflow.volumeMounts" (dict "Values" .Values "extraPipPackages" $extraPipPackages "extraVolumeMounts" $extraVolumeMounts) }} + {{- if or ($volumeMounts) (include "airflow.executor.kubernetes_like" .) }} + volumeMounts: + {{- $volumeMounts | indent 12 }} + {{- if include "airflow.executor.kubernetes_like" . }} + - name: pod-template + mountPath: /opt/airflow/pod_templates/pod_template.yaml + subPath: pod_template.yaml + readOnly: true + {{- end }} {{- end }} + {{- if .Values.dags.gitSync.enabled }} + {{- include "airflow.container.git_sync" . | indent 8 }} + {{- end }} {{- if .Values.airflow.extraContainers }} {{- toYaml .Values.airflow.extraContainers | nindent 8 }} {{- end }} + {{- $extraVolumes := .Values.scheduler.extraVolumes }} + {{- $volumes := include "airflow.volumes" (dict "Values" .Values "extraPipPackages" $extraPipPackages "extraVolumes" $extraVolumes) }} + {{- if or ($volumes) (include "airflow.executor.kubernetes_like" .) }} volumes: - - name: scripts - configMap: - name: {{ include "airflow.fullname" . }}-scripts - defaultMode: 0755 - {{- if .Values.scheduler.secretsMap }} - - name: {{ .Values.scheduler.secretsMap }}-volume - secret: - secretName: {{ .Values.scheduler.secretsMap }} - {{- else }} - {{- range .Values.scheduler.secrets }} - - name: {{ . }}-volume - secret: - secretName: {{ . }} - {{- end }} - {{- end }} - {{- if .Values.dags.persistence.enabled }} - - name: dags-data - persistentVolumeClaim: - claimName: {{ .Values.dags.persistence.existingClaim | default (include "airflow.fullname" . ) }} - {{- else if or (.Values.dags.initContainer.enabled) (.Values.dags.git.gitSync.enabled) }} - - name: dags-data - emptyDir: {} - {{- end }} - {{- if .Values.logs.persistence.enabled }} - - name: logs-data - persistentVolumeClaim: - claimName: {{ .Values.logs.persistence.existingClaim | default (printf "%s-logs" (include "airflow.fullname" . | trunc 58 )) }} - {{- end }} - {{- if or (.Values.dags.initContainer.enabled) (.Values.dags.git.gitSync.enabled) }} - - name: git-clone - configMap: - name: {{ include "airflow.fullname" . }}-scripts-git - defaultMode: 0755 - {{- if .Values.dags.git.secret }} - - name: git-clone-secret - secret: - secretName: {{ .Values.dags.git.secret }} - defaultMode: 0700 - {{- end }} - {{- end }} - {{- if or (.Values.scheduler.connections) (.Values.scheduler.existingSecretConnections) }} - - name: connections - secret: - secretName: |- - {{- if .Values.scheduler.existingSecretConnections }} - {{ .Values.scheduler.existingSecretConnections }} - {{- else }} - {{ include "airflow.fullname" . }}-connections - {{- end }} - defaultMode: 0755 - {{- end }} - {{- if or (.Values.scheduler.variables) (.Values.scheduler.pools) }} - - name: variables-pools - configMap: - name: {{ include "airflow.fullname" . }}-variables-pools - defaultMode: 0755 - {{- end }} - {{- range .Values.airflow.extraConfigmapMounts }} - - name: {{ .name }} + {{- $volumes | indent 8 }} + {{- if include "airflow.executor.kubernetes_like" . }} + - name: pod-template configMap: - name: {{ .configMap }} + name: {{ include "airflow.fullname" . }}-pod-template {{- end }} - {{- if .Values.airflow.extraVolumes }} - {{- toYaml .Values.airflow.extraVolumes | nindent 8 }} - {{- end }} + {{- end }} \ No newline at end of file diff --git a/charts/airflow/templates/webserver/webserver-deployment.yaml b/charts/airflow/templates/webserver/webserver-deployment.yaml index 9d457dce..5b837c03 100644 --- a/charts/airflow/templates/webserver/webserver-deployment.yaml +++ b/charts/airflow/templates/webserver/webserver-deployment.yaml @@ -1,3 +1,4 @@ +{{- $extraPipPackages := concat .Values.airflow.extraPipPackages .Values.web.extraPipPackages }} apiVersion: apps/v1 kind: Deployment metadata: @@ -17,11 +18,10 @@ metadata: {{- end }} spec: replicas: {{ .Values.web.replicas }} - minReadySeconds: {{ .Values.web.minReadySeconds }} strategy: - # this is safe - multiple web pods can run concurrently type: RollingUpdate rollingUpdate: + ## multiple web pods can safely run concurrently maxSurge: 25% maxUnavailable: 0 selector: @@ -32,12 +32,8 @@ spec: template: metadata: annotations: - checksum/config-env: {{ include (print $.Template.BasePath "/config/configmap-env.yaml") . | sha256sum }} - checksum/config-git-clone: {{ include (print $.Template.BasePath "/config/configmap-scripts-git.yaml") . | sha256sum }} - checksum/config-scripts: {{ include (print $.Template.BasePath "/config/configmap-scripts.yaml") . | sha256sum }} - {{- if and (.Values.dags.git.url) (.Values.dags.git.ref) }} - checksum/dags-git-ref: {{ .Values.dags.git.ref | sha256sum }} - {{- end }} + checksum/secret-config: {{ include (print $.Template.BasePath "/config/secret-config.yaml") . | sha256sum }} + checksum/config-webserver-config: {{ include (print $.Template.BasePath "/config/secret-webserver-config.yaml") . | sha256sum }} {{- if .Values.airflow.podAnnotations }} {{- toYaml .Values.airflow.podAnnotations | nindent 8 }} {{- end }} @@ -55,11 +51,11 @@ spec: {{- toYaml .Values.web.podLabels | nindent 8 }} {{- end }} spec: + restartPolicy: Always {{- if .Values.airflow.image.pullSecret }} imagePullSecrets: - name: {{ .Values.airflow.image.pullSecret }} {{- end }} - restartPolicy: Always {{- if .Values.web.nodeSelector }} nodeSelector: {{- toYaml .Values.web.nodeSelector | nindent 8 }} @@ -77,120 +73,28 @@ spec: securityContext: {{- toYaml .Values.web.securityContext | nindent 8 }} {{- end }} - {{- if and ( .Values.dags.initContainer.enabled ) ( not .Values.web.serializeDAGs ) }} initContainers: - - name: git-clone - image: {{ .Values.dags.initContainer.image.repository }}:{{ .Values.dags.initContainer.image.tag }} - imagePullPolicy: {{ .Values.dags.initContainer.image.pullPolicy }} - envFrom: - - configMapRef: - name: "{{ include "airflow.fullname" . }}-env" - env: - {{- include "airflow.mapenvsecrets" . | indent 12 }} - resources: - {{- toYaml .Values.dags.initContainer.resources | nindent 12 }} - command: - - /home/airflow/git/git-clone.sh - args: - - "{{ .Values.dags.git.url }}" - - "{{ .Values.dags.git.ref }}" - - "{{ .Values.dags.initContainer.mountPath }}{{ .Values.dags.initContainer.syncSubPath }}" - - "{{ .Values.dags.git.repoHost }}" - - "{{ .Values.dags.git.repoPort }}" - - "{{ .Values.dags.git.privateKeyName }}" - volumeMounts: - - name: git-clone - mountPath: /home/airflow/git - - name: dags-data - mountPath: "{{ .Values.dags.initContainer.mountPath }}" - {{- if .Values.dags.git.secret }} - - name: git-clone-secret - mountPath: /keys - {{- end }} - {{- end }} + {{- if $extraPipPackages }} + {{- include "airflow.init_container.install_pip_packages" (dict "Values" .Values "extraPipPackages" $extraPipPackages) | indent 8 }} + {{- end }} + {{- include "airflow.init_container.check_db" . | indent 8 }} + {{- include "airflow.init_container.wait_for_db_migrations" . | indent 8 }} containers: - {{- if and ( .Values.dags.git.gitSync.enabled ) ( not .Values.web.serializeDAGs ) }} - - name: git-sync - image: {{ .Values.dags.git.gitSync.image.repository }}:{{ .Values.dags.git.gitSync.image.tag }} - imagePullPolicy: {{ .Values.dags.git.gitSync.image.pullPolicy }} - envFrom: - - configMapRef: - name: "{{ include "airflow.fullname" . }}-env" - env: - {{- include "airflow.mapenvsecrets" . | indent 12 }} - command: - - /home/airflow/git/git-sync.sh - args: - - "{{ .Values.dags.git.url }}" - - "{{ .Values.dags.git.ref }}" - - "{{ .Values.dags.initContainer.mountPath }}{{ .Values.dags.initContainer.syncSubPath }}" - - "{{ .Values.dags.git.repoHost }}" - - "{{ .Values.dags.git.repoPort }}" - - "{{ .Values.dags.git.privateKeyName }}" - - "{{ .Values.dags.git.gitSync.refreshTime }}" - volumeMounts: - - name: git-clone - mountPath: /home/airflow/git - - name: dags-data - mountPath: "{{ .Values.dags.initContainer.mountPath }}" - {{- if .Values.dags.git.secret }} - - name: git-clone-secret - mountPath: /keys - {{- end }} + - name: airflow-web + {{- include "airflow.image" . | indent 10 }} resources: - {{- toYaml .Values.dags.git.gitSync.resources | nindent 12 }} - {{- end }} - - name: {{ .Chart.Name }}-web - image: {{ .Values.airflow.image.repository }}:{{ .Values.airflow.image.tag }} - imagePullPolicy: {{ .Values.airflow.image.pullPolicy}} + {{- toYaml .Values.web.resources | nindent 12 }} ports: - name: web containerPort: 8080 protocol: TCP envFrom: - - configMapRef: - name: "{{ include "airflow.fullname" . }}-env" + {{- include "airflow.envFrom" . | indent 12 }} env: - {{- include "airflow.mapenvsecrets" . | indent 12 }} - resources: - {{- toYaml .Values.web.resources | nindent 12 }} - volumeMounts: - - name: scripts - mountPath: /home/airflow/scripts - {{- if .Values.web.secretsMap }} - - name: {{ .Values.web.secretsMap }}-volume - readOnly: true - mountPath: {{ $.Values.web.secretsDir }} - {{- else }} - {{- range .Values.web.secrets }} - - name: {{ . }}-volume - readOnly: true - mountPath: {{ $.Values.web.secretsDir }}/{{ . }} - {{- end }} - {{- end }} - {{- if .Values.dags.persistence.enabled }} - - name: dags-data - mountPath: {{ .Values.dags.path }} - subPath: {{ .Values.dags.persistence.subPath }} - {{- else if or (.Values.dags.initContainer.enabled) (.Values.dags.git.gitSync.enabled) }} - - name: dags-data - mountPath: {{ .Values.dags.path }} - {{- end }} - {{- if .Values.logs.persistence.enabled }} - - name: logs-data - mountPath: {{ .Values.logs.path }} - subPath: {{ .Values.logs.persistence.subPath }} - {{- end }} - {{- range .Values.airflow.extraConfigmapMounts }} - - name: {{ .name }} - mountPath: {{ .mountPath }} - readOnly: {{ .readOnly }} - {{- if .subPath }} - subPath: {{ .subPath }} - {{- end }} - {{- end }} - {{- if .Values.airflow.extraVolumeMounts }} - {{- toYaml .Values.airflow.extraVolumeMounts | nindent 12 }} + {{- include "airflow.env" . | indent 12 }} + {{- if $extraPipPackages }} + - name: PYTHONPATH + value: /opt/python/site-packages {{- end }} command: - "/usr/bin/dumb-init" @@ -198,105 +102,52 @@ spec: args: - "bash" - "-c" - - > - true \ - {{- if gt .Values.web.initialStartupDelay 0.0 }} - && echo "*** waiting {{ .Values.web.initialStartupDelay }}s..." \ - && sleep {{ .Values.web.initialStartupDelay }} \ - {{- end }} - && mkdir -p /home/airflow/.local/bin \ - && export PATH="/home/airflow/.local/bin:$PATH" \ - {{- if .Values.dags.installRequirements }} - && echo "*** installing requirements..." \ - && /home/airflow/scripts/install-requirements.sh \ - {{- end }} - {{- if .Values.airflow.extraPipPackages }} - && echo "*** installing global extra pip packages..." \ - && pip install --user {{ range .Values.airflow.extraPipPackages }} {{ . | quote }} {{ end }} \ - {{- end }} - {{- if .Values.web.extraPipPackages }} - && echo "*** installing extra pip packages..." \ - && pip install --user {{ range .Values.web.extraPipPackages }} {{ . | quote }} {{ end }} \ - {{- end }} - && echo "*** running webserver..." \ - && exec airflow webserver + - "exec airflow webserver" {{- if .Values.web.livenessProbe.enabled }} livenessProbe: - httpGet: - scheme: {{ .Values.web.livenessProbe.scheme }} - {{- if .Values.ingress.web.livenessPath }} - path: "{{ .Values.ingress.web.livenessPath }}" - {{- else }} - path: "{{ .Values.ingress.web.path }}/health" - {{- end }} - port: web initialDelaySeconds: {{ .Values.web.livenessProbe.initialDelaySeconds }} periodSeconds: {{ .Values.web.livenessProbe.periodSeconds }} timeoutSeconds: {{ .Values.web.livenessProbe.timeoutSeconds }} - successThreshold: {{ .Values.web.livenessProbe.successThreshold }} failureThreshold: {{ .Values.web.livenessProbe.failureThreshold }} + httpGet: + scheme: {{ include "airflow.web.scheme" . }} + path: "/health" + port: web {{- end }} {{- if .Values.web.readinessProbe.enabled }} readinessProbe: - httpGet: - scheme: {{ .Values.web.readinessProbe.scheme }} - path: "{{ .Values.ingress.web.path }}/health" - port: web initialDelaySeconds: {{ .Values.web.readinessProbe.initialDelaySeconds }} periodSeconds: {{ .Values.web.readinessProbe.periodSeconds }} timeoutSeconds: {{ .Values.web.readinessProbe.timeoutSeconds }} - successThreshold: {{ .Values.web.readinessProbe.successThreshold }} failureThreshold: {{ .Values.web.readinessProbe.failureThreshold }} + httpGet: + scheme: {{ include "airflow.web.scheme" . }} + path: "/health" + port: web {{- end }} + {{- $extraVolumeMounts := .Values.web.extraVolumeMounts }} + {{- $volumeMounts := include "airflow.volumeMounts" (dict "Values" .Values "extraPipPackages" $extraPipPackages "extraVolumeMounts" $extraVolumeMounts) }} + volumeMounts: + {{- $volumeMounts | indent 12 }} + - name: webserver-config + mountPath: /opt/airflow/webserver_config.py + subPath: webserver_config.py + readOnly: true + {{- if .Values.dags.gitSync.enabled }} + {{- include "airflow.container.git_sync" . | indent 8 }} + {{- end }} {{- if .Values.airflow.extraContainers }} - {{- toYaml .Values.airflow.extraContainers | nindent 8 }} + {{- toYaml .Values.airflow.extraContainers | indent 8 }} {{- end }} + {{- $extraVolumes := .Values.web.extraVolumes }} + {{- $volumes := include "airflow.volumes" (dict "Values" .Values "extraPipPackages" $extraPipPackages "extraVolumes" $extraVolumes) }} volumes: - - name: scripts - configMap: - name: {{ include "airflow.fullname" . }}-scripts - defaultMode: 0755 - {{- if .Values.web.secretsMap }} - - name: {{ .Values.web.secretsMap }}-volume - secret: - secretName: {{ .Values.web.secretsMap }} - {{- else }} - {{- range .Values.web.secrets }} - - name: {{ . }}-volume - secret: - secretName: {{ . }} - {{- end }} - {{- end }} - {{- if .Values.dags.persistence.enabled }} - - name: dags-data - persistentVolumeClaim: - claimName: {{ .Values.dags.persistence.existingClaim | default (include "airflow.fullname" . ) }} - {{- else if or (.Values.dags.initContainer.enabled) (.Values.dags.git.gitSync.enabled) }} - - name: dags-data - emptyDir: {} - {{- end }} - {{- if .Values.logs.persistence.enabled }} - - name: logs-data - persistentVolumeClaim: - claimName: {{ .Values.logs.persistence.existingClaim | default (printf "%s-logs" (include "airflow.fullname" . | trunc 58 )) }} - {{- end }} - {{- if or (.Values.dags.initContainer.enabled) (.Values.dags.git.gitSync.enabled) }} - - name: git-clone - configMap: - name: {{ include "airflow.fullname" . }}-scripts-git - defaultMode: 0755 - {{- if .Values.dags.git.secret }} - - name: git-clone-secret + {{- $volumes | indent 8 }} + - name: webserver-config secret: - secretName: {{ .Values.dags.git.secret }} - defaultMode: 0700 - {{- end }} - {{- end }} - {{- range .Values.airflow.extraConfigmapMounts }} - - name: {{ .name }} - configMap: - name: {{ .configMap }} - {{- end }} - {{- if .Values.airflow.extraVolumes }} - {{- toYaml .Values.airflow.extraVolumes | nindent 8 }} - {{- end }} + {{- if .Values.web.webserverConfig.existingSecret }} + secretName: {{ .Values.web.webserverConfig.existingSecret }} + {{- else }} + secretName: {{ include "airflow.fullname" . }}-webserver-config + {{- end }} + defaultMode: 0644 \ No newline at end of file diff --git a/charts/airflow/templates/webserver/webserver-service-monitor.yaml b/charts/airflow/templates/webserver/webserver-service-monitor.yaml index d2fc660d..65fd0e53 100644 --- a/charts/airflow/templates/webserver/webserver-service-monitor.yaml +++ b/charts/airflow/templates/webserver/webserver-service-monitor.yaml @@ -19,7 +19,7 @@ spec: component: web release: {{ .Release.Name }} endpoints: - - port: web - path: {{ .Values.serviceMonitor.path }} - interval: {{ .Values.serviceMonitor.interval }} + - port: web + path: {{ .Values.serviceMonitor.path }} + interval: {{ .Values.serviceMonitor.interval }} {{- end }} diff --git a/charts/airflow/templates/worker/worker-service.yaml b/charts/airflow/templates/worker/worker-service.yaml index a7dda49d..3367ce49 100644 --- a/charts/airflow/templates/worker/worker-service.yaml +++ b/charts/airflow/templates/worker/worker-service.yaml @@ -1,6 +1,6 @@ {{- if .Values.workers.enabled }} -# Headless service for stable DNS entries of StatefulSet members. apiVersion: v1 +## this Service gives stable DNS entries for workers, used by webserver for logs kind: Service metadata: name: {{ include "airflow.fullname" . }}-worker diff --git a/charts/airflow/templates/worker/worker-statefulset.yaml b/charts/airflow/templates/worker/worker-statefulset.yaml index 9c8e2615..b1e3cabf 100644 --- a/charts/airflow/templates/worker/worker-statefulset.yaml +++ b/charts/airflow/templates/worker/worker-statefulset.yaml @@ -1,7 +1,7 @@ +{{- $extraPipPackages := concat .Values.airflow.extraPipPackages .Values.workers.extraPipPackages }} {{- if .Values.workers.enabled }} -## A StatefulSet is used to give workers consistent names for DNS, -## allowing the web server to access the log files. apiVersion: apps/v1 +## StatefulSet gives workers consistent DNS names, allowing webserver access to log files kind: StatefulSet metadata: name: {{ include "airflow.fullname" . }}-worker @@ -33,12 +33,7 @@ spec: template: metadata: annotations: - checksum/config-env: {{ include (print $.Template.BasePath "/config/configmap-env.yaml") . | sha256sum }} - checksum/config-git-clone: {{ include (print $.Template.BasePath "/config/configmap-scripts-git.yaml") . | sha256sum }} - checksum/config-scripts: {{ include (print $.Template.BasePath "/config/configmap-scripts.yaml") . | sha256sum }} - {{- if and (.Values.dags.git.url) (.Values.dags.git.ref) }} - checksum/dags-git-ref: {{ .Values.dags.git.ref | sha256sum }} - {{- end }} + checksum/secret-config: {{ include (print $.Template.BasePath "/config/secret-config.yaml") . | sha256sum }} {{- if .Values.airflow.podAnnotations }} {{- toYaml .Values.airflow.podAnnotations | nindent 8 }} {{- end }} @@ -56,11 +51,11 @@ spec: {{- toYaml .Values.workers.podLabels | nindent 8 }} {{- end }} spec: + restartPolicy: Always {{- if .Values.airflow.image.pullSecret }} imagePullSecrets: - name: {{ .Values.airflow.image.pullSecret }} {{- end }} - restartPolicy: Always {{- if .Values.workers.celery.gracefullTermination }} terminationGracePeriodSeconds: {{ add .Values.workers.terminationPeriod .Values.workers.celery.gracefullTerminationPeriod }} {{- else }} @@ -83,205 +78,98 @@ spec: securityContext: {{- toYaml .Values.workers.securityContext | nindent 8 }} {{- end }} - {{- if .Values.dags.initContainer.enabled }} initContainers: - - name: git-clone - image: {{ .Values.dags.initContainer.image.repository }}:{{ .Values.dags.initContainer.image.tag }} - imagePullPolicy: {{ .Values.dags.initContainer.image.pullPolicy }} - envFrom: - - configMapRef: - name: "{{ include "airflow.fullname" . }}-env" - env: - {{- include "airflow.mapenvsecrets" . | indent 12 }} - resources: - {{- toYaml .Values.dags.initContainer.resources | nindent 12 }} - command: - - /home/airflow/git/git-clone.sh - args: - - "{{ .Values.dags.git.url }}" - - "{{ .Values.dags.git.ref }}" - - "{{ .Values.dags.initContainer.mountPath }}{{ .Values.dags.initContainer.syncSubPath }}" - - "{{ .Values.dags.git.repoHost }}" - - "{{ .Values.dags.git.repoPort }}" - - "{{ .Values.dags.git.privateKeyName }}" - volumeMounts: - - name: git-clone - mountPath: /home/airflow/git - - name: dags-data - mountPath: "{{ .Values.dags.initContainer.mountPath }}" - {{- if .Values.dags.git.secret }} - - name: git-clone-secret - mountPath: /keys - {{- end }} - {{- end }} + {{- if $extraPipPackages }} + {{- include "airflow.init_container.install_pip_packages" (dict "Values" .Values "extraPipPackages" $extraPipPackages) | indent 8 }} + {{- end }} + {{- include "airflow.init_container.check_db" . | indent 8 }} + {{- include "airflow.init_container.wait_for_db_migrations" . | indent 8 }} containers: - {{- if .Values.dags.git.gitSync.enabled }} - - name: git-sync - image: {{ .Values.dags.git.gitSync.image.repository }}:{{ .Values.dags.git.gitSync.image.tag }} - imagePullPolicy: {{ .Values.dags.git.gitSync.image.pullPolicy }} + - name: airflow-worker + {{- include "airflow.image" . | indent 10 }} + resources: + {{- toYaml .Values.workers.resources | nindent 12 }} envFrom: - - configMapRef: - name: "{{ include "airflow.fullname" . }}-env" + {{- include "airflow.envFrom" . | indent 12 }} env: - {{- include "airflow.mapenvsecrets" . | indent 12 }} - command: - - /home/airflow/git/git-sync.sh - args: - - "{{ .Values.dags.git.url }}" - - "{{ .Values.dags.git.ref }}" - - "{{ .Values.dags.initContainer.mountPath }}{{ .Values.dags.initContainer.syncSubPath }}" - - "{{ .Values.dags.git.repoHost }}" - - "{{ .Values.dags.git.repoPort }}" - - "{{ .Values.dags.git.privateKeyName }}" - - "{{ .Values.dags.git.gitSync.refreshTime }}" - volumeMounts: - - name: git-clone - mountPath: /home/airflow/git - - name: dags-data - mountPath: "{{ .Values.dags.initContainer.mountPath }}" - {{- if .Values.dags.git.secret }} - - name: git-clone-secret - mountPath: /keys + {{- include "airflow.env" . | indent 12 }} + {{- if $extraPipPackages }} + - name: PYTHONPATH + value: /opt/python/site-packages {{- end }} - resources: - {{- toYaml .Values.dags.git.gitSync.resources | nindent 12 }} - {{- end }} - - name: {{ .Chart.Name }}-worker - imagePullPolicy: {{ .Values.airflow.image.pullPolicy }} - image: "{{ .Values.airflow.image.repository }}:{{ .Values.airflow.image.tag }}" - {{- if and (eq .Values.airflow.executor "CeleryExecutor") (.Values.workers.celery.gracefullTermination) }} + {{- if .Values.workers.celery.gracefullTermination }} lifecycle: preStop: exec: command: - "timeout" - "{{ .Values.workers.celery.gracefullTerminationPeriod }}s" - - "/home/airflow/scripts/graceful-stop-celery-worker.sh" - {{- end}} - envFrom: - - configMapRef: - name: "{{ include "airflow.fullname" . }}-env" - env: - {{- include "airflow.mapenvsecrets" . | indent 12 }} - volumeMounts: - - name: scripts - mountPath: /home/airflow/scripts - {{- $secretsDir := .Values.workers.secretsDir }} - {{- if .Values.workers.secretsMap }} - - name: {{ .Values.workers.secretsMap }}-volume - readOnly: true - mountPath: {{ $secretsDir }} - {{- else }} - {{- range .Values.workers.secrets }} - - name: {{ . }}-volume - readOnly: true - mountPath: {{ $secretsDir }}/{{ . }} - {{- end }} - {{- end }} - {{- if .Values.dags.persistence.enabled }} - - name: dags-data - mountPath: {{ .Values.dags.path }} - subPath: {{ .Values.dags.persistence.subPath }} - {{- else if or (.Values.dags.initContainer.enabled) (.Values.dags.git.gitSync.enabled) }} - - name: dags-data - mountPath: {{ .Values.dags.path }} - {{- end }} - {{- if .Values.logs.persistence.enabled }} - - name: logs-data - mountPath: {{ .Values.logs.path }} - subPath: {{ .Values.logs.persistence.subPath }} - {{- end }} - {{- range .Values.airflow.extraConfigmapMounts }} - - name: {{ .name }} - mountPath: {{ .mountPath }} - readOnly: {{ .readOnly }} - {{- if .subPath }} - subPath: {{ .subPath }} - {{- end }} - {{- end }} - {{- if .Values.airflow.extraVolumeMounts }} - {{- toYaml .Values.airflow.extraVolumeMounts | nindent 12 }} - {{- end }} + - "python" + - "-Wignore" + - "-c" + - | + import os + import time + import subprocess + from celery import Celery + from celery.app.control import Inspect + from typing import List + + def run_command(cmd: List[str]) -> str: + process = subprocess.Popen(cmd, stdout=subprocess.PIPE) + output, error = process.communicate() + if error is not None: + raise Exception(error) + else: + return output.decode(encoding="utf-8") + + broker_url = run_command(["bash", "-c", "eval $AIRFLOW__CELERY__BROKER_URL_CMD"]) + local_celery_host = f"celery@{os.environ['HOSTNAME']}" + app = Celery(broker=broker_url) + + # prevent the worker accepting new tasks + print(f"canceling celery consumer for {local_celery_host}...") + app.control.cancel_consumer("default", destination=[local_celery_host]) + + # wait until the worker finishes its current tasks + i = Inspect(app=app, destination=[local_celery_host]) + active_tasks = i.active()[local_celery_host] + while len(active_tasks) > 0: + print(f"waiting [10 sec] for remaining tasks to finish: {[task.get('name') for task in active_tasks]}") + time.sleep(10) + active_tasks = i.active()[local_celery_host] + {{- end }} + ports: + - name: wlog + containerPort: 8793 + protocol: TCP command: - "/usr/bin/dumb-init" - "--" args: - "bash" - "-c" - - > - true \ - {{- if gt .Values.workers.initialStartupDelay 0.0 }} - && echo "*** waiting {{ .Values.workers.initialStartupDelay }}s..." \ - && sleep {{ .Values.workers.initialStartupDelay }} \ - {{- end }} - && mkdir -p /home/airflow/.local/bin \ - && export PATH="/home/airflow/.local/bin:$PATH" \ - {{- if .Values.dags.installRequirements }} - && echo "*** installing requirements..." \ - && /home/airflow/scripts/install-requirements.sh \ - {{- end }} - {{- if .Values.airflow.extraPipPackages }} - && echo "*** installing global extra pip packages..." \ - && pip install --user {{ range .Values.airflow.extraPipPackages }} {{ . | quote }} {{ end }} \ - {{- end }} - && echo "*** running worker..." \ - && exec airflow worker - ports: - - name: wlog - containerPort: 8793 - protocol: TCP - resources: - {{- toYaml .Values.workers.resources | nindent 12 }} + {{- if .Values.airflow.legacyCommands }} + - "exec airflow worker" + {{- else }} + - "exec airflow celery worker" + {{- end }} + {{- $extraVolumeMounts := .Values.workers.extraVolumeMounts }} + {{- $volumeMounts := include "airflow.volumeMounts" (dict "Values" .Values "extraPipPackages" $extraPipPackages "extraVolumeMounts" $extraVolumeMounts) }} + {{- if $volumeMounts }} + volumeMounts: + {{- $volumeMounts | indent 12 }} + {{- end }} + {{- if .Values.dags.gitSync.enabled }} + {{- include "airflow.container.git_sync" . | indent 8 }} + {{- end }} {{- if .Values.airflow.extraContainers }} {{- toYaml .Values.airflow.extraContainers | nindent 8 }} {{- end }} + {{- $extraVolumes := .Values.workers.extraVolumes }} + {{- $volumes := include "airflow.volumes" (dict "Values" .Values "extraPipPackages" $extraPipPackages "extraVolumes" $extraVolumes) }} + {{- if $volumes }} volumes: - - name: scripts - configMap: - name: {{ include "airflow.fullname" . }}-scripts - defaultMode: 0755 - {{- if .Values.workers.secretsMap }} - - name: {{ .Values.workers.secretsMap }}-volume - secret: - secretName: {{ .Values.workers.secretsMap }} - {{- else }} - {{- range .Values.workers.secrets }} - - name: {{ . }}-volume - secret: - secretName: {{ . }} - {{- end }} - {{- end }} - {{- if .Values.dags.persistence.enabled }} - - name: dags-data - persistentVolumeClaim: - claimName: {{ .Values.dags.persistence.existingClaim | default (include "airflow.fullname" . ) }} - {{- else if or (.Values.dags.initContainer.enabled) (.Values.dags.git.gitSync.enabled) }} - - name: dags-data - emptyDir: {} - {{- end }} - {{- if .Values.logs.persistence.enabled }} - - name: logs-data - persistentVolumeClaim: - claimName: {{ .Values.logs.persistence.existingClaim | default (printf "%s-logs" (include "airflow.fullname" . | trunc 58 )) }} - {{- end }} - {{- if or (.Values.dags.initContainer.enabled) (.Values.dags.git.gitSync.enabled) }} - - name: git-clone - configMap: - name: {{ include "airflow.fullname" . }}-scripts-git - defaultMode: 0755 - {{- if .Values.dags.git.secret }} - - name: git-clone-secret - secret: - secretName: {{ .Values.dags.git.secret }} - defaultMode: 0700 - {{- end }} - {{- end }} - {{- range .Values.airflow.extraConfigmapMounts }} - - name: {{ .name }} - configMap: - name: {{ .configMap }} - {{- end }} - {{- if .Values.airflow.extraVolumes }} - {{- toYaml .Values.airflow.extraVolumes | nindent 8 }} - {{- end }} + {{- $volumes | indent 8 }} + {{- end }} {{- end }} diff --git a/charts/airflow/values.yaml b/charts/airflow/values.yaml index e778a8c5..6c860995 100644 --- a/charts/airflow/values.yaml +++ b/charts/airflow/values.yaml @@ -2,30 +2,32 @@ # Airflow - Common Configs ################################### airflow: - ## configs for the docker image of the web/scheduler/worker + ## if we use legacy 1.10 airflow commands + ## + legacyCommands: false + + ## configs for the airflow container image ## image: repository: apache/airflow - tag: 1.10.12-python3.6 + tag: 2.0.1-python3.8 ## values: Always or IfNotPresent pullPolicy: IfNotPresent pullSecret: "" + uid: 50000 + gid: 50000 ## the airflow executor type to use ## ## NOTE: - ## - this should be `CeleryExecutor` or `KubernetesExecutor` - ## - if set to `KubernetesExecutor`: - ## - ensure that `workers.enabled` is `false` - ## - ensure that `flower.enabled` is `false` - ## - ensure that `redis.enabled` is `false` - ## - ensure that K8S configs are set in `airflow.config` - ## - we set these configs automatically: - ## - `AIRFLOW__KUBERNETES__NAMESPACE` - ## - `AIRFLOW__KUBERNETES__WORKER_SERVICE_ACCOUNT_NAME` - ## - `AIRFLOW__KUBERNETES__ENV_FROM_CONFIGMAP_REF` - ## - `AIRFLOW__KUBERNETES__WORKER_CONTAINER_REPOSITORY` - ## - `AIRFLOW__KUBERNETES__WORKER_CONTAINER_TAG` + ## - allowed values: "CeleryExecutor", "CeleryKubernetesExecutor", "KubernetesExecutor" + ## - if you set KubernetesExecutor or CeleryKubernetesExecutor, we automatically set: + ## - AIRFLOW__KUBERNETES__ENV_FROM_CONFIGMAP_REF [unused from Airflow 2.0+] + ## - AIRFLOW__KUBERNETES__NAMESPACE + ## - AIRFLOW__KUBERNETES__POD_TEMPLATE_FILE + ## - AIRFLOW__KUBERNETES__WORKER_CONTAINER_REPOSITORY + ## - AIRFLOW__KUBERNETES__WORKER_CONTAINER_TAG + ## - AIRFLOW__KUBERNETES__WORKER_SERVICE_ACCOUNT_NAME [unused from Airflow 2.0+] ## executor: CeleryExecutor @@ -33,160 +35,300 @@ airflow: ## ## WARNING: ## - you MUST customise this value, otherwise the encryption will be somewhat pointless - ## - ## NOTE: - ## - to prevent this value being stored in your values.yaml (and airflow-env ConfigMap), - ## consider using `airflow.extraEnv` to define it from a pre-created secret + ## - consider using `airflow.extraEnv` with a pre-created Secret rather than this config ## ## GENERATE: ## python -c "from cryptography.fernet import Fernet; FERNET_KEY = Fernet.generate_key().decode(); print(FERNET_KEY)" ## fernetKey: "7T512UXSSmBOkpWimFHIVb8jK6lfmSAvx4mO6Arehnc=" - ## environment variables for the web/scheduler/worker Pods (for airflow configs) - ## - ## WARNING: - ## - don't include sensitive variables in here, instead make use of `airflow.extraEnv` with Secrets - ## - don't specify `AIRFLOW__CORE__SQL_ALCHEMY_CONN`, `AIRFLOW__CELERY__RESULT_BACKEND`, - ## or `AIRFLOW__CELERY__BROKER_URL`, they are dynamically created from chart values + ## environment variables for airflow configs ## ## NOTE: - ## - airflow allows environment configs to be set as environment variables - ## - they take the form: AIRFLOW__
__ - ## - see the Airflow documentation: https://airflow.apache.org/docs/stable/howto/set-config.html + ## - config docs: https://airflow.apache.org/docs/apache-airflow/stable/configurations-ref.html + ## - airflow configs env-vars are structured: "AIRFLOW__{config_section}__{config_name}" ## ## EXAMPLE: ## config: - ## ## Security - ## AIRFLOW__CORE__SECURE_MODE: "True" - ## AIRFLOW__API__AUTH_BACKEND: "airflow.api.auth.backend.deny_all" - ## AIRFLOW__WEBSERVER__EXPOSE_CONFIG: "False" - ## AIRFLOW__WEBSERVER__RBAC: "False" - ## - ## ## DAGS - ## AIRFLOW__SCHEDULER__DAG_DIR_LIST_INTERVAL: "30" + ## ## dags ## AIRFLOW__CORE__LOAD_EXAMPLES: "False" + ## AIRFLOW__SCHEDULER__DAG_DIR_LIST_INTERVAL: "30" ## - ## ## Email (SMTP) + ## ## email ## AIRFLOW__EMAIL__EMAIL_BACKEND: "airflow.utils.email.send_email_smtp" ## AIRFLOW__SMTP__SMTP_HOST: "smtpmail.example.com" - ## AIRFLOW__SMTP__SMTP_STARTTLS: "False" - ## AIRFLOW__SMTP__SMTP_SSL: "False" - ## AIRFLOW__SMTP__SMTP_PORT: "25" ## AIRFLOW__SMTP__SMTP_MAIL_FROM: "admin@example.com" + ## AIRFLOW__SMTP__SMTP_PORT: "25" + ## AIRFLOW__SMTP__SMTP_SSL: "False" + ## AIRFLOW__SMTP__SMTP_STARTTLS: "False" ## - ## ## Disable noisy "Handling signal: ttou" Gunicorn log messages - ## GUNICORN_CMD_ARGS: "--log-level WARNING" + ## ## domain used in airflow emails + ## AIRFLOW__WEBSERVER__BASE_URL: "http://airflow.example.com" ## - ## ## Proxy Config + ## ## ether environment variables ## HTTP_PROXY: "http://proxy.example.com:8080" ## config: {} - ## extra annotations for the web/scheduler/worker Pods + ## a list of initial users to create + ## + users: + - username: admin + password: admin + role: Admin + email: admin@example.com + firstName: admin + lastName: admin + + ## if we update users or just create them the first time (lookup by `username`) + ## + ## NOTE: + ## - if enabled, the chart will revert any changes made in the web-ui to users defined + ## in `users` (including passwords) + ## + usersUpdate: true + + ## a list of initial connections to create ## ## EXAMPLE: - ## podAnnotations: - ## iam.amazonaws.com/role: "airflow-Role" + ## connections: + ## ## see docs: https://airflow.apache.org/docs/apache-airflow-providers-amazon/stable/connections/aws.html + ## - id: my_aws + ## type: aws + ## description: my AWS connection + ## extra: |- + ## { "aws_access_key_id": "XXXXXXXX", + ## "aws_secret_access_key": "XXXXXXXX", + ## "region_name":"eu-central-1" } + ## ## see docs: https://airflow.apache.org/docs/apache-airflow-providers-google/stable/connections/gcp.html + ## - id: my_gcp + ## type: google_cloud_platform + ## description: my GCP connection + ## extra: |- + ## { "extra__google_cloud_platform__keyfile_dict": "XXXXXXXX", + ## "extra__google_cloud_platform__keyfile_dict: "XXXXXXXX", + ## "extra__google_cloud_platform__num_retries": "5" } ## - podAnnotations: {} + connections: [] - ## extra environment variables for the web/scheduler/worker (AND flower) Pods + ## if we update connections or just create them the first time (lookup by `id`) + ## + ## NOTE: + ## - if enabled, the chart will revert any changes made in the web-ui to connections + ## defined in `connections` + ## + connectionsUpdate: true + + ## a list of initial variables to create ## ## EXAMPLE: - ## extraEnv: - ## - name: AIRFLOW__CORE__FERNET_KEY - ## valueFrom: - ## secretKeyRef: - ## name: airflow-fernet-key - ## key: value - ## - name: AIRFLOW__LDAP__BIND_PASSWORD - ## valueFrom: - ## secretKeyRef: - ## name: airflow-ldap-password - ## key: value + ## variables: + ## - key: "var_1" + ## value: "my_value_1" + ## - key: "var_2" + ## value: "my_value_2" ## - extraEnv: [] + variables: [] + + ## if we update variables or just create them the first time (lookup by `key`) + ## + ## NOTE: + ## - if enabled, the chart will revert any changes made in the web-ui to variables + ## defined in `variables` + ## + variablesUpdate: true - ## extra configMap volumeMounts for the web/scheduler/worker Pods + ## a list of initial pools to create ## ## EXAMPLE: - ## extraConfigmapMounts: - ## - name: airflow-webserver-config - ## mountPath: /opt/airflow/webserver_config.py - ## configMap: airflow-webserver-config - ## readOnly: true - ## subPath: webserver_config.py + ## pools: + ## - name: "pool_1" + ## slots: 5 + ## description: "example pool with 5 slots" + ## - name: "pool_2" + ## slots: 10 + ## description: "example pool with 10 slots" ## - extraConfigmapMounts: [] + pools: [] - ## extra containers for the web/scheduler/worker Pods + ## if we update pools or just create them the first time (lookup by `name`) ## - ## EXAMPLE: (a sidecar that syncs DAGs from object storage) - ## extraContainers: - ## - name: s3-sync - ## image: my-user/s3sync:latest - ## volumeMounts: - ## - name: synchronised-dags - ## mountPath: /dags + ## NOTE: + ## - if enabled, the chart will revert any changes made in the web-ui to pools + ## defined in `pools` ## - extraContainers: [] + poolsUpdate: true - ## extra pip packages to install in the web/scheduler/worker Pods + ## extra annotations for the web/scheduler/worker/flower Pods + ## + podAnnotations: {} + + ## extra pip packages to install in the web/scheduler/worker/flower Pods ## ## EXAMPLE: ## extraPipPackages: - ## - "airflow-exporter==1.3.1" + ## - "SomeProject==1.0.0" ## extraPipPackages: [] - ## extra volumeMounts for the web/scheduler/worker Pods + ## extra environment variables for the web/scheduler/worker/flower Pods ## - ## EXAMPLE: - ## extraVolumeMounts: - ## - name: synchronised-dags - ## mountPath: /opt/airflow/dags + ## SPEC - EnvVar: + ## https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.20/#envvar-v1-core + ## + extraEnv: [] + + ## extra containers for the web/scheduler/worker/flower Pods + ## + ## SPEC - Container: + ## https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.20/#container-v1-core + ## + extraContainers: [] + + ## extra VolumeMounts for the web/scheduler/worker/flower Pods + ## + ## SPEC - VolumeMount: + ## https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.20/#volumemount-v1-core ## extraVolumeMounts: [] - ## extra volumes for the web/scheduler/worker Pods + ## extra Volumes for the web/scheduler/worker/flower Pods ## - ## EXAMPLE: - ## extraVolumes: - ## - name: synchronised-dags - ## emptyDir: {} + ## SPEC - Volume: + ## https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.20/#volume-v1-core ## extraVolumes: [] + ## configs to generate the AIRFLOW__KUBERNETES__POD_TEMPLATE_FILE + ## + ## NOTE: + ## - the generated "pod_template.yaml" is only mounted if `airflow.executor` is: + ## "CeleryKubernetesExecutor" or "KubernetesExecutor" + ## - values like `dags.gitSync.enabled` are respected by including the required sidecar + ## containers in the template + ## - the global `airflow.extraPipPackages` will NOT be installed in any circumstance + ## - read the airflow docs for pod-template-file: + ## https://airflow.apache.org/docs/apache-airflow/stable/executor/kubernetes.html#pod-template-file + ## + kubernetesPodTemplate: + ## the full text value to mount as the "pod_template.yaml" file + ## + ## NOTE: + ## - if set, will override all other values + ## + ## EXAMPLE: + ## stringOverride: |- + ## apiVersion: v1 + ## kind: Pod + ## metadata: + ## name: dummy-name + ## spec: + ## containers: + ## - name: base + ## ... + ## ... + ## volumes: [] + ## + stringOverride: "" + + ## the nodeSelector configs for the Pod template + ## + ## DOCS: + ## https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#nodeselector + ## + nodeSelector: {} + + ## the affinity configs for the Pod template + ## + ## SPEC - Affinity: + ## https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.20/#affinity-v1-core + ## + affinity: {} + + ## the toleration configs for the Pod template + ## + ## SPEC - Toleration: + ## https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.20/#toleration-v1-core + ## + tolerations: [] + + ## annotations for the Pod template + ## + podAnnotations: {} + + ## the security context for the Pod template + ## + ## SPEC - SecurityContext: + ## https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.20/#securitycontext-v1-core + ## + securityContext: {} + + ## extra pip packages to install in the Pod template + ## + ## EXAMPLE: + ## extraPipPackages: + ## - "SomeProject==1.0.0" + ## + extraPipPackages: [] + + ## extra VolumeMounts for the Pod template + ## + ## SPEC - VolumeMount: + ## https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.20/#volumemount-v1-core + ## + extraVolumeMounts: [] + + ## extra Volumes for the Pod template + ## + ## SPEC - Volume: + ## https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.20/#volume-v1-core + ## + extraVolumes: [] ################################### # Airflow - Scheduler Configs ################################### scheduler: + ## the number of scheduler Pods to run + ## + ## NOTE: + ## - if you set this >1 we recommend defining a `scheduler.podDisruptionBudget` + ## + replicas: 1 + ## resource requests/limits for the scheduler Pod ## - ## EXAMPLE: - ## resources: - ## requests: - ## cpu: "1000m" - ## memory: "1Gi" + ## SPEC - ResourceRequirements: + ## https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.20/#resourcerequirements-v1-core ## resources: {} ## the nodeSelector configs for the scheduler Pods ## + ## DOCS: + ## https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#nodeselector + ## nodeSelector: {} ## the affinity configs for the scheduler Pods ## + ## SPEC - Affinity: + ## https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.20/#affinity-v1-core + ## affinity: {} ## the toleration configs for the scheduler Pods ## + ## SPEC - Toleration: + ## https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.20/#toleration-v1-core + ## tolerations: [] ## the security context for the scheduler Pods ## + ## SPEC - SecurityContext: + ## https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.20/#securitycontext-v1-core + ## securityContext: {} ## labels for the scheduler Deployment @@ -201,11 +343,11 @@ scheduler: ## annotations: {} - ## Pod Annotations for the scheduler Deployment + ## Pod annotations for the scheduler Deployment ## podAnnotations: {} - ## if we should tell Kubernetes Autoscaler that its safe to evict these Pods + ## if we add the annotation: "cluster-autoscaler.kubernetes.io/safe-to-evict" = "true" ## safeToEvict: true @@ -214,198 +356,130 @@ scheduler: podDisruptionBudget: ## if a PodDisruptionBudget resource is created for the scheduler ## - enabled: true + enabled: false ## the maximum unavailable pods/percentage for the scheduler ## - ## NOTE: - ## - as there is only ever a single scheduler Pod, - ## this must be 100% for Kubernetes to be able to migrate it - ## - maxUnavailable: "100%" + maxUnavailable: "" ## the minimum available pods/percentage for the scheduler ## minAvailable: "" - ## custom airflow connections for the airflow scheduler + ## sets `airflow --num_runs` parameter used to run the airflow scheduler ## - ## EXAMPLE: - ## connections: - ## - id: my_aws - ## type: aws - ## extra: | - ## { - ## "aws_access_key_id": "XXXXXXXX", - ## "aws_secret_access_key": "XXXXXXXX", - ## "region_name":"eu-central-1" - ## } - ## - connections: [] - - ## if `scheduler.connections` are deleted and re-added after each scheduler restart - ## - refreshConnections: true + numRuns: -1 - ## the name of an existing Secret containing an `add-connections.sh` script to run on scheduler start + ## configs for the scheduler Pods' liveness probe ## ## NOTE: - ## - if this is non-empty, `scheduler.connections` will be ignored - ## - use this if you don't want to store connections in your values.yaml + ## - `periodSeconds` x `failureThreshold` = max seconds a scheduler can be unhealthy ## - ## EXAMPLE SECRET: - ## apiVersion: v1 - ## kind: Secret - ## metadata: - ## name: my-airflow-connections - ## type: Opaque - ## stringData: - ## add-connections.sh: | - ## #!/usr/bin/env bash - ## - ## # remove any existing connection - ## airflow connections --delete \ - ## --conn_id "my_aws" - ## - ## # re-add your custom connection - ## airflow connections --add \ - ## --conn_id "my_aws" \ - ## --conn_type "aws" \ - ## --conn_extra "{\"region_name\":\"eu-central-1\"}" - ## - existingSecretConnections: "" + livenessProbe: + enabled: true + initialDelaySeconds: 10 + periodSeconds: 30 + timeoutSeconds: 10 + failureThreshold: 5 - ## custom airflow variables for the airflow scheduler - ## - ## NOTE: - ## - THIS IS A STRING, containing a JSON object, with your variables in it + ## extra pip packages to install in the scheduler Pods ## ## EXAMPLE: - ## variables: | - ## { "environment": "dev" } + ## extraPipPackages: + ## - "SomeProject==1.0.0" ## - variables: | - {} + extraPipPackages: [] - ## custom airflow pools for the airflow scheduler + ## extra VolumeMounts for the scheduler Pods ## - ## NOTE: - ## - THIS IS A STRING, containing a JSON object, with your pools in it + ## SPEC - VolumeMount: + ## https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.20/#volumemount-v1-core ## - ## EXAMPLE: - ## pools: | - ## { - ## "example": { - ## "description": "This is an example pool with 2 slots.", - ## "slots": 2 - ## } - ## } - ## - pools: | - {} + extraVolumeMounts: [] - ## the value of the `airflow --num_runs` parameter used to run the airflow scheduler + ## extra Volumes for the scheduler Pods ## - ## NOTE: - ## - this is the number of 'dag refreshes' before the airflow scheduler process will exit - ## - if not set to `-1`, the scheduler Pod will restart regularly - ## - for most environments, `-1` will be an acceptable value - ## - numRuns: -1 - - ## if we run `airflow upgradedb || airflow db upgrade` when the scheduler starts + ## SPEC - Volume: + ## https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.20/#volume-v1-core ## - initdb: true + extraVolumes: [] - ## if we run `airflow upgradedb || airflow db upgrade` inside a special initContainer + ## extra init containers to run in the scheduler Pods ## - ## NOTE: - ## - may be needed if you have custom database hooks configured that will be pulled in by git-sync + ## SPEC - Container: + ## https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.20/#container-v1-core ## - preinitdb: false - - ## the number of seconds to wait (in bash) before starting the scheduler container - ## - initialStartupDelay: 0 + extraInitContainers: [] - ## configs for the scheduler liveness probe - ## - ## NOTE: - ## - the maximum number of seconds the scheduler can be unhealthy = - ## `livenessProbe.periodSeconds` x `livenessProbe.failureThreshold` +################################### +# Airflow - WebUI Configs +################################### +web: + ## configs to generate webserver_config.py ## - livenessProbe: - enabled: true - ## the number of seconds to wait before checking pod health + webserverConfig: + ## the full text value to mount as the webserver_config.py file ## ## NOTE: - ## - make larger if you are installing many packages with: - ## `airflow.extraPipPackages`, `web.extraPipPackages`, or `dags.installRequirements` + ## - if set, will override all values except `webserverConfig.existingSecret` ## - initialDelaySeconds: 300 - periodSeconds: 30 - failureThreshold: 5 - - ## the directory in which to mount secrets on scheduler containers - ## - secretsDir: /var/airflow/secrets + ## EXAMPLE: + ## stringOverride: |- + ## from airflow import configuration as conf + ## from flask_appbuilder.security.manager import AUTH_DB + ## + ## # the SQLAlchemy connection string + ## SQLALCHEMY_DATABASE_URI = conf.get('core', 'SQL_ALCHEMY_CONN') + ## + ## # use embedded DB for auth + ## AUTH_TYPE = AUTH_DB + ## + stringOverride: "" - ## the names of existing Kubernetes Secrets to mount as files at `{workers.secretsDir}//` - ## - secrets: [] + ## the name of a pre-created secret containing a `webserver_config.py` file as a key + ## + existingSecret: "" - ## the name of an existing Kubernetes Secret to mount as files to `{web.secretsDir}/` + ## the number of web Pods to run ## ## NOTE: - ## - overrides `scheduler.secrets` + ## - if you set this >1 we recommend defining a `web.podDisruptionBudget` ## - secretsMap: "" + replicas: 1 - ## extra init containers to run before the scheduler Pod + ## resource requests/limits for the web Pod ## - ## EXAMPLE: - ## extraInitContainers: - ## - name: volume-mount-hack - ## image: busybox - ## command: ["sh", "-c", "chown -R 1000:1000 logs"] - ## volumeMounts: - ## - mountPath: /opt/airflow/logs - ## name: logs-data - ## - extraInitContainers: [] - -################################### -# Airflow - WebUI Configs -################################### -web: - ## resource requests/limits for the airflow web Pods - ## - ## EXAMPLE: - ## resources: - ## requests: - ## cpu: "500m" - ## memory: "1Gi" + ## SPEC - ResourceRequirements: + ## https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.20/#resourcerequirements-v1-core ## resources: {} - ## the number of web Pods to run - ## - replicas: 1 - ## the nodeSelector configs for the web Pods ## + ## DOCS: + ## https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#nodeselector + ## nodeSelector: {} ## the affinity configs for the web Pods ## + ## SPEC - Affinity: + ## https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.20/#affinity-v1-core + ## affinity: {} ## the toleration configs for the web Pods ## + ## SPEC - Toleration: + ## https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.20/#toleration-v1-core + ## tolerations: [] ## the security context for the web Pods ## + ## SPEC - SecurityContext: + ## https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.20/#securitycontext-v1-core + ## securityContext: {} ## labels for the web Deployment @@ -424,7 +498,7 @@ web: ## podAnnotations: {} - ## if we should tell Kubernetes Autoscaler that its safe to evict these Pods + ## if we add the annotation: "cluster-autoscaler.kubernetes.io/safe-to-evict" = "true" ## safeToEvict: true @@ -433,10 +507,6 @@ web: podDisruptionBudget: ## if a PodDisruptionBudget resource is created for the web Deployment ## - ## WARNING: - ## - if you enable a PodDisruptionBudget, you should set `web.replicas` to a value - ## large enough for Kubernetes to evict at least 1 Pod at a time - ## enabled: false ## the maximum unavailable pods/percentage for the web Deployment @@ -460,119 +530,95 @@ web: nodePort: http: "" - ## sets `AIRFLOW__WEBSERVER__BASE_URL` - ## - ## NOTE: - ## - should be compatible with `ingress.web.path` config - ## - baseUrl: "http://localhost:8080" - - ## sets `AIRFLOW__CORE__STORE_SERIALIZED_DAGS` - ## - ## NOTE: - ## - setting true will disable `git-sync` and `git-clone` containers in the web Pod - ## - Docs: https://airflow.apache.org/docs/stable/dag-serialization.html - ## - serializeDAGs: false - - ## extra pip packages to install in the web container - ## - ## EXAMPLE: ( packages used by RBAC UI for OAuth ) - ## extraPipPackages: - ## - "apache-airflow[google_auth]==1.10.10" - ## - extraPipPackages: [] - - ## the number of seconds to wait (in bash) before starting the web container - ## - initialStartupDelay: 0 - - ## the number of seconds to wait before declaring a new Pod available - ## - minReadySeconds: 5 - - ## configs for the web Service readiness probe + ## configs for the web Pods' readiness probe ## readinessProbe: - enabled: false - scheme: HTTP + enabled: true initialDelaySeconds: 10 periodSeconds: 10 - timeoutSeconds: 1 - successThreshold: 1 - failureThreshold: 3 + timeoutSeconds: 5 + failureThreshold: 6 - ## configs for the web Service liveness probe + ## configs for the web Pods' liveness probe ## livenessProbe: enabled: true - scheme: HTTP - ## the number of seconds to wait before checking pod health - ## - ## NOTE: - ## - make larger if you are installing many packages with: - ## `airflow.extraPipPackages`, `web.extraPipPackages`, or `dags.installRequirements` - ## - initialDelaySeconds: 300 - periodSeconds: 30 - timeoutSeconds: 3 - successThreshold: 1 - failureThreshold: 2 + initialDelaySeconds: 10 + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 6 - ## the directory in which to mount secrets on web containers + ## extra pip packages to install in the web Pods ## - secretsDir: /var/airflow/secrets + ## EXAMPLE: + ## extraPipPackages: + ## - "SomeProject==1.0.0" + ## + extraPipPackages: [] - ## the names of existing Kubernetes Secrets to mount as files at `{workers.secretsDir}//` + ## extra VolumeMounts for the web Pods + ## + ## SPEC - VolumeMount: + ## https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.20/#volumemount-v1-core ## - secrets: [] + extraVolumeMounts: [] - ## the name of an existing Kubernetes Secret to mount as files to `{web.secretsDir}/` + ## extra Volumes for the web Pods ## - ## NOTE: - ## - overrides `web.secrets` + ## SPEC - Volume: + ## https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.20/#volume-v1-core ## - secretsMap: "" + extraVolumes: [] ################################### -# Airflow - Worker Configs +# Airflow - Celery Worker Configs ################################### workers: ## if the airflow workers StatefulSet should be deployed ## enabled: true - ## resource requests/limits for the airflow worker Pods + ## the number of worker Pods to run ## - ## EXAMPLE: - ## resources: - ## requests: - ## cpu: "1000m" - ## memory: "2Gi" + ## NOTE: + ## - if you set this >1 we recommend defining a `workers.podDisruptionBudget` + ## - this is the minimum when `workers.autoscaling.enabled` is true ## - resources: {} + replicas: 1 - ## the number of workers Pods to run + ## resource requests/limits for the worker Pod ## - ## NOTE: - ## - when `workers.autoscaling.enabled` is true, this is the minimum + ## SPEC - ResourceRequirements: + ## https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.20/#resourcerequirements-v1-core ## - replicas: 1 + resources: {} ## the nodeSelector configs for the worker Pods ## + ## DOCS: + ## https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#nodeselector + ## nodeSelector: {} ## the affinity configs for the worker Pods ## + ## SPEC - Affinity: + ## https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.20/#affinity-v1-core + ## affinity: {} ## the toleration configs for the worker Pods ## + ## SPEC - Toleration: + ## https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.20/#toleration-v1-core + ## tolerations: [] ## the security context for the worker Pods ## + ## SPEC - SecurityContext: + ## https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.20/#securitycontext-v1-core + ## securityContext: {} ## labels for the worker StatefulSet @@ -591,7 +637,7 @@ workers: ## podAnnotations: {} - ## if we should tell Kubernetes Autoscaler that its safe to evict these Pods + ## if we add the annotation: "cluster-autoscaler.kubernetes.io/safe-to-evict" = "true" ## safeToEvict: true @@ -600,13 +646,6 @@ workers: podDisruptionBudget: ## if a PodDisruptionBudget resource is created for the worker StatefulSet ## - ## WARNING: - ## - if you enable a PodDisruptionBudget, you should set `workers.replicas` to a value - ## large enough for Kubernetes to evict at least 1 Pod at a time - ## - if you enable `workers.celery.gracefullTermination`, you should consider - ## specifying a `workers.podDisruptionBudget.minAvailable` to prevent there - ## not being enough available workers during graceful termination waiting periods - ## enabled: false ## the maximum unavailable pods/percentage for the worker StatefulSet @@ -619,6 +658,9 @@ workers: ## configs for the HorizontalPodAutoscaler of the worker Pods ## + ## NOTE: + ## - if using git-sync, ensure `dags.gitSync.resources` is set + ## ## EXAMPLE: ## autoscaling: ## enabled: true @@ -636,40 +678,26 @@ workers: maxReplicas: 2 metrics: [] - ## the number of seconds to wait (in bash) before starting each worker container - ## - initialStartupDelay: 0 - ## configs for the celery worker Pods ## - ## NOTE: - ## - only takes effect if `airflow.executor` is `CeleryExecutor` - ## celery: - ## the number of tasks each celery worker can run at a time + ## if celery worker Pods are gracefully terminated ## - ## NOTE: - ## - sets AIRFLOW__CELERY__WORKER_CONCURRENCY - ## - instances: 16 - - ## if we should wait for tasks to finish before SIGTERM of the celery worker + ## graceful termination process: + ## 1. prevent worker accepting new tasks + ## 2. wait AT MOST `workers.celery.gracefullTerminationPeriod` for tasks to finish + ## 3. send SIGTERM to worker + ## 4. wait AT MOST `workers.terminationPeriod` for kill to finish + ## 5. send SIGKILL to worker ## - ## WARNING: - ## - consider using `workers.podDisruptionBudget.*` to prevent there not being + ## NOTE: + ## - consider defining a `workers.podDisruptionBudget` to prevent there not being ## enough available workers during graceful termination waiting periods ## gracefullTermination: false ## how many seconds to wait for tasks to finish before SIGTERM of the celery worker ## - ## graceful shutdown lifecycle: - ## 1. prevent worker accepting new tasks - ## 2. wait AT MOST `workers.celery.gracefullTerminationPeriod` for tasks to finish - ## 3. send SIGTERM to worker - ## 4. wait AT MOST `workers.terminationPeriod` for kill to finish - ## 5. send SIGKILL to worker - ## gracefullTerminationPeriod: 600 ## how many seconds to wait after SIGTERM before SIGKILL of the celery worker @@ -680,60 +708,76 @@ workers: ## terminationPeriod: 60 - ## directory in which to mount secrets on worker containers + ## extra pip packages to install in the worker Pod ## - secretsDir: /var/airflow/secrets + ## EXAMPLE: + ## extraPipPackages: + ## - "SomeProject==1.0.0" + ## + extraPipPackages: [] - ## the names of existing Kubernetes Secrets to mount as files at `{workers.secretsDir}//` + ## extra VolumeMounts for the worker Pods + ## + ## SPEC - VolumeMount: + ## https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.20/#volumemount-v1-core ## - secrets: [] + extraVolumeMounts: [] - ## the name of an existing Kubernetes Secret to mount as files to `{web.secretsDir}/` + ## extra Volumes for the worker Pods ## - ## NOTE: - ## - overrides `worker.secrets` + ## SPEC - Volume: + ## https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.20/#volume-v1-core ## - secretsMap: "" + extraVolumes: [] ################################### # Airflow - Flower Configs ################################### flower: - ## if the Flower UI should be deployed - ## - ## NOTE: - ## - only takes effect if `airflow.executor` is `CeleryExecutor` + ## if the airflow flower UI should be deployed ## enabled: true - ## resource requests/limits for the flower Pods + ## the number of flower Pods to run ## - ## EXAMPLE: - ## resources: - ## requests: - ## cpu: "100m" - ## memory: "126Mi" + ## NOTE: + ## - if you set this >1 we recommend defining a `flower.podDisruptionBudget` ## - resources: {} + replicas: 1 - ## the number of flower Pods to run + ## resource requests/limits for the flower Pod ## - replicas: 1 + ## SPEC - ResourceRequirements: + ## https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.20/#resourcerequirements-v1-core + ## + resources: {} ## the nodeSelector configs for the flower Pods ## + ## DOCS: + ## https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#nodeselector + ## nodeSelector: {} ## the affinity configs for the flower Pods ## + ## SPEC - Affinity: + ## https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.20/#affinity-v1-core + ## affinity: {} ## the toleration configs for the flower Pods ## + ## SPEC - Toleration: + ## https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.20/#toleration-v1-core + ## tolerations: [] ## the security context for the flower Pods ## + ## SPEC - SecurityContext: + ## https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.20/#securitycontext-v1-core + ## securityContext: {} ## labels for the flower Deployment @@ -752,7 +796,7 @@ flower: ## podAnnotations: {} - ## if we should tell Kubernetes Autoscaler that its safe to evict these Pods + ## if we add the annotation: "cluster-autoscaler.kubernetes.io/safe-to-evict" = "true" ## safeToEvict: true @@ -761,10 +805,6 @@ flower: podDisruptionBudget: ## if a PodDisruptionBudget resource is created for the flower Deployment ## - ## WARNING: - ## - if you enable a PodDisruptionBudget, you should set `flower.replicas` to a value - ## large enough for Kubernetes to evict at least 1 Pod at a time - ## enabled: false ## the maximum unavailable pods/percentage for the flower Deployment @@ -778,14 +818,14 @@ flower: ## the value of the flower `--auth` argument ## ## NOTE: - ## - see flower docs: https://flower.readthedocs.io/en/latest/auth.html#google-oauth-2-0 + ## - see flower docs: https://flower.readthedocs.io/en/latest/auth.html#google-oauth-2-0 ## oauthDomains: "" ## the name of a pre-created secret containing the basic authentication value for flower ## ## NOTE: - ## - This sets `AIRFLOW__CELERY__FLOWER_BASIC_AUTH` + ## - this will override any value of `config.AIRFLOW__CELERY__FLOWER_BASIC_AUTH` ## basicAuthSecret: "" @@ -793,13 +833,6 @@ flower: ## basicAuthSecretKey: "" - ## sets `AIRFLOW__CELERY__FLOWER_URL_PREFIX` - ## - ## NOTE: - ## - should match `ingress.flower.path` config - ## - urlPrefix: "" - ## configs for the Service of the flower Pods ## service: @@ -809,27 +842,47 @@ flower: loadBalancerIP: "" loadBalancerSourceRanges: [] nodePort: - http: "" + http: - ## the number of seconds to wait (in bash) before starting the flower container + ## configs for the flower Pods' readinessProbe probe ## - initialStartupDelay: 0 + readinessProbe: + enabled: true + initialDelaySeconds: 10 + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 6 - ## the number of seconds to wait before declaring a new Pod available + ## configs for the flower Pods' liveness probe ## - minReadySeconds: 5 + livenessProbe: + enabled: true + initialDelaySeconds: 10 + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 6 - ## extra ConfigMaps to mount on the flower Pods + ## extra pip packages to install in the flower Pod ## ## EXAMPLE: - ## extraConfigmapMounts: - ## - name: extra-cert - ## mountPath: /etc/ssl/certs/extra-cert.pem - ## configMap: extra-certificates - ## readOnly: true - ## subPath: extra-cert.pem + ## extraPipPackages: + ## - "SomeProject==1.0.0" ## - extraConfigmapMounts: [] + extraPipPackages: [] + + ## extra VolumeMounts for the flower Pods + ## + ## SPEC - VolumeMount: + ## https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.20/#volumemount-v1-core + ## + extraVolumeMounts: [] + + ## extra Volumes for the flower Pods + ## + ## SPEC - Volume: + ## https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.20/#volume-v1-core + ## + extraVolumes: [] ################################### # Airflow - Logs Configs @@ -865,10 +918,8 @@ logs: ## the access mode of the PVC ## ## WARNING: - ## - must be: `ReadWriteMany` - ## - ## NOTE: - ## - different StorageClass support different access modes: + ## - must be "ReadWriteMany" or airflow pods will fail to start + ## - different StorageClass types support different access modes: ## https://kubernetes.io/docs/concepts/storage/persistent-volumes/#access-modes ## accessMode: ReadWriteMany @@ -885,21 +936,6 @@ dags: ## path: /opt/airflow/dags - ## whether to disable pickling dags from the scheduler to workers - ## - ## NOTE: - ## - sets AIRFLOW__CORE__DONOT_PICKLE - ## - doNotPickle: false - - ## install any Python `requirements.txt` at the root of `dags.path` automatically - ## - ## WARNING: - ## - if set to true, and you are using `dags.git.gitSync`, you must also enable - ## `dags.initContainer` to ensure the requirements.txt is available at Pod start - ## - installRequirements: false - ## configs for the dags PVC ## persistence: @@ -925,11 +961,9 @@ dags: ## the access mode of the PVC ## - ## WARNING: - ## - must be one of: `ReadOnlyMany` or `ReadWriteMany` - ## ## NOTE: - ## - different StorageClass support different access modes: + ## - must be "ReadOnlyMany" or "ReadWriteMany" or airflow pods will fail to start + ## - different StorageClass types support different access modes: ## https://kubernetes.io/docs/concepts/storage/persistent-volumes/#access-modes ## accessMode: ReadOnlyMany @@ -938,155 +972,102 @@ dags: ## size: 1Gi - ## configs for the DAG git repository & sync container + ## configs for the git-sync sidecar (https://github.com/kubernetes/git-sync) ## - git: - ## url of the git repository - ## - ## EXAMPLE: (HTTP) - ## url: "https://github.com/USERNAME/REPOSITORY.git" - ## - ## EXAMPLE: (HTTP - Token) - ## url: "https://USERNAME:TOKEN@github.com/USERNAME/REPOSITORY.git" + gitSync: + ## if the git-sync sidecar container is enabled ## - ## EXAMPLE: (SSH) - ## url: "ssh://git@example.com:22/REPOSITORY.git" - ## - ## EXAMPLE: (SSH - GitHub) - ## url: "git@github.com:USERNAME/REPOSITORY.git" - ## - url: "" + enabled: false - ## the branch/tag/sha1 which we clone + ## the git-sync container image ## - ref: master + image: + repository: k8s.gcr.io/git-sync/git-sync + tag: v3.2.2 + ## values: Always or IfNotPresent + pullPolicy: IfNotPresent + uid: 65533 + gid: 65533 - ## the name of a pre-created secret containing files for ~/.ssh/ + ## resource requests/limits for the git-sync container ## - ## NOTE: - ## - this is ONLY RELEVANT for SSH git repos - ## - the secret commonly includes files: id_rsa, id_rsa.pub, known_hosts - ## - known_hosts is NOT NEEDED if `git.sshKeyscan` is true + ## EXAMPLE: + ## resources: + ## requests: + ## cpu: "50m" + ## memory: "64Mi" ## - secret: "" + resources: {} - ## if we should implicitly trust [git.repoHost]:git.repoPort, by auto creating a ~/.ssh/known_hosts + ## the url of the git repo ## - ## WARNING: - ## - setting true will increase your vulnerability to a repo spoofing attack + ## EXAMPLE - HTTPS: + ## repo: "https://github.com/USERNAME/REPOSITORY.git" ## - ## NOTE: - ## - this is ONLY RELEVANT for SSH git repos - ## - this is not needed if known_hosts is provided in `git.secret` - ## - git.repoHost and git.repoPort ARE REQUIRED for this to work + ## EXAMPLE - SSH: + ## repo: "git@github.com:USERNAME/REPOSITORY.git" ## - sshKeyscan: false + repo: "" - ## the name of the private key file in your `git.secret` + ## the sub-path (within your repo) where dags are located ## ## NOTE: - ## - this is ONLY RELEVANT for PRIVATE SSH git repos + ## - only dags under this path (within your repo) will be seen by airflow, + ## but the full repo will be cloned ## - privateKeyName: id_rsa + repoSubPath: "" - ## the host name of the git repo - ## - ## NOTE: - ## - this is ONLY REQUIRED for SSH git repos + ## the git branch to check out ## - ## EXAMPLE: - ## repoHost: "github.com" - ## - repoHost: "" + branch: master - ## the port of the git repo + ## the git revision (tag or hash) to check out ## - ## NOTE: - ## - this is ONLY REQUIRED for SSH git repos - ## - repoPort: 22 + revision: HEAD - ## configs for the git-sync container + ## shallow clone with a history truncated to the specified number of commits ## - gitSync: - ## enable the git-sync sidecar container - ## - enabled: false - - ## resource requests/limits for the git-sync container - ## - ## NOTE: - ## - when `workers.autoscaling` is true, YOU MUST SPECIFY a resource request - ## - ## EXAMPLE: - ## resources: - ## requests: - ## cpu: "50m" - ## memory: "64Mi" - ## - resources: {} + depth: 1 - ## the docker image for the git-sync container - image: - repository: alpine/git - tag: latest - ## values: Always or IfNotPresent - pullPolicy: IfNotPresent - - ## the git sync interval in seconds - ## - refreshTime: 60 - - ## configs for the git-clone container - ## - ## NOTE: - ## - use this container if you want to only clone the external git repo - ## at Pod start-time, and not keep it synchronised afterwards - ## - initContainer: - ## enable the git-clone sidecar container + ## the number of seconds between syncs ## - ## NOTE: - ## - this is NOT required for the git-sync sidecar to work - ## - this is mostly used for when `dags.installRequirements` is true to ensure that - ## requirements.txt is available at Pod start + syncWait: 60 + + ## the max number of seconds allowed for a complete sync ## - enabled: false + syncTimeout: 120 - ## resource requests/limits for the git-clone container + ## the name of a pre-created Secret with git http credentials ## - ## EXAMPLE: - ## resources: - ## requests: - ## cpu: "50m" - ## memory: "64Mi" + httpSecret: "" + + ## the key in `dags.gitSync.httpSecret` with your git username ## - resources: {} + httpSecretUsernameKey: username - ## the docker image for the git-clone container - image: - repository: alpine/git - tag: latest - ## values: Always or IfNotPresent - pullPolicy: IfNotPresent + ## the key in `dags.gitSync.httpSecret` with your git password/token + ## + httpSecretPasswordKey: password - ## path to mount dags-data volume to + ## the name of a pre-created Secret with git ssh credentials ## - ## WARNING: - ## - this path is also used by the git-sync container + sshSecret: "" + + ## the key in `dags.gitSync.sshSecret` with your ssh-key file ## - mountPath: "/dags" + sshSecretKey: id_rsa - ## sub-path under `dags.initContainer.mountPath` to sync dags to + ## the string value of a "known_hosts" file (for SSH only) ## ## WARNING: - ## - this path is also used by the git-sync container - ## - this MUST INCLUDE the leading / + ## - known_hosts verification will be disabled if left empty, making you more + ## vulnerable to repo spoofing attacks ## ## EXAMPLE: - ## syncSubPath: "/subdirWithDags" + ## sshKnownHosts: |- + ## ssh-rsa ## - syncSubPath: "" + sshKnownHosts: "" ################################### # Kubernetes - Ingress Configs @@ -1094,10 +1075,6 @@ dags: ingress: ## if we should deploy Ingress resources ## - ## NOTE: - ## - if you want to change url prefix for web ui or flower (even if you do not use this Ingress), - ## you can change `web.baseUrl` and `flower.urlPrefix` - ## enabled: false ## configs for the Ingress of the web Service @@ -1116,9 +1093,6 @@ ingress: ## WARNING: ## - do NOT include the trailing slash (for root, set an empty string) ## - ## NOTE: - ## - should be compatible with `web.baseUrl` config - ## ## EXAMPLE: (if set to "/airflow") ## - UI: http://example.com/airflow/admin ## - API: http://example.com/airflow/api @@ -1130,13 +1104,6 @@ ingress: ## host: "" - ## the livenessPath for the web Ingress - ## - ## NOTE: - ## - if set to "", defaults to: `{ingress.web.path}/health` - ## - livenessPath: "" - ## configs for web Ingress TLS ## tls: @@ -1147,7 +1114,7 @@ ingress: ## the name of a pre-created Secret containing a TLS private key and certificate ## ## NOTE: - ## - this MUST be specified if `ingress.web.tls.enabled` is true + ## - this MUST be specified if `tls.enabled` is true ## secretName: "" @@ -1156,8 +1123,8 @@ ingress: ## EXAMPLE: ## precedingPaths: ## - path: "/*" - ## serviceName: "ssl-redirect" - ## servicePort: "use-annotation" + ## serviceName: "my-service" + ## servicePort: "port-name" ## precedingPaths: [] @@ -1166,8 +1133,8 @@ ingress: ## EXAMPLE: ## succeedingPaths: ## - path: "/extra-service" - ## serviceName: "extra-service" - ## servicePort: "use-annotation" + ## serviceName: "my-service" + ## servicePort: "port-name" ## succeedingPaths: [] @@ -1187,9 +1154,6 @@ ingress: ## WARNING: ## - do NOT include the trailing slash (for root, set an empty string) ## - ## NOTE: - ## - should match `flower.urlPrefix` config - ## ## EXAMPLE: (if set to "/airflow/flower") ## - UI: http://example.com/airflow/flower ## @@ -1199,16 +1163,6 @@ ingress: ## host: "" - ## the livenessPath for the flower Ingress - ## - ## WARNING: - ## - keep the trailing slash - ## - ## NOTE: - ## - if set to "", defaults to: `{ingress.flower.path}/` - ## - livenessPath: "" - ## configs for flower Ingress TLS ## tls: @@ -1219,7 +1173,7 @@ ingress: ## the name of a pre-created Secret containing a TLS private key and certificate ## ## NOTE: - ## - this MUST be specified if `ingress.flower.tls.enabled` is true + ## - this MUST be specified if `tls.enabled` is true ## secretName: "" @@ -1228,8 +1182,8 @@ ingress: ## EXAMPLE: ## precedingPaths: ## - path: "/*" - ## serviceName: "ssl-redirect" - ## servicePort: "use-annotation" + ## serviceName: "my-service" + ## servicePort: "port-name" ## precedingPaths: [] @@ -1238,8 +1192,8 @@ ingress: ## EXAMPLE: ## succeedingPaths: ## - path: "/extra-service" - ## serviceName: "extra-service" - ## servicePort: "use-annotation" + ## serviceName: "my-service" + ## servicePort: "port-name" ## succeedingPaths: [] @@ -1260,7 +1214,7 @@ rbac: ## NOTE: ## - this is needed for KubernetesPodOperator() to use `log_events_on_failure=True` ## - events: false + events: true ################################### # Kubernetes - Service Account @@ -1269,7 +1223,7 @@ serviceAccount: ## if a Kubernetes ServiceAccount is created ## ## NOTE: - ## - if false, you must create the service account outside of this helm chart, + ## - if false, you must create the service account outside of this chart, ## with the name: `serviceAccount.name` ## create: true @@ -1277,7 +1231,7 @@ serviceAccount: ## the name of the ServiceAccount ## ## NOTE: - ## - by default the name is generated using the `airflow.serviceAccountName` template in `_helpers.tpl` + ## - by default the name is generated using the `airflow.serviceAccountName` template in `_helpers/common.tpl` ## name: "" @@ -1292,7 +1246,10 @@ serviceAccount: ################################### # Kubernetes - Extra Manifests ################################### -## additional Kubernetes manifests to include with this chart +## extra Kubernetes manifests to include alongside this chart +## +## NOTE: +## - this can be used to include ANY Kubernetes YAML resource ## ## EXAMPLE: ## extraManifests: @@ -1553,10 +1510,10 @@ externalRedis: passwordSecretKey: "redis-password" ################################### -# Prometheus - ServiceMonitor +# Prometheus Operator - ServiceMonitor ################################### serviceMonitor: - ## if the ServiceMonitor resources should be deployed + ## if ServiceMonitor resources should be deployed for airflow webserver ## ## WARNING: ## - you will need an exporter in your airflow docker container, for example: @@ -1564,7 +1521,7 @@ serviceMonitor: ## ## NOTE: ## - you can install pip packages with `airflow.extraPipPackages` - ## - ServiceMonitor is a resource from: https://github.com/coreos/prometheus-operator + ## - ServiceMonitor is a resource from: https://github.com/prometheus-operator/prometheus-operator ## enabled: false @@ -1582,10 +1539,10 @@ serviceMonitor: interval: "30s" ################################### -# Prometheus - PrometheusRule +# Prometheus Operator - PrometheusRule ################################### prometheusRule: - ## if the PrometheusRule resources should be deployed + ## if PrometheusRule resources should be deployed for airflow webserver ## ## WARNING: ## - you will need an exporter in your airflow docker container, for example: @@ -1593,7 +1550,7 @@ prometheusRule: ## ## NOTE: ## - you can install pip packages with `airflow.extraPipPackages` - ## - PrometheusRule a resource from: https://github.com/coreos/prometheus-operator + ## - PrometheusRule is a resource from: https://github.com/prometheus-operator/prometheus-operator ## enabled: false diff --git a/ct-config.yaml b/ct-config.yaml index 0b0efb24..f856f0be 100644 --- a/ct-config.yaml +++ b/ct-config.yaml @@ -4,4 +4,4 @@ chart-dirs: - charts chart-repos: - stable=https://charts.helm.sh/stable -helm-extra-args: --timeout 600s +helm-extra-args: --timeout=600s \ No newline at end of file