Skip to content

Test warehouse platform #80

Test warehouse platform

Test warehouse platform #80

Workflow file for this run

name: Test warehouse platform
on:
workflow_dispatch:
inputs:
warehouse-type:
type: choice
required: true
description: Type of warehouse platform
options:
- postgres
- snowflake
- bigquery
- redshift
- databricks
- databricks_catalog
- spark
elementary-ref:
type: string
required: false
description: Branch or tag to checkout for 'elementary' repository
dbt-data-reliability-ref:
type: string
required: false
description: Branch or tag to checkout for 'dbt-data-reliability' repository
dbt-version:
type: string
required: false
description: dbt's version to test with
should-run-tests:
type: boolean
required: false
default: true
description: Whether to run E2E tests
clear-tests:
type: boolean
required: false
default: true
description: Whether to clean test environment
generate-data:
type: boolean
required: false
default: false
description: Whether to generate new data
workflow_call:
inputs:
warehouse-type:
type: string
required: true
should-run-tests:
type: boolean
required: false
default: true
elementary-ref:
type: string
required: false
dbt-data-reliability-ref:
type: string
required: false
dbt-version:
type: string
required: false
clear-tests:
type: boolean
required: false
default: true
generate-data:
type: boolean
required: false
default: false
env:
BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
DBT_PKG_INTEG_TESTS_DIR: ${{ github.workspace }}/dbt-data-reliability/integration_tests/deprecated_tests
ELMENTARY_INTERNAL_DBT_PKG_DIR: ${{ github.workspace }}/elementary/elementary/monitor/dbt_project
jobs:
test:
runs-on: ubuntu-20.04
defaults:
run:
working-directory: elementary
concurrency:
# This is what eventually defines the schema name in the data platform.
group: tests_${{ inputs.warehouse-type }}_dbt_${{ inputs.dbt-version }}_${{ github.head_ref || github.ref_name }}
cancel-in-progress: true
steps:
- name: Checkout Elementary
uses: actions/checkout@v3
with:
path: elementary
ref: ${{ inputs.elementary-ref }}
- name: Checkout dbt package
uses: actions/checkout@v3
with:
repository: elementary-data/dbt-data-reliability
path: dbt-data-reliability
ref: ${{ inputs.dbt-data-reliability-ref }}
- name: Start Postgres
if: inputs.warehouse-type == 'postgres'
working-directory: ${{ env.DBT_PKG_INTEG_TESTS_DIR }}
run: docker-compose up -d postgres
- name: Setup Python
uses: actions/setup-python@v4
with:
python-version: "3.8"
- name: Install Spark requirements
if: inputs.warehouse-type == 'spark'
run: sudo apt-get install python-dev libsasl2-dev gcc
- name: Install dbt
run: >
pip install --pre
"dbt-core${{ inputs.dbt-version && format('=={0}', inputs.dbt-version) }}"
"dbt-${{ (inputs.warehouse-type == 'databricks_catalog' && 'databricks') || inputs.warehouse-type }}${{ inputs.dbt-version && format('<={0}', inputs.dbt-version) }}"
- name: Install Elementary
run: |
pip install -r dev-requirements.txt
pip install ".[${{ (inputs.warehouse-type == 'databricks_catalog' && 'databricks') || inputs.warehouse-type }}]"
- name: Write dbt profiles
env:
PROFILES_YML: ${{ secrets.CI_PROFILES_YML }}
run: |
mkdir -p ~/.dbt
DBT_VERSION=$(pip show dbt-core | grep -i version | awk '{print $2}' | sed 's/\.//g')
UNDERSCORED_REF_NAME=$(echo "dbt_${DBT_VERSION}_${BRANCH_NAME}" | head -c 32 | sed "s/-/_/g")
echo "$PROFILES_YML" | base64 -d | sed "s/<SCHEMA_NAME>/py_$UNDERSCORED_REF_NAME/g" > ~/.dbt/profiles.yml
- name: Run Python package unit tests
run: pytest -vv tests/unit --warehouse-type ${{ inputs.warehouse-type }}
- name: Run Python package integration tests
run: pytest -vv tests/integration --warehouse-type ${{ inputs.warehouse-type }}
- name: Install dbt package
run: |
ELEMENTARY_PKG_LOCATION=$(pip show elementary-data | grep -i location | awk '{print $2}')
DBT_PROJECT_PATH="$ELEMENTARY_PKG_LOCATION/elementary/monitor/dbt_project"
DBT_PKGS_PATH="$DBT_PROJECT_PATH/dbt_packages"
dbt deps --project-dir "$DBT_PROJECT_PATH"
rm -rf "$DBT_PKGS_PATH/elementary"
ln -vs "$GITHUB_WORKSPACE/dbt-data-reliability" "$DBT_PKGS_PATH/elementary"
- name: Run dbt package integration tests
if: github.event_name != 'workflow_dispatch' || inputs.should-run-tests
working-directory: ${{ env.DBT_PKG_INTEG_TESTS_DIR }}
run: |
dbt deps
python run_e2e_tests.py -t "${{ inputs.warehouse-type }}" -g "${{ inputs.warehouse-type == 'postgres' || inputs.generate-data }}" --clear-tests "${{ inputs.clear-tests }}"
- name: Run help
run: edr --help
- name: Run monitor
env:
SLACK_WEBHOOK: ${{ secrets.CI_SLACK_WEBHOOK }}
run: >
edr monitor
-t "${{ inputs.warehouse-type }}"
--group-by table
--project-dir "${{ env.DBT_PKG_INTEG_TESTS_DIR }}"
--project-profile-target "${{ inputs.warehouse-type }}"
--slack-webhook "$SLACK_WEBHOOK"
- name: Validate alerts statuses were updated
working-directory: ${{ env.ELMENTARY_INTERNAL_DBT_PKG_DIR }}
run: |
dbt deps
dbt run-operation validate_alert_statuses_are_updated -t "${{ inputs.warehouse-type }}"
- name: Run report
run: >
edr monitor report
-t "${{ inputs.warehouse-type }}"
--project-dir "${{ env.DBT_PKG_INTEG_TESTS_DIR }}"
--project-profile-target "${{ inputs.warehouse-type }}"
- name: Upload report artifact
uses: actions/upload-artifact@v3
with:
name: report_${{ inputs.warehouse-type }}_${{ env.BRANCH_NAME }}.html
path: elementary/edr_target/elementary_report.html
- name: Write GCS keyfile
env:
GCS_KEYFILE: ${{ secrets.GCS_KEYFILE }}
run: echo "$GCS_KEYFILE" | base64 -d > /tmp/gcs_keyfile.json
- name: Run send report
env:
SLACK_TOKEN: ${{ secrets.CI_SLACK_TOKEN }}
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
AZURE_CONNECTION_STRING: ${{ secrets.AZURE_CONNECTION_STRING }}
run: >
edr monitor send-report
-t "${{ inputs.warehouse-type }}"
--project-dir "${{ env.DBT_PKG_INTEG_TESTS_DIR }}"
--project-profile-target "${{ inputs.warehouse-type }}"
--slack-file-name "report_${{ inputs.warehouse-type }}_${{ env.BRANCH_NAME }}.html"
--slack-token "$SLACK_TOKEN"
--slack-channel-name data-ops
--bucket-file-path "ci_reports/report_${{ inputs.warehouse-type }}_${{ env.BRANCH_NAME }}.html"
--aws-access-key-id "$AWS_ACCESS_KEY_ID"
--aws-secret-access-key "$AWS_SECRET_ACCESS_KEY"
--s3-bucket-name elementary-ci-artifacts
--google-service-account-path /tmp/gcs_keyfile.json
--gcs-bucket-name elementary_ci_artifacts
--azure-connection-string "$AZURE_CONNECTION_STRING"
--azure-container-name reports
--update-bucket-website true
- name: Upload edr log
if: ${{ always() }}
uses: actions/upload-artifact@v3
with:
name: edr.log
path: elementary/edr_target/edr.log
- name: Run Python package e2e tests
if: github.event_name != 'workflow_dispatch' || inputs.should-run-tests
run: pytest -vv tests/e2e --warehouse-type ${{ inputs.warehouse-type }}