Skip to content

adrianisk - Gable Check Contracts & Assets #82

adrianisk - Gable Check Contracts & Assets

adrianisk - Gable Check Contracts & Assets #82

name: Gable Check Contracts & Assets
run-name: ${{ github.actor }} - Gable Check Contracts & Assets
on: pull_request
permissions:
# Required to checkout the data assets from the repo
contents: read
# Required to post messages to the PR
pull-requests: write
jobs:
validate-data-contracts:
runs-on: ubuntu-latest
name: 'Validate Data Contracts'
steps:
- name: Check out repository code
uses: actions/checkout@v3
# By default, the Gable CLI will exit with a non-zero exit code if no contracts are found when calling the
# validate or publish command. For this tutorial, there won't be any contracts in the repo to start, so we
# need this extra step to check if we should skip validation
- name: Check if Contracts Exist
shell: bash
run: |
set +e
sh -c "ls ./contracts/*.yaml"
if [ $? -eq 0 ]; then
echo "Found contract files, running validation..."
else
echo "No contract files found, skipping contract validation..."
echo SKIP_VALIDATION=true >> $GITHUB_ENV
fi
- name: Validate Contracts
if: ${{ env.SKIP_VALIDATION != 'true' }}
uses: gabledata/cicd/github-actions/validate-contracts@latest
with:
# Provide API key secret, and endpoint variable created in previous steps
gable-api-key: ${{secrets.GABLE_API_KEY_UNSTABLE_SANDBOX}}
gable-api-endpoint: ${{secrets.GABLE_API_ENDPOINT_UNSTABLE_SANDBOX}}
# List of paths to contract files that should be validated, with support for glob syntax.
# Can either be specified as a space separated list ('contract1.yml contract2.yml'), or
# a multiline string
contract-paths: |
./contracts/*.yaml
check-data-assets-against-contracts:
runs-on: ubuntu-latest
name: 'Check Data Assets Against Contracts'
services:
postgres:
image: postgres:14
env:
POSTGRES_USER: postgres
POSTGRES_PASSWORD: postgres
POSTGRES_DB: tutorial
ports:
- 5432:5432
options: >-
--health-cmd pg_isready
--health-interval 10s
--health-timeout 5s
--health-retries 5
steps:
- name: Check out repository code
uses: actions/checkout@v3
- name: Setup Python
uses: actions/setup-python@v4
with:
python-version: 3.10.11
- name: Install Poetry
run: curl -sSL https://install.python-poetry.org | python3 -
shell: bash
- name: Install dependencies
shell: bash
run: |
# Install root level dependencies
poetry install
# Create a virtual environment for the PySpark project, install dependencies
python3 -m venv "pyspark/.venv"
pyspark/.venv/bin/pip install -r pyspark/requirements.txt
ls -la pyspark/.venv/bin
- name: Run migrations from db_migrations directory
working-directory: ./db_migrations
run: poetry run alembic upgrade head
shell: bash
- name: Check PySpark Data Assets
uses: gabledata/cicd/github-actions/check-data-assets@python_path
with:
# Provide API key and endpoint secrets
gable-api-key: ${{secrets.GABLE_API_KEY_UNSTABLE_SANDBOX}}
gable-api-endpoint: ${{secrets.GABLE_API_ENDPOINT_UNSTABLE_SANDBOX}}
allow-gable-pre-release: true
python-path: pyspark/.venv/bin/python
# List of paths to Protobuf files that should be checked with support for glob syntax.
# Can either be specified as a space separated list ('event1.proto event2.proto'), or
# a multiline string
data-asset-options: |
--source-type pyspark \
--project-root ${{ github.workspace }}/pyspark \
--csv-schema-file ${{ github.workspace }}/pyspark/schemas.csv \
--spark-job-entrypoint 'job.py --final_output_table pnw_bookings_30_days' --debug
- name: Check Protobuf Data Assets
uses: gabledata/cicd/github-actions/check-data-assets@latest
with:
# Provide API key secret, and endpoint variable created in previous steps
gable-api-key: ${{secrets.GABLE_API_KEY_UNSTABLE_SANDBOX}}
gable-api-endpoint: ${{secrets.GABLE_API_ENDPOINT_UNSTABLE_SANDBOX}}
# List of paths to Protobuf files that should be checked with support for glob syntax.
# Can either be specified as a space separated list ('event1.proto event2.proto'), or
# a multiline string
data-asset-options: |
--source-type protobuf \
--files ./event_schemas/*.proto
- name: Check Avro Data Assets
uses: gabledata/cicd/github-actions/check-data-assets@latest
with:
# Provide API key secret, and endpoint variable created in previous steps
gable-api-key: ${{secrets.GABLE_API_KEY_UNSTABLE_SANDBOX}}
gable-api-endpoint: ${{secrets.GABLE_API_ENDPOINT_UNSTABLE_SANDBOX}}
# List of paths to Avro files that should be checked with support for glob syntax.
# Can either be specified as a space separated list ('event1.proto event2.proto'), or
# a multiline string
data-asset-options: |
--source-type avro \
--files ./event_schemas/*.avsc
- name: Check Postgres Data Assets
uses: gabledata/cicd/github-actions/check-data-assets@latest
with:
# Provide API key secret, and endpoint variable created in previous steps
gable-api-key: ${{secrets.GABLE_API_KEY_UNSTABLE_SANDBOX}}
gable-api-endpoint: ${{secrets.GABLE_API_ENDPOINT_UNSTABLE_SANDBOX}}
# List of paths to Avro files that should be checked with support for glob syntax.
# Can either be specified as a space separated list ('event1.proto event2.proto'), or
# a multiline string
data-asset-options: |
--source-type postgres \
--host prod.store.com \
--port 5432 \
--db tutorial \
--schema public \
--proxy-host localhost \
--proxy-port 5432 \
--proxy-db tutorial \
--proxy-schema public \
--proxy-user postgres \
--proxy-password postgres