adrianisk - Gable Check Contracts & Assets #82
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Gable Check Contracts & Assets | |
run-name: ${{ github.actor }} - Gable Check Contracts & Assets | |
on: pull_request | |
permissions: | |
# Required to checkout the data assets from the repo | |
contents: read | |
# Required to post messages to the PR | |
pull-requests: write | |
jobs: | |
validate-data-contracts: | |
runs-on: ubuntu-latest | |
name: 'Validate Data Contracts' | |
steps: | |
- name: Check out repository code | |
uses: actions/checkout@v3 | |
# By default, the Gable CLI will exit with a non-zero exit code if no contracts are found when calling the | |
# validate or publish command. For this tutorial, there won't be any contracts in the repo to start, so we | |
# need this extra step to check if we should skip validation | |
- name: Check if Contracts Exist | |
shell: bash | |
run: | | |
set +e | |
sh -c "ls ./contracts/*.yaml" | |
if [ $? -eq 0 ]; then | |
echo "Found contract files, running validation..." | |
else | |
echo "No contract files found, skipping contract validation..." | |
echo SKIP_VALIDATION=true >> $GITHUB_ENV | |
fi | |
- name: Validate Contracts | |
if: ${{ env.SKIP_VALIDATION != 'true' }} | |
uses: gabledata/cicd/github-actions/validate-contracts@latest | |
with: | |
# Provide API key secret, and endpoint variable created in previous steps | |
gable-api-key: ${{secrets.GABLE_API_KEY_UNSTABLE_SANDBOX}} | |
gable-api-endpoint: ${{secrets.GABLE_API_ENDPOINT_UNSTABLE_SANDBOX}} | |
# List of paths to contract files that should be validated, with support for glob syntax. | |
# Can either be specified as a space separated list ('contract1.yml contract2.yml'), or | |
# a multiline string | |
contract-paths: | | |
./contracts/*.yaml | |
check-data-assets-against-contracts: | |
runs-on: ubuntu-latest | |
name: 'Check Data Assets Against Contracts' | |
services: | |
postgres: | |
image: postgres:14 | |
env: | |
POSTGRES_USER: postgres | |
POSTGRES_PASSWORD: postgres | |
POSTGRES_DB: tutorial | |
ports: | |
- 5432:5432 | |
options: >- | |
--health-cmd pg_isready | |
--health-interval 10s | |
--health-timeout 5s | |
--health-retries 5 | |
steps: | |
- name: Check out repository code | |
uses: actions/checkout@v3 | |
- name: Setup Python | |
uses: actions/setup-python@v4 | |
with: | |
python-version: 3.10.11 | |
- name: Install Poetry | |
run: curl -sSL https://install.python-poetry.org | python3 - | |
shell: bash | |
- name: Install dependencies | |
shell: bash | |
run: | | |
# Install root level dependencies | |
poetry install | |
# Create a virtual environment for the PySpark project, install dependencies | |
python3 -m venv "pyspark/.venv" | |
pyspark/.venv/bin/pip install -r pyspark/requirements.txt | |
ls -la pyspark/.venv/bin | |
- name: Run migrations from db_migrations directory | |
working-directory: ./db_migrations | |
run: poetry run alembic upgrade head | |
shell: bash | |
- name: Check PySpark Data Assets | |
uses: gabledata/cicd/github-actions/check-data-assets@python_path | |
with: | |
# Provide API key and endpoint secrets | |
gable-api-key: ${{secrets.GABLE_API_KEY_UNSTABLE_SANDBOX}} | |
gable-api-endpoint: ${{secrets.GABLE_API_ENDPOINT_UNSTABLE_SANDBOX}} | |
allow-gable-pre-release: true | |
python-path: pyspark/.venv/bin/python | |
# List of paths to Protobuf files that should be checked with support for glob syntax. | |
# Can either be specified as a space separated list ('event1.proto event2.proto'), or | |
# a multiline string | |
data-asset-options: | | |
--source-type pyspark \ | |
--project-root ${{ github.workspace }}/pyspark \ | |
--csv-schema-file ${{ github.workspace }}/pyspark/schemas.csv \ | |
--spark-job-entrypoint 'job.py --final_output_table pnw_bookings_30_days' --debug | |
- name: Check Protobuf Data Assets | |
uses: gabledata/cicd/github-actions/check-data-assets@latest | |
with: | |
# Provide API key secret, and endpoint variable created in previous steps | |
gable-api-key: ${{secrets.GABLE_API_KEY_UNSTABLE_SANDBOX}} | |
gable-api-endpoint: ${{secrets.GABLE_API_ENDPOINT_UNSTABLE_SANDBOX}} | |
# List of paths to Protobuf files that should be checked with support for glob syntax. | |
# Can either be specified as a space separated list ('event1.proto event2.proto'), or | |
# a multiline string | |
data-asset-options: | | |
--source-type protobuf \ | |
--files ./event_schemas/*.proto | |
- name: Check Avro Data Assets | |
uses: gabledata/cicd/github-actions/check-data-assets@latest | |
with: | |
# Provide API key secret, and endpoint variable created in previous steps | |
gable-api-key: ${{secrets.GABLE_API_KEY_UNSTABLE_SANDBOX}} | |
gable-api-endpoint: ${{secrets.GABLE_API_ENDPOINT_UNSTABLE_SANDBOX}} | |
# List of paths to Avro files that should be checked with support for glob syntax. | |
# Can either be specified as a space separated list ('event1.proto event2.proto'), or | |
# a multiline string | |
data-asset-options: | | |
--source-type avro \ | |
--files ./event_schemas/*.avsc | |
- name: Check Postgres Data Assets | |
uses: gabledata/cicd/github-actions/check-data-assets@latest | |
with: | |
# Provide API key secret, and endpoint variable created in previous steps | |
gable-api-key: ${{secrets.GABLE_API_KEY_UNSTABLE_SANDBOX}} | |
gable-api-endpoint: ${{secrets.GABLE_API_ENDPOINT_UNSTABLE_SANDBOX}} | |
# List of paths to Avro files that should be checked with support for glob syntax. | |
# Can either be specified as a space separated list ('event1.proto event2.proto'), or | |
# a multiline string | |
data-asset-options: | | |
--source-type postgres \ | |
--host prod.store.com \ | |
--port 5432 \ | |
--db tutorial \ | |
--schema public \ | |
--proxy-host localhost \ | |
--proxy-port 5432 \ | |
--proxy-db tutorial \ | |
--proxy-schema public \ | |
--proxy-user postgres \ | |
--proxy-password postgres |