Skip to content

adrianisk - Gable Publish Contracts & Assets #30

adrianisk - Gable Publish Contracts & Assets

adrianisk - Gable Publish Contracts & Assets #30

name: Gable Publish Contracts & Assets
run-name: ${{ github.actor }} - Gable Publish Contracts & Assets
# Run on push to main branch
on:
push:
branches:
- 'main'
- 'pyspark_cicd'
permissions:
# Required to checkout the contracts from the repo
contents: read
jobs:
validate-publish-contracts:
runs-on: ubuntu-latest
name: 'Validate & Publish Contracts'
needs: [register-data-assets]
steps:
- name: Check out repository code
uses: actions/checkout@v3
# By default, the Gable CLI will exit with a non-zero exit code if no contracts are found when calling the
# validate or publish command. For this tutorial, there won't be any contracts in the repo to start, so we
# need this extra step to check if we should skip validation
- shell: bash
run: |
set +e
sh -c "ls ./contracts/*.yaml"
if [ $? -eq 0 ]; then
echo "Found contract files, publishing contracts..."
else
echo "No contract files found, skipping contract validation..."
echo SKIP_VALIDATION=true >> $GITHUB_ENV
fi
- name: Validate Contracts
if: ${{ env.SKIP_VALIDATION != 'true' }}
uses: gabledata/cicd/github-actions/validate-contracts@latest
with:
# Provide API key and endpoint secrets
gable-api-key: ${{secrets.GABLE_API_KEY_UNSTABLE}}
gable-api-endpoint: ${{secrets.GABLE_API_ENDPOINT_UNSTABLE}}
# List of paths to contract files that should be validated, with support for glob syntax.
# Can either be specified as a space separated list ('contract1.yml contract2.yml'), or
# a multiline string
contract-paths: |
./contracts/*.yaml
- name: Publish Contracts
if: ${{ env.SKIP_VALIDATION != 'true' }}
uses: gabledata/cicd/github-actions/publish-contracts@latest
with:
# Provide API key and endpoint secrets
gable-api-key: ${{secrets.GABLE_API_KEY_UNSTABLE}}
gable-api-endpoint: ${{secrets.GABLE_API_ENDPOINT_UNSTABLE}}
# List of paths to contract files that should be published, with support for glob syntax.
# Can either be specified as a space separated list ('contract1.yml contract2.yml'), or
# a multiline string
contract-paths: |
./contracts/*.yaml
register-data-assets:
runs-on: ubuntu-latest
name: 'Register Data Assets'
services:
postgres:
image: postgres:14
env:
POSTGRES_USER: postgres
POSTGRES_PASSWORD: postgres
POSTGRES_DB: tutorial
ports:
- 5432:5432
options: >-
--health-cmd pg_isready
--health-interval 10s
--health-timeout 5s
--health-retries 5
steps:
- name: Check out repository code
uses: actions/checkout@v3
- name: Setup Python
uses: actions/setup-python@v4
with:
python-version: 3.10.11
- name: Install Poetry
run: curl -sSL https://install.python-poetry.org | python3 -
shell: bash
- name: Install dependencies
shell: bash
run: |
# Install root level dependencies
poetry install
# Create a virtual environment for the PySpark project, install dependencies
python3 -m venv "pyspark/.venv"
pyspark/.venv/bin/pip install -r pyspark/requirements.txt
ls -la pyspark/.venv/bin
- name: Run migrations from db_migrations directory
working-directory: ./db_migrations
run: poetry run alembic upgrade head
shell: bash
- name: Register PySpark Data Assets
uses: gabledata/cicd/github-actions/register-data-assets@python_path
with:
# Provide API key and endpoint secrets
gable-api-key: ${{secrets.GABLE_API_KEY_UNSTABLE}}
gable-api-endpoint: ${{secrets.GABLE_API_ENDPOINT_UNSTABLE}}
python-path: pyspark/.venv/bin/python
# List of paths to Protobuf files that should be checked with support for glob syntax.
# Can either be specified as a space separated list ('event1.proto event2.proto'), or
# a multiline string
data-asset-options: |
--source-type pyspark \
--project-root pyspark \
--csv-schema-file pyspark/schemas.csv \
--spark-job-entrypoint \'job.py --final_output_table pnw_bookings_30_days\'
- name: Register Protobuf Data Assets
uses: gabledata/cicd/github-actions/register-data-assets@latest
with:
# Provide API key and endpoint secrets
gable-api-key: ${{secrets.GABLE_API_KEY_UNSTABLE}}
gable-api-endpoint: ${{secrets.GABLE_API_ENDPOINT_UNSTABLE}}
# List of paths to Protobuf files that should be checked with support for glob syntax.
# Can either be specified as a space separated list ('event1.proto event2.proto'), or
# a multiline string
data-asset-options: |
--source-type protobuf \
--files ./event_schemas/*.proto
- name: Register Avro Data Assets
uses: gabledata/cicd/github-actions/register-data-assets@latest
with:
# Provide API key and endpoint secrets
gable-api-key: ${{secrets.GABLE_API_KEY_UNSTABLE}}
gable-api-endpoint: ${{secrets.GABLE_API_ENDPOINT_UNSTABLE}}
# List of paths to Avro files that should be checked with support for glob syntax.
# Can either be specified as a space separated list ('event1.proto event2.proto'), or
# a multiline string
data-asset-options: |
--source-type avro \
--files ./event_schemas/*.avsc
- name: Register Postgres Data Assets
uses: gabledata/cicd/github-actions/register-data-assets@latest
with:
# Provide API key and endpoint secrets
gable-api-key: ${{secrets.GABLE_API_KEY_UNSTABLE}}
gable-api-endpoint: ${{secrets.GABLE_API_ENDPOINT_UNSTABLE}}
# List of paths to Avro files that should be checked with support for glob syntax.
# Can either be specified as a space separated list ('event1.proto event2.proto'), or
# a multiline string
data-asset-options: |
--source-type postgres \
--host prod.store.com \
--port 5432 \
--db tutorial \
--schema public \
--proxy-host 0.0.0.0 \
--proxy-port 5432 \
--proxy-user postgres \
--proxy-password postgres