Skip to content

Prepare Utils codebase for Spark ( Iceberg ) Support #42

Prepare Utils codebase for Spark ( Iceberg ) Support

Prepare Utils codebase for Spark ( Iceberg ) Support #42

Workflow file for this run

name: pr_tests_spark
on:
pull_request:
concurrency: dbt_integration_tests
env:
DBT_PROFILES_DIR: ./ci
SPARK_MASTER_HOST: localhost
SPARK_USER: spark
SPARK_SCHEMA: default
AWS_REGION: eu-west-1
AWS_DEFAULT_REGION: eu-west-1
jobs:
pr_tests_spark:
name: pr_tests_spark
runs-on: ubuntu-latest
defaults:
run:
working-directory: ./integration_tests
strategy:
matrix:
dbt_version:
- 1.*
warehouse:
- spark
steps:
- name: Check out
uses: actions/checkout@v3
- name: Set SCHEMA_SUFFIX env
run: >-
echo "SCHEMA_SUFFIX=$(echo ${DBT_VERSION%.*} | tr . _)" >> $GITHUB_ENV
env:
DBT_VERSION: '${{ matrix.dbt_version }}'
- name: Set DEFAULT_TARGET env
run: |
echo "DEFAULT_TARGET=${{ matrix.warehouse }}" >> $GITHUB_ENV
- name: Python setup
uses: actions/setup-python@v4
with:
python-version: 3.8.x
- name: Install dependencies
run: |
pip install --upgrade pip wheel setuptools
pip install -Iv "dbt-spark[PyHive]==${{ matrix.dbt_version }}" --upgrade
pip install boto3 awscli
dbt deps
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v1
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: eu-west-1
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v1
- name: Install Docker Compose
run: |
sudo curl -L "https://github.com/docker/compose/releases/download/1.29.2/docker-compose-$(uname -s)-$(uname -m)" -o /usr/local/bin/docker-compose
sudo chmod +x /usr/local/bin/docker-compose
- name: Build and start Spark cluster
working-directory: .github/workflows/spark_deployment
run: |
docker-compose build
docker-compose up -d
echo "Waiting for Spark services to start..."
sleep 90
- name: Check running containers
working-directory: .github/workflows/spark_deployment
run: docker ps
- name: Check Docker network
working-directory: .github/workflows/spark_deployment
run: |
docker network ls
# docker network inspect spark-network
- name: Print Docker logs
working-directory: .github/workflows/spark_deployment
run: |
echo "Docker logs for spark-master:"
docker-compose logs --tail=1000 spark-master
echo "Docker logs for spark-worker:"
docker-compose logs --tail=1000 spark-worker
echo "Docker logs for thrift-server:"
docker-compose logs --tail=1000 thrift-server
- name: Verify Spark configuration
working-directory: .github/workflows/spark_deployment
run: |
echo "Verifying Spark configuration..."
docker-compose exec -T spark-master bash -c "cat /spark/conf/spark-defaults.conf"
- name: Wait for Thrift Server
run: |
echo "Waiting for Thrift Server to be fully operational..."
sleep 60
- name: Check ThriftServer Process
working-directory: .github/workflows/spark_deployment
run: docker-compose exec -T thrift-server bash -c "ps aux | grep ThriftServer"
- name: Check Latest ThriftServer Log
working-directory: .github/workflows/spark_deployment
run: docker-compose exec -T thrift-server bash -c "tail -n 50 /spark/logs/\$(ls -t /spark/logs/ | grep thriftserver | head -n1)"
- name: Test ThriftServer connection with Beeline
working-directory: .github/workflows/spark_deployment
run: |
docker-compose exec -T thrift-server bash -c '/spark/bin/beeline -u "jdbc:hive2://localhost:10000" -e "SHOW DATABASES;"'
- name: "Pre-test: Drop ci schemas"
run: |
dbt run-operation post_ci_cleanup --target spark
- name: Run tests
run: ./.scripts/integration_tests.sh -d spark
- name: "Post-test: Drop ci schemas"
run: |
dbt run-operation post_ci_cleanup --target spark