Prepare Utils codebase for Spark ( Iceberg ) Support #36
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: pr_tests_spark | |
on: | |
pull_request: | |
env: | |
DBT_PROFILES_DIR: ./ci | |
SPARK_MASTER_HOST: localhost | |
SPARK_USER: spark | |
SPARK_SCHEMA: default | |
AWS_REGION: eu-west-1 | |
AWS_DEFAULT_REGION: eu-west-1 | |
jobs: | |
pr_tests_spark: | |
name: pr_tests_spark | |
runs-on: ubuntu-latest | |
defaults: | |
run: | |
working-directory: ./integration_tests | |
strategy: | |
matrix: | |
dbt_version: | |
- 1.* | |
warehouse: | |
- spark | |
steps: | |
- name: Check out | |
uses: actions/checkout@v3 | |
- name: Log in to Docker Hub | |
uses: docker/login-action@v2 | |
with: | |
username: ${{ secrets.DOCKERHUB_USERNAME }} | |
password: ${{ secrets.DOCKERHUB_TOKEN }} | |
- name: Set SCHEMA_SUFFIX env | |
run: >- | |
echo "SCHEMA_SUFFIX=$(echo ${DBT_VERSION%.*} | tr . _)" >> $GITHUB_ENV | |
env: | |
DBT_VERSION: '${{ matrix.dbt_version }}' | |
- name: Set DEFAULT_TARGET env | |
run: | | |
echo "DEFAULT_TARGET=${{ matrix.warehouse }}" >> $GITHUB_ENV | |
- name: Python setup | |
uses: actions/setup-python@v4 | |
with: | |
python-version: 3.8.x | |
- name: Install dependencies | |
run: | | |
pip install --upgrade pip wheel setuptools | |
pip install -Iv "dbt-spark[PyHive]==${{ matrix.dbt_version }}" --upgrade | |
pip install boto3 awscli | |
dbt deps | |
- name: Configure AWS credentials | |
uses: aws-actions/configure-aws-credentials@v1 | |
with: | |
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} | |
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} | |
aws-region: eu-west-1 | |
- name: Set up Docker Buildx | |
uses: docker/setup-buildx-action@v1 | |
- name: Install Docker Compose | |
run: | | |
sudo curl -L "https://github.com/docker/compose/releases/download/1.29.2/docker-compose-$(uname -s)-$(uname -m)" -o /usr/local/bin/docker-compose | |
sudo chmod +x /usr/local/bin/docker-compose | |
- name: Build and start Spark cluster | |
working-directory: .github/workflows/spark_deployment | |
run: | | |
# docker-compose build | |
docker-compose up -d | |
echo "Waiting for Spark services to start..." | |
sleep 90 | |
- name: Check running containers | |
working-directory: .github/workflows/spark_deployment | |
run: docker ps | |
- name: Check Docker network | |
working-directory: .github/workflows/spark_deployment | |
run: | | |
docker network ls | |
# docker network inspect spark-network | |
- name: Print Docker logs | |
working-directory: .github/workflows/spark_deployment | |
run: | | |
echo "Docker logs for spark-master:" | |
docker-compose logs --tail=1000 spark-master | |
echo "Docker logs for spark-worker:" | |
docker-compose logs --tail=1000 spark-worker | |
echo "Docker logs for thrift-server:" | |
docker-compose logs --tail=1000 thrift-server | |
- name: Verify Spark configuration | |
working-directory: .github/workflows/spark_deployment | |
run: | | |
echo "Verifying Spark configuration..." | |
docker-compose exec -T spark-master bash -c "cat /spark/conf/spark-defaults.conf" | |
- name: Wait for Thrift Server | |
run: | | |
echo "Waiting for Thrift Server to be fully operational..." | |
sleep 60 | |
- name: Check ThriftServer Process | |
working-directory: .github/workflows/spark_deployment | |
run: docker-compose exec -T thrift-server bash -c "ps aux | grep ThriftServer" | |
- name: Check Latest ThriftServer Log | |
working-directory: .github/workflows/spark_deployment | |
run: docker-compose exec -T thrift-server bash -c "tail -n 50 /spark/logs/\$(ls -t /spark/logs/ | grep thriftserver | head -n1)" | |
- name: Test ThriftServer connection with Beeline | |
working-directory: .github/workflows/spark_deployment | |
run: | | |
docker-compose exec -T thrift-server bash -c '/spark/bin/beeline -u "jdbc:hive2://localhost:10000" -e "SHOW DATABASES;"' | |
- name: "Pre-test: Drop ci schemas" | |
run: | | |
dbt run-operation post_ci_cleanup --target spark | |
- name: Run tests | |
run: ./.scripts/integration_tests.sh -d spark | |
- name: "Post-test: Drop ci schemas" | |
run: | | |
dbt run-operation post_ci_cleanup --target spark |