diff --git a/.github/workflows/pr_tests.yml b/.github/workflows/pr_tests.yml index a63cd3db..5ca10006 100644 --- a/.github/workflows/pr_tests.yml +++ b/.github/workflows/pr_tests.yml @@ -61,7 +61,7 @@ jobs: strategy: matrix: dbt_version: ["1.*"] - warehouse: ["postgres", "bigquery", "snowflake", "databricks", "redshift"] # TODO: Add RS self-hosted runner + warehouse: ["postgres", "bigquery", "snowflake", "databricks", "redshift","spark"] # TODO: Add RS self-hosted runner services: postgres: image: postgres:latest @@ -82,7 +82,18 @@ jobs: steps: - name: Check out uses: actions/checkout@v3 - + - name: Configure Docker credentials + uses: docker/login-action@v2 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v1 + with: + aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + aws-region: eu-west-1 + # Remove '*' and replace '.' with '_' in DBT_VERSION & set as SCHEMA_SUFFIX. # SCHEMA_SUFFIX allows us to run multiple versions of dbt in parallel without overwriting the output tables - name: Set SCHEMA_SUFFIX env @@ -118,10 +129,24 @@ jobs: - name: Install spark dependencies run: | pip install --upgrade pip wheel setuptools - pip install -Iv "dbt-${{ matrix.warehouse }}[ODBC]"==${{ matrix.dbt_version }} --upgrade + pip install -Iv "dbt-${{ matrix.warehouse }}[PyHive]"==${{ matrix.dbt_version }} --upgrade dbt deps if: ${{matrix.warehouse == 'spark'}} + + - name: Install Docker Compose + run: | + sudo curl -L "https://github.com/docker/compose/releases/download/1.29.2/docker-compose-$(uname -s)-$(uname -m)" -o /usr/local/bin/docker-compose + sudo chmod +x /usr/local/bin/docker-compose + + - name: Build and start Spark cluster + working-directory: .github/workflows/spark_deployment + run: | + docker-compose up -d + echo "Waiting for Spark services to start..." + sleep 90 + if: ${{matrix.warehouse == 'spark'}} + - name: "Pre-test: Drop ci schemas" run: | dbt run-operation post_ci_cleanup --target ${{ matrix.warehouse }} diff --git a/.github/workflows/pr_tests_spark.yml b/.github/workflows/pr_tests_spark.yml index 9f1cf133..5ab037eb 100644 --- a/.github/workflows/pr_tests_spark.yml +++ b/.github/workflows/pr_tests_spark.yml @@ -2,6 +2,7 @@ name: pr_tests_spark on: pull_request: + - 'pause' concurrency: dbt_integration_tests @@ -78,50 +79,6 @@ jobs: echo "Waiting for Spark services to start..." sleep 90 - - name: Check running containers - working-directory: .github/workflows/spark_deployment - run: docker ps - - - name: Check Docker network - working-directory: .github/workflows/spark_deployment - run: | - docker network ls - # docker network inspect spark-network - - - name: Print Docker logs - working-directory: .github/workflows/spark_deployment - run: | - echo "Docker logs for spark-master:" - docker-compose logs --tail=1000 spark-master - echo "Docker logs for spark-worker:" - docker-compose logs --tail=1000 spark-worker - echo "Docker logs for thrift-server:" - docker-compose logs --tail=1000 thrift-server - - - name: Verify Spark configuration - working-directory: .github/workflows/spark_deployment - run: | - echo "Verifying Spark configuration..." - docker-compose exec -T spark-master bash -c "cat /spark/conf/spark-defaults.conf" - - - name: Wait for Thrift Server - run: | - echo "Waiting for Thrift Server to be fully operational..." - sleep 60 - - - name: Check ThriftServer Process - working-directory: .github/workflows/spark_deployment - run: docker-compose exec -T thrift-server bash -c "ps aux | grep ThriftServer" - - - name: Check Latest ThriftServer Log - working-directory: .github/workflows/spark_deployment - run: docker-compose exec -T thrift-server bash -c "tail -n 50 /spark/logs/\$(ls -t /spark/logs/ | grep thriftserver | head -n1)" - - - name: Test ThriftServer connection with Beeline - working-directory: .github/workflows/spark_deployment - run: | - docker-compose exec -T thrift-server bash -c '/spark/bin/beeline -u "jdbc:hive2://localhost:10000" -e "SHOW DATABASES;"' - - name: "Pre-test: Drop ci schemas" run: | dbt run-operation post_ci_cleanup --target spark