diff --git a/.github/workflows/pr_tests_spark.yml b/.github/workflows/pr_tests_spark.yml index 02909c2f..974a05b3 100644 --- a/.github/workflows/pr_tests_spark.yml +++ b/.github/workflows/pr_tests_spark.yml @@ -1,189 +1,187 @@ name: pr_tests_spark on: - pull_request: - branches: - - main - push: - branches: - - feature/** - - dev - - staging - - template-spark-tests - - spark_prep + pull_request: + branches: + - main + push: + branches: + - feature/** + - dev + - staging + - template-spark-tests + - spark_prep concurrency: dbt_integration_tests env: - DBT_PROFILES_DIR: ./ci - SPARK_MASTER_HOST: spark-master - SPARK_USER: spark - SPARK_SCHEMA: default + DBT_PROFILES_DIR: ./ci + SPARK_MASTER_HOST: spark-master + SPARK_USER: spark + SPARK_SCHEMA: default + jobs: - pr_tests: - name: pr_tests - runs-on: ubuntu-latest - defaults: - run: - working-directory: ./integration_tests - strategy: - matrix: - dbt_version: - - 1.* - warehouse: - - spark - steps: - - name: Check out - uses: actions/checkout@v3 - - - name: Set SCHEMA_SUFFIX env - run: >- - echo "SCHEMA_SUFFIX=$(echo ${DBT_VERSION%.*} | tr . _)" >> $GITHUB_ENV - env: - DBT_VERSION: '${{ matrix.dbt_version }}' - - - name: Set DEFAULT_TARGET env - run: | - echo "DEFAULT_TARGET=${{ matrix.warehouse }}" >> $GITHUB_ENV - - - name: Python setup - uses: actions/setup-python@v4 - with: - python-version: 3.8.x - - - name: Pip cache - uses: actions/cache@v3 - with: - path: ~/.cache/pip - key: >- - ${{ runner.os }}-pip-${{ matrix.dbt_version }}-${{ matrix.warehouse }} - restore-keys: > - ${{ runner.os }}-pip-${{ matrix.dbt_version }}-${{ matrix.warehouse }} - - - name: Install spark dependencies - run: | - pip install --upgrade pip wheel setuptools - pip install -Iv "dbt-spark[PyHive]"==${{ matrix.dbt_version }} --upgrade - dbt deps - - - name: Set up Docker and Docker Compose - uses: docker/setup-buildx-action@v1 - - - name: Check Docker and Docker Compose versions - run: | - docker --version - docker-compose --version - - - name: Debug environment - run: | - pwd - ls -la - env - - - name: Create Dockerfile - run: | - cat << EOF > Dockerfile - FROM openjdk:11-jdk-slim - - ENV SPARK_VERSION=3.5.1 - ENV HADOOP_VERSION=3.3.4 - ENV SPARK_HOME=/spark - - RUN apt-get update && apt-get install -y curl wget procps rsync ssh iputils-ping net-tools - - RUN wget --tries=5 --retry-connrefused --waitretry=1 --timeout=20 https://downloads.apache.org/spark/spark-\${SPARK_VERSION}/spark-\${SPARK_VERSION}-bin-hadoop3.tgz && \ - tar -xvzf spark-\${SPARK_VERSION}-bin-hadoop3.tgz && \ - mv spark-\${SPARK_VERSION}-bin-hadoop3 \${SPARK_HOME} && \ - rm spark-\${SPARK_VERSION}-bin-hadoop3.tgz - - ENV PATH=\$PATH:\${SPARK_HOME}/bin:\${SPARK_HOME}/sbin - - WORKDIR \${SPARK_HOME} - - CMD ["bash"] - EOF - - - name: Create docker-compose.yml - run: | - cat << EOF > docker-compose.yml - version: '3' - - services: - spark-master: - build: . - command: | - bash -c " - /spark/sbin/start-master.sh && - /spark/sbin/start-thriftserver.sh --master spark://spark-master:7077 && - tail -f /spark/logs/*" - ports: - - "8080:8080" - - "7077:7077" - - "10000:10000" - environment: - - SPARK_MODE=master - - SPARK_MASTER_HOST=spark-master - - SPARK_MASTER_PORT=7077 - - SPARK_MASTER_WEBUI_PORT=8080 - - spark-worker: - build: . - command: bin/spark-class org.apache.spark.deploy.worker.Worker spark://spark-master:7077 - depends_on: - - spark-master - environment: - - SPARK_MODE=worker - - SPARK_WORKER_CORES=2 - - SPARK_WORKER_MEMORY=2g - - SPARK_WORKER_PORT=8081 - - SPARK_WORKER_WEBUI_PORT=8081 - - SPARK_MASTER=spark://spark-master:7077 - EOF - - - name: Check file permissions - run: | - ls -l Dockerfile - ls -l docker-compose.yml - - - name: Validate Docker Compose file - run: docker-compose config - - - name: Build and start Spark cluster - run: | - docker-compose build --no-cache - docker-compose up -d - - - name: Wait for services to start - run: | - echo "Waiting for Spark services to start..." - sleep 60 - - - name: Check Spark cluster status - run: | - docker-compose exec -T spark-master bash -c "jps && ps aux | grep spark && netstat -tuln" - docker-compose exec -T spark-worker bash -c "jps && ps aux | grep spark && netstat -tuln" - - - name: Run Spark test job - run: > - docker-compose exec -T spark-master bin/spark-submit - --master spark://spark-master:7077 --class - org.apache.spark.examples.SparkPi - examples/jars/spark-examples_2.12-3.5.1.jar 10 - - - name: Wait for Thrift Server - run: | - echo "Waiting for Thrift Server to be fully operational..." - sleep 30 - - - name: 'Pre-test: Drop ci schemas' - run: | - dbt run-operation post_ci_cleanup --target spark - - - name: Run tests - run: ./.scripts/integration_tests.sh -d spark - - - name: 'Post-test: Drop ci schemas' - run: | - dbt run-operation post_ci_cleanup --target spark - - - name: Cleanup Spark cluster - if: always() - run: | - docker-compose down \ No newline at end of file + pr_tests: + name: pr_tests + runs-on: ubuntu-latest + defaults: + run: + working-directory: ./integration_tests + strategy: + matrix: + dbt_version: + - 1.* + warehouse: + - spark + steps: + - name: Check out + uses: actions/checkout@v3 + + - name: Set SCHEMA_SUFFIX env + run: >- + echo "SCHEMA_SUFFIX=$(echo ${DBT_VERSION%.*} | tr . _)" >> $GITHUB_ENV + env: + DBT_VERSION: '${{ matrix.dbt_version }}' + + - name: Set DEFAULT_TARGET env + run: | + echo "DEFAULT_TARGET=${{ matrix.warehouse }}" >> $GITHUB_ENV + + - name: Python setup + uses: actions/setup-python@v4 + with: + python-version: 3.8.x + + - name: Pip cache + uses: actions/cache@v3 + with: + path: ~/.cache/pip + key: >- + ${{ runner.os }}-pip-${{ matrix.dbt_version }}-${{ matrix.warehouse }} + restore-keys: >- + ${{ runner.os }}-pip-${{ matrix.dbt_version }}-${{ matrix.warehouse }} + + - name: Install spark dependencies + run: | + pip install --upgrade pip wheel setuptools + pip install -Iv "dbt-spark[PyHive]==${{ matrix.dbt_version }}" --upgrade + dbt deps + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v1 + + - name: Install Docker Compose + run: | + sudo curl -L "https://github.com/docker/compose/releases/download/1.29.2/docker-compose-$(uname -s)-$(uname -m)" -o /usr/local/bin/docker-compose + sudo chmod +x /usr/local/bin/docker-compose + + - name: Check Docker and Docker Compose versions + run: | + docker --version + docker-compose --version + + - name: Create Dockerfile + run: | + cat << EOF > Dockerfile + FROM openjdk:11-jdk-slim + + ENV SPARK_VERSION=3.5.1 + ENV HADOOP_VERSION=3.3.4 + ENV SPARK_HOME=/spark + + RUN apt-get update && apt-get install -y curl wget procps rsync ssh iputils-ping net-tools + + RUN wget --tries=5 --retry-connrefused --waitretry=1 --timeout=20 https://downloads.apache.org/spark/spark-\${SPARK_VERSION}/spark-\${SPARK_VERSION}-bin-hadoop3.tgz && \ + tar -xvzf spark-\${SPARK_VERSION}-bin-hadoop3.tgz && \ + mv spark-\${SPARK_VERSION}-bin-hadoop3 \${SPARK_HOME} && \ + rm spark-\${SPARK_VERSION}-bin-hadoop3.tgz + + ENV PATH=\$PATH:\${SPARK_HOME}/bin:\${SPARK_HOME}/sbin + + WORKDIR \${SPARK_HOME} + + CMD ["bash"] + EOF + + - name: Create docker-compose.yml + run: | + cat << EOF > docker-compose.yml + version: '3' + + services: + spark-master: + build: . + command: | + bash -c " + /spark/sbin/start-master.sh && + /spark/sbin/start-thriftserver.sh --master spark://spark-master:7077 && + tail -f /spark/logs/*" + ports: + - "8080:8080" + - "7077:7077" + - "10000:10000" + environment: + - SPARK_MODE=master + - SPARK_MASTER_HOST=spark-master + - SPARK_MASTER_PORT=7077 + - SPARK_MASTER_WEBUI_PORT=8080 + + spark-worker: + build: . + command: bin/spark-class org.apache.spark.deploy.worker.Worker spark://spark-master:7077 + depends_on: + - spark-master + environment: + - SPARK_MODE=worker + - SPARK_WORKER_CORES=2 + - SPARK_WORKER_MEMORY=2g + - SPARK_WORKER_PORT=8081 + - SPARK_WORKER_WEBUI_PORT=8081 + - SPARK_MASTER=spark://spark-master:7077 + EOF + + - name: Debug Docker Compose file + run: cat docker-compose.yml + + - name: Build and start Spark cluster + run: | + docker-compose build --no-cache + docker-compose up -d + + - name: Check running containers + run: docker ps + + - name: Wait for services to start + run: | + echo "Waiting for Spark services to start..." + sleep 60 + + - name: Check Spark cluster status + run: | + docker-compose exec -T spark-master bash -c "jps && ps aux | grep spark && netstat -tuln" + docker-compose exec -T spark-worker bash -c "jps && ps aux | grep spark && netstat -tuln" + + - name: Run Spark test job + run: > + docker-compose exec -T spark-master bin/spark-submit + --master spark://spark-master:7077 --class + org.apache.spark.examples.SparkPi + examples/jars/spark-examples_2.12-3.5.1.jar 10 + + - name: Wait for Thrift Server + run: | + echo "Waiting for Thrift Server to be fully operational..." + sleep 30 + + - name: 'Pre-test: Drop ci schemas' + run: | + dbt run-operation post_ci_cleanup --target spark + + - name: Run tests + run: ./.scripts/integration_tests.sh -d spark + + - name: 'Post-test: Drop ci schemas' + run: | + dbt run-operation post_ci_cleanup --target spark + + - name: Cleanup Spark cluster + if: always() + run: | + docker-compose down \ No newline at end of file