Skip to content

Commit

Permalink
We can only hope
Browse files Browse the repository at this point in the history
  • Loading branch information
ilias1111 committed Aug 7, 2024
1 parent 3a82d7e commit 59b1eb6
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 39 deletions.
46 changes: 7 additions & 39 deletions .github/workflows/pr_tests_spark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ env:
SPARK_SCHEMA: default
AWS_REGION: eu-west-1
AWS_DEFAULT_REGION: eu-west-1
DOCKER_PLATFORM: linux/amd64

jobs:
pr_tests_spark:
Expand Down Expand Up @@ -63,65 +64,32 @@ jobs:
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: eu-west-1

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v1

- name: Install Docker Compose
run: |
sudo curl -L "https://github.com/docker/compose/releases/download/1.29.2/docker-compose-$(uname -s)-$(uname -m)" -o /usr/local/bin/docker-compose
sudo chmod +x /usr/local/bin/docker-compose
- name: Build and start Spark cluster
- name: Start Spark cluster
working-directory: .github/workflows/spark_deployment
run: |
# docker-compose build
docker-compose up -d
echo "Waiting for Spark services to start..."
sleep 90
sleep 120
- name: Check running containers
working-directory: .github/workflows/spark_deployment
run: docker ps

- name: Check Docker network
working-directory: .github/workflows/spark_deployment
run: |
docker network ls
# docker network inspect spark-network
- name: Print Docker logs
if: failure()
working-directory: .github/workflows/spark_deployment
run: |
echo "Docker logs for spark-master:"
docker-compose logs --tail=1000 spark-master
docker-compose logs spark-master
echo "Docker logs for spark-worker:"
docker-compose logs --tail=1000 spark-worker
docker-compose logs spark-worker
echo "Docker logs for thrift-server:"
docker-compose logs --tail=1000 thrift-server
- name: Verify Spark configuration
working-directory: .github/workflows/spark_deployment
run: |
echo "Verifying Spark configuration..."
docker-compose exec -T spark-master bash -c "cat /spark/conf/spark-defaults.conf"
- name: Wait for Thrift Server
run: |
echo "Waiting for Thrift Server to be fully operational..."
sleep 60
- name: Check ThriftServer Process
working-directory: .github/workflows/spark_deployment
run: docker-compose exec -T thrift-server bash -c "ps aux | grep ThriftServer"

- name: Check Latest ThriftServer Log
working-directory: .github/workflows/spark_deployment
run: docker-compose exec -T thrift-server bash -c "tail -n 50 /spark/logs/\$(ls -t /spark/logs/ | grep thriftserver | head -n1)"

- name: Test ThriftServer connection with Beeline
working-directory: .github/workflows/spark_deployment
run: |
docker-compose exec -T thrift-server bash -c '/spark/bin/beeline -u "jdbc:hive2://localhost:10000" -e "SHOW DATABASES;"'
docker-compose logs thrift-server
- name: "Pre-test: Drop ci schemas"
run: |
Expand Down
3 changes: 3 additions & 0 deletions .github/workflows/spark_deployment/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ networks:
services:
spark-master:
image: snowplow/spark-s3-iceberg:v2
platform: ${DOCKER_PLATFORM:-linux/amd64}
command: ["/bin/bash", "-c", "/opt/spark/sbin/start-master.sh -h spark-master --properties-file /opt/spark/conf/spark-defaults.conf && tail -f /opt/spark/logs/spark--org.apache.spark.deploy.master.Master-1-*.out"]
hostname: spark-master
ports:
Expand All @@ -26,6 +27,7 @@ services:

spark-worker:
image: snowplow/spark-s3-iceberg:v2
platform: ${DOCKER_PLATFORM:-linux/amd64}
command: ["/bin/bash", "-c", "sleep 10 && /opt/spark/sbin/start-worker.sh spark://spark-master:7077 --properties-file /opt/spark/conf/spark-defaults.conf && tail -f /opt/spark/logs/spark--org.apache.spark.deploy.worker.Worker-*.out"]
depends_on:
- spark-master
Expand All @@ -46,6 +48,7 @@ services:

thrift-server:
image: snowplow/spark-s3-iceberg:v2
platform: ${DOCKER_PLATFORM:-linux/amd64}
command: ["/bin/bash", "-c", "sleep 30 && /opt/spark/sbin/start-thriftserver.sh --master spark://spark-master:7077 --driver-memory 2g --executor-memory 3g --hiveconf hive.server2.thrift.port=10000 --hiveconf hive.server2.thrift.bind.host=0.0.0.0 --conf spark.sql.hive.thriftServer.async=true --conf spark.sql.hive.thriftServer.workerQueue.size=2000 --conf spark.sql.hive.thriftServer.maxWorkerThreads=100 --conf spark.sql.hive.thriftServer.minWorkerThreads=50 && tail -f /opt/spark/logs/spark--org.apache.spark.sql.hive.thriftserver.HiveThriftServer2-*.out"]
ports:
- '10000:10000'
Expand Down

0 comments on commit 59b1eb6

Please sign in to comment.