-
Notifications
You must be signed in to change notification settings - Fork 8
135 lines (112 loc) · 4.33 KB
/
pr_tests_spark.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
name: pr_tests_spark
on:
pull_request:
env:
DBT_PROFILES_DIR: ./ci
SPARK_MASTER_HOST: localhost
SPARK_USER: spark
SPARK_SCHEMA: default
AWS_REGION: eu-west-1
AWS_DEFAULT_REGION: eu-west-1
jobs:
pr_tests_spark:
name: pr_tests_spark
runs-on: ubuntu-latest
defaults:
run:
working-directory: ./integration_tests
strategy:
matrix:
dbt_version:
- 1.*
warehouse:
- spark
steps:
- name: Check out
uses: actions/checkout@v3
- name: Log in to Docker Hub
uses: docker/login-action@v2
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Set SCHEMA_SUFFIX env
run: >-
echo "SCHEMA_SUFFIX=$(echo ${DBT_VERSION%.*} | tr . _)" >> $GITHUB_ENV
env:
DBT_VERSION: '${{ matrix.dbt_version }}'
- name: Set DEFAULT_TARGET env
run: |
echo "DEFAULT_TARGET=${{ matrix.warehouse }}" >> $GITHUB_ENV
- name: Python setup
uses: actions/setup-python@v4
with:
python-version: 3.8.x
- name: Install dependencies
run: |
pip install --upgrade pip wheel setuptools
pip install -Iv "dbt-spark[PyHive]==${{ matrix.dbt_version }}" --upgrade
pip install boto3 awscli
dbt deps
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v1
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: eu-west-1
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v1
- name: Install Docker Compose
run: |
sudo curl -L "https://github.com/docker/compose/releases/download/1.29.2/docker-compose-$(uname -s)-$(uname -m)" -o /usr/local/bin/docker-compose
sudo chmod +x /usr/local/bin/docker-compose
- name: Build and start Spark cluster
working-directory: .github/workflows/spark_deployment
run: |
# docker-compose build
docker-compose up -d
echo "Waiting for Spark services to start..."
sleep 90
- name: Check running containers
working-directory: .github/workflows/spark_deployment
run: docker ps
- name: Check Docker network
working-directory: .github/workflows/spark_deployment
run: |
docker network ls
# docker network inspect spark-network
- name: Print Docker logs
working-directory: .github/workflows/spark_deployment
run: |
echo "Docker logs for spark-master:"
docker-compose logs --tail=1000 spark-master
echo "Docker logs for spark-worker:"
docker-compose logs --tail=1000 spark-worker
echo "Docker logs for thrift-server:"
docker-compose logs --tail=1000 thrift-server
- name: Verify Spark configuration
working-directory: .github/workflows/spark_deployment
run: |
echo "Verifying Spark configuration..."
docker-compose exec -T spark-master bash -c "cat /spark/conf/spark-defaults.conf"
- name: Wait for Thrift Server
run: |
echo "Waiting for Thrift Server to be fully operational..."
sleep 60
- name: Check ThriftServer Process
working-directory: .github/workflows/spark_deployment
run: docker-compose exec -T thrift-server bash -c "ps aux | grep ThriftServer"
- name: Check Latest ThriftServer Log
working-directory: .github/workflows/spark_deployment
run: docker-compose exec -T thrift-server bash -c "tail -n 50 /spark/logs/\$(ls -t /spark/logs/ | grep thriftserver | head -n1)"
- name: Test ThriftServer connection with Beeline
working-directory: .github/workflows/spark_deployment
run: |
docker-compose exec -T thrift-server bash -c '/spark/bin/beeline -u "jdbc:hive2://localhost:10000" -e "SHOW DATABASES;"'
- name: "Pre-test: Drop ci schemas"
run: |
dbt run-operation post_ci_cleanup --target spark
- name: Run tests
run: ./.scripts/integration_tests.sh -d spark
- name: "Post-test: Drop ci schemas"
run: |
dbt run-operation post_ci_cleanup --target spark