-
Notifications
You must be signed in to change notification settings - Fork 74
284 lines (253 loc) · 11.2 KB
/
test-google-cloud-platform-install.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
#
# This is an e2e test to deploy PostHog on Google Cloud Platform using Helm.
#
# TODO:
# - run k8s spec test
#
name: e2e - Google Cloud Platform (install)
on:
workflow_dispatch:
# Please do not add 'pull_request' here as without the proper
# GitHub settings might lead 3rd party users to run commands
# into our cloud account for testing
push:
branches:
- main
env:
# This is needed until k8s v1.25
# https://cloud.google.com/blog/products/containers-kubernetes/kubectl-auth-changes-in-gke
USE_GKE_GCLOUD_AUTH_PLUGIN: True
jobs:
gcp-install:
runs-on: ubuntu-20.04
if: github.repository == 'PostHog/charts-clickhouse'
#
# These permissions are needed to interact with GitHub's OIDC Token endpoint.
# We use OpenID Connect (OIDC) to allow this GitHub Action to access and manage
# GCP resources without needing to store the GCP credentials as long-lived GitHub secrets.
#
# see: https://docs.github.com/en/actions/deployment/security-hardening-your-deployments/configuring-openid-connect-in-google-cloud-platform
#
permissions:
id-token: write
contents: write
steps:
- name: Checkout
uses: actions/checkout@v3
- name: Authenticate to Google Cloud
uses: google-github-actions/auth@v0
with:
workload_identity_provider: 'projects/494532703488/locations/global/workloadIdentityPools/github/providers/github'
service_account: '[email protected]'
access_token_lifetime: '7200s'
- name: Set up Google Cloud SDK
uses: google-github-actions/setup-gcloud@v0
with:
install_components: 'gke-gcloud-auth-plugin'
- name: Install doctl to manage 'posthog.cc' DNS
uses: digitalocean/action-doctl@v2
with:
token: ${{ secrets.DIGITALOCEAN_ACCESS_TOKEN }}
- name: Declare variables that we can share across steps
id: vars
run: |
TEST_NAME="helm-test-e2e-gcp-$(date '+%F')-$(git rev-parse --short HEAD)"
echo "k8s_cluster_name=${TEST_NAME}" >> $GITHUB_OUTPUT
echo "dns_record=${TEST_NAME}" >> $GITHUB_OUTPUT
echo "fqdn_record=${TEST_NAME}.posthog.cc" >> $GITHUB_OUTPUT
- name: Deploy a new k8s cluster
id: k8s_cluster_creation
run: |
gcloud container clusters create \
${{ steps.vars.outputs.k8s_cluster_name }} \
--project ${{ secrets.GCP_PROJECT_ID }} \
--region us-central1 \
--cluster-version 1.24 \
--labels="provisioned_by=github_action" \
--machine-type e2-standard-2 \
--num-nodes 2
# note: num-nodes will be created in each zone, such that if you specify
# --num-nodes=4 and choose two locations 8 nodes will be created.
- name: Create new GCP global static IP address
id: static_ip_creation
# note: we need to create the IP address first in order
# to get the load balancer successfully provisioned
run: |
gcloud compute addresses create \
--project ${{ secrets.GCP_PROJECT_ID }} \
--global \
${{ steps.vars.outputs.dns_record }}
- name: Create the DNS record
id: dns_creation
run: |
# Get the global static IP address
global_static_ip=$(gcloud compute addresses list --project ${{ secrets.GCP_PROJECT_ID }} --global --filter=name:${{ steps.vars.outputs.dns_record }} --format="value(ADDRESS)")
# Create the DNS record
doctl compute domain records create \
posthog.cc \
--record-type A \
--record-ttl 60 \
--record-name "${{ steps.vars.outputs.dns_record }}" \
--record-data "$global_static_ip"
- name: Install PostHog using the Helm chart
id: helm_install
run: |
helm upgrade --install \
-f ci/values/google_cloud_platform.yaml \
--set "ingress.hostname=${{ steps.vars.outputs.fqdn_record }}" \
--set "ingress.gcp.ip_name=${{ steps.vars.outputs.dns_record }}" \
--timeout 30m \
--create-namespace \
--namespace posthog \
posthog ./charts/posthog \
--wait-for-jobs \
--wait
#
# Wait for all k8s resources to be ready.
#
# Despite the --wait flag used in the command above
# there is no guarantee that all the resources will be deployed
# when the command returns.
#
#
# Why can't we directly use the 'action-k8s-await-workloads' step below?
# Because it's not working for this use case
#
# ref: https://github.com/jupyterhub/action-k8s-await-workloads/issues/38
#
- name: Workaround - wait for all the PostHog resources in k8s to be ready
timeout-minutes: 15
run: ./ci/wait_for_all_the_posthog_resources_to_be_ready.sh
- name: Workaround - wait for the GCP load balancer to be ready
timeout-minutes: 15
run: |
echo "Waiting for the GCP Load Balancer to be ready..."
load_balancer_external_ip=""
while [ -z "$load_balancer_external_ip" ];
do
echo " sleeping 10 seconds" && sleep 10
load_balancer_external_ip=$(kubectl get ingress -n posthog posthog -o jsonpath="{.status.loadBalancer.ingress[0].ip}")
done
echo "The GCP Load Balancer is now ready!"
- name: Wait until all the resources are fully deployed in k8s
uses: jupyterhub/action-k8s-await-workloads@main
with:
namespace: "posthog"
timeout: 300
max-restarts: 10
- name: Wait for the Google-managed TLS certificate to be issued and deployed
id: tls_certificate_creation
timeout-minutes: 60
run: |
echo "Wait for the Google-managed TLS certificate to be issued and deployed..."
certificate_status=""
while [ "$certificate_status" != "Active" ];
do
echo " sleeping 10 seconds" && sleep 10
certificate_status=$(kubectl get managedcertificate -n posthog posthog-gke-cert -o jsonpath="{.status.certificateStatus}")
done
echo "The certificate has been issued and it has been deployed!"
- name: Setup PostHog for the ingestion test
run: ./ci/setup_ingestion_test.sh
- name: Set PostHog endpoints to use for the ingestion test
run: |
echo "POSTHOG_API_ENDPOINT=https://${{ steps.vars.outputs.fqdn_record }}" | tee -a "$GITHUB_ENV"
echo "POSTHOG_EVENT_ENDPOINT=https://${{ steps.vars.outputs.fqdn_record }}" | tee -a "$GITHUB_ENV"
#
# TODO: the GCE Ingress is not picking up health check from readiness probe definition and it's using '/'
# instead. We need to fix this issue before being able to enable the k6 ingestion test
# See WIP at https://github.com/PostHog/charts-clickhouse/pull/209
#
# - name: Run ingestion test using k6
# uses: k6io/[email protected]
# with:
# filename: ci/k6/ingestion-test.js
- name: Emit namespace report
uses: jupyterhub/action-k8s-namespace-report@v1
if: always()
with:
namespace: posthog
#
# Due to what I consider as a bad design choice from Google, when a GKE cluster is deleted,
# standalone NEGs are not so we need to do the cleanup ourself.
#
# An additional problem is that we can't delete when they are in use but there's also no
# easy way to get the NEG associated to a specific GKE cluster using the 'gcloud' CLI.
# In order to do so, we need to fetch the NEG info via 'kubectl' in this step, delete
# the cluster in another and then, using the output from this step, finally delete the NEG.
#
- name: Fetch the associated NEG (Network Endpoint Groups)
id: fetch_neg
if: ${{ always() }}
shell: bash
run: |
#
# posthog-events
#
POSTHOG_EVENTS_NEG_INFO=$(kubectl get svc -n posthog posthog-events -o json | jq --raw-output '.["metadata"]["annotations"]["cloud.google.com/neg-status"]')
POSTHOG_EVENTS_NEG_NAME=$(echo "$POSTHOG_EVENTS_NEG_INFO" | jq --raw-output '.["network_endpoint_groups"]["8000"]')
IFS=" " read -r -a POSTHOG_EVENTS_NEG_ZONES <<< "$(echo "$POSTHOG_EVENTS_NEG_INFO" | jq --raw-output '.["zones"] | join(",")')"
echo "posthog_events_neg_name=${POSTHOG_EVENTS_NEG_NAME}"
echo "posthog_events_neg_zones=${POSTHOG_EVENTS_NEG_ZONES[@]}"
#
# posthog-web
#
POSTHOG_WEB_NEG_INFO=$(kubectl get svc -n posthog posthog-web -o json | jq --raw-output '.["metadata"]["annotations"]["cloud.google.com/neg-status"]')
POSTHOG_WEB_NEG_NAME=$(echo "$POSTHOG_WEB_NEG_INFO" | jq --raw-output '.["network_endpoint_groups"]["8000"]')
IFS=" " read -r -a POSTHOG_WEB_NEG_ZONES <<< "$(echo "$POSTHOG_WEB_NEG_INFO" | jq --raw-output '.["zones"] | join(",")')"
echo "posthog_web_neg_name=${POSTHOG_WEB_NEG_NAME}"
echo "posthog_web_neg_zones=${POSTHOG_WEB_NEG_ZONES[@]}"
- name: Delete the k8s cluster and all the associated resources
if: ${{ always() && steps.k8s_cluster_creation.outcome == 'success' }}
run: |
gcloud container clusters delete \
--project ${{ secrets.GCP_PROJECT_ID }} \
--region us-central1 \
--quiet \
${{ steps.vars.outputs.k8s_cluster_name }}
- name: Delete the associated NEG (Network Endpoint Groups)
if: ${{ always() && steps.fetch_neg.outcome == 'success' }}
shell: bash
run: |
delete_neg() {
local NEG_NAME="$1"
local NEG_ZONES
IFS=',' read -r -a NEG_ZONES <<< "$2"
for NEG_ZONE in "${NEG_ZONES[@]}"
do
gcloud compute network-endpoint-groups delete \
--project ${{ secrets.GCP_PROJECT_ID }} \
--zone "$NEG_ZONE" \
--quiet \
"$NEG_NAME"
done
}
delete_neg "${{ steps.vars.outputs.posthog_events_neg_name }}" "${{ steps.vars.outputs.posthog_events_neg_zones }}"
delete_neg "${{ steps.vars.outputs.posthog_web_neg_name }}" "${{ steps.vars.outputs.posthog_web_neg_zones }}"
- name: Delete the global static IP address
if: ${{ always() && steps.static_ip_creation.outcome == 'success' }}
run: |
gcloud compute addresses delete \
--project ${{ secrets.GCP_PROJECT_ID }} \
--global \
--quiet \
${{ steps.vars.outputs.dns_record }}
- name: Delete the DNS record
if: ${{ always() && steps.dns_creation.outcome == 'success' }}
run: |
DNS_RECORD_ID=$(doctl compute domain records list posthog.cc --no-header --format ID,Name | grep ${{ steps.vars.outputs.dns_record }} | awk '{print $1}')
doctl compute domain records delete \
posthog.cc \
--force \
"$DNS_RECORD_ID"
- name: Delete the Google-managed TLS certificate
if: ${{ always() }}
run: |
TLS_CERTIFICATE_NAME=$(gcloud compute ssl-certificates list --project ${{ secrets.GCP_PROJECT_ID }} --global --filter=${{ steps.vars.outputs.dns_record }} --format="value(NAME)")
if [ -n "$TLS_CERTIFICATE_NAME" ];
then
gcloud compute ssl-certificates delete \
--project ${{ secrets.GCP_PROJECT_ID }} \
--quiet \
"$TLS_CERTIFICATE_NAME"
fi