fix(batch-exports): Use prefix in snowflake internal stage (#25847) #102619
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This workflow runs all of our backend django tests. | |
# | |
# If these tests get too slow, look at increasing concurrency and re-timing the tests by manually dispatching | |
# .github/workflows/ci-backend-update-test-timing.yml action | |
name: Backend CI | |
on: | |
push: | |
branches: | |
- master | |
workflow_dispatch: | |
inputs: | |
clickhouseServerVersion: | |
description: ClickHouse server version. Leave blank for default | |
type: string | |
pull_request: | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} | |
# This is so that the workflow run isn't canceled when a snapshot update is pushed within it by posthog-bot | |
# We do however cancel from container-images-ci.yml if a commit is pushed by someone OTHER than posthog-bot | |
cancel-in-progress: false | |
env: | |
SECRET_KEY: '6b01eee4f945ca25045b5aab440b953461faf08693a9abbf1166dc7c6b9772da' # unsafe - for testing only | |
DATABASE_URL: 'postgres://posthog:posthog@localhost:5432/posthog' | |
REDIS_URL: 'redis://localhost' | |
CLICKHOUSE_HOST: 'localhost' | |
CLICKHOUSE_SECURE: 'False' | |
CLICKHOUSE_VERIFY: 'False' | |
TEST: 1 | |
CLICKHOUSE_SERVER_IMAGE_VERSION: ${{ github.event.inputs.clickhouseServerVersion || '' }} | |
OBJECT_STORAGE_ENABLED: 'True' | |
OBJECT_STORAGE_ENDPOINT: 'http://localhost:19000' | |
OBJECT_STORAGE_ACCESS_KEY_ID: 'object_storage_root_user' | |
OBJECT_STORAGE_SECRET_ACCESS_KEY: 'object_storage_root_password' | |
# tests would intermittently fail in GH actions | |
# with exit code 134 _after passing_ all tests | |
# this appears to fix it | |
# absolute wild tbh https://stackoverflow.com/a/75503402 | |
DISPLAY: ':99.0' | |
jobs: | |
# Job to decide if we should run backend ci | |
# See https://github.com/dorny/paths-filter#conditional-execution for more details | |
changes: | |
runs-on: ubuntu-latest | |
timeout-minutes: 5 | |
name: Determine need to run backend checks | |
# Set job outputs to values from filter step | |
outputs: | |
backend: ${{ steps.filter.outputs.backend }} | |
steps: | |
# For pull requests it's not necessary to checkout the code, but we | |
# also want this to run on master so we need to checkout | |
- uses: actions/checkout@v3 | |
- uses: dorny/paths-filter@v2 | |
id: filter | |
with: | |
filters: | | |
backend: | |
# Avoid running backend tests for irrelevant changes | |
# NOTE: we are at risk of missing a dependency here. We could make | |
# the dependencies more clear if we separated the backend/frontend | |
# code completely | |
# really we should ignore ee/frontend/** but dorny doesn't support that | |
# - '!ee/frontend/**' | |
# including the negated rule appears to work | |
# but makes it always match because the checked file always isn't `ee/frontend/**` 🙈 | |
- 'ee/**/*' | |
- 'hogvm/**/*' | |
- 'posthog/**/*' | |
- 'bin/*.py' | |
- requirements.txt | |
- requirements-dev.txt | |
- mypy.ini | |
- pytest.ini | |
- frontend/src/queries/schema.json # Used for generating schema.py | |
- plugin-transpiler/src # Used for transpiling plugins | |
# Make sure we run if someone is explicitly change the workflow | |
- .github/workflows/ci-backend.yml | |
- .github/actions/run-backend-tests/action.yml | |
# We use docker compose for tests, make sure we rerun on | |
# changes to docker-compose.dev.yml e.g. dependency | |
# version changes | |
- docker-compose.dev.yml | |
- frontend/public/email/* | |
# These scripts are used in the CI | |
- bin/check_temporal_up | |
- bin/check_kafka_clickhouse_up | |
backend-code-quality: | |
needs: changes | |
timeout-minutes: 30 | |
name: Python code quality checks | |
runs-on: ubuntu-latest | |
steps: | |
# If this run wasn't initiated by the bot (meaning: snapshot update) and we've determined | |
# there are backend changes, cancel previous runs | |
- uses: n1hility/cancel-previous-runs@v3 | |
if: github.actor != 'posthog-bot' && needs.changes.outputs.backend == 'true' | |
with: | |
token: ${{ secrets.GITHUB_TOKEN }} | |
- uses: actions/checkout@v3 | |
with: | |
fetch-depth: 1 | |
- name: Set up Python | |
uses: actions/setup-python@v5 | |
with: | |
python-version: 3.11.9 | |
cache: 'pip' | |
cache-dependency-path: '**/requirements*.txt' | |
token: ${{ secrets.POSTHOG_BOT_GITHUB_TOKEN }} | |
# uv is a fast pip alternative: https://github.com/astral-sh/uv/ | |
- run: pip install uv | |
- name: Install SAML (python3-saml) dependencies | |
run: | | |
sudo apt-get update | |
sudo apt-get install libxml2-dev libxmlsec1 libxmlsec1-dev libxmlsec1-openssl | |
- name: Install Python dependencies | |
run: | | |
uv pip install --system -r requirements.txt -r requirements-dev.txt | |
- name: Check for syntax errors, import sort, and code style violations | |
run: | | |
ruff check . | |
- name: Check formatting | |
run: | | |
ruff format --check --diff . | |
- name: Add Problem Matcher | |
run: echo "::add-matcher::.github/mypy-problem-matcher.json" | |
- name: Check static typing | |
run: | | |
mypy -p posthog | mypy-baseline filter | |
- name: Check if "schema.py" is up to date | |
run: | | |
npm run schema:build:python && git diff --exit-code | |
check-migrations: | |
needs: changes | |
if: needs.changes.outputs.backend == 'true' | |
timeout-minutes: 10 | |
name: Validate Django and CH migrations | |
runs-on: ubuntu-latest | |
steps: | |
- uses: actions/checkout@v3 | |
- name: Stop/Start stack with Docker Compose | |
run: | | |
docker compose -f docker-compose.dev.yml down | |
docker compose -f docker-compose.dev.yml up -d | |
- name: Set up Python | |
uses: actions/setup-python@v5 | |
with: | |
python-version: 3.11.9 | |
cache: 'pip' | |
cache-dependency-path: '**/requirements*.txt' | |
token: ${{ secrets.POSTHOG_BOT_GITHUB_TOKEN }} | |
# uv is a fast pip alternative: https://github.com/astral-sh/uv/ | |
- run: pip install uv | |
- name: Install SAML (python3-saml) dependencies | |
run: | | |
sudo apt-get update | |
sudo apt-get install libxml2-dev libxmlsec1-dev libxmlsec1-openssl | |
# First running migrations from master, to simulate the real-world scenario | |
- name: Checkout master | |
uses: actions/checkout@v3 | |
with: | |
ref: master | |
- name: Install python dependencies for master | |
run: | | |
uv pip install --system -r requirements.txt -r requirements-dev.txt | |
- name: Run migrations up to master | |
run: | | |
python manage.py migrate | |
# Now we can consider this PR's migrations | |
- name: Checkout this PR | |
uses: actions/checkout@v3 | |
- name: Install python dependencies for this PR | |
run: | | |
uv pip install --system -r requirements.txt -r requirements-dev.txt | |
- name: Run migrations for this PR | |
run: | | |
python manage.py migrate | |
- name: Check migrations | |
run: | | |
python manage.py makemigrations --check --dry-run | |
git fetch origin master | |
# `git diff --name-only` returns a list of files that were changed - added OR deleted OR modified | |
# With `--name-status` we get the same, but including a column for status, respectively: A, D, M | |
# In this check we exclusively care about files that were | |
# added (A) in posthog/migrations/. We also want to ignore | |
# initial migrations (0001_*) as these are guaranteed to be | |
# run on initial setup where there is no data. | |
git diff --name-status origin/master..HEAD | grep "A\sposthog/migrations/" | awk '{print $2}' | grep -v migrations/0001_ | python manage.py test_migrations_are_safe | |
- name: Check CH migrations | |
run: | | |
# Same as above, except now for CH looking at files that were added in posthog/clickhouse/migrations/ | |
git diff --name-status origin/master..HEAD | grep "A\sposthog/clickhouse/migrations/" | awk '{print $2}' | python manage.py test_ch_migrations_are_safe | |
django: | |
needs: changes | |
# increase for tmate testing | |
timeout-minutes: 30 | |
name: Django tests – ${{ matrix.segment }} (persons-on-events ${{ matrix.person-on-events && 'on' || 'off' }}), Py ${{ matrix.python-version }}, ${{ matrix.clickhouse-server-image }} (${{matrix.group}}/${{ matrix.concurrency }}) | |
runs-on: ubuntu-latest | |
strategy: | |
fail-fast: false | |
matrix: | |
python-version: ['3.11.9'] | |
clickhouse-server-image: ['clickhouse/clickhouse-server:23.12.6.19'] | |
segment: ['Core'] | |
person-on-events: [false, true] | |
# :NOTE: Keep concurrency and groups in sync | |
concurrency: [10] | |
group: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] | |
include: | |
- segment: 'Temporal' | |
person-on-events: false | |
clickhouse-server-image: 'clickhouse/clickhouse-server:23.12.6.19' | |
python-version: '3.11.9' | |
concurrency: 1 | |
group: 1 | |
steps: | |
# The first step is the only one that should run if `needs.changes.outputs.backend == 'false'`. | |
# All the other ones should rely on `needs.changes.outputs.backend` directly or indirectly, so that they're | |
# effectively skipped if backend code is unchanged. See https://github.com/PostHog/posthog/pull/15174. | |
- uses: actions/checkout@v3 | |
with: | |
fetch-depth: 1 | |
repository: ${{ github.event.pull_request.head.repo.full_name }} | |
ref: ${{ github.event.pull_request.head.ref }} | |
# Use PostHog Bot token when not on forks to enable proper snapshot updating | |
token: ${{ github.event.pull_request.head.repo.full_name == github.repository && secrets.POSTHOG_BOT_GITHUB_TOKEN || github.token }} | |
- uses: ./.github/actions/run-backend-tests | |
if: needs.changes.outputs.backend == 'true' | |
with: | |
segment: ${{ matrix.segment }} | |
person-on-events: ${{ matrix.person-on-events }} | |
python-version: ${{ matrix.python-version }} | |
clickhouse-server-image: ${{ matrix.clickhouse-server-image }} | |
concurrency: ${{ matrix.concurrency }} | |
group: ${{ matrix.group }} | |
token: ${{ secrets.POSTHOG_BOT_GITHUB_TOKEN }} | |
- uses: EndBug/add-and-commit@v9 | |
# Skip on forks | |
# Also skip for persons-on-events runs, as we want to ignore snapshots diverging there | |
if: ${{ github.event.pull_request.head.repo.full_name == 'PostHog/posthog' && needs.changes.outputs.backend == 'true' && !matrix.person-on-events }} | |
with: | |
add: '["ee", "./**/*.ambr", "posthog/queries/", "posthog/migrations", "posthog/tasks", "posthog/hogql/"]' | |
message: 'Update query snapshots' | |
pull: --rebase --autostash # Make sure we're up-to-date with other segments' updates | |
default_author: github_actions | |
github_token: ${{ secrets.POSTHOG_BOT_GITHUB_TOKEN }} | |
- name: Check if any snapshot changes were left uncomitted | |
id: changed-files | |
if: ${{ github.event.pull_request.head.repo.full_name == 'PostHog/posthog' && needs.changes.outputs.backend == 'true' && !matrix.person-on-events }} | |
run: | | |
if [[ -z $(git status -s | grep -v ".test_durations" | tr -d "\n") ]] | |
then | |
echo 'files_found=false' >> $GITHUB_OUTPUT | |
else | |
echo 'diff=$(git status --porcelain)' >> $GITHUB_OUTPUT | |
echo 'files_found=true' >> $GITHUB_OUTPUT | |
fi | |
- name: Fail CI if some snapshots have been updated but not committed | |
if: steps.changed-files.outputs.files_found == 'true' && steps.add-and-commit.outcome == 'success' | |
run: | | |
echo "${{ steps.changed-files.outputs.diff }}" | |
exit 1 | |
- name: Archive email renders | |
uses: actions/upload-artifact@v3 | |
if: needs.changes.outputs.backend == 'true' && matrix.segment == 'Core' && matrix.person-on-events == false | |
with: | |
name: email_renders | |
path: posthog/tasks/test/__emails__ | |
retention-days: 5 | |
async-migrations: | |
name: Async migrations tests - ${{ matrix.clickhouse-server-image }} | |
needs: changes | |
strategy: | |
fail-fast: false | |
matrix: | |
clickhouse-server-image: ['clickhouse/clickhouse-server:23.12.6.19'] | |
if: needs.changes.outputs.backend == 'true' | |
runs-on: ubuntu-latest | |
steps: | |
- name: 'Checkout repo' | |
uses: actions/checkout@v3 | |
with: | |
fetch-depth: 1 | |
- name: Start stack with Docker Compose | |
run: | | |
export CLICKHOUSE_SERVER_IMAGE_VERSION=${{ matrix.clickhouse-server-image }} | |
docker compose -f docker-compose.dev.yml down | |
docker compose -f docker-compose.dev.yml up -d | |
- name: Set up Python | |
uses: actions/setup-python@v5 | |
with: | |
python-version: 3.11.9 | |
cache: 'pip' | |
cache-dependency-path: '**/requirements*.txt' | |
token: ${{ secrets.POSTHOG_BOT_GITHUB_TOKEN }} | |
# uv is a fast pip alternative: https://github.com/astral-sh/uv/ | |
- run: pip install uv | |
- name: Install SAML (python3-saml) dependencies | |
run: | | |
sudo apt-get update | |
sudo apt-get install libxml2-dev libxmlsec1-dev libxmlsec1-openssl | |
- name: Install python dependencies | |
shell: bash | |
run: | | |
uv pip install --system -r requirements.txt -r requirements-dev.txt | |
- name: Add kafka host to /etc/hosts for kafka connectivity | |
run: sudo echo "127.0.0.1 kafka" | sudo tee -a /etc/hosts | |
- name: Set up needed files | |
run: | | |
mkdir -p frontend/dist | |
touch frontend/dist/index.html | |
touch frontend/dist/layout.html | |
touch frontend/dist/exporter.html | |
- name: Wait for Clickhouse & Kafka | |
run: bin/check_kafka_clickhouse_up | |
- name: Run async migrations tests | |
run: | | |
pytest -m "async_migrations" | |
calculate-running-time: | |
name: Calculate running time | |
needs: [django, async-migrations] | |
runs-on: ubuntu-latest | |
if: # Run on pull requests to PostHog/posthog + on PostHog/posthog outside of PRs - but never on forks | |
needs.changes.outputs.backend == 'true' && | |
( | |
github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name || github.repository | |
) == 'PostHog/posthog' | |
steps: | |
- name: Calculate running time | |
run: | | |
echo "${{ secrets.GITHUB_TOKEN }}" | gh auth login --with-token | |
run_id=${GITHUB_RUN_ID} | |
repo=${GITHUB_REPOSITORY} | |
run_info=$(gh api repos/${repo}/actions/runs/${run_id}) | |
echo run_info: ${run_info} | |
# name is the name of the workflow file | |
# run_started_at is the start time of the workflow | |
# we want to get the number of seconds between the start time and now | |
name=$(echo ${run_info} | jq -r '.name') | |
run_url=$(echo ${run_info} | jq -r '.url') | |
run_started_at=$(echo ${run_info} | jq -r '.run_started_at') | |
run_attempt=$(echo ${run_info} | jq -r '.run_attempt') | |
start_seconds=$(date -d "${run_started_at}" +%s) | |
now_seconds=$(date +%s) | |
duration=$((now_seconds-start_seconds)) | |
echo running_time_duration_seconds=${duration} >> $GITHUB_ENV | |
echo running_time_run_url=${run_url} >> $GITHUB_ENV | |
echo running_time_run_attempt=${run_attempt} >> $GITHUB_ENV | |
echo running_time_run_id=${run_id} >> $GITHUB_ENV | |
echo running_time_run_started_at=${run_started_at} >> $GITHUB_ENV | |
- name: Capture running time to PostHog | |
uses: PostHog/[email protected] | |
with: | |
posthog-token: ${{secrets.POSTHOG_API_TOKEN}} | |
event: 'posthog-ci-running-time' | |
properties: '{"duration_seconds": ${{ env.running_time_duration_seconds }}, "run_url": "${{ env.running_time_run_url }}", "run_attempt": "${{ env.running_time_run_attempt }}", "run_id": "${{ env.running_time_run_id }}", "run_started_at": "${{ env.running_time_run_started_at }}"}' |