feat(data-warehouse): data warehouse table breakdowns #169
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This workflow runs all of our backend django tests. | |
# | |
# If these tests get too slow, look at increasing concurrency and re-timing the tests by manually dispatching | |
# .github/workflows/ci-backend-update-test-timing.yml action | |
name: Backend CI (depot) | |
on: | |
push: | |
branches: | |
- master | |
pull_request: | |
workflow_dispatch: | |
inputs: | |
clickhouseServerVersion: | |
description: ClickHouse server version. Leave blank for default | |
type: string | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} | |
# This is so that the workflow run isn't canceled when a snapshot update is pushed within it by posthog-bot | |
# We do however cancel from container-images-ci.yml if a commit is pushed by someone OTHER than posthog-bot | |
cancel-in-progress: false | |
env: | |
SECRET_KEY: '6b01eee4f945ca25045b5aab440b953461faf08693a9abbf1166dc7c6b9772da' # unsafe - for testing only | |
DATABASE_URL: 'postgres://posthog:posthog@localhost:5432/posthog' | |
REDIS_URL: 'redis://localhost' | |
CLICKHOUSE_HOST: 'localhost' | |
CLICKHOUSE_SECURE: 'False' | |
CLICKHOUSE_VERIFY: 'False' | |
TEST: 1 | |
CLICKHOUSE_SERVER_IMAGE_VERSION: ${{ github.event.inputs.clickhouseServerVersion || '' }} | |
OBJECT_STORAGE_ENABLED: 'True' | |
OBJECT_STORAGE_ENDPOINT: 'http://localhost:19000' | |
OBJECT_STORAGE_ACCESS_KEY_ID: 'object_storage_root_user' | |
OBJECT_STORAGE_SECRET_ACCESS_KEY: 'object_storage_root_password' | |
jobs: | |
# Job to decide if we should run backend ci | |
# See https://github.com/dorny/paths-filter#conditional-execution for more details | |
changes: | |
runs-on: depot-ubuntu-latest | |
timeout-minutes: 5 | |
if: github.repository == 'PostHog/posthog' | |
name: Determine need to run backend checks | |
# Set job outputs to values from filter step | |
outputs: | |
backend: ${{ steps.filter.outputs.backend }} | |
steps: | |
# For pull requests it's not necessary to checkout the code, but we | |
# also want this to run on master so we need to checkout | |
- uses: actions/checkout@v3 | |
- uses: dorny/paths-filter@v2 | |
id: filter | |
with: | |
filters: | | |
backend: | |
# Avoid running backend tests for irrelevant changes | |
# NOTE: we are at risk of missing a dependency here. We could make | |
# the dependencies more clear if we separated the backend/frontend | |
# code completely | |
# really we should ignore ee/frontend/** but dorny doesn't support that | |
# - '!ee/frontend/**' | |
# including the negated rule appears to work | |
# but makes it always match because the checked file always isn't `ee/frontend/**` 🙈 | |
- 'ee/**/*' | |
- 'posthog/**/*' | |
- 'bin/*.py' | |
- requirements.txt | |
- requirements-dev.txt | |
- mypy.ini | |
- pytest.ini | |
- frontend/src/queries/schema.json # Used for generating schema.py | |
- plugin-transpiler/src # Used for transpiling plugins | |
# Make sure we run if someone is explicitly change the workflow | |
- .github/workflows/ci-backend.yml | |
- .github/actions/run-backend-tests/action.yml | |
# We use docker compose for tests, make sure we rerun on | |
# changes to docker-compose.dev.yml e.g. dependency | |
# version changes | |
- docker-compose.dev.yml | |
- frontend/public/email/* | |
# These scripts are used in the CI | |
- bin/check_temporal_up | |
- bin/check_kafka_clickhouse_up | |
backend-code-quality: | |
needs: changes | |
timeout-minutes: 30 | |
name: Python code quality checks | |
runs-on: depot-ubuntu-latest | |
steps: | |
# If this run wasn't initiated by the bot (meaning: snapshot update) and we've determined | |
# there are backend changes, cancel previous runs | |
- uses: n1hility/cancel-previous-runs@v3 | |
if: github.actor != 'posthog-bot' && needs.changes.outputs.backend == 'true' | |
with: | |
token: ${{ secrets.GITHUB_TOKEN }} | |
- uses: actions/checkout@v3 | |
with: | |
fetch-depth: 1 | |
- name: Set up Python | |
uses: actions/setup-python@v5 | |
with: | |
python-version: 3.10.10 | |
cache: 'pip' | |
cache-dependency-path: '**/requirements*.txt' | |
token: ${{ secrets.POSTHOG_BOT_GITHUB_TOKEN }} | |
- uses: syphar/restore-virtualenv@v1 | |
id: cache-backend-tests | |
with: | |
custom_cache_key_element: v2- | |
- name: Install SAML (python3-saml) dependencies | |
run: | | |
sudo apt-get update | |
sudo apt-get install libxml2-dev libxmlsec1 libxmlsec1-dev libxmlsec1-openssl | |
- name: Install Python dependencies | |
if: steps.cache-backend-tests.outputs.cache-hit != 'true' | |
run: | | |
python -m pip install -r requirements.txt -r requirements-dev.txt | |
- name: Check for syntax errors, import sort, and code style violations | |
run: | | |
ruff check . | |
- name: Check formatting | |
run: | | |
ruff format --exclude posthog/hogql/grammar --check --diff . | |
- name: Add Problem Matcher | |
run: echo "::add-matcher::.github/mypy-problem-matcher.json" | |
- name: Check static typing | |
run: | | |
mypy -p posthog | mypy-baseline filter | |
- name: Check if "schema.py" is up to date | |
run: | | |
npm run schema:build:python && git diff --exit-code | |
- name: Check if ANTLR definitions are up to date | |
run: | | |
cd .. | |
sudo apt-get install default-jre | |
mkdir antlr | |
cd antlr | |
curl -o antlr.jar https://www.antlr.org/download/antlr-$ANTLR_VERSION-complete.jar | |
export PWD=`pwd` | |
echo '#!/bin/bash' > antlr | |
echo "java -jar $PWD/antlr.jar \$*" >> antlr | |
chmod +x antlr | |
export CLASSPATH=".:$PWD/antlr.jar:$CLASSPATH" | |
export PATH="$PWD:$PATH" | |
cd ../posthog | |
antlr | grep "Version" | |
npm run grammar:build && git diff --exit-code | |
env: | |
# Installing a version of ANTLR compatible with what's in Homebrew as of October 2023 (version 4.13), | |
# as apt-get is quite out of date. The same version must be set in hogql_parser/pyproject.toml | |
ANTLR_VERSION: '4.13.1' | |
check-migrations: | |
needs: changes | |
if: needs.changes.outputs.backend == 'true' | |
timeout-minutes: 10 | |
name: Validate Django migrations | |
runs-on: depot-ubuntu-latest | |
steps: | |
- uses: actions/checkout@v3 | |
- name: Stop/Start stack with Docker Compose | |
run: | | |
docker compose -f docker-compose.dev.yml down | |
docker compose -f docker-compose.dev.yml up -d | |
- name: Set up Python | |
uses: actions/setup-python@v5 | |
with: | |
python-version: 3.10.10 | |
cache: 'pip' | |
cache-dependency-path: '**/requirements*.txt' | |
token: ${{ secrets.POSTHOG_BOT_GITHUB_TOKEN }} | |
- uses: syphar/restore-virtualenv@v1 | |
id: cache-backend-tests | |
with: | |
custom_cache_key_element: v1- | |
- name: Install SAML (python3-saml) dependencies | |
run: | | |
sudo apt-get update | |
sudo apt-get install libxml2-dev libxmlsec1-dev libxmlsec1-openssl | |
- name: Install python dependencies | |
if: steps.cache-backend-tests.outputs.cache-hit != 'true' | |
run: | | |
python -m pip install -r requirements.txt -r requirements-dev.txt | |
- uses: actions/checkout@v3 | |
with: | |
ref: master | |
- name: Run migrations up to master | |
run: | | |
# We need to ensure we have requirements for the master branch | |
# now also, so we can run migrations up to master. | |
python -m pip install -r requirements.txt -r requirements-dev.txt | |
python manage.py migrate | |
- uses: actions/checkout@v3 | |
- name: Check migrations | |
run: | | |
python manage.py makemigrations --check --dry-run | |
git fetch origin master | |
# `git diff --name-only` returns a list of files that were changed - added OR deleted OR modified | |
# With `--name-status` we get the same, but including a column for status, respectively: A, D, M | |
# In this check we exclusively care about files that were | |
# added (A) in posthog/migrations/. We also want to ignore | |
# initial migrations (0001_*) as these are guaranteed to be | |
# run on initial setup where there is no data. | |
git diff --name-status origin/master..HEAD | grep "A\sposthog/migrations/" | awk '{print $2}' | grep -v migrations/0001_ | python manage.py test_migrations_are_safe | |
django: | |
needs: changes | |
timeout-minutes: 30 | |
name: Django tests – ${{ matrix.segment }} (persons-on-events ${{ matrix.person-on-events && 'on' || 'off' }}), Py ${{ matrix.python-version }}, ${{ matrix.clickhouse-server-image }} (${{matrix.group}}/${{ matrix.concurrency }}) (depot) | |
runs-on: depot-ubuntu-latest | |
strategy: | |
fail-fast: false | |
matrix: | |
python-version: ['3.10.10'] | |
clickhouse-server-image: ['clickhouse/clickhouse-server:23.11.2.11-alpine'] | |
segment: ['FOSS', 'EE'] | |
person-on-events: [false, true] | |
# :NOTE: Keep concurrency and groups in sync | |
concurrency: [5] | |
group: [1, 2, 3, 4, 5] | |
steps: | |
# The first step is the only one that should run if `needs.changes.outputs.backend == 'false'`. | |
# All the other ones should rely on `needs.changes.outputs.backend` directly or indirectly, so that they're | |
# effectively skipped if backend code is unchanged. See https://github.com/PostHog/posthog/pull/15174. | |
- uses: actions/checkout@v3 | |
with: | |
fetch-depth: 1 | |
repository: ${{ github.event.pull_request.head.repo.full_name }} | |
ref: ${{ github.event.pull_request.head.ref }} | |
# Use PostHog Bot token when not on forks to enable proper snapshot updating | |
token: ${{ github.event.pull_request.head.repo.full_name == github.repository && secrets.POSTHOG_BOT_GITHUB_TOKEN || github.token }} | |
- uses: ./.github/actions/run-backend-tests | |
if: needs.changes.outputs.backend == 'true' | |
with: | |
segment: ${{ matrix.segment }} | |
person-on-events: ${{ matrix.person-on-events }} | |
python-version: ${{ matrix.python-version }} | |
clickhouse-server-image: ${{ matrix.clickhouse-server-image }} | |
concurrency: ${{ matrix.concurrency }} | |
group: ${{ matrix.group }} | |
token: ${{ secrets.POSTHOG_BOT_GITHUB_TOKEN }} | |
- uses: EndBug/add-and-commit@v9 | |
# Skip on forks | |
# Also skip for persons-on-events runs, as we want to ignore snapshots diverging there | |
if: ${{ github.repository == 'PostHog/posthog' && needs.changes.outputs.backend == 'true' && !matrix.person-on-events }} | |
with: | |
add: '["ee", "./**/*.ambr", "posthog/queries/", "posthog/migrations", "posthog/tasks", "posthog/hogql/"]' | |
message: 'Update query snapshots' | |
pull: --rebase --autostash # Make sure we're up-to-date with other segments' updates | |
default_author: github_actions | |
github_token: ${{ secrets.POSTHOG_BOT_GITHUB_TOKEN }} | |
- name: Check if any snapshot changes were left uncomitted | |
id: changed-files | |
if: ${{ github.repository == 'PostHog/posthog' && needs.changes.outputs.backend == 'true' && !matrix.person-on-events }} | |
run: | | |
if [[ -z $(git status -s | grep -v ".test_durations" | tr -d "\n") ]] | |
then | |
echo 'files_found=false' >> $GITHUB_OUTPUT | |
else | |
echo 'diff=$(git status --porcelain)' >> $GITHUB_OUTPUT | |
echo 'files_found=true' >> $GITHUB_OUTPUT | |
fi | |
- name: Fail CI if some snapshots have been updated but not committed | |
if: steps.changed-files.outputs.files_found == 'true' && steps.add-and-commit.outcome == 'success' | |
run: | | |
echo "${{ steps.changed-files.outputs.diff }}" | |
exit 1 | |
- name: Archive email renders | |
uses: actions/upload-artifact@v3 | |
if: needs.changes.outputs.backend == 'true' && matrix.segment == 'FOSS' && matrix.person-on-events == false | |
with: | |
name: email_renders | |
path: posthog/tasks/test/__emails__ | |
retention-days: 5 | |
async-migrations: | |
name: Async migrations tests - ${{ matrix.clickhouse-server-image }} | |
needs: changes | |
strategy: | |
fail-fast: false | |
matrix: | |
clickhouse-server-image: ['clickhouse/clickhouse-server:23.11.2.11-alpine'] | |
if: needs.changes.outputs.backend == 'true' | |
runs-on: depot-ubuntu-latest | |
steps: | |
- name: 'Checkout repo' | |
uses: actions/checkout@v3 | |
with: | |
fetch-depth: 1 | |
- name: Start stack with Docker Compose | |
run: | | |
export CLICKHOUSE_SERVER_IMAGE_VERSION=${{ matrix.clickhouse-server-image }} | |
docker compose -f docker-compose.dev.yml down | |
docker compose -f docker-compose.dev.yml up -d | |
- name: Set up Python | |
uses: actions/setup-python@v5 | |
with: | |
python-version: 3.10.10 | |
cache: 'pip' | |
cache-dependency-path: '**/requirements*.txt' | |
token: ${{ secrets.POSTHOG_BOT_GITHUB_TOKEN }} | |
- uses: syphar/restore-virtualenv@v1 | |
id: cache-backend-tests | |
with: | |
custom_cache_key_element: v2- | |
- name: Install SAML (python3-saml) dependencies | |
run: | | |
sudo apt-get update | |
sudo apt-get install libxml2-dev libxmlsec1-dev libxmlsec1-openssl | |
- name: Install python dependencies | |
if: steps.cache-backend-tests.outputs.cache-hit != 'true' | |
shell: bash | |
run: | | |
python -m pip install -r requirements.txt -r requirements-dev.txt | |
- name: Add kafka host to /etc/hosts for kafka connectivity | |
run: sudo echo "127.0.0.1 kafka" | sudo tee -a /etc/hosts | |
- name: Set up needed files | |
run: | | |
mkdir -p frontend/dist | |
touch frontend/dist/index.html | |
touch frontend/dist/layout.html | |
touch frontend/dist/exporter.html | |
- name: Wait for Clickhouse & Kafka | |
run: bin/check_kafka_clickhouse_up | |
- name: Run async migrations tests | |
run: | | |
pytest -m "async_migrations" | |
calculate-running-time: | |
name: Calculate running time | |
needs: [django, async-migrations] | |
runs-on: ubuntu-latest | |
if: needs.changes.outputs.backend == 'true' | |
steps: | |
- name: Calculate running time | |
run: | | |
echo "${{ secrets.GITHUB_TOKEN }}" | gh auth login --with-token | |
run_id=${GITHUB_RUN_ID} | |
repo=${GITHUB_REPOSITORY} | |
run_info=$(gh api repos/${repo}/actions/runs/${run_id}) | |
echo run_info: ${run_info} | |
# name is the name of the workflow file | |
# run_started_at is the start time of the workflow | |
# we want to get the number of seconds between the start time and now | |
name=$(echo ${run_info} | jq -r '.name') | |
run_url=$(echo ${run_info} | jq -r '.url') | |
run_started_at=$(echo ${run_info} | jq -r '.run_started_at') | |
run_attempt=$(echo ${run_info} | jq -r '.run_attempt') | |
start_seconds=$(date -d "${run_started_at}" +%s) | |
now_seconds=$(date +%s) | |
duration=$((now_seconds-start_seconds)) | |
echo running_time_duration_seconds=${duration} >> $GITHUB_ENV | |
echo running_time_run_url=${run_url} >> $GITHUB_ENV | |
echo running_time_run_attempt=${run_attempt} >> $GITHUB_ENV | |
echo running_time_run_id=${run_id} >> $GITHUB_ENV | |
echo running_time_run_started_at=${run_started_at} >> $GITHUB_ENV | |
- name: Capture running time to PostHog | |
uses: PostHog/[email protected] | |
with: | |
posthog-token: ${{secrets.POSTHOG_API_TOKEN}} | |
event: 'posthog-ci-running-time' | |
properties: '{"duration_seconds": ${{ env.running_time_duration_seconds }}, "run_url": "${{ env.running_time_run_url }}", "run_attempt": "${{ env.running_time_run_attempt }}", "run_id": "${{ env.running_time_run_id }}", "run_started_at": "${{ env.running_time_run_started_at }}"}' |