Merge branch 'master' into feature/retention-cumulative

# Conflicts: # frontend/__snapshots__/scenes-app-insights--retention-breakdown-edit--dark--webkit.png # frontend/__snapshots__/scenes-app-insights--retention-breakdown-edit--dark.png # frontend/__snapshots__/scenes-app-insights--retention-breakdown-edit--light--webkit.png # frontend/__snapshots__/scenes-app-insights--retention-breakdown-edit--light.png # frontend/__snapshots__/scenes-app-insights--retention-edit--dark--webkit.png # frontend/__snapshots__/scenes-app-insights--retention-edit--light--webkit.png # frontend/__snapshots__/scenes-app-insights--retention-edit--light.png
PostHog · Aug 9, 2024 · aadb03a · aadb03a
2 parents aeab2fa + fe376e8
commit aadb03a
Show file tree

Hide file tree

Showing 360 changed files with 2,327 additions and 2,077 deletions.
diff --git a/.github/workflows/replay-capture.yml b/.github/workflows/replay-capture.yml
@@ -0,0 +1,35 @@
+name: Vector Replay Capture Tests
+
+on:
+    workflow_dispatch:
+    pull_request:
+        paths:
+            - vector/**
+            - .github/workflows/replay-capture.yml
+
+    workflow_call:
+
+jobs:
+    vector-test:
+        name: Vector test
+        runs-on: ubuntu-20.04
+        env:
+            QUOTA_LIMITED_TEAMS_PATH: vector/replay-capture/tests/quota_limited_teams.csv
+            OVERFLOW_SESSIONS_PATH: vector/replay-capture/tests/overflow_sessions.csv
+            KAFKA_BOOSTRAP_SERVERS: dummy:9092
+            KAFKA_EVENTS_TOPIC: session_recording_snapshot_item_events
+            KAFKA_OVERFLOW_TOPIC: session_recording_snapshot_item_overflow
+        steps:
+            - name: Checkout
+              uses: actions/checkout@v4
+
+            - name: Install Vector
+              run: |
+                  wget https://github.com/vectordotdev/vector/releases/download/v0.40.0/vector-0.40.0-x86_64-unknown-linux-gnu.tar.gz
+                  tar xzvf vector-0.40.0-x86_64-unknown-linux-gnu.tar.gz ./vector-x86_64-unknown-linux-gnu/bin/vector
+                  sudo mv ./vector-x86_64-unknown-linux-gnu/bin/vector /usr/bin/vector
+
+            - name: Run vector tests
+              run: |
+                  yq -i e 'explode(.)' vector/replay-capture/vector.yaml
+                  vector test vector/replay-capture/*.yaml
diff --git a/.github/workflows/vector-docker-build-deploy.yml b/.github/workflows/vector-docker-build-deploy.yml
@@ -0,0 +1,104 @@
+name: Build and deploy replay capture container images
+
+on:
+    workflow_dispatch:
+    push:
+        paths:
+            - 'vector/**'
+            - '.github/workflows/vector-docker-build-deploy.yml'
+        branches:
+            - 'master'
+
+jobs:
+    build:
+        name: Build and publish container image
+        runs-on: depot-ubuntu-22.04-4
+        permissions:
+            id-token: write # allow issuing OIDC tokens for this workflow run
+            contents: read # allow reading the repo contents
+            packages: write # allow push to ghcr.io
+
+        outputs:
+            digest: ${{ steps.docker_build.outputs.digest }}
+
+        defaults:
+            run:
+                working-directory: vector/
+
+        steps:
+            - name: Check Out Repo
+              # Checkout project code
+              # Use sparse checkout to only select files in vector directory
+              # Turning off cone mode ensures that files in the project root are not included during checkout
+              uses: actions/checkout@v4
+              with:
+                  sparse-checkout: 'vector/'
+                  sparse-checkout-cone-mode: false
+
+            - name: Login to ghcr.io
+              uses: docker/login-action@v2
+              with:
+                  registry: ghcr.io
+                  username: ${{ github.actor }}
+                  password: ${{ secrets.GITHUB_TOKEN }}
+                  logout: false
+
+            - name: Set up QEMU
+              uses: docker/setup-qemu-action@v3
+
+            - name: Docker meta
+              id: meta
+              uses: docker/metadata-action@v5
+              with:
+                  images: ghcr.io/posthog/posthog/replay-capture
+                  tags: |
+                      type=ref,event=pr
+                      type=ref,event=branch
+                      type=semver,pattern={{version}}
+                      type=semver,pattern={{major}}.{{minor}}
+                      type=sha
+
+            - name: Set up Docker Buildx
+              id: buildx
+              uses: docker/setup-buildx-action@v2
+
+            - name: Build and push image
+              id: docker_build
+              uses: docker/build-push-action@v5
+              with:
+                  context: ./vector/replay-capture/
+                  file: ./vector/replay-capture/Dockerfile
+                  push: true
+                  tags: ${{ steps.meta.outputs.tags }}
+                  labels: ${{ steps.meta.outputs.labels }}
+                  platforms: linux/arm64
+
+    deploy:
+        runs-on: ubuntu-latest
+        needs: build
+        steps:
+            - name: get deployer token
+              id: deployer
+              uses: getsentry/action-github-app-token@v3
+              with:
+                  app_id: ${{ secrets.DEPLOYER_APP_ID }}
+                  private_key: ${{ secrets.DEPLOYER_APP_PRIVATE_KEY }}
+
+            - name: Trigger livestream deployment
+              uses: peter-evans/repository-dispatch@v3
+              with:
+                  token: ${{ steps.deployer.outputs.token }}
+                  repository: PostHog/charts
+                  event-type: commit_state_update
+                  client-payload: |
+                      {
+                        "values": {
+                          "image": {
+                            "sha": "${{ needs.build.outputs.digest }}"
+                          }
+                        },
+                        "release": "replay-capture-vector",
+                        "commit": ${{ toJson(github.event.head_commit) }},
+                        "repository": ${{ toJson(github.repository) }},
+                        "labels": []
+                      }
diff --git a/cypress/e2e/dashboard.cy.ts b/cypress/e2e/dashboard.cy.ts
@@ -14,7 +14,7 @@ describe('Dashboard', () => {
     it('Dashboards loaded', () => {
         cy.get('h1').should('contain', 'Dashboards')
         // Breadcrumbs work
-        cy.get('[data-attr=breadcrumb-organization]').should('contain', 'Hogflix')
+        cy.get('[data-attr=breadcrumb-organization]').should('contain', 'H') // "H" as the lettermark of "Hogflix"
         cy.get('[data-attr=breadcrumb-project]').should('contain', 'Hogflix Demo App')
         cy.get('[data-attr=breadcrumb-Dashboards]').should('have.text', 'Dashboards')
     })
@@ -233,7 +233,7 @@ describe('Dashboard', () => {
 
         cy.get('.InsightCard').its('length').should('be.gte', 2)
         // Breadcrumbs work
-        cy.get('[data-attr=breadcrumb-organization]').should('contain', 'Hogflix')
+        cy.get('[data-attr=breadcrumb-organization]').should('contain', 'H') // "H" as the lettermark of "Hogflix"
         cy.get('[data-attr=breadcrumb-project]').should('contain', 'Hogflix Demo App')
         cy.get('[data-attr=breadcrumb-Dashboards]').should('have.text', 'Dashboards')
         cy.get('[data-attr^="breadcrumb-Dashboard:"]').should('have.text', TEST_DASHBOARD_NAME + 'UnnamedCancelSave')

diff --git a/cypress/e2e/insights.cy.ts b/cypress/e2e/insights.cy.ts
@@ -13,7 +13,7 @@ describe('Insights', () => {
     it('Saving an insight sets breadcrumbs', () => {
         createInsight('insight name')
 
-        cy.get('[data-attr=breadcrumb-organization]').should('contain', 'Hogflix')
+        cy.get('[data-attr=breadcrumb-organization]').should('contain', 'H') // "H" as the lettermark of "Hogflix"
         cy.get('[data-attr=breadcrumb-project]').should('contain', 'Hogflix Demo App')
         cy.get('[data-attr=breadcrumb-SavedInsights]').should('have.text', 'Product analytics')
         cy.get('[data-attr^="breadcrumb-Insight:"]').should('have.text', 'insight name')

diff --git a/docker-compose.base.yml b/docker-compose.base.yml
@@ -105,14 +105,34 @@ services:
         restart: on-failure
 
     capture:
-        image: ghcr.io/posthog/capture:main
+        image: ghcr.io/posthog/posthog/capture:master
         restart: on-failure
         environment:
             ADDRESS: '0.0.0.0:3000'
             KAFKA_TOPIC: 'events_plugin_ingestion'
             KAFKA_HOSTS: 'kafka:9092'
             REDIS_URL: 'redis://redis:6379/'
 
+    replay-capture:
+        image: ghcr.io/posthog/posthog/replay-capture:master
+        build:
+            context: vector/replay-capture
+        restart: on-failure
+        entrypoint: ['sh', '-c']
+        command:
+            - |
+                set -x
+                # seed empty required data files
+                mkdir -p /etc/vector/data
+                echo "token" > /etc/vector/data/quota_limited_teams.csv
+                echo "session_id" > /etc/vector/data/overflow_sessions.csv
+                exec vector -v --watch-config
+        environment:
+            KAFKA_EVENTS_TOPIC: session_recording_snapshot_item_events
+            KAFKA_OVERFLOW_TOPIC: session_recording_snapshot_item_overflow
+            KAFKA_BOOSTRAP_SERVERS: 'kafka:9092'
+            REDIS_URL: 'redis://redis:6379/'
+
     plugins:
         command: ./bin/plugin-server --no-restart-loop
         restart: on-failure

diff --git a/docker-compose.dev.yml b/docker-compose.dev.yml
@@ -117,6 +117,17 @@ services:
             - redis
             - kafka
 
+    # Optional capture
+    replay-capture:
+        extends:
+            file: docker-compose.base.yml
+            service: replay-capture
+        ports:
+            - 3001:8000
+        depends_on:
+            - redis
+            - kafka
+
     livestream:
         extends:
             file: docker-compose.base.yml

diff --git a/ee/api/test/__snapshots__/test_organization_resource_access.ambr b/ee/api/test/__snapshots__/test_organization_resource_access.ambr
@@ -36,6 +36,7 @@
   SELECT "posthog_organization"."id",
          "posthog_organization"."name",
          "posthog_organization"."slug",
+         "posthog_organization"."logo_media_id",
          "posthog_organization"."created_at",
          "posthog_organization"."updated_at",
          "posthog_organization"."plugins_access_level",
@@ -119,6 +120,7 @@
   SELECT "posthog_organization"."id",
          "posthog_organization"."name",
          "posthog_organization"."slug",
+         "posthog_organization"."logo_media_id",
          "posthog_organization"."created_at",
          "posthog_organization"."updated_at",
          "posthog_organization"."plugins_access_level",
@@ -220,6 +222,7 @@
   SELECT "posthog_organization"."id",
          "posthog_organization"."name",
          "posthog_organization"."slug",
+         "posthog_organization"."logo_media_id",
          "posthog_organization"."created_at",
          "posthog_organization"."updated_at",
          "posthog_organization"."plugins_access_level",
@@ -245,6 +248,7 @@
   SELECT "posthog_organization"."id",
          "posthog_organization"."name",
          "posthog_organization"."slug",
+         "posthog_organization"."logo_media_id",
          "posthog_organization"."created_at",
          "posthog_organization"."updated_at",
          "posthog_organization"."plugins_access_level",

diff --git a/ee/clickhouse/queries/experiments/funnel_experiment_result.py b/ee/clickhouse/queries/experiments/funnel_experiment_result.py
@@ -6,6 +6,8 @@
 
 from numpy.random import default_rng
 from rest_framework.exceptions import ValidationError
+import scipy.stats as stats
+from sentry_sdk import capture_exception
 
 from ee.clickhouse.queries.experiments import (
     CONTROL_VARIANT_KEY,
@@ -111,6 +113,8 @@ def get_results(self, validate: bool = True):
             }
 
             significance_code, loss = self.are_results_significant(control_variant, test_variants, probabilities)
+
+            credible_intervals = calculate_credible_intervals([control_variant, *test_variants])
         except ValidationError:
             if validate:
                 raise
@@ -124,6 +128,7 @@ def get_results(self, validate: bool = True):
             "significance_code": significance_code,
             "expected_loss": loss,
             "variants": [asdict(variant) for variant in [control_variant, *test_variants]],
+            "credible_intervals": credible_intervals,
         }
 
     def get_variants(self, funnel_results):
@@ -320,6 +325,43 @@ def calculate_probability_of_winning_for_each(variants: list[Variant]) -> list[P
     return [max(0, 1 - total_test_probabilities), *probabilities[1:]]
 
 
+def calculate_credible_intervals(variants, lower_bound=0.025, upper_bound=0.975):
+    """
+    Calculate the Bayesian credible intervals for a list of variants.
+    If no lower/upper bound provided, the function calculates the 95% credible interval.
+    """
+    intervals = {}
+
+    for variant in variants:
+        try:
+            if variant.success_count < 0 or variant.failure_count < 0:
+                capture_exception(
+                    Exception("Invalid variant success/failure count"),
+                    {
+                        "variant": variant.key,
+                        "success_count": variant.success_count,
+                        "failure_count": variant.failure_count,
+                    },
+                )
+                return {}
+
+            # Calculate the credible interval
+            # Laplace smoothing: we add 1 to alpha and beta to avoid division errors if either is zero
+            alpha = variant.success_count + 1
+            beta = variant.failure_count + 1
+            credible_interval = stats.beta.ppf([lower_bound, upper_bound], alpha, beta)
+
+            intervals[variant.key] = (credible_interval[0], credible_interval[1])
+        except Exception as e:
+            capture_exception(
+                Exception(f"Error calculating credible interval for variant {variant.key}"),
+                {"error": str(e)},
+            )
+            return {}
+
+    return intervals
+
+
 def validate_event_variants(funnel_results, variants):
     errors = {
         ExperimentNoResultsErrorKeys.NO_EVENTS: True,