Skip to content

Commit

Permalink
Merge branch 'master' into feature/retention-cumulative
Browse files Browse the repository at this point in the history
# Conflicts:
#	frontend/__snapshots__/scenes-app-insights--retention-breakdown-edit--dark--webkit.png
#	frontend/__snapshots__/scenes-app-insights--retention-breakdown-edit--dark.png
#	frontend/__snapshots__/scenes-app-insights--retention-breakdown-edit--light--webkit.png
#	frontend/__snapshots__/scenes-app-insights--retention-breakdown-edit--light.png
#	frontend/__snapshots__/scenes-app-insights--retention-edit--dark--webkit.png
#	frontend/__snapshots__/scenes-app-insights--retention-edit--light--webkit.png
#	frontend/__snapshots__/scenes-app-insights--retention-edit--light.png
  • Loading branch information
webjunkie committed Aug 9, 2024
2 parents aeab2fa + fe376e8 commit aadb03a
Show file tree
Hide file tree
Showing 360 changed files with 2,327 additions and 2,077 deletions.
35 changes: 35 additions & 0 deletions .github/workflows/replay-capture.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
name: Vector Replay Capture Tests

on:
workflow_dispatch:
pull_request:
paths:
- vector/**
- .github/workflows/replay-capture.yml

workflow_call:

jobs:
vector-test:
name: Vector test
runs-on: ubuntu-20.04
env:
QUOTA_LIMITED_TEAMS_PATH: vector/replay-capture/tests/quota_limited_teams.csv
OVERFLOW_SESSIONS_PATH: vector/replay-capture/tests/overflow_sessions.csv
KAFKA_BOOSTRAP_SERVERS: dummy:9092
KAFKA_EVENTS_TOPIC: session_recording_snapshot_item_events
KAFKA_OVERFLOW_TOPIC: session_recording_snapshot_item_overflow
steps:
- name: Checkout
uses: actions/checkout@v4

- name: Install Vector
run: |
wget https://github.com/vectordotdev/vector/releases/download/v0.40.0/vector-0.40.0-x86_64-unknown-linux-gnu.tar.gz
tar xzvf vector-0.40.0-x86_64-unknown-linux-gnu.tar.gz ./vector-x86_64-unknown-linux-gnu/bin/vector
sudo mv ./vector-x86_64-unknown-linux-gnu/bin/vector /usr/bin/vector
- name: Run vector tests
run: |
yq -i e 'explode(.)' vector/replay-capture/vector.yaml
vector test vector/replay-capture/*.yaml
104 changes: 104 additions & 0 deletions .github/workflows/vector-docker-build-deploy.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
name: Build and deploy replay capture container images

on:
workflow_dispatch:
push:
paths:
- 'vector/**'
- '.github/workflows/vector-docker-build-deploy.yml'
branches:
- 'master'

jobs:
build:
name: Build and publish container image
runs-on: depot-ubuntu-22.04-4
permissions:
id-token: write # allow issuing OIDC tokens for this workflow run
contents: read # allow reading the repo contents
packages: write # allow push to ghcr.io

outputs:
digest: ${{ steps.docker_build.outputs.digest }}

defaults:
run:
working-directory: vector/

steps:
- name: Check Out Repo
# Checkout project code
# Use sparse checkout to only select files in vector directory
# Turning off cone mode ensures that files in the project root are not included during checkout
uses: actions/checkout@v4
with:
sparse-checkout: 'vector/'
sparse-checkout-cone-mode: false

- name: Login to ghcr.io
uses: docker/login-action@v2
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
logout: false

- name: Set up QEMU
uses: docker/setup-qemu-action@v3

- name: Docker meta
id: meta
uses: docker/metadata-action@v5
with:
images: ghcr.io/posthog/posthog/replay-capture
tags: |
type=ref,event=pr
type=ref,event=branch
type=semver,pattern={{version}}
type=semver,pattern={{major}}.{{minor}}
type=sha
- name: Set up Docker Buildx
id: buildx
uses: docker/setup-buildx-action@v2

- name: Build and push image
id: docker_build
uses: docker/build-push-action@v5
with:
context: ./vector/replay-capture/
file: ./vector/replay-capture/Dockerfile
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
platforms: linux/arm64

deploy:
runs-on: ubuntu-latest
needs: build
steps:
- name: get deployer token
id: deployer
uses: getsentry/action-github-app-token@v3
with:
app_id: ${{ secrets.DEPLOYER_APP_ID }}
private_key: ${{ secrets.DEPLOYER_APP_PRIVATE_KEY }}

- name: Trigger livestream deployment
uses: peter-evans/repository-dispatch@v3
with:
token: ${{ steps.deployer.outputs.token }}
repository: PostHog/charts
event-type: commit_state_update
client-payload: |
{
"values": {
"image": {
"sha": "${{ needs.build.outputs.digest }}"
}
},
"release": "replay-capture-vector",
"commit": ${{ toJson(github.event.head_commit) }},
"repository": ${{ toJson(github.repository) }},
"labels": []
}
4 changes: 2 additions & 2 deletions cypress/e2e/dashboard.cy.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ describe('Dashboard', () => {
it('Dashboards loaded', () => {
cy.get('h1').should('contain', 'Dashboards')
// Breadcrumbs work
cy.get('[data-attr=breadcrumb-organization]').should('contain', 'Hogflix')
cy.get('[data-attr=breadcrumb-organization]').should('contain', 'H') // "H" as the lettermark of "Hogflix"
cy.get('[data-attr=breadcrumb-project]').should('contain', 'Hogflix Demo App')
cy.get('[data-attr=breadcrumb-Dashboards]').should('have.text', 'Dashboards')
})
Expand Down Expand Up @@ -233,7 +233,7 @@ describe('Dashboard', () => {

cy.get('.InsightCard').its('length').should('be.gte', 2)
// Breadcrumbs work
cy.get('[data-attr=breadcrumb-organization]').should('contain', 'Hogflix')
cy.get('[data-attr=breadcrumb-organization]').should('contain', 'H') // "H" as the lettermark of "Hogflix"
cy.get('[data-attr=breadcrumb-project]').should('contain', 'Hogflix Demo App')
cy.get('[data-attr=breadcrumb-Dashboards]').should('have.text', 'Dashboards')
cy.get('[data-attr^="breadcrumb-Dashboard:"]').should('have.text', TEST_DASHBOARD_NAME + 'UnnamedCancelSave')
Expand Down
2 changes: 1 addition & 1 deletion cypress/e2e/insights.cy.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ describe('Insights', () => {
it('Saving an insight sets breadcrumbs', () => {
createInsight('insight name')

cy.get('[data-attr=breadcrumb-organization]').should('contain', 'Hogflix')
cy.get('[data-attr=breadcrumb-organization]').should('contain', 'H') // "H" as the lettermark of "Hogflix"
cy.get('[data-attr=breadcrumb-project]').should('contain', 'Hogflix Demo App')
cy.get('[data-attr=breadcrumb-SavedInsights]').should('have.text', 'Product analytics')
cy.get('[data-attr^="breadcrumb-Insight:"]').should('have.text', 'insight name')
Expand Down
22 changes: 21 additions & 1 deletion docker-compose.base.yml
Original file line number Diff line number Diff line change
Expand Up @@ -105,14 +105,34 @@ services:
restart: on-failure

capture:
image: ghcr.io/posthog/capture:main
image: ghcr.io/posthog/posthog/capture:master
restart: on-failure
environment:
ADDRESS: '0.0.0.0:3000'
KAFKA_TOPIC: 'events_plugin_ingestion'
KAFKA_HOSTS: 'kafka:9092'
REDIS_URL: 'redis://redis:6379/'

replay-capture:
image: ghcr.io/posthog/posthog/replay-capture:master
build:
context: vector/replay-capture
restart: on-failure
entrypoint: ['sh', '-c']
command:
- |
set -x
# seed empty required data files
mkdir -p /etc/vector/data
echo "token" > /etc/vector/data/quota_limited_teams.csv
echo "session_id" > /etc/vector/data/overflow_sessions.csv
exec vector -v --watch-config
environment:
KAFKA_EVENTS_TOPIC: session_recording_snapshot_item_events
KAFKA_OVERFLOW_TOPIC: session_recording_snapshot_item_overflow
KAFKA_BOOSTRAP_SERVERS: 'kafka:9092'
REDIS_URL: 'redis://redis:6379/'

plugins:
command: ./bin/plugin-server --no-restart-loop
restart: on-failure
Expand Down
11 changes: 11 additions & 0 deletions docker-compose.dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,17 @@ services:
- redis
- kafka

# Optional capture
replay-capture:
extends:
file: docker-compose.base.yml
service: replay-capture
ports:
- 3001:8000
depends_on:
- redis
- kafka

livestream:
extends:
file: docker-compose.base.yml
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
SELECT "posthog_organization"."id",
"posthog_organization"."name",
"posthog_organization"."slug",
"posthog_organization"."logo_media_id",
"posthog_organization"."created_at",
"posthog_organization"."updated_at",
"posthog_organization"."plugins_access_level",
Expand Down Expand Up @@ -119,6 +120,7 @@
SELECT "posthog_organization"."id",
"posthog_organization"."name",
"posthog_organization"."slug",
"posthog_organization"."logo_media_id",
"posthog_organization"."created_at",
"posthog_organization"."updated_at",
"posthog_organization"."plugins_access_level",
Expand Down Expand Up @@ -220,6 +222,7 @@
SELECT "posthog_organization"."id",
"posthog_organization"."name",
"posthog_organization"."slug",
"posthog_organization"."logo_media_id",
"posthog_organization"."created_at",
"posthog_organization"."updated_at",
"posthog_organization"."plugins_access_level",
Expand All @@ -245,6 +248,7 @@
SELECT "posthog_organization"."id",
"posthog_organization"."name",
"posthog_organization"."slug",
"posthog_organization"."logo_media_id",
"posthog_organization"."created_at",
"posthog_organization"."updated_at",
"posthog_organization"."plugins_access_level",
Expand Down
42 changes: 42 additions & 0 deletions ee/clickhouse/queries/experiments/funnel_experiment_result.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@

from numpy.random import default_rng
from rest_framework.exceptions import ValidationError
import scipy.stats as stats
from sentry_sdk import capture_exception

from ee.clickhouse.queries.experiments import (
CONTROL_VARIANT_KEY,
Expand Down Expand Up @@ -111,6 +113,8 @@ def get_results(self, validate: bool = True):
}

significance_code, loss = self.are_results_significant(control_variant, test_variants, probabilities)

credible_intervals = calculate_credible_intervals([control_variant, *test_variants])
except ValidationError:
if validate:
raise
Expand All @@ -124,6 +128,7 @@ def get_results(self, validate: bool = True):
"significance_code": significance_code,
"expected_loss": loss,
"variants": [asdict(variant) for variant in [control_variant, *test_variants]],
"credible_intervals": credible_intervals,
}

def get_variants(self, funnel_results):
Expand Down Expand Up @@ -320,6 +325,43 @@ def calculate_probability_of_winning_for_each(variants: list[Variant]) -> list[P
return [max(0, 1 - total_test_probabilities), *probabilities[1:]]


def calculate_credible_intervals(variants, lower_bound=0.025, upper_bound=0.975):
"""
Calculate the Bayesian credible intervals for a list of variants.
If no lower/upper bound provided, the function calculates the 95% credible interval.
"""
intervals = {}

for variant in variants:
try:
if variant.success_count < 0 or variant.failure_count < 0:
capture_exception(
Exception("Invalid variant success/failure count"),
{
"variant": variant.key,
"success_count": variant.success_count,
"failure_count": variant.failure_count,
},
)
return {}

# Calculate the credible interval
# Laplace smoothing: we add 1 to alpha and beta to avoid division errors if either is zero
alpha = variant.success_count + 1
beta = variant.failure_count + 1
credible_interval = stats.beta.ppf([lower_bound, upper_bound], alpha, beta)

intervals[variant.key] = (credible_interval[0], credible_interval[1])
except Exception as e:
capture_exception(
Exception(f"Error calculating credible interval for variant {variant.key}"),
{"error": str(e)},
)
return {}

return intervals


def validate_event_variants(funnel_results, variants):
errors = {
ExperimentNoResultsErrorKeys.NO_EVENTS: True,
Expand Down
Loading

0 comments on commit aadb03a

Please sign in to comment.