diff --git a/bin/mprocs.yaml b/bin/mprocs.yaml index cb3761250d035..f57efc845d834 100644 --- a/bin/mprocs.yaml +++ b/bin/mprocs.yaml @@ -19,8 +19,8 @@ procs: shell: 'bin/check_kafka_clickhouse_up && bin/check_temporal_up && python manage.py start_temporal_worker' docker-compose: - shell: 'docker compose -f docker-compose.dev.yml up' - stop: - send-keys: [''] + # docker-compose makes sure the stack is up, and then follows its logs - but doesn't tear down on exit for speed + shell: 'docker compose -f docker-compose.dev.yml up -d && docker compose -f docker-compose.dev.yml logs --tail=0 -f' mouse_scroll_speed: 1 +scrollback: 10000 diff --git a/bin/start b/bin/start index ceaddede2140f..7ec881e57b81c 100755 --- a/bin/start +++ b/bin/start @@ -9,4 +9,14 @@ export HOG_HOOK_URL=${HOG_HOOK_URL:-http://localhost:3300/hoghook} [ ! -f ./share/GeoLite2-City.mmdb ] && ( curl -L "https://mmdbcdn.posthog.net/" --http1.1 | brotli --decompress --output=./share/GeoLite2-City.mmdb ) +if ! command -v mprocs &> /dev/null; then + if command -v brew &> /dev/null; then + echo "🔁 Installing mprocs via Homebrew..." + brew install mprocs + else + echo "👉 To run bin/start, install mprocs: https://github.com/pvolok/mprocs#installation" + exit 1 + fi +fi + exec mprocs --config bin/mprocs.yaml diff --git a/cypress/e2e/experiments.cy.ts b/cypress/e2e/experiments.cy.ts index 34898f3146536..e0e8339920bee 100644 --- a/cypress/e2e/experiments.cy.ts +++ b/cypress/e2e/experiments.cy.ts @@ -1,5 +1,3 @@ -import { setupFeatureFlags } from '../support/decide' - describe('Experiments', () => { let randomNum let experimentName @@ -47,10 +45,6 @@ describe('Experiments', () => { }) const createExperimentInNewUi = (): void => { - setupFeatureFlags({ - 'new-experiments-ui': true, - }) - cy.visit('/experiments') // Name, flag key, description diff --git a/cypress/e2e/insights-reload-query.ts b/cypress/e2e/insights-reload-query.ts index 2f944f8993da7..55685ae309fb8 100644 --- a/cypress/e2e/insights-reload-query.ts +++ b/cypress/e2e/insights-reload-query.ts @@ -1,5 +1,3 @@ -import JSONCrush from 'jsoncrush' - describe('ReloadInsight component', () => { beforeEach(() => { // Clear local storage before each test to ensure a clean state @@ -21,8 +19,7 @@ describe('ReloadInsight component', () => { const draftQuery = window.localStorage.getItem(`draft-query-${currentTeamId}`) expect(draftQuery).to.not.be.null - const draftQueryObjUncrushed = JSONCrush.uncrush(draftQuery) - const draftQueryObj = JSON.parse(draftQueryObjUncrushed) + const draftQueryObj = JSON.parse(draftQuery) expect(draftQueryObj).to.have.property('query') diff --git a/ee/hogai/eval/conftest.py b/ee/hogai/eval/conftest.py index d0bc75348eeac..1a88ebffa2e33 100644 --- a/ee/hogai/eval/conftest.py +++ b/ee/hogai/eval/conftest.py @@ -1,28 +1,107 @@ +import functools +from collections.abc import Generator +from pathlib import Path + import pytest +from django.conf import settings +from django.test import override_settings +from langchain_core.runnables import RunnableConfig + +from ee.models import Conversation +from posthog.demo.matrix.manager import MatrixManager +from posthog.models import Organization, Project, Team, User +from posthog.tasks.demo_create_data import HedgeboxMatrix +from posthog.test.base import BaseTest + + +# Flaky is a handy tool, but it always runs setup fixtures for retries. +# This decorator will just retry without re-running setup. +def retry_test_only(max_retries=3): + def decorator(func): + @functools.wraps(func) + def wrapper(*args, **kwargs): + last_error: Exception | None = None + for attempt in range(max_retries): + try: + return func(*args, **kwargs) + except Exception as e: + last_error = e + print(f"\nRetrying test (attempt {attempt + 1}/{max_retries})...") # noqa + if last_error: + raise last_error + + return wrapper + + return decorator + + +# Apply decorators to all tests in the package. +def pytest_collection_modifyitems(items): + current_dir = Path(__file__).parent + for item in items: + if Path(item.fspath).is_relative_to(current_dir): + item.add_marker( + pytest.mark.skipif(not settings.IN_EVAL_TESTING, reason="Only runs for the assistant evaluation") + ) + # Apply our custom retry decorator to the test function + item.obj = retry_test_only(max_retries=3)(item.obj) + + +@pytest.fixture(scope="package") +def team(django_db_blocker) -> Generator[Team, None, None]: + with django_db_blocker.unblock(): + organization = Organization.objects.create(name=BaseTest.CONFIG_ORGANIZATION_NAME) + project = Project.objects.create(id=Team.objects.increment_id_sequence(), organization=organization) + team = Team.objects.create( + id=project.id, + project=project, + organization=organization, + test_account_filters=[ + { + "key": "email", + "value": "@posthog.com", + "operator": "not_icontains", + "type": "person", + } + ], + has_completed_onboarding_for={"product_analytics": True}, + ) + yield team + organization.delete() -from posthog.test.base import run_clickhouse_statement_in_parallel +@pytest.fixture(scope="package") +def user(team, django_db_blocker) -> Generator[User, None, None]: + with django_db_blocker.unblock(): + user = User.objects.create_and_join(team.organization, "eval@posthog.com", "password1234") + yield user + user.delete() -@pytest.fixture(scope="module", autouse=True) -def setup_kafka_tables(django_db_setup): - from posthog.clickhouse.client import sync_execute - from posthog.clickhouse.schema import ( - CREATE_KAFKA_TABLE_QUERIES, - build_query, - ) - from posthog.settings import CLICKHOUSE_CLUSTER, CLICKHOUSE_DATABASE - kafka_queries = list(map(build_query, CREATE_KAFKA_TABLE_QUERIES)) - run_clickhouse_statement_in_parallel(kafka_queries) +@pytest.mark.django_db(transaction=True) +@pytest.fixture +def runnable_config(team, user) -> Generator[RunnableConfig, None, None]: + conversation = Conversation.objects.create(team=team, user=user) + yield { + "configurable": { + "thread_id": conversation.id, + } + } + conversation.delete() - yield - kafka_tables = sync_execute( - f""" - SELECT name - FROM system.tables - WHERE database = '{CLICKHOUSE_DATABASE}' AND name LIKE 'kafka_%' - """, - ) - kafka_truncate_queries = [f"DROP TABLE {table[0]} ON CLUSTER '{CLICKHOUSE_CLUSTER}'" for table in kafka_tables] - run_clickhouse_statement_in_parallel(kafka_truncate_queries) +@pytest.fixture(scope="package", autouse=True) +def setup_test_data(django_db_setup, team, user, django_db_blocker): + with django_db_blocker.unblock(): + matrix = HedgeboxMatrix( + seed="b1ef3c66-5f43-488a-98be-6b46d92fbcef", # this seed generates all events + days_past=120, + days_future=30, + n_clusters=500, + group_type_index_offset=0, + ) + matrix_manager = MatrixManager(matrix, print_steps=True) + with override_settings(TEST=False): + # Simulation saving should occur in non-test mode, so that Kafka isn't mocked. Normally in tests we don't + # want to ingest via Kafka, but simulation saving is specifically designed to use that route for speed + matrix_manager.run_on_team(team, user) diff --git a/ee/hogai/eval/tests/test_eval_funnel_generator.py b/ee/hogai/eval/tests/test_eval_funnel_generator.py index 4d7876ca6f73c..5f0f29243296a 100644 --- a/ee/hogai/eval/tests/test_eval_funnel_generator.py +++ b/ee/hogai/eval/tests/test_eval_funnel_generator.py @@ -1,40 +1,46 @@ +from collections.abc import Callable from typing import cast +import pytest from langgraph.graph.state import CompiledStateGraph from ee.hogai.assistant import AssistantGraph -from ee.hogai.eval.utils import EvalBaseTest from ee.hogai.utils.types import AssistantNodeName, AssistantState from posthog.schema import AssistantFunnelsQuery, HumanMessage, VisualizationMessage -class TestEvalFunnelGenerator(EvalBaseTest): - def _call_node(self, query: str, plan: str) -> AssistantFunnelsQuery: - graph: CompiledStateGraph = ( - AssistantGraph(self.team) - .add_edge(AssistantNodeName.START, AssistantNodeName.FUNNEL_GENERATOR) - .add_funnel_generator(AssistantNodeName.END) - .compile() - ) +@pytest.fixture +def call_node(team, runnable_config) -> Callable[[str, str], AssistantFunnelsQuery]: + graph: CompiledStateGraph = ( + AssistantGraph(team) + .add_edge(AssistantNodeName.START, AssistantNodeName.FUNNEL_GENERATOR) + .add_funnel_generator(AssistantNodeName.END) + .compile() + ) + + def callable(query: str, plan: str) -> AssistantFunnelsQuery: state = graph.invoke( AssistantState(messages=[HumanMessage(content=query)], plan=plan), - self._get_config(), + runnable_config, ) return cast(VisualizationMessage, AssistantState.model_validate(state).messages[-1]).answer - def test_node_replaces_equals_with_contains(self): - query = "what is the conversion rate from a page view to sign up for users with name John?" - plan = """Sequence: - 1. $pageview - - property filter 1 - - person - - name - - equals - - John - 2. signed_up - """ - actual_output = self._call_node(query, plan).model_dump_json(exclude_none=True) - assert "exact" not in actual_output - assert "icontains" in actual_output - assert "John" not in actual_output - assert "john" in actual_output + return callable + + +def test_node_replaces_equals_with_contains(call_node): + query = "what is the conversion rate from a page view to sign up for users with name John?" + plan = """Sequence: + 1. $pageview + - property filter 1 + - person + - name + - equals + - John + 2. signed_up + """ + actual_output = call_node(query, plan).model_dump_json(exclude_none=True) + assert "exact" not in actual_output + assert "icontains" in actual_output + assert "John" not in actual_output + assert "john" in actual_output diff --git a/ee/hogai/eval/tests/test_eval_funnel_planner.py b/ee/hogai/eval/tests/test_eval_funnel_planner.py index 9adbd75e77c6c..c8bc25bc0b5dc 100644 --- a/ee/hogai/eval/tests/test_eval_funnel_planner.py +++ b/ee/hogai/eval/tests/test_eval_funnel_planner.py @@ -1,208 +1,224 @@ +from collections.abc import Callable + +import pytest from deepeval import assert_test from deepeval.metrics import GEval from deepeval.test_case import LLMTestCase, LLMTestCaseParams +from langchain_core.runnables.config import RunnableConfig from langgraph.graph.state import CompiledStateGraph from ee.hogai.assistant import AssistantGraph -from ee.hogai.eval.utils import EvalBaseTest from ee.hogai.utils.types import AssistantNodeName, AssistantState from posthog.schema import HumanMessage -class TestEvalFunnelPlanner(EvalBaseTest): - def _get_plan_correctness_metric(self): - return GEval( - name="Funnel Plan Correctness", - criteria="You will be given expected and actual generated plans to provide a taxonomy to answer a user's question with a funnel insight. Compare the plans to determine whether the taxonomy of the actual plan matches the expected plan. Do not apply general knowledge about funnel insights.", - evaluation_steps=[ - "A plan must define at least two series in the sequence, but it is not required to define any filters, exclusion steps, or a breakdown.", - "Compare events, properties, math types, and property values of 'expected output' and 'actual output'. Do not penalize if the actual output does not include a timeframe.", - "Check if the combination of events, properties, and property values in 'actual output' can answer the user's question according to the 'expected output'.", - # The criteria for aggregations must be more specific because there isn't a way to bypass them. - "Check if the math types in 'actual output' match those in 'expected output.' If the aggregation type is specified by a property, user, or group in 'expected output', the same property, user, or group must be used in 'actual output'.", - "If 'expected output' contains exclusion steps, check if 'actual output' contains those, and heavily penalize if the exclusion steps are not present or different.", - "If 'expected output' contains a breakdown, check if 'actual output' contains a similar breakdown, and heavily penalize if the breakdown is not present or different. Plans may only have one breakdown.", - # We don't want to see in the output unnecessary property filters. The assistant tries to use them all the time. - "Heavily penalize if the 'actual output' contains any excessive output not present in the 'expected output'. For example, the `is set` operator in filters should not be used unless the user explicitly asks for it.", - ], - evaluation_params=[ - LLMTestCaseParams.INPUT, - LLMTestCaseParams.EXPECTED_OUTPUT, - LLMTestCaseParams.ACTUAL_OUTPUT, - ], - threshold=0.7, - ) +@pytest.fixture(scope="module") +def metric(): + return GEval( + name="Funnel Plan Correctness", + criteria="You will be given expected and actual generated plans to provide a taxonomy to answer a user's question with a funnel insight. Compare the plans to determine whether the taxonomy of the actual plan matches the expected plan. Do not apply general knowledge about funnel insights.", + evaluation_steps=[ + "A plan must define at least two series in the sequence, but it is not required to define any filters, exclusion steps, or a breakdown.", + "Compare events, properties, math types, and property values of 'expected output' and 'actual output'. Do not penalize if the actual output does not include a timeframe.", + "Check if the combination of events, properties, and property values in 'actual output' can answer the user's question according to the 'expected output'.", + # The criteria for aggregations must be more specific because there isn't a way to bypass them. + "Check if the math types in 'actual output' match those in 'expected output.' If the aggregation type is specified by a property, user, or group in 'expected output', the same property, user, or group must be used in 'actual output'.", + "If 'expected output' contains exclusion steps, check if 'actual output' contains those, and heavily penalize if the exclusion steps are not present or different.", + "If 'expected output' contains a breakdown, check if 'actual output' contains a similar breakdown, and heavily penalize if the breakdown is not present or different. Plans may only have one breakdown.", + # We don't want to see in the output unnecessary property filters. The assistant tries to use them all the time. + "Heavily penalize if the 'actual output' contains any excessive output not present in the 'expected output'. For example, the `is set` operator in filters should not be used unless the user explicitly asks for it.", + ], + evaluation_params=[ + LLMTestCaseParams.INPUT, + LLMTestCaseParams.EXPECTED_OUTPUT, + LLMTestCaseParams.ACTUAL_OUTPUT, + ], + threshold=0.7, + ) - def _call_node(self, query): - graph: CompiledStateGraph = ( - AssistantGraph(self.team) - .add_edge(AssistantNodeName.START, AssistantNodeName.FUNNEL_PLANNER) - .add_funnel_planner(AssistantNodeName.END) - .compile() - ) + +@pytest.fixture +def call_node(team, runnable_config: RunnableConfig) -> Callable[[str], str]: + graph: CompiledStateGraph = ( + AssistantGraph(team) + .add_edge(AssistantNodeName.START, AssistantNodeName.FUNNEL_PLANNER) + .add_funnel_planner(AssistantNodeName.END) + .compile() + ) + + def callable(query: str) -> str: state = graph.invoke( AssistantState(messages=[HumanMessage(content=query)]), - self._get_config(), + runnable_config, ) return AssistantState.model_validate(state).plan or "" - def test_basic_funnel(self): - query = "what was the conversion from a page view to sign up?" - test_case = LLMTestCase( - input=query, - expected_output=""" - Sequence: - 1. $pageview - 2. signed_up - """, - actual_output=self._call_node(query), - ) - assert_test(test_case, [self._get_plan_correctness_metric()]) - - def test_outputs_at_least_two_events(self): - """ - Ambigious query. The funnel must return at least two events. - """ - query = "how many users paid a bill?" - test_case = LLMTestCase( - input=query, - expected_output=""" - Sequence: - 1. any event - 2. upgrade_plan - """, - actual_output=self._call_node(query), - ) - assert_test(test_case, [self._get_plan_correctness_metric()]) - - def test_no_excessive_property_filters(self): - query = "Show the user conversion from a sign up to a file download" - test_case = LLMTestCase( - input=query, - expected_output=""" - Sequence: - 1. signed_up - 2. downloaded_file - """, - actual_output=self._call_node(query), - ) - assert_test(test_case, [self._get_plan_correctness_metric()]) + return callable - def test_basic_filtering(self): - query = ( - "What was the conversion from uploading a file to downloading it from Chrome and Safari in the last 30d?" - ) - test_case = LLMTestCase( - input=query, - expected_output=""" - Sequence: - 1. uploaded_file - - property filter 1: - - entity: event - - property name: $browser - - property type: String - - operator: equals - - property value: Chrome - - property filter 2: - - entity: event - - property name: $browser - - property type: String - - operator: equals - - property value: Safari - 2. downloaded_file - - property filter 1: - - entity: event - - property name: $browser - - property type: String - - operator: equals - - property value: Chrome - - property filter 2: - - entity: event - - property name: $browser - - property type: String - - operator: equals - - property value: Safari - """, - actual_output=self._call_node(query), - ) - assert_test(test_case, [self._get_plan_correctness_metric()]) - - def test_exclusion_steps(self): - query = "What was the conversion from uploading a file to downloading it in the last 30d excluding users that deleted a file?" - test_case = LLMTestCase( - input=query, - expected_output=""" - Sequence: - 1. uploaded_file - 2. downloaded_file - - Exclusions: - - deleted_file - - start index: 0 - - end index: 1 - """, - actual_output=self._call_node(query), - ) - assert_test(test_case, [self._get_plan_correctness_metric()]) - - def test_breakdown(self): - query = "Show a conversion from uploading a file to downloading it segmented by a browser" - test_case = LLMTestCase( - input=query, - expected_output=""" - Sequence: - 1. uploaded_file - 2. downloaded_file - - Breakdown by: - - entity: event - - property name: $browser - """, - actual_output=self._call_node(query), - ) - assert_test(test_case, [self._get_plan_correctness_metric()]) - - def test_needle_in_a_haystack(self): - query = "What was the conversion from a sign up to a paying customer on the personal-pro plan?" - test_case = LLMTestCase( - input=query, - expected_output=""" - Sequence: - 1. signed_up - 2. paid_bill - - property filter 1: - - entity: event - - property name: plan - - property type: String - - operator: equals - - property value: personal/pro - """, - actual_output=self._call_node(query), - ) - assert_test(test_case, [self._get_plan_correctness_metric()]) - - def test_planner_outputs_multiple_series_from_a_single_series_question(self): - query = "What's our sign-up funnel?" - test_case = LLMTestCase( - input=query, - expected_output=""" - Sequence: - 1. $pageview - 2. signed_up - """, - actual_output=self._call_node(query), - ) - assert_test(test_case, [self._get_plan_correctness_metric()]) - - def test_funnel_does_not_include_timeframe(self): - query = "what was the conversion from a page view to sign up for event time before 2024-01-01?" - test_case = LLMTestCase( - input=query, - expected_output=""" - Sequence: - 1. $pageview - 2. signed_up - """, - actual_output=self._call_node(query), - ) - assert_test(test_case, [self._get_plan_correctness_metric()]) + +def test_basic_funnel(metric, call_node): + query = "what was the conversion from a page view to sign up?" + test_case = LLMTestCase( + input=query, + expected_output=""" + Sequence: + 1. $pageview + 2. signed_up + """, + actual_output=call_node(query), + ) + assert_test(test_case, [metric]) + + +def test_outputs_at_least_two_events(metric, call_node): + """ + Ambigious query. The funnel must return at least two events. + """ + query = "how many users paid a bill?" + test_case = LLMTestCase( + input=query, + expected_output=""" + Sequence: + 1. any event + 2. upgrade_plan + """, + actual_output=call_node(query), + ) + assert_test(test_case, [metric]) + + +def test_no_excessive_property_filters(metric, call_node): + query = "Show the user conversion from a sign up to a file download" + test_case = LLMTestCase( + input=query, + expected_output=""" + Sequence: + 1. signed_up + 2. downloaded_file + """, + actual_output=call_node(query), + ) + assert_test(test_case, [metric]) + + +def test_basic_filtering(metric, call_node): + query = "What was the conversion from uploading a file to downloading it from Chrome and Safari in the last 30d?" + test_case = LLMTestCase( + input=query, + expected_output=""" + Sequence: + 1. uploaded_file + - property filter 1: + - entity: event + - property name: $browser + - property type: String + - operator: equals + - property value: Chrome + - property filter 2: + - entity: event + - property name: $browser + - property type: String + - operator: equals + - property value: Safari + 2. downloaded_file + - property filter 1: + - entity: event + - property name: $browser + - property type: String + - operator: equals + - property value: Chrome + - property filter 2: + - entity: event + - property name: $browser + - property type: String + - operator: equals + - property value: Safari + """, + actual_output=call_node(query), + ) + assert_test(test_case, [metric]) + + +def test_exclusion_steps(metric, call_node): + query = "What was the conversion from uploading a file to downloading it in the last 30d excluding users that deleted a file?" + test_case = LLMTestCase( + input=query, + expected_output=""" + Sequence: + 1. uploaded_file + 2. downloaded_file + + Exclusions: + - deleted_file + - start index: 0 + - end index: 1 + """, + actual_output=call_node(query), + ) + assert_test(test_case, [metric]) + + +def test_breakdown(metric, call_node): + query = "Show a conversion from uploading a file to downloading it segmented by a browser" + test_case = LLMTestCase( + input=query, + expected_output=""" + Sequence: + 1. uploaded_file + 2. downloaded_file + + Breakdown by: + - entity: event + - property name: $browser + """, + actual_output=call_node(query), + ) + assert_test(test_case, [metric]) + + +def test_needle_in_a_haystack(metric, call_node): + query = "What was the conversion from a sign up to a paying customer on the personal-pro plan?" + test_case = LLMTestCase( + input=query, + expected_output=""" + Sequence: + 1. signed_up + 2. paid_bill + - property filter 1: + - entity: event + - property name: plan + - property type: String + - operator: equals + - property value: personal/pro + """, + actual_output=call_node(query), + ) + assert_test(test_case, [metric]) + + +def test_planner_outputs_multiple_series_from_a_single_series_question(metric, call_node): + query = "What's our sign-up funnel?" + test_case = LLMTestCase( + input=query, + expected_output=""" + Sequence: + 1. $pageview + 2. signed_up + """, + actual_output=call_node(query), + ) + assert_test(test_case, [metric]) + + +def test_funnel_does_not_include_timeframe(metric, call_node): + query = "what was the conversion from a page view to sign up for event time before 2024-01-01?" + test_case = LLMTestCase( + input=query, + expected_output=""" + Sequence: + 1. $pageview + 2. signed_up + """, + actual_output=call_node(query), + ) + assert_test(test_case, [metric]) diff --git a/ee/hogai/eval/tests/test_eval_router.py b/ee/hogai/eval/tests/test_eval_router.py index c1307e9d40f00..84e5c4c809972 100644 --- a/ee/hogai/eval/tests/test_eval_router.py +++ b/ee/hogai/eval/tests/test_eval_router.py @@ -1,69 +1,80 @@ +from collections.abc import Callable from typing import cast +import pytest from langgraph.graph.state import CompiledStateGraph from ee.hogai.assistant import AssistantGraph -from ee.hogai.eval.utils import EvalBaseTest from ee.hogai.utils.types import AssistantNodeName, AssistantState from posthog.schema import HumanMessage, RouterMessage -class TestEvalRouter(EvalBaseTest): - def _call_node(self, query: str | list): - graph: CompiledStateGraph = ( - AssistantGraph(self.team) - .add_start() - .add_router(path_map={"trends": AssistantNodeName.END, "funnel": AssistantNodeName.END}) - .compile() - ) +@pytest.fixture +def call_node(team, runnable_config) -> Callable[[str | list], str]: + graph: CompiledStateGraph = ( + AssistantGraph(team) + .add_start() + .add_router(path_map={"trends": AssistantNodeName.END, "funnel": AssistantNodeName.END}) + .compile() + ) + + def callable(query: str | list) -> str: messages = [HumanMessage(content=query)] if isinstance(query, str) else query state = graph.invoke( AssistantState(messages=messages), - self._get_config(), + runnable_config, ) return cast(RouterMessage, AssistantState.model_validate(state).messages[-1]).content - def test_outputs_basic_trends_insight(self): - query = "Show the $pageview trend" - res = self._call_node(query) - self.assertEqual(res, "trends") - - def test_outputs_basic_funnel_insight(self): - query = "What is the conversion rate of users who uploaded a file to users who paid for a plan?" - res = self._call_node(query) - self.assertEqual(res, "funnel") - - def test_converts_trends_to_funnel(self): - conversation = [ - HumanMessage(content="Show trends of $pageview and $identify"), - RouterMessage(content="trends"), - HumanMessage(content="Convert this insight to a funnel"), - ] - res = self._call_node(conversation[:1]) - self.assertEqual(res, "trends") - res = self._call_node(conversation) - self.assertEqual(res, "funnel") - - def test_converts_funnel_to_trends(self): - conversation = [ - HumanMessage(content="What is the conversion from a page view to a sign up?"), - RouterMessage(content="funnel"), - HumanMessage(content="Convert this insight to a trends"), - ] - res = self._call_node(conversation[:1]) - self.assertEqual(res, "funnel") - res = self._call_node(conversation) - self.assertEqual(res, "trends") - - def test_outputs_single_trends_insight(self): - """ - Must display a trends insight because it's not possible to build a funnel with a single series. - """ - query = "how many users upgraded their plan to personal pro?" - res = self._call_node(query) - self.assertEqual(res, "trends") - - def test_classifies_funnel_with_single_series(self): - query = "What's our sign-up funnel?" - res = self._call_node(query) - self.assertEqual(res, "funnel") + return callable + + +def test_outputs_basic_trends_insight(call_node): + query = "Show the $pageview trend" + res = call_node(query) + assert res == "trends" + + +def test_outputs_basic_funnel_insight(call_node): + query = "What is the conversion rate of users who uploaded a file to users who paid for a plan?" + res = call_node(query) + assert res == "funnel" + + +def test_converts_trends_to_funnel(call_node): + conversation = [ + HumanMessage(content="Show trends of $pageview and $identify"), + RouterMessage(content="trends"), + HumanMessage(content="Convert this insight to a funnel"), + ] + res = call_node(conversation[:1]) + assert res == "trends" + res = call_node(conversation) + assert res == "funnel" + + +def test_converts_funnel_to_trends(call_node): + conversation = [ + HumanMessage(content="What is the conversion from a page view to a sign up?"), + RouterMessage(content="funnel"), + HumanMessage(content="Convert this insight to a trends"), + ] + res = call_node(conversation[:1]) + assert res == "funnel" + res = call_node(conversation) + assert res == "trends" + + +def test_outputs_single_trends_insight(call_node): + """ + Must display a trends insight because it's not possible to build a funnel with a single series. + """ + query = "how many users upgraded their plan to personal pro?" + res = call_node(query) + assert res == "trends" + + +def test_classifies_funnel_with_single_series(call_node): + query = "What's our sign-up funnel?" + res = call_node(query) + assert res == "funnel" diff --git a/ee/hogai/eval/tests/test_eval_trends_generator.py b/ee/hogai/eval/tests/test_eval_trends_generator.py index 496bbf0100b51..c8491957c868f 100644 --- a/ee/hogai/eval/tests/test_eval_trends_generator.py +++ b/ee/hogai/eval/tests/test_eval_trends_generator.py @@ -1,58 +1,65 @@ +from collections.abc import Callable from typing import cast +import pytest from langgraph.graph.state import CompiledStateGraph from ee.hogai.assistant import AssistantGraph -from ee.hogai.eval.utils import EvalBaseTest from ee.hogai.utils.types import AssistantNodeName, AssistantState from posthog.schema import AssistantTrendsQuery, HumanMessage, VisualizationMessage -class TestEvalTrendsGenerator(EvalBaseTest): - def _call_node(self, query: str, plan: str) -> AssistantTrendsQuery: - graph: CompiledStateGraph = ( - AssistantGraph(self.team) - .add_edge(AssistantNodeName.START, AssistantNodeName.TRENDS_GENERATOR) - .add_trends_generator(AssistantNodeName.END) - .compile() - ) +@pytest.fixture +def call_node(team, runnable_config) -> Callable[[str, str], AssistantTrendsQuery]: + graph: CompiledStateGraph = ( + AssistantGraph(team) + .add_edge(AssistantNodeName.START, AssistantNodeName.TRENDS_GENERATOR) + .add_trends_generator(AssistantNodeName.END) + .compile() + ) + + def callable(query: str, plan: str) -> AssistantTrendsQuery: state = graph.invoke( AssistantState(messages=[HumanMessage(content=query)], plan=plan), - self._get_config(), + runnable_config, ) return cast(VisualizationMessage, AssistantState.model_validate(state).messages[-1]).answer - def test_node_replaces_equals_with_contains(self): - query = "what is pageview trend for users with name John?" - plan = """Events: - - $pageview - - math operation: total count - - property filter 1 - - person - - name - - equals - - John - """ - actual_output = self._call_node(query, plan).model_dump_json(exclude_none=True) - assert "exact" not in actual_output - assert "icontains" in actual_output - assert "John" not in actual_output - assert "john" in actual_output - - def test_node_leans_towards_line_graph(self): - query = "How often do users download files?" - # We ideally want to consider both total count of downloads per period, as well as how often a median user downloads - plan = """Events: - - downloaded_file - - math operation: total count - - downloaded_file - - math operation: median count per user - """ - actual_output = self._call_node(query, plan) - assert actual_output.trendsFilter.display == "ActionsLineGraph" - assert actual_output.series[0].kind == "EventsNode" - assert actual_output.series[0].event == "downloaded_file" - assert actual_output.series[0].math == "total" - assert actual_output.series[1].kind == "EventsNode" - assert actual_output.series[1].event == "downloaded_file" - assert actual_output.series[1].math == "median_count_per_actor" + return callable + + +def test_node_replaces_equals_with_contains(call_node): + query = "what is pageview trend for users with name John?" + plan = """Events: + - $pageview + - math operation: total count + - property filter 1 + - person + - name + - equals + - John + """ + actual_output = call_node(query, plan).model_dump_json(exclude_none=True) + assert "exact" not in actual_output + assert "icontains" in actual_output + assert "John" not in actual_output + assert "john" in actual_output + + +def test_node_leans_towards_line_graph(call_node): + query = "How often do users download files?" + # We ideally want to consider both total count of downloads per period, as well as how often a median user downloads + plan = """Events: + - downloaded_file + - math operation: total count + - downloaded_file + - math operation: median count per user + """ + actual_output = call_node(query, plan) + assert actual_output.trendsFilter.display == "ActionsLineGraph" + assert actual_output.series[0].kind == "EventsNode" + assert actual_output.series[0].event == "downloaded_file" + assert actual_output.series[0].math == "total" + assert actual_output.series[1].kind == "EventsNode" + assert actual_output.series[1].event == "downloaded_file" + assert actual_output.series[1].math == "median_count_per_actor" diff --git a/ee/hogai/eval/tests/test_eval_trends_planner.py b/ee/hogai/eval/tests/test_eval_trends_planner.py index d4fbff456a91c..4d4ea4c41dfbf 100644 --- a/ee/hogai/eval/tests/test_eval_trends_planner.py +++ b/ee/hogai/eval/tests/test_eval_trends_planner.py @@ -1,179 +1,196 @@ +from collections.abc import Callable + +import pytest from deepeval import assert_test from deepeval.metrics import GEval from deepeval.test_case import LLMTestCase, LLMTestCaseParams +from langchain_core.runnables.config import RunnableConfig from langgraph.graph.state import CompiledStateGraph from ee.hogai.assistant import AssistantGraph -from ee.hogai.eval.utils import EvalBaseTest from ee.hogai.utils.types import AssistantNodeName, AssistantState from posthog.schema import HumanMessage -class TestEvalTrendsPlanner(EvalBaseTest): - def _get_plan_correctness_metric(self): - return GEval( - name="Trends Plan Correctness", - criteria="You will be given expected and actual generated plans to provide a taxonomy to answer a user's question with a trends insight. Compare the plans to determine whether the taxonomy of the actual plan matches the expected plan. Do not apply general knowledge about trends insights.", - evaluation_steps=[ - "A plan must define at least one event and a math type, but it is not required to define any filters, breakdowns, or formulas.", - "Compare events, properties, math types, and property values of 'expected output' and 'actual output'. Do not penalize if the actual output does not include a timeframe.", - "Check if the combination of events, properties, and property values in 'actual output' can answer the user's question according to the 'expected output'.", - # The criteria for aggregations must be more specific because there isn't a way to bypass them. - "Check if the math types in 'actual output' match those in 'expected output'. Math types sometimes are interchangeable, so use your judgement. If the aggregation type is specified by a property, user, or group in 'expected output', the same property, user, or group must be used in 'actual output'.", - "If 'expected output' contains a breakdown, check if 'actual output' contains a similar breakdown, and heavily penalize if the breakdown is not present or different.", - "If 'expected output' contains a formula, check if 'actual output' contains a similar formula, and heavily penalize if the formula is not present or different.", - # We don't want to see in the output unnecessary property filters. The assistant tries to use them all the time. - "Heavily penalize if the 'actual output' contains any excessive output not present in the 'expected output'. For example, the `is set` operator in filters should not be used unless the user explicitly asks for it.", - ], - evaluation_params=[ - LLMTestCaseParams.INPUT, - LLMTestCaseParams.EXPECTED_OUTPUT, - LLMTestCaseParams.ACTUAL_OUTPUT, - ], - threshold=0.7, - ) +@pytest.fixture(scope="module") +def metric(): + return GEval( + name="Trends Plan Correctness", + criteria="You will be given expected and actual generated plans to provide a taxonomy to answer a user's question with a trends insight. Compare the plans to determine whether the taxonomy of the actual plan matches the expected plan. Do not apply general knowledge about trends insights.", + evaluation_steps=[ + "A plan must define at least one event and a math type, but it is not required to define any filters, breakdowns, or formulas.", + "Compare events, properties, math types, and property values of 'expected output' and 'actual output'. Do not penalize if the actual output does not include a timeframe.", + "Check if the combination of events, properties, and property values in 'actual output' can answer the user's question according to the 'expected output'.", + # The criteria for aggregations must be more specific because there isn't a way to bypass them. + "Check if the math types in 'actual output' match those in 'expected output'. Math types sometimes are interchangeable, so use your judgement. If the aggregation type is specified by a property, user, or group in 'expected output', the same property, user, or group must be used in 'actual output'.", + "If 'expected output' contains a breakdown, check if 'actual output' contains a similar breakdown, and heavily penalize if the breakdown is not present or different.", + "If 'expected output' contains a formula, check if 'actual output' contains a similar formula, and heavily penalize if the formula is not present or different.", + # We don't want to see in the output unnecessary property filters. The assistant tries to use them all the time. + "Heavily penalize if the 'actual output' contains any excessive output not present in the 'expected output'. For example, the `is set` operator in filters should not be used unless the user explicitly asks for it.", + ], + evaluation_params=[ + LLMTestCaseParams.INPUT, + LLMTestCaseParams.EXPECTED_OUTPUT, + LLMTestCaseParams.ACTUAL_OUTPUT, + ], + threshold=0.7, + ) - def _call_node(self, query): - graph: CompiledStateGraph = ( - AssistantGraph(self.team) - .add_edge(AssistantNodeName.START, AssistantNodeName.TRENDS_PLANNER) - .add_trends_planner(AssistantNodeName.END) - .compile() - ) + +@pytest.fixture +def call_node(team, runnable_config: RunnableConfig) -> Callable[[str], str]: + graph: CompiledStateGraph = ( + AssistantGraph(team) + .add_edge(AssistantNodeName.START, AssistantNodeName.TRENDS_PLANNER) + .add_trends_planner(AssistantNodeName.END) + .compile() + ) + + def callable(query: str) -> str: state = graph.invoke( AssistantState(messages=[HumanMessage(content=query)]), - self._get_config(), + runnable_config, ) return AssistantState.model_validate(state).plan or "" - def test_no_excessive_property_filters(self): - query = "Show the $pageview trend" - test_case = LLMTestCase( - input=query, - expected_output=""" - Events: - - $pageview - - math operation: total count - """, - actual_output=self._call_node(query), - ) - assert_test(test_case, [self._get_plan_correctness_metric()]) - - def test_no_excessive_property_filters_for_a_defined_math_type(self): - query = "What is the MAU?" - test_case = LLMTestCase( - input=query, - expected_output=""" - Events: - - $pageview - - math operation: unique users - """, - actual_output=self._call_node(query), - ) - assert_test(test_case, [self._get_plan_correctness_metric()]) - - def test_basic_filtering(self): - query = "can you compare how many Chrome vs Safari users uploaded a file in the last 30d?" - test_case = LLMTestCase( - input=query, - expected_output=""" - Events: - - uploaded_file - - math operation: total count - - property filter 1: - - entity: event - - property name: $browser - - property type: String - - operator: equals - - property value: Chrome - - property filter 2: - - entity: event - - property name: $browser - - property type: String - - operator: equals - - property value: Safari - - Breakdown by: - - breakdown 1: + return callable + + +def test_no_excessive_property_filters(metric, call_node): + query = "Show the $pageview trend" + test_case = LLMTestCase( + input=query, + expected_output=""" + Events: + - $pageview + - math operation: total count + """, + actual_output=call_node(query), + ) + assert_test(test_case, [metric]) + + +def test_no_excessive_property_filters_for_a_defined_math_type(metric, call_node): + query = "What is the MAU?" + test_case = LLMTestCase( + input=query, + expected_output=""" + Events: + - $pageview + - math operation: unique users + """, + actual_output=call_node(query), + ) + assert_test(test_case, [metric]) + + +def test_basic_filtering(metric, call_node): + query = "can you compare how many Chrome vs Safari users uploaded a file in the last 30d?" + test_case = LLMTestCase( + input=query, + expected_output=""" + Events: + - uploaded_file + - math operation: total count + - property filter 1: - entity: event - property name: $browser - """, - actual_output=self._call_node(query), - ) - assert_test(test_case, [self._get_plan_correctness_metric()]) - - def test_formula_mode(self): - query = "i want to see a ratio of identify divided by page views" - test_case = LLMTestCase( - input=query, - expected_output=""" - Events: - - $identify - - math operation: total count - - $pageview - - math operation: total count - - Formula: - `A/B`, where `A` is the total count of `$identify` and `B` is the total count of `$pageview` - """, - actual_output=self._call_node(query), - ) - assert_test(test_case, [self._get_plan_correctness_metric()]) - - def test_math_type_by_a_property(self): - query = "what is the average session duration?" - test_case = LLMTestCase( - input=query, - expected_output=""" - Events: - - All Events - - math operation: average by `$session_duration` - """, - actual_output=self._call_node(query), - ) - assert_test(test_case, [self._get_plan_correctness_metric()]) - - def test_math_type_by_a_user(self): - query = "What is the median page view count for a user?" - test_case = LLMTestCase( - input=query, - expected_output=""" - Events: - - $pageview - - math operation: median by users - """, - actual_output=self._call_node(query), - ) - assert_test(test_case, [self._get_plan_correctness_metric()]) - - def test_needle_in_a_haystack(self): - query = "How frequently do people pay for a personal-pro plan?" - test_case = LLMTestCase( - input=query, - expected_output=""" - Events: - - paid_bill - - math operation: total count - - property filter 1: - - entity: event - - property name: plan - - property type: String - - operator: contains - - property value: personal/pro - """, - actual_output=self._call_node(query), - ) - assert_test(test_case, [self._get_plan_correctness_metric()]) - - def test_funnel_does_not_include_timeframe(self): - query = "what is the pageview trend for event time before 2024-01-01?" - test_case = LLMTestCase( - input=query, - expected_output=""" - Events: - - $pageview - - math operation: total count - """, - actual_output=self._call_node(query), - ) - assert_test(test_case, [self._get_plan_correctness_metric()]) + - property type: String + - operator: equals + - property value: Chrome + - property filter 2: + - entity: event + - property name: $browser + - property type: String + - operator: equals + - property value: Safari + + Breakdown by: + - breakdown 1: + - entity: event + - property name: $browser + """, + actual_output=call_node(query), + ) + assert_test(test_case, [metric]) + + +def test_formula_mode(metric, call_node): + query = "i want to see a ratio of identify divided by page views" + test_case = LLMTestCase( + input=query, + expected_output=""" + Events: + - $identify + - math operation: total count + - $pageview + - math operation: total count + + Formula: + `A/B`, where `A` is the total count of `$identify` and `B` is the total count of `$pageview` + """, + actual_output=call_node(query), + ) + assert_test(test_case, [metric]) + + +def test_math_type_by_a_property(metric, call_node): + query = "what is the average session duration?" + test_case = LLMTestCase( + input=query, + expected_output=""" + Events: + - All Events + - math operation: average by `$session_duration` + """, + actual_output=call_node(query), + ) + assert_test(test_case, [metric]) + + +def test_math_type_by_a_user(metric, call_node): + query = "What is the median page view count for a user?" + test_case = LLMTestCase( + input=query, + expected_output=""" + Events: + - $pageview + - math operation: median by users + """, + actual_output=call_node(query), + ) + assert_test(test_case, [metric]) + + +def test_needle_in_a_haystack(metric, call_node): + query = "How frequently do people pay for a personal-pro plan?" + test_case = LLMTestCase( + input=query, + expected_output=""" + Events: + - paid_bill + - math operation: total count + - property filter 1: + - entity: event + - property name: plan + - property type: String + - operator: contains + - property value: personal/pro + """, + actual_output=call_node(query), + ) + assert_test(test_case, [metric]) + + +def test_trends_does_not_include_timeframe(metric, call_node): + query = "what is the pageview trend for event time before 2024-01-01?" + test_case = LLMTestCase( + input=query, + expected_output=""" + Events: + - $pageview + - math operation: total count + """, + actual_output=call_node(query), + ) + assert_test(test_case, [metric]) diff --git a/ee/hogai/eval/utils.py b/ee/hogai/eval/utils.py deleted file mode 100644 index 6e03c4cfafa9f..0000000000000 --- a/ee/hogai/eval/utils.py +++ /dev/null @@ -1,40 +0,0 @@ -import os - -import pytest -from django.test import override_settings -from flaky import flaky -from langchain_core.runnables import RunnableConfig - -from ee.models.assistant import Conversation -from posthog.demo.matrix.manager import MatrixManager -from posthog.tasks.demo_create_data import HedgeboxMatrix -from posthog.test.base import NonAtomicBaseTest - - -@pytest.mark.skipif(os.environ.get("DEEPEVAL") != "YES", reason="Only runs for the assistant evaluation") -@flaky(max_runs=3, min_passes=1) -class EvalBaseTest(NonAtomicBaseTest): - def _get_config(self) -> RunnableConfig: - conversation = Conversation.objects.create(team=self.team, user=self.user) - return { - "configurable": { - "thread_id": conversation.id, - } - } - - @classmethod - def setUpTestData(cls): - super().setUpTestData() - matrix = HedgeboxMatrix( - seed="b1ef3c66-5f43-488a-98be-6b46d92fbcef", # this seed generates all events - days_past=120, - days_future=30, - n_clusters=500, - group_type_index_offset=0, - ) - matrix_manager = MatrixManager(matrix, print_steps=True) - existing_user = cls.team.organization.members.first() - with override_settings(TEST=False): - # Simulation saving should occur in non-test mode, so that Kafka isn't mocked. Normally in tests we don't - # want to ingest via Kafka, but simulation saving is specifically designed to use that route for speed - matrix_manager.run_on_team(cls.team, existing_user) diff --git a/frontend/__snapshots__/scenes-app-insights--funnel-top-to-bottom-breakdown-edit--dark.png b/frontend/__snapshots__/scenes-app-insights--funnel-top-to-bottom-breakdown-edit--dark.png index c97834c6d7fb1..4b405550412a0 100644 Binary files a/frontend/__snapshots__/scenes-app-insights--funnel-top-to-bottom-breakdown-edit--dark.png and b/frontend/__snapshots__/scenes-app-insights--funnel-top-to-bottom-breakdown-edit--dark.png differ diff --git a/frontend/__snapshots__/scenes-app-insights--funnel-top-to-bottom-edit--dark.png b/frontend/__snapshots__/scenes-app-insights--funnel-top-to-bottom-edit--dark.png index 9f3435ec919df..248d1f34b318a 100644 Binary files a/frontend/__snapshots__/scenes-app-insights--funnel-top-to-bottom-edit--dark.png and b/frontend/__snapshots__/scenes-app-insights--funnel-top-to-bottom-edit--dark.png differ diff --git a/frontend/src/lib/components/CommandBar/SearchResultPreview.tsx b/frontend/src/lib/components/CommandBar/SearchResultPreview.tsx index af4e70df0a08a..7fe2a6313bfdd 100644 --- a/frontend/src/lib/components/CommandBar/SearchResultPreview.tsx +++ b/frontend/src/lib/components/CommandBar/SearchResultPreview.tsx @@ -40,18 +40,10 @@ export const SearchResultPreview = (): JSX.Element | null => { onClick={() => { openResult(activeResultIndex) }} - tooltip={ - <> - Open - - } aria-label="Open search result" > - Open + Open -
- Open -
diff --git a/frontend/src/lib/constants.tsx b/frontend/src/lib/constants.tsx index ae93860525488..4ba7745915b41 100644 --- a/frontend/src/lib/constants.tsx +++ b/frontend/src/lib/constants.tsx @@ -181,7 +181,6 @@ export const FEATURE_FLAGS = { SQL_EDITOR: 'sql-editor', // owner: @EDsCODE #team-data-warehouse SESSION_REPLAY_DOCTOR: 'session-replay-doctor', // owner: #team-replay SAVED_NOT_PINNED: 'saved-not-pinned', // owner: #team-replay - NEW_EXPERIMENTS_UI: 'new-experiments-ui', // owner: @jurajmajerik #team-feature-success AUDIT_LOGS_ACCESS: 'audit-logs-access', // owner: #team-growth SUBSCRIBE_FROM_PAYGATE: 'subscribe-from-paygate', // owner: #team-growth HEATMAPS_UI: 'heatmaps-ui', // owner: @benjackwhite diff --git a/frontend/src/lib/lemon-ui/LemonButton/More.tsx b/frontend/src/lib/lemon-ui/LemonButton/More.tsx index 6eff15a377572..d4e61ade7cbf9 100644 --- a/frontend/src/lib/lemon-ui/LemonButton/More.tsx +++ b/frontend/src/lib/lemon-ui/LemonButton/More.tsx @@ -2,12 +2,14 @@ import { IconEllipsis } from '@posthog/icons' import { PopoverProps } from '../Popover/Popover' import { LemonButtonWithDropdown } from '.' -import { LemonButtonProps } from './LemonButton' +import { LemonButtonDropdown, LemonButtonProps } from './LemonButton' -export type MoreProps = Partial> & LemonButtonProps +export type MoreProps = Partial> & + LemonButtonProps & { dropdown?: Partial } export function More({ overlay, + dropdown, 'data-attr': dataAttr, placement = 'bottom-end', ...buttonProps @@ -17,11 +19,14 @@ export function More({ aria-label="more" data-attr={dataAttr ?? 'more-button'} icon={} - dropdown={{ - placement: placement, - actionable: true, - overlay, - }} + dropdown={ + { + placement: placement, + actionable: true, + ...dropdown, + overlay, + } as LemonButtonDropdown + } size="small" {...buttonProps} disabled={!overlay} diff --git a/frontend/src/lib/lemon-ui/LemonInput/LemonInput.scss b/frontend/src/lib/lemon-ui/LemonInput/LemonInput.scss index 166c7ff50528a..fa2ddaea8d2fd 100644 --- a/frontend/src/lib/lemon-ui/LemonInput/LemonInput.scss +++ b/frontend/src/lib/lemon-ui/LemonInput/LemonInput.scss @@ -98,11 +98,6 @@ // NOTE Design: Search inputs are given a specific small width max-width: 240px; border-radius: 0; - - // Add a focus ring to the element (not the input) when focused - &:has(:focus-visible) { - outline: -webkit-focus-ring-color auto 1px; - } } &.LemonInput--type-number { diff --git a/frontend/src/queries/schema.json b/frontend/src/queries/schema.json index 1d2a4d94012aa..005fb78c94497 100644 --- a/frontend/src/queries/schema.json +++ b/frontend/src/queries/schema.json @@ -2132,6 +2132,9 @@ "significant": { "type": "boolean" }, + "stats_version": { + "type": "integer" + }, "timezone": { "type": "string" }, @@ -4511,6 +4514,9 @@ "significant": { "type": "boolean" }, + "stats_version": { + "type": "integer" + }, "variants": { "items": { "$ref": "#/definitions/ExperimentVariantFunnelsBaseStats" @@ -5991,6 +5997,9 @@ }, "response": { "$ref": "#/definitions/ExperimentFunnelsQueryResponse" + }, + "stats_version": { + "type": "integer" } }, "required": ["funnels_query", "kind"], @@ -6041,6 +6050,9 @@ "significant": { "type": "boolean" }, + "stats_version": { + "type": "integer" + }, "variants": { "items": { "$ref": "#/definitions/ExperimentVariantFunnelsBaseStats" @@ -9787,6 +9799,9 @@ "significant": { "type": "boolean" }, + "stats_version": { + "type": "integer" + }, "variants": { "items": { "$ref": "#/definitions/ExperimentVariantFunnelsBaseStats" @@ -10418,6 +10433,9 @@ "significant": { "type": "boolean" }, + "stats_version": { + "type": "integer" + }, "variants": { "items": { "$ref": "#/definitions/ExperimentVariantFunnelsBaseStats" diff --git a/frontend/src/queries/schema.ts b/frontend/src/queries/schema.ts index 5360ae06d99f4..9e4252c989b38 100644 --- a/frontend/src/queries/schema.ts +++ b/frontend/src/queries/schema.ts @@ -2003,6 +2003,7 @@ export interface ExperimentFunnelsQueryResponse { significance_code: ExperimentSignificanceCode expected_loss: number credible_intervals: Record + stats_version?: integer } export type CachedExperimentFunnelsQueryResponse = CachedQueryResponse @@ -2012,6 +2013,7 @@ export interface ExperimentFunnelsQuery extends DataNode { diff --git a/frontend/src/scenes/experiments/ExperimentView/CumulativeExposuresChart.tsx b/frontend/src/scenes/experiments/ExperimentView/CumulativeExposuresChart.tsx index 7f4378a7ec5ef..6efdd992f5988 100644 --- a/frontend/src/scenes/experiments/ExperimentView/CumulativeExposuresChart.tsx +++ b/frontend/src/scenes/experiments/ExperimentView/CumulativeExposuresChart.tsx @@ -10,11 +10,11 @@ import { BaseMathType, ChartDisplayType, InsightType, PropertyFilterType, Proper import { experimentLogic } from '../experimentLogic' export function CumulativeExposuresChart(): JSX.Element { - const { experiment, experimentResults, getMetricType } = useValues(experimentLogic) + const { experiment, metricResults, getMetricType } = useValues(experimentLogic) const metricIdx = 0 const metricType = getMetricType(metricIdx) - + const result = metricResults?.[metricIdx] const variants = experiment.parameters?.feature_flag_variants?.map((variant) => variant.key) || [] if (experiment.holdout) { variants.push(`holdout-${experiment.holdout.id}`) @@ -25,7 +25,7 @@ export function CumulativeExposuresChart(): JSX.Element { if (metricType === InsightType.TRENDS) { query = { kind: NodeKind.InsightVizNode, - source: (experimentResults as CachedExperimentTrendsQueryResponse)?.exposure_query || { + source: (result as CachedExperimentTrendsQueryResponse)?.exposure_query || { kind: NodeKind.TrendsQuery, series: [], interval: 'day', diff --git a/frontend/src/scenes/experiments/ExperimentView/DataCollection.tsx b/frontend/src/scenes/experiments/ExperimentView/DataCollection.tsx index 2463e1dd791a5..b22eb57b35d4b 100644 --- a/frontend/src/scenes/experiments/ExperimentView/DataCollection.tsx +++ b/frontend/src/scenes/experiments/ExperimentView/DataCollection.tsx @@ -32,7 +32,7 @@ export function DataCollection(): JSX.Element { const experimentProgressPercent = metricType === InsightType.FUNNELS - ? (funnelResultsPersonsTotal / recommendedSampleSize) * 100 + ? (funnelResultsPersonsTotal(0) / recommendedSampleSize) * 100 : (actualRunningTime / recommendedRunningTime) * 100 const hasHighRunningTime = recommendedRunningTime > 62 @@ -109,7 +109,7 @@ export function DataCollection(): JSX.Element { Saw  - {humanFriendlyNumber(funnelResultsPersonsTotal)} of{' '} + {humanFriendlyNumber(funnelResultsPersonsTotal(0))} of{' '} {humanFriendlyNumber(recommendedSampleSize)}{' '} {' '} {formatUnitByQuantity(recommendedSampleSize, 'participant')} diff --git a/frontend/src/scenes/experiments/ExperimentView/DistributionTable.tsx b/frontend/src/scenes/experiments/ExperimentView/DistributionTable.tsx index b3c2962d95c55..caee718efb726 100644 --- a/frontend/src/scenes/experiments/ExperimentView/DistributionTable.tsx +++ b/frontend/src/scenes/experiments/ExperimentView/DistributionTable.tsx @@ -137,9 +137,11 @@ export function DistributionModal({ experimentId }: { experimentId: Experiment[' export function DistributionTable(): JSX.Element { const { openDistributionModal } = useActions(experimentLogic) - const { experimentId, experiment, experimentResults } = useValues(experimentLogic) + const { experimentId, experiment, metricResults } = useValues(experimentLogic) const { reportExperimentReleaseConditionsViewed } = useActions(experimentLogic) + const result = metricResults?.[0] + const onSelectElement = (variant: string): void => { LemonDialog.open({ title: 'Select a domain', @@ -166,7 +168,7 @@ export function DistributionTable(): JSX.Element { key: 'key', title: 'Variant', render: function Key(_, item): JSX.Element { - if (!experimentResults || !experimentResults.insight) { + if (!result || !result.insight) { return {item.key} } return diff --git a/frontend/src/scenes/experiments/ExperimentView/ExperimentView.tsx b/frontend/src/scenes/experiments/ExperimentView/ExperimentView.tsx index 4f2c109275189..c5dc5eb43cb4b 100644 --- a/frontend/src/scenes/experiments/ExperimentView/ExperimentView.tsx +++ b/frontend/src/scenes/experiments/ExperimentView/ExperimentView.tsx @@ -26,9 +26,9 @@ import { Results } from './Results' import { SecondaryMetricsTable } from './SecondaryMetricsTable' const ResultsTab = (): JSX.Element => { - const { experiment, experimentResults, featureFlags } = useValues(experimentLogic) - - const hasResultsInsight = experimentResults && experimentResults.insight + const { experiment, metricResults, featureFlags } = useValues(experimentLogic) + const result = metricResults?.[0] + const hasResultsInsight = result && result.insight return (
@@ -70,12 +70,12 @@ const VariantsTab = (): JSX.Element => { } export function ExperimentView(): JSX.Element { - const { experimentLoading, experimentResultsLoading, experimentId, experimentResults, tabKey, featureFlags } = + const { experimentLoading, metricResultsLoading, experimentId, metricResults, tabKey, featureFlags } = useValues(experimentLogic) const { setTabKey } = useActions(experimentLogic) - - const hasResultsInsight = experimentResults && experimentResults.insight + const result = metricResults?.[0] + const hasResultsInsight = result && result.insight return ( <> @@ -86,7 +86,7 @@ export function ExperimentView(): JSX.Element { ) : ( <> - {experimentResultsLoading ? ( + {metricResultsLoading ? ( ) : ( <> diff --git a/frontend/src/scenes/experiments/ExperimentView/Overview.tsx b/frontend/src/scenes/experiments/ExperimentView/Overview.tsx index 2095309364143..c8c44c8ea5c04 100644 --- a/frontend/src/scenes/experiments/ExperimentView/Overview.tsx +++ b/frontend/src/scenes/experiments/ExperimentView/Overview.tsx @@ -1,17 +1,29 @@ import { useValues } from 'kea' +import { CachedExperimentFunnelsQueryResponse, CachedExperimentTrendsQueryResponse } from '~/queries/schema' + import { experimentLogic } from '../experimentLogic' import { VariantTag } from './components' export function Overview(): JSX.Element { - const { experimentId, experimentResults, getIndexForVariant, getHighestProbabilityVariant, areResultsSignificant } = + const { experimentId, metricResults, getIndexForVariant, getHighestProbabilityVariant, areResultsSignificant } = useValues(experimentLogic) + const result = metricResults?.[0] + if (!result) { + return <> + } + function WinningVariantText(): JSX.Element { - const highestProbabilityVariant = getHighestProbabilityVariant(experimentResults) - const index = getIndexForVariant(experimentResults, highestProbabilityVariant || '') - if (highestProbabilityVariant && index !== null && experimentResults) { - const { probability } = experimentResults + const highestProbabilityVariant = getHighestProbabilityVariant( + result as CachedExperimentFunnelsQueryResponse | CachedExperimentTrendsQueryResponse + ) + const index = getIndexForVariant( + result as CachedExperimentFunnelsQueryResponse | CachedExperimentTrendsQueryResponse, + highestProbabilityVariant || '' + ) + if (highestProbabilityVariant && index !== null && result) { + const { probability } = result return (
@@ -32,7 +44,9 @@ export function Overview(): JSX.Element { return (
Your results are  - {`${areResultsSignificant ? 'significant' : 'not significant'}`}. + + {`${areResultsSignificant(0) ? 'significant' : 'not significant'}`}. +
) } diff --git a/frontend/src/scenes/experiments/ExperimentView/Results.tsx b/frontend/src/scenes/experiments/ExperimentView/Results.tsx index c4e7a4b05ed62..61574de1b3966 100644 --- a/frontend/src/scenes/experiments/ExperimentView/Results.tsx +++ b/frontend/src/scenes/experiments/ExperimentView/Results.tsx @@ -5,13 +5,17 @@ import { ResultsHeader, ResultsQuery } from './components' import { SummaryTable } from './SummaryTable' export function Results(): JSX.Element { - const { experimentResults } = useValues(experimentLogic) + const { metricResults } = useValues(experimentLogic) + const result = metricResults?.[0] + if (!result) { + return <> + } return (
- +
) } diff --git a/frontend/src/scenes/experiments/ExperimentView/SecondaryMetricsTable.tsx b/frontend/src/scenes/experiments/ExperimentView/SecondaryMetricsTable.tsx index 8369038f00cbb..7f5bcbabfa3a1 100644 --- a/frontend/src/scenes/experiments/ExperimentView/SecondaryMetricsTable.tsx +++ b/frontend/src/scenes/experiments/ExperimentView/SecondaryMetricsTable.tsx @@ -21,7 +21,7 @@ export function SecondaryMetricsTable({ experimentId }: { experimentId: Experime const [modalMetricIdx, setModalMetricIdx] = useState(null) const { - experimentResults, + metricResults, secondaryMetricResultsLoading, experiment, getSecondaryMetricType, @@ -65,7 +65,7 @@ export function SecondaryMetricsTable({ experimentId }: { experimentId: Experime { title:
Variant
, render: function Key(_, item: TabularSecondaryMetricResults): JSX.Element { - if (!experimentResults || !experimentResults.insight) { + if (!metricResults?.[0] || !metricResults?.[0].insight) { return {item.variant} } return ( diff --git a/frontend/src/scenes/experiments/ExperimentView/SummaryTable.tsx b/frontend/src/scenes/experiments/ExperimentView/SummaryTable.tsx index 6150d4e7b7826..adb1bc9d69616 100644 --- a/frontend/src/scenes/experiments/ExperimentView/SummaryTable.tsx +++ b/frontend/src/scenes/experiments/ExperimentView/SummaryTable.tsx @@ -10,7 +10,6 @@ import { urls } from 'scenes/urls' import { FilterLogicalOperator, - FunnelExperimentVariant, InsightType, PropertyFilterType, PropertyOperator, @@ -27,7 +26,7 @@ export function SummaryTable(): JSX.Element { const { experimentId, experiment, - experimentResults, + metricResults, tabularExperimentResults, getMetricType, exposureCountDataForVariant, @@ -38,14 +37,14 @@ export function SummaryTable(): JSX.Element { credibleIntervalForVariant, } = useValues(experimentLogic) const metricType = getMetricType(0) - - if (!experimentResults) { + const result = metricResults?.[0] + if (!result) { return <> } - const winningVariant = getHighestProbabilityVariant(experimentResults) + const winningVariant = getHighestProbabilityVariant(result) - const columns: LemonTableColumns = [ + const columns: LemonTableColumns = [ { key: 'variants', title: 'Variant', @@ -64,14 +63,14 @@ export function SummaryTable(): JSX.Element { key: 'counts', title: (
- {experimentResults.insight?.[0] && 'action' in experimentResults.insight[0] && ( - + {result.insight?.[0] && 'action' in result.insight[0] && ( + )} {experimentMathAggregationForTrends() ? 'metric' : 'count'}
), render: function Key(_, variant): JSX.Element { - const count = countDataForVariant(experimentResults, variant.key) + const count = countDataForVariant(result, variant.key) if (!count) { return <>— } @@ -83,7 +82,7 @@ export function SummaryTable(): JSX.Element { key: 'exposure', title: 'Exposure', render: function Key(_, variant): JSX.Element { - const exposure = exposureCountDataForVariant(experimentResults, variant.key) + const exposure = exposureCountDataForVariant(result, variant.key) if (!exposure) { return <>— } @@ -120,7 +119,7 @@ export function SummaryTable(): JSX.Element { return Baseline } - const controlVariant = (experimentResults.variants as TrendExperimentVariant[]).find( + const controlVariant = (result.variants as TrendExperimentVariant[]).find( ({ key }) => key === 'control' ) as TrendExperimentVariant @@ -161,7 +160,7 @@ export function SummaryTable(): JSX.Element { return Baseline } - const credibleInterval = credibleIntervalForVariant(experimentResults || null, variant.key, metricType) + const credibleInterval = credibleIntervalForVariant(result || null, variant.key, metricType) if (!credibleInterval) { return <>— } @@ -181,7 +180,7 @@ export function SummaryTable(): JSX.Element { key: 'conversionRate', title: 'Conversion rate', render: function Key(_, item): JSX.Element { - const conversionRate = conversionRateForVariant(experimentResults, item.key) + const conversionRate = conversionRateForVariant(result, item.key) if (!conversionRate) { return <>— } @@ -204,8 +203,8 @@ export function SummaryTable(): JSX.Element { return Baseline } - const controlConversionRate = conversionRateForVariant(experimentResults, 'control') - const variantConversionRate = conversionRateForVariant(experimentResults, item.key) + const controlConversionRate = conversionRateForVariant(result, 'control') + const variantConversionRate = conversionRateForVariant(result, item.key) if (!controlConversionRate || !variantConversionRate) { return <>— @@ -235,7 +234,7 @@ export function SummaryTable(): JSX.Element { return Baseline } - const credibleInterval = credibleIntervalForVariant(experimentResults || null, item.key, metricType) + const credibleInterval = credibleIntervalForVariant(result || null, item.key, metricType) if (!credibleInterval) { return <>— } @@ -254,15 +253,13 @@ export function SummaryTable(): JSX.Element { key: 'winProbability', title: 'Win probability', sorter: (a, b) => { - const aPercentage = (experimentResults?.probability?.[a.key] || 0) * 100 - const bPercentage = (experimentResults?.probability?.[b.key] || 0) * 100 + const aPercentage = (result?.probability?.[a.key] || 0) * 100 + const bPercentage = (result?.probability?.[b.key] || 0) * 100 return aPercentage - bPercentage }, render: function Key(_, item): JSX.Element { const variantKey = item.key - const percentage = - experimentResults?.probability?.[variantKey] != undefined && - experimentResults.probability?.[variantKey] * 100 + const percentage = result?.probability?.[variantKey] != undefined && result.probability?.[variantKey] * 100 const isWinning = variantKey === winningVariant return ( @@ -351,7 +348,7 @@ export function SummaryTable(): JSX.Element { return (
- +
) } diff --git a/frontend/src/scenes/experiments/ExperimentView/components.tsx b/frontend/src/scenes/experiments/ExperimentView/components.tsx index 9e5bfb7c2c4b0..87885a3761b79 100644 --- a/frontend/src/scenes/experiments/ExperimentView/components.tsx +++ b/frontend/src/scenes/experiments/ExperimentView/components.tsx @@ -62,7 +62,11 @@ export function VariantTag({ muted?: boolean fontSize?: number }): JSX.Element { - const { experiment, experimentResults, getIndexForVariant } = useValues(experimentLogic({ experimentId })) + const { experiment, getIndexForVariant, metricResults } = useValues(experimentLogic({ experimentId })) + + if (!metricResults) { + return <> + } if (experiment.holdout && variantKey === `holdout-${experiment.holdout_id}`) { return ( @@ -71,7 +75,7 @@ export function VariantTag({ className="w-2 h-2 rounded-full mr-0.5" // eslint-disable-next-line react/forbid-dom-props style={{ - backgroundColor: getExperimentInsightColour(getIndexForVariant(experimentResults, variantKey)), + backgroundColor: getExperimentInsightColour(getIndexForVariant(metricResults[0], variantKey)), }} /> {experiment.holdout.name} @@ -85,7 +89,7 @@ export function VariantTag({ className="w-2 h-2 rounded-full mr-0.5" // eslint-disable-next-line react/forbid-dom-props style={{ - backgroundColor: getExperimentInsightColour(getIndexForVariant(experimentResults, variantKey)), + backgroundColor: getExperimentInsightColour(getIndexForVariant(metricResults[0], variantKey)), }} /> + {result.label} @@ -205,8 +209,15 @@ export function ResultsQuery({ ) } -export function ExploreButton({ icon = }: { icon?: JSX.Element }): JSX.Element { - const { experimentResults, experiment, featureFlags } = useValues(experimentLogic) +export function ExploreButton({ + icon = , + metricIndex = 0, +}: { + icon?: JSX.Element + metricIndex?: number +}): JSX.Element { + const { metricResults, experiment, featureFlags } = useValues(experimentLogic) + const result = metricResults?.[metricIndex] // keep in sync with https://github.com/PostHog/posthog/blob/master/ee/clickhouse/queries/experiments/funnel_experiment_result.py#L71 // :TRICKY: In the case of no results, we still want users to explore the query, so they can debug further. @@ -223,7 +234,7 @@ export function ExploreButton({ icon = }: { icon?: JSX.Element let query: InsightVizNode if (featureFlags[FEATURE_FLAGS.EXPERIMENTS_HOGQL]) { - const newQueryResults = experimentResults as unknown as + const newQueryResults = result as unknown as | CachedExperimentTrendsQueryResponse | CachedExperimentFunnelsQueryResponse @@ -237,7 +248,7 @@ export function ExploreButton({ icon = }: { icon?: JSX.Element source: source as InsightQueryNode, } } else { - const oldQueryResults = experimentResults as ExperimentResults['result'] + const oldQueryResults = result as unknown as ExperimentResults['result'] if (!oldQueryResults?.filters) { return <> @@ -272,7 +283,9 @@ export function ExploreButton({ icon = }: { icon?: JSX.Element } export function ResultsHeader(): JSX.Element { - const { experimentResults } = useValues(experimentLogic) + const { metricResults } = useValues(experimentLogic) + + const result = metricResults?.[0] return (
@@ -284,16 +297,17 @@ export function ResultsHeader(): JSX.Element {
-
{experimentResults && }
+
{result && }
) } -export function NoResultsEmptyState(): JSX.Element { +export function NoResultsEmptyState({ metricIndex = 0 }: { metricIndex?: number }): JSX.Element { type ErrorCode = 'no-events' | 'no-flag-info' | 'no-control-variant' | 'no-test-variant' - const { experimentResultsLoading, experimentResultCalculationError } = useValues(experimentLogic) + const { metricResultsLoading, primaryMetricsResultErrors } = useValues(experimentLogic) + const metricError = primaryMetricsResultErrors?.[metricIndex] function ChecklistItem({ errorCode, value }: { errorCode: ErrorCode; value: boolean }): JSX.Element { const failureText = { @@ -327,28 +341,25 @@ export function NoResultsEmptyState(): JSX.Element { ) } - if (experimentResultsLoading) { + if (metricResultsLoading) { return <> } // Validation errors return 400 and are rendered as a checklist - if (experimentResultCalculationError?.statusCode === 400) { - let parsedDetail: Record - try { - parsedDetail = JSON.parse(experimentResultCalculationError.detail) - } catch (error) { + if (metricError?.statusCode === 400) { + if (!metricError.hasDiagnostics) { return (
Experiment results could not be calculated
-
{experimentResultCalculationError.detail}
+
{metricError.detail}
) } const checklistItems = [] - for (const [errorCode, value] of Object.entries(parsedDetail)) { + for (const [errorCode, value] of Object.entries(metricError.detail as Record)) { checklistItems.push() } @@ -377,14 +388,14 @@ export function NoResultsEmptyState(): JSX.Element { ) } - if (experimentResultCalculationError?.statusCode === 504) { + if (metricError?.statusCode === 504) { return (

Experiment results timed out

- {!!experimentResultCalculationError && ( + {!!metricError && (
This may occur when the experiment has a large amount of data or is particularly complex. We are actively working on fixing this. In the meantime, please try refreshing @@ -404,11 +415,7 @@ export function NoResultsEmptyState(): JSX.Element {

Experiment results could not be calculated

- {!!experimentResultCalculationError && ( -
- {experimentResultCalculationError.detail} -
- )} + {!!metricError &&
{metricError.detail}
}
@@ -464,7 +471,7 @@ export function PageHeaderCustom(): JSX.Element { launchExperiment, endExperiment, archiveExperiment, - loadExperimentResults, + loadMetricResults, loadSecondaryMetricResults, createExposureCohort, openShipVariantModal, @@ -505,7 +512,7 @@ export function PageHeaderCustom(): JSX.Element { {exposureCohortId ? 'View' : 'Create'} exposure cohort loadExperimentResults(true)} + onClick={() => loadMetricResults(true)} fullWidth data-attr="refresh-experiment" > @@ -588,7 +595,7 @@ export function PageHeaderCustom(): JSX.Element {
)} {featureFlags[FEATURE_FLAGS.EXPERIMENT_MAKE_DECISION] && - areResultsSignificant && + areResultsSignificant(0) && !isSingleVariantShipped && ( <> @@ -615,7 +622,7 @@ export function ShipVariantModal({ experimentId }: { experimentId: Experiment['i const { aggregationLabel } = useValues(groupsModel) const [selectedVariantKey, setSelectedVariantKey] = useState() - useEffect(() => setSelectedVariantKey(sortedWinProbabilities[0]?.key), [sortedWinProbabilities]) + useEffect(() => setSelectedVariantKey(sortedWinProbabilities(0)[0]?.key), [sortedWinProbabilities(0)]) const aggregationTargetName = experiment.filters.aggregation_group_type_index != null @@ -656,12 +663,12 @@ export function ShipVariantModal({ experimentId }: { experimentId: Experiment['i data-attr="metrics-selector" value={selectedVariantKey} onChange={(variantKey) => setSelectedVariantKey(variantKey)} - options={sortedWinProbabilities.map(({ key }) => ({ + options={sortedWinProbabilities(0).map(({ key }) => ({ value: key, label: (
- {key === sortedWinProbabilities[0]?.key && ( + {key === sortedWinProbabilities(0)[0]?.key && ( Winning @@ -693,9 +700,9 @@ export function ActionBanner(): JSX.Element { const { experiment, getMetricType, - experimentResults, + metricResults, experimentLoading, - experimentResultsLoading, + metricResultsLoading, isExperimentRunning, areResultsSignificant, isExperimentStopped, @@ -706,6 +713,7 @@ export function ActionBanner(): JSX.Element { featureFlags, } = useValues(experimentLogic) + const result = metricResults?.[0] const { archiveExperiment } = useActions(experimentLogic) const { aggregationLabel } = useValues(groupsModel) @@ -720,7 +728,7 @@ export function ActionBanner(): JSX.Element { const recommendedRunningTime = experiment?.parameters?.recommended_running_time || 1 const recommendedSampleSize = experiment?.parameters?.recommended_sample_size || 100 - if (!experiment || experimentLoading || experimentResultsLoading) { + if (!experiment || experimentLoading || metricResultsLoading) { return <> } @@ -766,12 +774,12 @@ export function ActionBanner(): JSX.Element { } // Running, results present, not significant - if (isExperimentRunning && experimentResults && !isExperimentStopped && !areResultsSignificant) { + if (isExperimentRunning && result && !isExperimentStopped && !areResultsSignificant(0)) { // Results insignificant, but a large enough sample/running time has been achieved // Further collection unlikely to change the result -> recommmend cutting the losses if ( metricType === InsightType.FUNNELS && - funnelResultsPersonsTotal > Math.max(recommendedSampleSize, 500) && + funnelResultsPersonsTotal(0) > Math.max(recommendedSampleSize, 500) && dayjs().diff(experiment.start_date, 'day') > 2 // at least 2 days running ) { return ( @@ -800,9 +808,9 @@ export function ActionBanner(): JSX.Element { } // Running, results significant - if (isExperimentRunning && !isExperimentStopped && areResultsSignificant && experimentResults) { - const { probability } = experimentResults - const winningVariant = getHighestProbabilityVariant(experimentResults) + if (isExperimentRunning && !isExperimentStopped && areResultsSignificant(0) && result) { + const { probability } = result + const winningVariant = getHighestProbabilityVariant(result) if (!winningVariant) { return <> } @@ -812,7 +820,7 @@ export function ActionBanner(): JSX.Element { // Win probability only slightly over 0.9 and the recommended sample/time just met -> proceed with caution if ( metricType === InsightType.FUNNELS && - funnelResultsPersonsTotal < recommendedSampleSize + 50 && + funnelResultsPersonsTotal(0) < recommendedSampleSize + 50 && winProbability < 0.93 ) { return ( @@ -848,7 +856,7 @@ export function ActionBanner(): JSX.Element { } // Stopped, results significant - if (isExperimentStopped && areResultsSignificant) { + if (isExperimentStopped && areResultsSignificant(0)) { return ( You have stopped this experiment, and it is no longer collecting data. With significant results in hand, @@ -866,7 +874,7 @@ export function ActionBanner(): JSX.Element { } // Stopped, results not significant - if (isExperimentStopped && experimentResults && !areResultsSignificant) { + if (isExperimentStopped && result && !areResultsSignificant(0)) { return ( You have stopped this experiment, and it is no longer collecting data. Because your results are not diff --git a/frontend/src/scenes/experiments/experimentLogic.tsx b/frontend/src/scenes/experiments/experimentLogic.tsx index 698c4182dc84d..42f6b2cd37652 100644 --- a/frontend/src/scenes/experiments/experimentLogic.tsx +++ b/frontend/src/scenes/experiments/experimentLogic.tsx @@ -1,4 +1,3 @@ -import { IconInfo } from '@posthog/icons' import { actions, connect, kea, key, listeners, path, props, reducers, selectors } from 'kea' import { forms } from 'kea-forms' import { loaders } from 'kea-loaders' @@ -8,11 +7,9 @@ import { EXPERIMENT_DEFAULT_DURATION, FunnelLayout } from 'lib/constants' import { FEATURE_FLAGS } from 'lib/constants' import { dayjs } from 'lib/dayjs' import { lemonToast } from 'lib/lemon-ui/LemonToast/LemonToast' -import { Tooltip } from 'lib/lemon-ui/Tooltip' import { featureFlagLogic } from 'lib/logic/featureFlagLogic' import { hasFormErrors, toParams } from 'lib/utils' import { eventUsageLogic } from 'lib/utils/eventUsageLogic' -import { ReactElement } from 'react' import { validateFeatureFlagKey } from 'scenes/feature-flags/featureFlagLogic' import { funnelDataLogic } from 'scenes/funnels/funnelDataLogic' import { insightDataLogic } from 'scenes/insights/insightDataLogic' @@ -31,6 +28,7 @@ import { CachedExperimentFunnelsQueryResponse, CachedExperimentTrendsQueryResponse, ExperimentFunnelsQuery, + ExperimentSignificanceCode, ExperimentTrendsQuery, NodeKind, } from '~/queries/schema' @@ -55,7 +53,6 @@ import { ProductKey, PropertyMathType, SecondaryMetricResults, - SignificanceCode, TrendExperimentVariant, TrendResult, TrendsFilterType, @@ -177,7 +174,6 @@ export const experimentLogic = kea([ setExperimentType: (type?: string) => ({ type }), removeExperimentGroup: (idx: number) => ({ idx }), setEditExperiment: (editing: boolean) => ({ editing }), - setExperimentResultCalculationError: (error: ExperimentResultCalculationError) => ({ error }), setFlagImplementationWarning: (warning: boolean) => ({ warning }), setExposureAndSampleSize: (exposure: number, sampleSize: number) => ({ exposure, sampleSize }), updateExperimentGoal: (filters: Partial) => ({ filters }), @@ -420,12 +416,6 @@ export const experimentLogic = kea([ setEditExperiment: (_, { editing }) => editing, }, ], - experimentResultCalculationError: [ - null as ExperimentResultCalculationError | null, - { - setExperimentResultCalculationError: (_, { error }) => error, - }, - ], flagImplementationWarning: [ false as boolean, { @@ -597,10 +587,7 @@ export const experimentLogic = kea([ experiment && actions.reportExperimentViewed(experiment) if (experiment?.start_date) { - actions.loadExperimentResults() - if (values.featureFlags[FEATURE_FLAGS.EXPERIMENTS_MULTIPLE_METRICS]) { - actions.loadMetricResults() - } + actions.loadMetricResults() actions.loadSecondaryMetricResults() } }, @@ -618,7 +605,7 @@ export const experimentLogic = kea([ actions.updateExperiment({ end_date: endDate.toISOString() }) const duration = endDate.diff(values.experiment?.start_date, 'second') values.experiment && - actions.reportExperimentCompleted(values.experiment, endDate, duration, values.areResultsSignificant) + actions.reportExperimentCompleted(values.experiment, endDate, duration, values.areResultsSignificant(0)) }, archiveExperiment: async () => { actions.updateExperiment({ archived: true }) @@ -683,13 +670,11 @@ export const experimentLogic = kea([ resetRunningExperiment: async () => { actions.updateExperiment({ start_date: null, end_date: null, archived: false }) values.experiment && actions.reportExperimentReset(values.experiment) - - actions.loadExperimentResultsSuccess(null) actions.loadSecondaryMetricResultsSuccess([]) }, updateExperimentSuccess: async ({ experiment }) => { actions.updateExperiments(experiment) - actions.loadExperimentResults() + actions.loadMetricResults() actions.loadSecondaryMetricResults() }, setExperiment: async ({ experiment }) => { @@ -838,69 +823,6 @@ export const experimentLogic = kea([ return response }, }, - experimentResults: [ - null as - | ExperimentResults['result'] - | CachedExperimentTrendsQueryResponse - | CachedExperimentFunnelsQueryResponse - | null, - { - loadExperimentResults: async ( - refresh?: boolean - ): Promise< - | ExperimentResults['result'] - | CachedExperimentTrendsQueryResponse - | CachedExperimentFunnelsQueryResponse - | null - > => { - try { - // :FLAG: CLEAN UP AFTER MIGRATION - if (values.featureFlags[FEATURE_FLAGS.EXPERIMENTS_HOGQL]) { - // Queries are shareable, so we need to set the experiment_id for the backend to correctly associate the query with the experiment - const queryWithExperimentId = { - ...values.experiment.metrics[0], - experiment_id: values.experimentId, - } - if ( - queryWithExperimentId.kind === NodeKind.ExperimentTrendsQuery && - values.featureFlags[FEATURE_FLAGS.EXPERIMENT_STATS_V2] - ) { - queryWithExperimentId.stats_version = 2 - } - - const response = await performQuery(queryWithExperimentId, undefined, refresh) - - return { - ...response, - fakeInsightId: Math.random().toString(36).substring(2, 15), - } as unknown as CachedExperimentTrendsQueryResponse | CachedExperimentFunnelsQueryResponse - } - - const refreshParam = refresh ? '?refresh=true' : '' - const response: ExperimentResults = await api.get( - `api/projects/${values.currentProjectId}/experiments/${values.experimentId}/results${refreshParam}` - ) - return { - ...response.result, - fakeInsightId: Math.random().toString(36).substring(2, 15), - last_refresh: response.last_refresh, - } - } catch (error: any) { - let errorDetail = error.detail - // :HANDLE FLAG: CLEAN UP AFTER MIGRATION - if (values.featureFlags[FEATURE_FLAGS.EXPERIMENTS_HOGQL]) { - const errorDetailMatch = error.detail.match(/\{.*\}/) - errorDetail = errorDetailMatch ? errorDetailMatch[0] : error.detail - } - actions.setExperimentResultCalculationError({ detail: errorDetail, statusCode: error.status }) - if (error.status === 504) { - actions.reportExperimentResultsLoadingTimeout(values.experimentId) - } - return null - } - }, - }, - ], metricResults: [ null as (CachedExperimentTrendsQueryResponse | CachedExperimentFunnelsQueryResponse | null)[] | null, { @@ -1187,83 +1109,40 @@ export const experimentLogic = kea([ }, ], areResultsSignificant: [ - (s) => [s.experimentResults], - (experimentResults): boolean => { - return experimentResults?.significant || false - }, - ], - // TODO: remove with the old UI - significanceBannerDetails: [ - (s) => [s.experimentResults], - (experimentResults): string | ReactElement => { - if (experimentResults?.significance_code === SignificanceCode.HighLoss) { - return ( - <> - This is because the expected loss in conversion is greater than 1% - Current value is {((experimentResults?.expected_loss || 0) * 100)?.toFixed(2)}% - } - > - - - . - - ) - } - - if (experimentResults?.significance_code === SignificanceCode.HighPValue) { - return ( - <> - This is because the p value is greater than 0.05 - Current value is {experimentResults?.p_value?.toFixed(3) || 1}.} - > - - - . - - ) - } - - if (experimentResults?.significance_code === SignificanceCode.LowWinProbability) { - return 'This is because the win probability of all test variants combined is less than 90%.' - } - - if (experimentResults?.significance_code === SignificanceCode.NotEnoughExposure) { - return 'This is because we need at least 100 people per variant to declare significance.' - } - - return '' - }, + (s) => [s.metricResults], + (metricResults: (CachedExperimentFunnelsQueryResponse | CachedExperimentTrendsQueryResponse | null)[]) => + (metricIndex: number = 0): boolean => { + return metricResults?.[metricIndex]?.significant || false + }, ], significanceDetails: [ - (s) => [s.experimentResults], - (experimentResults): string => { - if (experimentResults?.significance_code === SignificanceCode.HighLoss) { - return `This is because the expected loss in conversion is greater than 1% (current value is ${( - (experimentResults?.expected_loss || 0) * 100 - )?.toFixed(2)}%).` - } + (s) => [s.metricResults], + (metricResults: (CachedExperimentFunnelsQueryResponse | CachedExperimentTrendsQueryResponse | null)[]) => + (metricIndex: number = 0): string => { + const results = metricResults?.[metricIndex] + + if (results?.significance_code === ExperimentSignificanceCode.HighLoss) { + return `This is because the expected loss in conversion is greater than 1% (current value is ${( + (results as CachedExperimentFunnelsQueryResponse)?.expected_loss || 0 + )?.toFixed(2)}%).` + } - if (experimentResults?.significance_code === SignificanceCode.HighPValue) { - return `This is because the p value is greater than 0.05 (current value is ${ - experimentResults?.p_value?.toFixed(3) || 1 - }).` - } + if (results?.significance_code === ExperimentSignificanceCode.HighPValue) { + return `This is because the p value is greater than 0.05 (current value is ${ + (results as CachedExperimentTrendsQueryResponse)?.p_value?.toFixed(3) || 1 + }).` + } - if (experimentResults?.significance_code === SignificanceCode.LowWinProbability) { - return 'This is because the win probability of all test variants combined is less than 90%.' - } + if (results?.significance_code === ExperimentSignificanceCode.LowWinProbability) { + return 'This is because the win probability of all test variants combined is less than 90%.' + } - if (experimentResults?.significance_code === SignificanceCode.NotEnoughExposure) { - return 'This is because we need at least 100 people per variant to declare significance.' - } + if (results?.significance_code === ExperimentSignificanceCode.NotEnoughExposure) { + return 'This is because we need at least 100 people per variant to declare significance.' + } - return '' - }, + return '' + }, ], recommendedSampleSize: [ (s) => [s.conversionMetrics, s.minimumSampleSizePerVariant, s.variants], @@ -1353,17 +1232,17 @@ export const experimentLogic = kea([ () => [], () => ( - experimentResults: + metricResult: | Partial | CachedExperimentFunnelsQueryResponse | CachedExperimentTrendsQueryResponse | null, variantKey: string ): number | null => { - if (!experimentResults || !experimentResults.insight) { + if (!metricResult || !metricResult.insight) { return null } - const variantResults = (experimentResults.insight as FunnelStep[][]).find( + const variantResults = (metricResult.insight as FunnelStep[][]).find( (variantFunnel: FunnelStep[]) => { const breakdownValue = variantFunnel[0]?.breakdown_value return Array.isArray(breakdownValue) && breakdownValue[0] === variantKey @@ -1380,7 +1259,7 @@ export const experimentLogic = kea([ () => [], () => ( - experimentResults: + metricResult: | Partial | CachedSecondaryMetricExperimentFunnelsQueryResponse | CachedSecondaryMetricExperimentTrendsQueryResponse @@ -1388,13 +1267,13 @@ export const experimentLogic = kea([ variantKey: string, metricType: InsightType ): [number, number] | null => { - const credibleInterval = experimentResults?.credible_intervals?.[variantKey] + const credibleInterval = metricResult?.credible_intervals?.[variantKey] if (!credibleInterval) { return null } if (metricType === InsightType.FUNNELS) { - const controlVariant = (experimentResults.variants as FunnelExperimentVariant[]).find( + const controlVariant = (metricResult.variants as FunnelExperimentVariant[]).find( ({ key }) => key === 'control' ) as FunnelExperimentVariant const controlConversionRate = @@ -1411,7 +1290,7 @@ export const experimentLogic = kea([ return [lowerBound, upperBound] } - const controlVariant = (experimentResults.variants as TrendExperimentVariant[]).find( + const controlVariant = (metricResult.variants as TrendExperimentVariant[]).find( ({ key }) => key === 'control' ) as TrendExperimentVariant @@ -1428,7 +1307,7 @@ export const experimentLogic = kea([ (s) => [s.getMetricType], (getMetricType) => ( - experimentResults: + metricResult: | Partial | CachedExperimentTrendsQueryResponse | CachedExperimentFunnelsQueryResponse @@ -1437,14 +1316,14 @@ export const experimentLogic = kea([ ): number | null => { // Ensures we get the right index from results, so the UI can // display the right colour for the variant - if (!experimentResults || !experimentResults.insight) { + if (!metricResult || !metricResult.insight) { return null } let index = -1 if (getMetricType(0) === InsightType.FUNNELS) { // Funnel Insight is displayed in order of decreasing count - index = (Array.isArray(experimentResults.insight) ? [...experimentResults.insight] : []) + index = (Array.isArray(metricResult.insight) ? [...metricResult.insight] : []) .sort((a, b) => { const aCount = (a && Array.isArray(a) && a[0]?.count) || 0 const bCount = (b && Array.isArray(b) && b[0]?.count) || 0 @@ -1458,7 +1337,7 @@ export const experimentLogic = kea([ return Array.isArray(breakdownValue) && breakdownValue[0] === variant }) } else { - index = (experimentResults.insight as TrendResult[]).findIndex( + index = (metricResult.insight as TrendResult[]).findIndex( (variantTrend: TrendResult) => variantTrend.breakdown_value === variant ) } @@ -1474,7 +1353,7 @@ export const experimentLogic = kea([ (s) => [s.experimentMathAggregationForTrends], (experimentMathAggregationForTrends) => ( - experimentResults: + metricResult: | Partial | CachedExperimentTrendsQueryResponse | CachedExperimentFunnelsQueryResponse @@ -1483,10 +1362,10 @@ export const experimentLogic = kea([ type: 'primary' | 'secondary' = 'primary' ): number | null => { const usingMathAggregationType = type === 'primary' ? experimentMathAggregationForTrends() : false - if (!experimentResults || !experimentResults.insight) { + if (!metricResult || !metricResult.insight) { return null } - const variantResults = (experimentResults.insight as TrendResult[]).find( + const variantResults = (metricResult.insight as TrendResult[]).find( (variantTrend: TrendResult) => variantTrend.breakdown_value === variant ) if (!variantResults) { @@ -1524,17 +1403,17 @@ export const experimentLogic = kea([ () => [], () => ( - experimentResults: + metricResult: | Partial | CachedExperimentTrendsQueryResponse | CachedExperimentFunnelsQueryResponse | null, variant: string ): number | null => { - if (!experimentResults || !experimentResults.variants) { + if (!metricResult || !metricResult.variants) { return null } - const variantResults = (experimentResults.variants as TrendExperimentVariant[]).find( + const variantResults = (metricResult.variants as TrendExperimentVariant[]).find( (variantTrend: TrendExperimentVariant) => variantTrend.key === variant ) if (!variantResults || !variantResults.absolute_exposure) { @@ -1564,58 +1443,50 @@ export const experimentLogic = kea([ } }, ], - sortedExperimentResultVariants: [ - (s) => [s.experimentResults, s.experiment], - (experimentResults, experiment): string[] => { - if (experimentResults) { - const sortedResults = Object.keys(experimentResults.probability).sort( - (a, b) => experimentResults.probability[b] - experimentResults.probability[a] - ) - - experiment?.parameters?.feature_flag_variants?.forEach((variant) => { - if (!sortedResults.includes(variant.key)) { - sortedResults.push(variant.key) - } - }) - return sortedResults - } - return [] - }, - ], tabularExperimentResults: [ - (s) => [s.experiment, s.experimentResults, s.getMetricType], - (experiment, experimentResults, getMetricType): any => { - const tabularResults = [] - const metricType = getMetricType(0) - - if (experimentResults) { - for (const variantObj of experimentResults.variants) { - if (metricType === InsightType.FUNNELS) { - const { key, success_count, failure_count } = variantObj as FunnelExperimentVariant - tabularResults.push({ key, success_count, failure_count }) - } else if (metricType === InsightType.TRENDS) { - const { key, count, exposure, absolute_exposure } = variantObj as TrendExperimentVariant - tabularResults.push({ key, count, exposure, absolute_exposure }) + (s) => [s.experiment, s.metricResults, s.getMetricType], + ( + experiment, + metricResults: ( + | CachedExperimentFunnelsQueryResponse + | CachedExperimentTrendsQueryResponse + | null + )[], + getMetricType + ) => + (metricIndex: number = 0): any[] => { + const tabularResults = [] + const metricType = getMetricType(metricIndex) + const result = metricResults?.[metricIndex] + + if (result) { + for (const variantObj of result.variants) { + if (metricType === InsightType.FUNNELS) { + const { key, success_count, failure_count } = variantObj as FunnelExperimentVariant + tabularResults.push({ key, success_count, failure_count }) + } else if (metricType === InsightType.TRENDS) { + const { key, count, exposure, absolute_exposure } = variantObj as TrendExperimentVariant + tabularResults.push({ key, count, exposure, absolute_exposure }) + } } } - } - if (experiment.feature_flag?.filters.multivariate?.variants) { - for (const { key } of experiment.feature_flag.filters.multivariate.variants) { - if (tabularResults.find((variantObj) => variantObj.key === key)) { - continue - } + if (experiment.feature_flag?.filters.multivariate?.variants) { + for (const { key } of experiment.feature_flag.filters.multivariate.variants) { + if (tabularResults.find((variantObj) => variantObj.key === key)) { + continue + } - if (metricType === InsightType.FUNNELS) { - tabularResults.push({ key, success_count: null, failure_count: null }) - } else if (metricType === InsightType.TRENDS) { - tabularResults.push({ key, count: null, exposure: null, absolute_exposure: null }) + if (metricType === InsightType.FUNNELS) { + tabularResults.push({ key, success_count: null, failure_count: null }) + } else if (metricType === InsightType.TRENDS) { + tabularResults.push({ key, count: null, exposure: null, absolute_exposure: null }) + } } } - } - return tabularResults - }, + return tabularResults + }, ], tabularSecondaryMetricResults: [ (s) => [s.experiment, s.secondaryMetricResults, s.conversionRateForVariant, s.countDataForVariant], @@ -1655,39 +1526,56 @@ export const experimentLogic = kea([ }, ], sortedWinProbabilities: [ - (s) => [s.experimentResults, s.conversionRateForVariant], + (s) => [s.metricResults, s.conversionRateForVariant], ( - experimentResults, - conversionRateForVariant - ): { key: string; winProbability: number; conversionRate: number | null }[] => { - if (!experimentResults) { - return [] - } + metricResults: ( + | CachedExperimentFunnelsQueryResponse + | CachedExperimentTrendsQueryResponse + | null + )[], + conversionRateForVariant + ) => + (metricIndex: number = 0) => { + const result = metricResults?.[metricIndex] + + if (!result || !result.probability) { + return [] + } - return Object.keys(experimentResults.probability) - .map((key) => ({ - key, - winProbability: experimentResults.probability[key], - conversionRate: conversionRateForVariant(experimentResults, key), - })) - .sort((a, b) => b.winProbability - a.winProbability) - }, + return Object.keys(result.probability) + .map((key) => ({ + key, + winProbability: result.probability[key], + conversionRate: conversionRateForVariant(result, key), + })) + .sort((a, b) => b.winProbability - a.winProbability) + }, ], funnelResultsPersonsTotal: [ - (s) => [s.experimentResults, s.getMetricType], - (experimentResults, getMetricType): number => { - if (getMetricType(0) !== InsightType.FUNNELS || !experimentResults?.insight) { - return 0 - } + (s) => [s.metricResults, s.getMetricType], + ( + metricResults: ( + | CachedExperimentFunnelsQueryResponse + | CachedExperimentTrendsQueryResponse + | null + )[], + getMetricType + ) => + (metricIndex: number = 0): number => { + const result = metricResults?.[metricIndex] - let sum = 0 - experimentResults.insight.forEach((variantResult) => { - if (variantResult[0]?.count) { - sum += variantResult[0].count + if (getMetricType(metricIndex) !== InsightType.FUNNELS || !result?.insight) { + return 0 } - }) - return sum - }, + + let sum = 0 + result.insight.forEach((variantResult) => { + if (variantResult[0]?.count) { + sum += variantResult[0].count + } + }) + return sum + }, ], actualRunningTime: [ (s) => [s.experiment], diff --git a/frontend/src/scenes/insights/insightDataLogic.tsx b/frontend/src/scenes/insights/insightDataLogic.tsx index 1de631587cf35..b5036267488b0 100644 --- a/frontend/src/scenes/insights/insightDataLogic.tsx +++ b/frontend/src/scenes/insights/insightDataLogic.tsx @@ -219,6 +219,7 @@ export const insightDataLogic = kea([ if (isQueryTooLarge(query)) { localStorage.removeItem(`draft-query-${values.currentTeamId}`) } + localStorage.setItem( `draft-query-${values.currentTeamId}`, crushDraftQueryForLocalStorage(query, Date.now()) diff --git a/frontend/src/scenes/insights/utils.tsx b/frontend/src/scenes/insights/utils.tsx index 5a1b4d56ec7d9..04e55519ab05b 100644 --- a/frontend/src/scenes/insights/utils.tsx +++ b/frontend/src/scenes/insights/utils.tsx @@ -1,4 +1,3 @@ -import JSONCrush from 'jsoncrush' import api from 'lib/api' import { dayjs } from 'lib/dayjs' import { CORE_FILTER_DEFINITIONS_BY_GROUP } from 'lib/taxonomy' @@ -445,40 +444,27 @@ export function isQueryTooLarge(query: Node>): boolean { export function parseDraftQueryFromLocalStorage( query: string ): { query: Node>; timestamp: number } | null { - // First try to uncrush the query if it's a JSONCrush query else fall back to parsing it as a JSON try { - const uncrushedQuery = JSONCrush.uncrush(query) - return JSON.parse(uncrushedQuery) + return JSON.parse(query) } catch (e) { - console.error('Error parsing uncrushed query', e) - try { - return JSON.parse(query) - } catch (e) { - console.error('Error parsing query', e) - return null - } + console.error('Error parsing query', e) + return null } } export function crushDraftQueryForLocalStorage(query: Node>, timestamp: number): string { - return JSONCrush.crush(JSON.stringify({ query, timestamp })) + return JSON.stringify({ query, timestamp }) } export function parseDraftQueryFromURL(query: string): Node> | null { try { - const uncrushedQuery = JSONCrush.uncrush(query) - return JSON.parse(uncrushedQuery) + return JSON.parse(query) } catch (e) { - console.error('Error parsing uncrushed query', e) - try { - return JSON.parse(query) - } catch (e) { - console.error('Error parsing query', e) - return null - } + console.error('Error parsing query', e) + return null } } export function crushDraftQueryForURL(query: Node>): string { - return JSONCrush.crush(JSON.stringify(query)) + return JSON.stringify(query) } diff --git a/frontend/src/scenes/notebooks/Nodes/NotebookNodeExperiment.tsx b/frontend/src/scenes/notebooks/Nodes/NotebookNodeExperiment.tsx index e7bc3a324202c..afde3f1836415 100644 --- a/frontend/src/scenes/notebooks/Nodes/NotebookNodeExperiment.tsx +++ b/frontend/src/scenes/notebooks/Nodes/NotebookNodeExperiment.tsx @@ -18,7 +18,7 @@ import { INTEGER_REGEX_MATCH_GROUPS } from './utils' const Component = ({ attributes }: NotebookNodeProps): JSX.Element => { const { id } = attributes - const { experiment, experimentLoading, experimentMissing, isExperimentRunning, experimentResults } = useValues( + const { experiment, experimentLoading, experimentMissing, isExperimentRunning, metricResults } = useValues( experimentLogic({ experimentId: id }) ) const { loadExperiment } = useActions(experimentLogic({ experimentId: id })) @@ -41,6 +41,10 @@ const Component = ({ attributes }: NotebookNodeProps } + if (!metricResults) { + return <> + } + return (
@@ -78,7 +82,7 @@ const Component = ({ attributes }: NotebookNodeProps
- +
)} diff --git a/frontend/src/scenes/pipeline/hogfunctions/HogFunctionTest.tsx b/frontend/src/scenes/pipeline/hogfunctions/HogFunctionTest.tsx index 1861b06f369ed..787d46245c4cb 100644 --- a/frontend/src/scenes/pipeline/hogfunctions/HogFunctionTest.tsx +++ b/frontend/src/scenes/pipeline/hogfunctions/HogFunctionTest.tsx @@ -1,9 +1,19 @@ import { TZLabel } from '@posthog/apps-common' import { IconInfo, IconX } from '@posthog/icons' -import { LemonButton, LemonLabel, LemonSwitch, LemonTable, LemonTag, Tooltip } from '@posthog/lemon-ui' +import { + LemonButton, + LemonDivider, + LemonLabel, + LemonSwitch, + LemonTable, + LemonTag, + Spinner, + Tooltip, +} from '@posthog/lemon-ui' import clsx from 'clsx' import { useActions, useValues } from 'kea' import { Form } from 'kea-forms' +import { More } from 'lib/lemon-ui/LemonButton/More' import { LemonField } from 'lib/lemon-ui/LemonField' import { CodeEditorResizeable } from 'lib/monaco/CodeEditorResizable' @@ -62,11 +72,25 @@ export function HogFunctionTestPlaceholder({ } export function HogFunctionTest(props: HogFunctionTestLogicProps): JSX.Element { - const { isTestInvocationSubmitting, testResult, expanded, sampleGlobalsLoading, sampleGlobalsError, type } = - useValues(hogFunctionTestLogic(props)) - const { submitTestInvocation, setTestResult, toggleExpanded, loadSampleGlobals } = useActions( - hogFunctionTestLogic(props) - ) + const { + isTestInvocationSubmitting, + testResult, + expanded, + sampleGlobalsLoading, + sampleGlobalsError, + type, + savedGlobals, + testInvocation, + } = useValues(hogFunctionTestLogic(props)) + const { + submitTestInvocation, + setTestResult, + toggleExpanded, + loadSampleGlobals, + deleteSavedGlobals, + setSampleGlobals, + saveGlobals, + } = useActions(hogFunctionTestLogic(props)) return (
@@ -75,7 +99,10 @@ export function HogFunctionTest(props: HogFunctionTestLogicProps): JSX.Element { >
-

Testing

+

+ Testing + {sampleGlobalsLoading ? : null} +

{!expanded && (type === 'email' ? (

Click here to test the provider with a sample e-mail

@@ -87,7 +114,7 @@ export function HogFunctionTest(props: HogFunctionTestLogicProps): JSX.Element {
{!expanded ? ( - toggleExpanded()}> + toggleExpanded()}> Start testing ) : ( @@ -97,46 +124,100 @@ export function HogFunctionTest(props: HogFunctionTestLogicProps): JSX.Element { type="primary" onClick={() => setTestResult(null)} loading={isTestInvocationSubmitting} + data-attr="clear-hog-test-result" > Clear test result ) : ( <> - - Refresh globals - - - {({ value, onChange }) => ( - - When selected, async functions such as `fetch` will not - actually be called but instead will be mocked out with - the fetch content logged instead - - } + + + {({ value, onChange }) => ( + onChange(!v)} + checked={!value} + data-attr="toggle-hog-test-mocking" + className="px-2 py-1" + label={ + + When disabled, async functions such as + `fetch` will not be called. Instead they + will be mocked out and logged. + + } + > + + Make real HTTP requests + + + + } + /> + )} + + + + Fetch new event + + + {savedGlobals.map(({ name, globals }, index) => ( +
+ setSampleGlobals(globals)} + fullWidth + className="flex-1" + > + {name} + + } + onClick={() => deleteSavedGlobals(index)} + tooltip="Delete saved test data" + /> +
+ ))} + {testInvocation.globals && ( + { + const name = prompt('Name this test data') + if (name) { + saveGlobals(name, JSON.parse(testInvocation.globals)) + } + }} + disabledReason={(() => { + try { + JSON.parse(testInvocation.globals) + } catch (e) { + return 'Invalid globals JSON' + } + return undefined + })()} > - - Mock out HTTP requests - - - - } - /> - )} -
+ Save test data + + )} + + } + /> @@ -145,7 +226,12 @@ export function HogFunctionTest(props: HogFunctionTestLogicProps): JSX.Element { )} - } onClick={() => toggleExpanded()} tooltip="Hide testing" /> + } + onClick={() => toggleExpanded()} + tooltip="Hide testing" + /> )}
diff --git a/frontend/src/scenes/pipeline/hogfunctions/hogFunctionConfigurationLogic.tsx b/frontend/src/scenes/pipeline/hogfunctions/hogFunctionConfigurationLogic.tsx index d6f7c98884a7c..d38b39ce21c59 100644 --- a/frontend/src/scenes/pipeline/hogfunctions/hogFunctionConfigurationLogic.tsx +++ b/frontend/src/scenes/pipeline/hogfunctions/hogFunctionConfigurationLogic.tsx @@ -1,6 +1,6 @@ import { lemonToast } from '@posthog/lemon-ui' import equal from 'fast-deep-equal' -import { actions, afterMount, connect, kea, key, listeners, path, props, reducers, selectors } from 'kea' +import { actions, afterMount, connect, isBreakpoint, kea, key, listeners, path, props, reducers, selectors } from 'kea' import { forms } from 'kea-forms' import { loaders } from 'kea-loaders' import { beforeUnload, router } from 'kea-router' @@ -231,8 +231,15 @@ export const hogFunctionConfigurationLogic = kea ({ configuration }), persistForUnload: true, setSampleGlobalsError: (error) => ({ error }), + setSampleGlobals: (sampleGlobals: HogFunctionInvocationGlobals | null) => ({ sampleGlobals }), }), reducers(({ props }) => ({ + sampleGlobals: [ + null as HogFunctionInvocationGlobals | null, + { + setSampleGlobals: (_, { sampleGlobals }) => sampleGlobals, + }, + ], showSource: [ // Show source by default for blank templates when creating a new function !!(!props.id && props.templateId?.startsWith('template-blank-')), @@ -440,7 +447,9 @@ export const hogFunctionConfigurationLogic = kea([ ], actions: [ hogFunctionConfigurationLogic({ id: props.id }), - ['touchConfigurationField', 'loadSampleGlobalsSuccess', 'loadSampleGlobals'], + ['touchConfigurationField', 'loadSampleGlobalsSuccess', 'loadSampleGlobals', 'setSampleGlobals'], ], })), actions({ setTestResult: (result: HogFunctionTestInvocationResult | null) => ({ result }), toggleExpanded: (expanded?: boolean) => ({ expanded }), + saveGlobals: (name: string, globals: HogFunctionInvocationGlobals) => ({ name, globals }), + deleteSavedGlobals: (index: number) => ({ index }), }), reducers({ expanded: [ false as boolean, { - toggleExpanded: (_, { expanded }) => (expanded === undefined ? !_ : expanded), + toggleExpanded: (state, { expanded }) => (expanded === undefined ? !state : expanded), }, ], @@ -66,11 +69,23 @@ export const hogFunctionTestLogic = kea([ setTestResult: (_, { result }) => result, }, ], + + savedGlobals: [ + [] as { name: string; globals: HogFunctionInvocationGlobals }[], + { persist: true, prefix: `${getCurrentTeamId()}__` }, + { + saveGlobals: (state, { name, globals }) => [...state, { name, globals }], + deleteSavedGlobals: (state, { index }) => state.filter((_, i) => i !== index), + }, + ], }), listeners(({ values, actions }) => ({ loadSampleGlobalsSuccess: () => { actions.setTestInvocationValue('globals', JSON.stringify(values.sampleGlobals, null, 2)) }, + setSampleGlobals: ({ sampleGlobals }) => { + actions.setTestInvocationValue('globals', JSON.stringify(sampleGlobals, null, 2)) + }, })), forms(({ props, actions, values }) => ({ testInvocation: { diff --git a/frontend/src/scenes/session-recordings/player/PlayerMeta.tsx b/frontend/src/scenes/session-recordings/player/PlayerMeta.tsx index 5c811872d4134..6e4e69a038f79 100644 --- a/frontend/src/scenes/session-recordings/player/PlayerMeta.tsx +++ b/frontend/src/scenes/session-recordings/player/PlayerMeta.tsx @@ -7,7 +7,7 @@ import { CopyToClipboardInline } from 'lib/components/CopyToClipboard' import { useResizeBreakpoints } from 'lib/hooks/useResizeObserver' import { LemonSkeleton } from 'lib/lemon-ui/LemonSkeleton' import { Tooltip } from 'lib/lemon-ui/Tooltip' -import { percentage } from 'lib/utils' +import { isObject, percentage } from 'lib/utils' import { DraggableToNotebook } from 'scenes/notebooks/AddToNotebook/DraggableToNotebook' import { IconWindow } from 'scenes/session-recordings/player/icons' import { PlayerMetaLinks } from 'scenes/session-recordings/player/PlayerMetaLinks' @@ -20,6 +20,12 @@ import { Logo } from '~/toolbar/assets/Logo' import { sessionRecordingPlayerLogic, SessionRecordingPlayerMode } from './sessionRecordingPlayerLogic' function URLOrScreen({ lastUrl }: { lastUrl: string | undefined }): JSX.Element | null { + if (isObject(lastUrl) && 'href' in lastUrl) { + // regression protection, we saw a user whose site was sometimes sending the string-ified location object + // this is a best-effort attempt to show the href in that case + lastUrl = lastUrl['href'] as string | undefined + } + if (!lastUrl) { return null } diff --git a/frontend/src/scenes/urls.ts b/frontend/src/scenes/urls.ts index 293481f598e3b..477064de6497e 100644 --- a/frontend/src/scenes/urls.ts +++ b/frontend/src/scenes/urls.ts @@ -1,4 +1,3 @@ -import JSONCrush from 'jsoncrush' import { combineUrl } from 'kea-router' import { AlertType } from 'lib/components/Alerts/types' import { getCurrentTeamId } from 'lib/utils/getAppContext' @@ -75,8 +74,7 @@ export const urls = { insightNew: (type?: InsightType, dashboardId?: DashboardType['id'] | null, query?: Node): string => combineUrl('/insights/new', dashboardId ? { dashboard: dashboardId } : {}, { ...(type ? { insight: type } : {}), - // have to use JSONCrush directly rather than the util to avoid circular dep - ...(query ? { q: typeof query === 'string' ? query : JSONCrush.crush(JSON.stringify(query)) } : {}), + ...(query ? { q: typeof query === 'string' ? query : JSON.stringify(query) } : {}), }).url, insightNewHogQL: (query: string, filters?: HogQLFilters): string => combineUrl( diff --git a/jest.config.ts b/jest.config.ts index 39ab8232a5592..53e5df5943413 100644 --- a/jest.config.ts +++ b/jest.config.ts @@ -8,7 +8,7 @@ process.env.TZ = process.env.TZ || 'UTC' * https://jestjs.io/docs/en/configuration.html */ -const esmModules = ['query-selector-shadow-dom', 'react-syntax-highlighter', '@react-hook', '@medv', 'monaco-editor', 'jsoncrush'] +const esmModules = ['query-selector-shadow-dom', 'react-syntax-highlighter', '@react-hook', '@medv', 'monaco-editor'] const eeFolderExists = fs.existsSync('ee/frontend/exports.ts') function rootDirectories() { const rootDirectories = ['/frontend/src'] diff --git a/package.json b/package.json index 7d55acb0ca23e..58ce1231fde79 100644 --- a/package.json +++ b/package.json @@ -140,7 +140,6 @@ "hls.js": "^1.5.15", "husky": "^7.0.4", "image-blob-reduce": "^4.1.0", - "jsoncrush": "^1.1.8", "kea": "^3.1.5", "kea-forms": "^3.2.0", "kea-loaders": "^3.0.0", @@ -162,7 +161,7 @@ "pmtiles": "^2.11.0", "postcss": "^8.4.31", "postcss-preset-env": "^9.3.0", - "posthog-js": "1.202.4", + "posthog-js": "1.202.5", "posthog-js-lite": "3.0.0", "prettier": "^2.8.8", "prop-types": "^15.7.2", diff --git a/plugin-server/src/cdp/cdp-api.ts b/plugin-server/src/cdp/cdp-api.ts index ed4e60976b1e9..a5c3f84e4f276 100644 --- a/plugin-server/src/cdp/cdp-api.ts +++ b/plugin-server/src/cdp/cdp-api.ts @@ -136,6 +136,7 @@ export class CdpApi { id: team.id, name: team.name, url: `${this.hub.SITE_URL ?? 'http://localhost:8000'}/project/${team.id}`, + ...globals.project, }, }, compoundConfiguration, diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 902f297bc841d..aeaf7828a60e2 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -241,9 +241,6 @@ dependencies: image-blob-reduce: specifier: ^4.1.0 version: 4.1.0 - jsoncrush: - specifier: ^1.1.8 - version: 1.1.8 kea: specifier: ^3.1.5 version: 3.1.5(react@18.2.0) @@ -308,8 +305,8 @@ dependencies: specifier: ^9.3.0 version: 9.3.0(postcss@8.4.31) posthog-js: - specifier: 1.202.4 - version: 1.202.4 + specifier: 1.202.5 + version: 1.202.5 posthog-js-lite: specifier: 3.0.0 version: 3.0.0 @@ -15415,10 +15412,6 @@ packages: resolution: {integrity: sha512-gfFQZrcTc8CnKXp6Y4/CBT3fTc0OVuDofpre4aEeEpSBPV5X5v4+Vmx+8snU7RLPrNHPKSgLxGo9YuQzz20o+w==} dev: true - /jsoncrush@1.1.8: - resolution: {integrity: sha512-lvIMGzMUA0fjuqwNcxlTNRq2bibPZ9auqT/LyGdlR5hvydJtA/BasSgkx4qclqTKVeTidrJvsS/oVjlTCPQ4Nw==} - dev: false - /jsonfile@6.1.0: resolution: {integrity: sha512-5dgndWOriYSm5cnYaJNhalLNDKOqFwyDB/rr1E9ZsGciGvKPs8R2xYGCacuf3z6K1YKDz182fd+fY3cn3pMqXQ==} dependencies: @@ -17909,8 +17902,8 @@ packages: resolution: {integrity: sha512-dyajjnfzZD1tht4N7p7iwf7nBnR1MjVaVu+MKr+7gBgA39bn28wizCIJZztZPtHy4PY0YwtSGgwfBCuG/hnHgA==} dev: false - /posthog-js@1.202.4: - resolution: {integrity: sha512-YLu6f1ibAkiopGivGQnLBaCKegT+0GHP3DfP72z3KVby2UXLBB7dj+GIa54zfjburE7ehasysRzeK5nW39QOfA==} + /posthog-js@1.202.5: + resolution: {integrity: sha512-ZkUPkdO35Di2croJByHvBVp/sACq5KJEIaYRchUrr5lRK/57B/s6d5Y5PV374pbWpKhJdXRkUXd3ZUKJ8bOmfw==} dependencies: core-js: 3.39.0 fflate: 0.4.8 diff --git a/posthog/api/hog_function.py b/posthog/api/hog_function.py index 3f50d710acc96..9f9d54e7db1af 100644 --- a/posthog/api/hog_function.py +++ b/posthog/api/hog_function.py @@ -360,13 +360,13 @@ def invocations(self, request: Request, *args, **kwargs): # Remove the team from the config configuration.pop("team") - globals = serializer.validated_data["globals"] + hog_globals = serializer.validated_data["globals"] mock_async_functions = serializer.validated_data["mock_async_functions"] res = create_hog_invocation_test( team_id=hog_function.team_id, hog_function_id=hog_function.id, - globals=globals, + globals=hog_globals, configuration=configuration, mock_async_functions=mock_async_functions, ) diff --git a/posthog/conftest.py b/posthog/conftest.py index c27dbec43955e..e9804d25eff42 100644 --- a/posthog/conftest.py +++ b/posthog/conftest.py @@ -14,6 +14,7 @@ def create_clickhouse_tables(num_tables: int): CREATE_DATA_QUERIES, CREATE_DICTIONARY_QUERIES, CREATE_DISTRIBUTED_TABLE_QUERIES, + CREATE_KAFKA_TABLE_QUERIES, CREATE_MERGETREE_TABLE_QUERIES, CREATE_MV_TABLE_QUERIES, CREATE_VIEW_QUERIES, @@ -28,10 +29,18 @@ def create_clickhouse_tables(num_tables: int): + len(CREATE_DICTIONARY_QUERIES) ) + # Evaluation tests use Kafka for faster data ingestion. + if settings.IN_EVAL_TESTING: + total_tables += len(CREATE_KAFKA_TABLE_QUERIES) + # Check if all the tables have already been created. Views, materialized views, and dictionaries also count if num_tables == total_tables: return + if settings.IN_EVAL_TESTING: + kafka_table_queries = list(map(build_query, CREATE_KAFKA_TABLE_QUERIES)) + run_clickhouse_statement_in_parallel(kafka_table_queries) + table_queries = list(map(build_query, CREATE_MERGETREE_TABLE_QUERIES + CREATE_DISTRIBUTED_TABLE_QUERIES)) run_clickhouse_statement_in_parallel(table_queries) @@ -62,7 +71,7 @@ def reset_clickhouse_tables(): from posthog.models.channel_type.sql import TRUNCATE_CHANNEL_DEFINITION_TABLE_SQL from posthog.models.cohort.sql import TRUNCATE_COHORTPEOPLE_TABLE_SQL from posthog.models.error_tracking.sql import TRUNCATE_ERROR_TRACKING_ISSUE_FINGERPRINT_OVERRIDES_TABLE_SQL - from posthog.models.event.sql import TRUNCATE_EVENTS_TABLE_SQL, TRUNCATE_EVENTS_RECENT_TABLE_SQL + from posthog.models.event.sql import TRUNCATE_EVENTS_RECENT_TABLE_SQL, TRUNCATE_EVENTS_TABLE_SQL from posthog.models.group.sql import TRUNCATE_GROUPS_TABLE_SQL from posthog.models.performance.sql import TRUNCATE_PERFORMANCE_EVENTS_TABLE_SQL from posthog.models.person.sql import ( @@ -100,6 +109,18 @@ def reset_clickhouse_tables(): TRUNCATE_HEATMAPS_TABLE_SQL(), ] + # Drop created Kafka tables because some tests don't expect it. + if settings.IN_EVAL_TESTING: + kafka_tables = sync_execute( + f""" + SELECT name + FROM system.tables + WHERE database = '{settings.CLICKHOUSE_DATABASE}' AND name LIKE 'kafka_%' + """, + ) + # Using `ON CLUSTER` takes x20 more time to drop the tables: https://github.com/ClickHouse/ClickHouse/issues/15473. + TABLES_TO_CREATE_DROP += [f"DROP TABLE {table[0]}" for table in kafka_tables] + run_clickhouse_statement_in_parallel(TABLES_TO_CREATE_DROP) from posthog.clickhouse.schema import ( diff --git a/posthog/hogql_queries/experiments/experiment_funnels_query_runner.py b/posthog/hogql_queries/experiments/experiment_funnels_query_runner.py index d9d874d7b6e6b..363ee06bc78be 100644 --- a/posthog/hogql_queries/experiments/experiment_funnels_query_runner.py +++ b/posthog/hogql_queries/experiments/experiment_funnels_query_runner.py @@ -7,6 +7,11 @@ calculate_credible_intervals, calculate_probabilities, ) +from posthog.hogql_queries.experiments.funnels_statistics_v2 import ( + are_results_significant_v2, + calculate_credible_intervals_v2, + calculate_probabilities_v2, +) from posthog.hogql_queries.query_runner import QueryRunner from posthog.models.experiment import Experiment from ..insights.funnels.funnels_query_runner import FunnelsQueryRunner @@ -45,6 +50,8 @@ def __init__(self, *args, **kwargs): if self.experiment.holdout: self.variants.append(f"holdout-{self.experiment.holdout.id}") + self.stats_version = self.query.stats_version or 1 + self.prepared_funnels_query = self._prepare_funnel_query() self.funnels_query_runner = FunnelsQueryRunner( query=self.prepared_funnels_query, team=self.team, timings=self.timings, limit_context=self.limit_context @@ -63,9 +70,14 @@ def calculate(self) -> ExperimentFunnelsQueryResponse: # Statistical analysis control_variant, test_variants = self._get_variants_with_base_stats(funnels_result) - probabilities = calculate_probabilities(control_variant, test_variants) - significance_code, loss = are_results_significant(control_variant, test_variants, probabilities) - credible_intervals = calculate_credible_intervals([control_variant, *test_variants]) + if self.stats_version == 2: + probabilities = calculate_probabilities_v2(control_variant, test_variants) + significance_code, loss = are_results_significant_v2(control_variant, test_variants, probabilities) + credible_intervals = calculate_credible_intervals_v2([control_variant, *test_variants]) + else: + probabilities = calculate_probabilities(control_variant, test_variants) + significance_code, loss = are_results_significant(control_variant, test_variants, probabilities) + credible_intervals = calculate_credible_intervals([control_variant, *test_variants]) except Exception as e: raise ValueError(f"Error calculating experiment funnel results: {str(e)}") from e @@ -80,6 +92,7 @@ def calculate(self) -> ExperimentFunnelsQueryResponse: }, significant=significance_code == ExperimentSignificanceCode.SIGNIFICANT, significance_code=significance_code, + stats_version=self.stats_version, expected_loss=loss, credible_intervals=credible_intervals, ) diff --git a/posthog/hogql_queries/experiments/funnels_statistics_v2.py b/posthog/hogql_queries/experiments/funnels_statistics_v2.py new file mode 100644 index 0000000000000..02f18d2f70740 --- /dev/null +++ b/posthog/hogql_queries/experiments/funnels_statistics_v2.py @@ -0,0 +1,216 @@ +import numpy as np +from scipy import stats +from posthog.schema import ExperimentVariantFunnelsBaseStats, ExperimentSignificanceCode +from posthog.hogql_queries.experiments import ( + FF_DISTRIBUTION_THRESHOLD, + MIN_PROBABILITY_FOR_SIGNIFICANCE, + EXPECTED_LOSS_SIGNIFICANCE_LEVEL, +) +from scipy.stats import betabinom + +ALPHA_PRIOR = 1 +BETA_PRIOR = 1 +SAMPLE_SIZE = 10000 + + +def calculate_probabilities_v2( + control: ExperimentVariantFunnelsBaseStats, variants: list[ExperimentVariantFunnelsBaseStats] +) -> list[float]: + """ + Calculate the win probabilities for each variant in an experiment using Bayesian analysis + for funnel conversion rates. + + This function computes the probability that each variant is the best (i.e., has the highest + conversion rate) compared to all other variants, including the control. It uses samples + drawn from the posterior Beta distributions of each variant's conversion rate. + + Parameters: + ----------- + control : ExperimentVariantFunnelsBaseStats + Statistics for the control group, including success and failure counts + variants : list[ExperimentVariantFunnelsBaseStats] + List of statistics for test variants to compare against the control + + Returns: + -------- + list[float] + A list of probabilities where: + - The first element is the probability that the control variant is the best + - Subsequent elements are the probabilities that each test variant is the best + + Notes: + ------ + - Uses a Bayesian approach with Beta distributions as the posterior + - Uses Beta(1,1) as the prior, which is uniform over [0,1] + - Draws 10,000 samples from each variant's posterior distribution + """ + all_variants = [control, *variants] + + # Use Beta distribution for conversion rates + samples: list[np.ndarray] = [] + for variant in all_variants: + # Add prior to both successes and failures for Bayesian prior + alpha = ALPHA_PRIOR + variant.success_count + beta = BETA_PRIOR + variant.failure_count + # Generate samples from Beta distribution + variant_samples = np.random.beta(alpha, beta, SAMPLE_SIZE) + samples.append(variant_samples) + + samples_array = np.array(samples) + # Calculate probability of each variant being the best + probabilities = [] + for i in range(len(all_variants)): + probability = (samples_array[i] == np.max(samples_array, axis=0)).mean() + probabilities.append(float(probability)) + + return probabilities + + +def calculate_expected_loss_v2( + target_variant: ExperimentVariantFunnelsBaseStats, variants: list[ExperimentVariantFunnelsBaseStats] +) -> float: + """ + Calculates expected loss in conversion rate using Beta-Binomial conjugate prior. + + This implementation uses a Bayesian approach with Beta-Binomial model + to estimate the expected loss when choosing the target variant over others. + + Parameters: + ----------- + target_variant : ExperimentVariantFunnelsBaseStats + The variant being evaluated for loss + variants : list[ExperimentVariantFunnelsBaseStats] + List of other variants to compare against + + Returns: + -------- + float + Expected loss in conversion rate if choosing the target variant + """ + # Calculate posterior parameters for target variant + target_alpha = int(ALPHA_PRIOR + target_variant.success_count) + target_beta = int(BETA_PRIOR + target_variant.failure_count) + target_n = int(target_variant.success_count + target_variant.failure_count) + + # Get samples from target variant's Beta-Binomial + target_samples = betabinom.rvs(target_n, target_alpha, target_beta, size=SAMPLE_SIZE) / target_n + + # Get samples from each comparison variant + variant_samples = [] + for variant in variants: + n = int(variant.success_count + variant.failure_count) + alpha = int(ALPHA_PRIOR + variant.success_count) + beta = int(BETA_PRIOR + variant.failure_count) + samples = betabinom.rvs(n, alpha, beta, size=SAMPLE_SIZE) / n + variant_samples.append(samples) + + # Calculate loss + variant_max = np.maximum.reduce(variant_samples) + losses = np.maximum(0, variant_max - target_samples) + expected_loss = float(np.mean(losses)) + + return expected_loss + + +def are_results_significant_v2( + control: ExperimentVariantFunnelsBaseStats, + variants: list[ExperimentVariantFunnelsBaseStats], + probabilities: list[float], +) -> tuple[ExperimentSignificanceCode, float]: + """ + Determine if the experiment results are statistically significant using Bayesian analysis + for funnel conversion rates. + + This function evaluates whether there is strong evidence that any variant is better + than the others by considering both winning probabilities and expected loss. It checks + if the sample size is sufficient and evaluates the risk of choosing the winning variant. + + Parameters: + ----------- + control : ExperimentVariantFunnelsBaseStats + Statistics for the control group, including success and failure counts + variants : list[ExperimentVariantFunnelsBaseStats] + List of statistics for test variants to compare against the control + probabilities : list[float] + List of probabilities from calculate_probabilities_v2 + + Returns: + -------- + tuple[ExperimentSignificanceCode, float] + A tuple containing: + - Significance code indicating the result (significant, not enough exposure, high loss, etc.) + - Expected loss value for significant results, 1.0 for non-significant results + + Notes: + ------ + - Requires minimum exposure threshold per variant for reliable results + - Uses probability threshold from MIN_PROBABILITY_FOR_SIGNIFICANCE + - Calculates expected loss for the best-performing variant + - Returns HIGH_LOSS if expected loss exceeds significance threshold + - Returns NOT_ENOUGH_EXPOSURE if sample size requirements not met + """ + # Check minimum exposure + if control.success_count + control.failure_count < FF_DISTRIBUTION_THRESHOLD or any( + v.success_count + v.failure_count < FF_DISTRIBUTION_THRESHOLD for v in variants + ): + return ExperimentSignificanceCode.NOT_ENOUGH_EXPOSURE, 1.0 + + # Check if any variant has high enough probability + max_probability = max(probabilities) + if max_probability >= MIN_PROBABILITY_FOR_SIGNIFICANCE: + # Find best performing variant + all_variants = [control, *variants] + conversion_rates = [v.success_count / (v.success_count + v.failure_count) for v in all_variants] + best_idx = np.argmax(conversion_rates) + best_variant = all_variants[best_idx] + other_variants = all_variants[:best_idx] + all_variants[best_idx + 1 :] + expected_loss = calculate_expected_loss_v2(best_variant, other_variants) + + if expected_loss >= EXPECTED_LOSS_SIGNIFICANCE_LEVEL: + return ExperimentSignificanceCode.HIGH_LOSS, expected_loss + + return ExperimentSignificanceCode.SIGNIFICANT, expected_loss + + return ExperimentSignificanceCode.LOW_WIN_PROBABILITY, 1.0 + + +def calculate_credible_intervals_v2(variants: list[ExperimentVariantFunnelsBaseStats]) -> dict[str, list[float]]: + """ + Calculate Bayesian credible intervals for conversion rates of each variant. + + This function computes the 95% credible intervals for the true conversion rate + of each variant, representing the range where we believe the true rate lies + with 95% probability. + + Parameters: + ----------- + variants : list[ExperimentVariantFunnelsBaseStats] + List of all variants including control, containing success and failure counts + + Returns: + -------- + dict[str, list[float]] + Dictionary mapping variant keys to [lower, upper] credible intervals, where: + - lower is the 2.5th percentile of the posterior distribution + - upper is the 97.5th percentile of the posterior distribution + + Notes: + ------ + - Uses Beta distribution as the posterior + - Uses Beta(1,1) as the prior, which is uniform over [0,1] + - Returns 95% credible intervals + - Intervals become narrower with larger sample sizes + """ + intervals = {} + + for variant in variants: + # Add 1 to both successes and failures for Bayesian prior + alpha = ALPHA_PRIOR + variant.success_count + beta = BETA_PRIOR + variant.failure_count + + # Calculate 95% credible interval + lower, upper = stats.beta.ppf([0.025, 0.975], alpha, beta) + + intervals[variant.key] = [float(lower), float(upper)] + + return intervals diff --git a/posthog/hogql_queries/experiments/test/test_experiment_funnels_query_runner.py b/posthog/hogql_queries/experiments/test/test_experiment_funnels_query_runner.py index 3f3127fcb1b55..3dc11499b1393 100644 --- a/posthog/hogql_queries/experiments/test/test_experiment_funnels_query_runner.py +++ b/posthog/hogql_queries/experiments/test/test_experiment_funnels_query_runner.py @@ -113,6 +113,8 @@ def test_query_runner(self): ) result = query_runner.calculate() + self.assertEqual(result.stats_version, 1) + self.assertEqual(len(result.variants), 2) control_variant = next(variant for variant in result.variants if variant.key == "control") @@ -123,11 +125,88 @@ def test_query_runner(self): self.assertEqual(test_variant.success_count, 8) self.assertEqual(test_variant.failure_count, 2) - self.assertIn("control", result.probability) - self.assertIn("test", result.probability) + self.assertAlmostEqual(result.probability["control"], 0.2, delta=0.1) + self.assertAlmostEqual(result.probability["test"], 0.8, delta=0.1) - self.assertIn("control", result.credible_intervals) - self.assertIn("test", result.credible_intervals) + self.assertEqual(result.significant, False) + self.assertEqual(result.significance_code, ExperimentSignificanceCode.NOT_ENOUGH_EXPOSURE) + self.assertEqual(result.expected_loss, 1.0) + + self.assertAlmostEqual(result.credible_intervals["control"][0], 0.3, delta=0.1) + self.assertAlmostEqual(result.credible_intervals["control"][1], 0.8, delta=0.1) + self.assertAlmostEqual(result.credible_intervals["test"][0], 0.5, delta=0.1) + self.assertAlmostEqual(result.credible_intervals["test"][1], 0.9, delta=0.1) + + @freeze_time("2020-01-01T12:00:00Z") + def test_query_runner_v2(self): + feature_flag = self.create_feature_flag() + experiment = self.create_experiment(feature_flag=feature_flag) + + feature_flag_property = f"$feature/{feature_flag.key}" + + funnels_query = FunnelsQuery( + series=[EventsNode(event="$pageview"), EventsNode(event="purchase")], + dateRange={"date_from": "2020-01-01", "date_to": "2020-01-14"}, + ) + experiment_query = ExperimentFunnelsQuery( + experiment_id=experiment.id, + kind="ExperimentFunnelsQuery", + funnels_query=funnels_query, + stats_version=2, + ) + + experiment.metrics = [{"type": "primary", "query": experiment_query.model_dump()}] + experiment.save() + + for variant, purchase_count in [("control", 6), ("test", 8)]: + for i in range(10): + _create_person(distinct_ids=[f"user_{variant}_{i}"], team_id=self.team.pk) + _create_event( + team=self.team, + event="$pageview", + distinct_id=f"user_{variant}_{i}", + timestamp="2020-01-02T12:00:00Z", + properties={feature_flag_property: variant}, + ) + if i < purchase_count: + _create_event( + team=self.team, + event="purchase", + distinct_id=f"user_{variant}_{i}", + timestamp="2020-01-02T12:01:00Z", + properties={feature_flag_property: variant}, + ) + + flush_persons_and_events() + + query_runner = ExperimentFunnelsQueryRunner( + query=ExperimentFunnelsQuery(**experiment.metrics[0]["query"]), team=self.team + ) + result = query_runner.calculate() + + self.assertEqual(result.stats_version, 2) + + self.assertEqual(len(result.variants), 2) + + control_variant = next(variant for variant in result.variants if variant.key == "control") + test_variant = next(variant for variant in result.variants if variant.key == "test") + + self.assertEqual(control_variant.success_count, 6) + self.assertEqual(control_variant.failure_count, 4) + self.assertEqual(test_variant.success_count, 8) + self.assertEqual(test_variant.failure_count, 2) + + self.assertAlmostEqual(result.probability["control"], 0.2, delta=0.1) + self.assertAlmostEqual(result.probability["test"], 0.8, delta=0.1) + + self.assertEqual(result.significant, False) + self.assertEqual(result.significance_code, ExperimentSignificanceCode.NOT_ENOUGH_EXPOSURE) + self.assertEqual(result.expected_loss, 1.0) + + self.assertAlmostEqual(result.credible_intervals["control"][0], 0.3, delta=0.1) + self.assertAlmostEqual(result.credible_intervals["control"][1], 0.8, delta=0.1) + self.assertAlmostEqual(result.credible_intervals["test"][0], 0.5, delta=0.1) + self.assertAlmostEqual(result.credible_intervals["test"][1], 0.9, delta=0.1) @flaky(max_runs=10, min_passes=1) @freeze_time("2020-01-01T12:00:00Z") diff --git a/posthog/hogql_queries/experiments/test/test_funnels_statistics.py b/posthog/hogql_queries/experiments/test/test_funnels_statistics.py new file mode 100644 index 0000000000000..2206ff92b9305 --- /dev/null +++ b/posthog/hogql_queries/experiments/test/test_funnels_statistics.py @@ -0,0 +1,243 @@ +from posthog.hogql_queries.experiments import MIN_PROBABILITY_FOR_SIGNIFICANCE +from posthog.schema import ExperimentVariantFunnelsBaseStats, ExperimentSignificanceCode +from posthog.hogql_queries.experiments.funnels_statistics_v2 import ( + calculate_probabilities_v2, + are_results_significant_v2, + calculate_credible_intervals_v2, +) +from posthog.hogql_queries.experiments.funnels_statistics import ( + calculate_probabilities, + are_results_significant, + calculate_credible_intervals, +) +from posthog.test.base import APIBaseTest + + +def create_variant( + key: str, + success_count: int, + failure_count: int, +) -> ExperimentVariantFunnelsBaseStats: + return ExperimentVariantFunnelsBaseStats( + key=key, + success_count=success_count, + failure_count=failure_count, + ) + + +class TestExperimentFunnelStatistics(APIBaseTest): + def run_test_for_both_implementations(self, test_fn): + """Run the same test for both implementations""" + self.stats_version = 1 + # Run for original implementation + test_fn( + stats_version=1, + calculate_probabilities=calculate_probabilities, + are_results_significant=are_results_significant, + calculate_credible_intervals=calculate_credible_intervals, + ) + self.stats_version = 2 + # Run for v2 implementation + test_fn( + stats_version=2, + calculate_probabilities=calculate_probabilities_v2, + are_results_significant=are_results_significant_v2, + calculate_credible_intervals=calculate_credible_intervals_v2, + ) + + def test_small_sample_two_variants_not_significant(self): + """Test with small sample size, two variants, no clear winner""" + + def run_test(stats_version, calculate_probabilities, are_results_significant, calculate_credible_intervals): + control = create_variant("control", success_count=10, failure_count=90) + test = create_variant("test", success_count=15, failure_count=85) + + probabilities = calculate_probabilities(control, [test]) + significance, p_value = are_results_significant(control, [test], probabilities) + intervals = calculate_credible_intervals([control, test]) + + self.assertEqual(len(probabilities), 2) + if stats_version == 2: + self.assertAlmostEqual(probabilities[0], 0.15, delta=0.1) + self.assertAlmostEqual(probabilities[1], 0.85, delta=0.1) + self.assertEqual(significance, ExperimentSignificanceCode.LOW_WIN_PROBABILITY) + self.assertEqual(p_value, 1) + + # Check credible intervals + self.assertAlmostEqual(intervals["control"][0], 0.05, delta=0.05) + self.assertAlmostEqual(intervals["control"][1], 0.20, delta=0.05) + self.assertAlmostEqual(intervals["test"][0], 0.08, delta=0.05) + self.assertAlmostEqual(intervals["test"][1], 0.25, delta=0.05) + else: + # Original implementation behavior + self.assertTrue(0.1 < probabilities[0] < 0.5) + self.assertTrue(0.5 < probabilities[1] < 0.9) + self.assertEqual(significance, ExperimentSignificanceCode.LOW_WIN_PROBABILITY) + self.assertEqual(p_value, 1) + + # Original implementation intervals + self.assertAlmostEqual(intervals["control"][0], 0.05, delta=0.05) + self.assertAlmostEqual(intervals["control"][1], 0.20, delta=0.05) + self.assertAlmostEqual(intervals["test"][0], 0.08, delta=0.05) + self.assertAlmostEqual(intervals["test"][1], 0.25, delta=0.05) + + self.run_test_for_both_implementations(run_test) + + def test_large_sample_two_variants_significant(self): + """Test with large sample size, two variants, clear winner""" + + def run_test(stats_version, calculate_probabilities, are_results_significant, calculate_credible_intervals): + control = create_variant("control", success_count=1000, failure_count=9000) + test = create_variant("test", success_count=1500, failure_count=8500) + + probabilities = calculate_probabilities(control, [test]) + significance, p_value = are_results_significant(control, [test], probabilities) + intervals = calculate_credible_intervals([control, test]) + + self.assertEqual(len(probabilities), 2) + if stats_version == 2: + self.assertAlmostEqual(probabilities[1], 1.0, delta=0.05) + self.assertAlmostEqual(probabilities[0], 0.0, delta=0.05) + self.assertEqual(significance, ExperimentSignificanceCode.SIGNIFICANT) + self.assertEqual(p_value, 0) + + # Check credible intervals + self.assertAlmostEqual(intervals["control"][0], 0.095, delta=0.01) + self.assertAlmostEqual(intervals["control"][1], 0.105, delta=0.01) + self.assertAlmostEqual(intervals["test"][0], 0.145, delta=0.01) + self.assertAlmostEqual(intervals["test"][1], 0.155, delta=0.01) + else: + # Original implementation behavior + self.assertTrue(probabilities[1] > 0.5) # Test variant winning + self.assertTrue(probabilities[0] < 0.5) # Control variant losing + self.assertEqual(significance, ExperimentSignificanceCode.SIGNIFICANT) + self.assertLess(p_value, 0.05) + + # Original implementation intervals + self.assertAlmostEqual(intervals["control"][0], 0.095, delta=0.01) + self.assertAlmostEqual(intervals["control"][1], 0.105, delta=0.01) + self.assertAlmostEqual(intervals["test"][0], 0.145, delta=0.01) + self.assertAlmostEqual(intervals["test"][1], 0.155, delta=0.01) + + self.run_test_for_both_implementations(run_test) + + def test_many_variants_not_significant(self): + """Test with multiple variants, no clear winner""" + + def run_test(stats_version, calculate_probabilities, are_results_significant, calculate_credible_intervals): + control = create_variant("control", success_count=100, failure_count=900) + test_a = create_variant("test_a", success_count=98, failure_count=902) + test_b = create_variant("test_b", success_count=102, failure_count=898) + test_c = create_variant("test_c", success_count=101, failure_count=899) + + probabilities = calculate_probabilities(control, [test_a, test_b, test_c]) + significance, p_value = are_results_significant(control, [test_a, test_b, test_c], probabilities) + intervals = calculate_credible_intervals([control, test_a, test_b, test_c]) + + self.assertEqual(len(probabilities), 4) + if stats_version == 2: + self.assertTrue(all(p < MIN_PROBABILITY_FOR_SIGNIFICANCE for p in probabilities)) + self.assertEqual(significance, ExperimentSignificanceCode.LOW_WIN_PROBABILITY) + self.assertEqual(p_value, 1) + + # Check credible intervals overlap + # Check credible intervals for control and all test variants + self.assertAlmostEqual(intervals["control"][0], 0.09, delta=0.02) + self.assertAlmostEqual(intervals["control"][1], 0.12, delta=0.02) + self.assertAlmostEqual(intervals["test_a"][0], 0.09, delta=0.02) + self.assertAlmostEqual(intervals["test_a"][1], 0.12, delta=0.02) + self.assertAlmostEqual(intervals["test_b"][0], 0.09, delta=0.02) + self.assertAlmostEqual(intervals["test_b"][1], 0.12, delta=0.02) + self.assertAlmostEqual(intervals["test_c"][0], 0.09, delta=0.02) + self.assertAlmostEqual(intervals["test_c"][1], 0.12, delta=0.02) + else: + # Original implementation behavior + self.assertTrue(all(0.1 < p < 0.9 for p in probabilities)) + self.assertEqual(significance, ExperimentSignificanceCode.LOW_WIN_PROBABILITY) + self.assertEqual(p_value, 1) + + # Check credible intervals overlap + # Check credible intervals for control and all test variants + self.assertAlmostEqual(intervals["control"][0], 0.09, delta=0.02) + self.assertAlmostEqual(intervals["control"][1], 0.12, delta=0.02) + self.assertAlmostEqual(intervals["test_a"][0], 0.09, delta=0.02) + self.assertAlmostEqual(intervals["test_a"][1], 0.12, delta=0.02) + self.assertAlmostEqual(intervals["test_b"][0], 0.09, delta=0.02) + self.assertAlmostEqual(intervals["test_b"][1], 0.12, delta=0.02) + self.assertAlmostEqual(intervals["test_c"][0], 0.09, delta=0.02) + self.assertAlmostEqual(intervals["test_c"][1], 0.12, delta=0.02) + + self.run_test_for_both_implementations(run_test) + + def test_insufficient_sample_size(self): + """Test with sample size below threshold""" + + def run_test(stats_version, calculate_probabilities, are_results_significant, calculate_credible_intervals): + control = create_variant("control", success_count=5, failure_count=45) + test = create_variant("test", success_count=8, failure_count=42) + + probabilities = calculate_probabilities(control, [test]) + significance, p_value = are_results_significant(control, [test], probabilities) + intervals = calculate_credible_intervals([control, test]) + + self.assertEqual(len(probabilities), 2) + if stats_version == 2: + self.assertEqual(significance, ExperimentSignificanceCode.NOT_ENOUGH_EXPOSURE) + self.assertEqual(p_value, 1.0) + + # Check wide credible intervals due to small sample + self.assertTrue(intervals["control"][1] - intervals["control"][0] > 0.15) + self.assertTrue(intervals["test"][1] - intervals["test"][0] > 0.15) + else: + # Original implementation behavior + self.assertEqual(significance, ExperimentSignificanceCode.NOT_ENOUGH_EXPOSURE) + self.assertEqual(p_value, 1.0) + + # Check wide credible intervals + self.assertTrue(intervals["control"][1] - intervals["control"][0] > 0.15) + self.assertTrue(intervals["test"][1] - intervals["test"][0] > 0.15) + + self.run_test_for_both_implementations(run_test) + + def test_expected_loss_minimal_difference(self): + """Test expected loss when variants have very similar performance""" + + def run_test(stats_version, calculate_probabilities, are_results_significant, calculate_credible_intervals): + control = create_variant("control", success_count=1000, failure_count=9000) # 11% conversion + test = create_variant("test", success_count=1050, failure_count=8800) # 11.9% conversion + + probabilities = calculate_probabilities(control, [test]) + significance, expected_loss = are_results_significant(control, [test], probabilities) + + if stats_version == 2: + self.assertEqual(significance, ExperimentSignificanceCode.SIGNIFICANT) + # Expected loss should still be relatively small + self.assertLess(expected_loss, 0.03) # Less than 3% expected loss + self.assertGreater(expected_loss, 0) + else: + # Original implementation behavior + self.assertEqual(significance, ExperimentSignificanceCode.SIGNIFICANT) + self.assertLess(expected_loss, 0.03) # Less than 3% expected loss + self.assertGreater(expected_loss, 0) + + self.run_test_for_both_implementations(run_test) + + def test_expected_loss_test_variant_clear_winner(self): + """Test expected loss when one variant is clearly better""" + + def run_test(stats_version, calculate_probabilities, are_results_significant, calculate_credible_intervals): + control = create_variant("control", success_count=1000, failure_count=9000) # 11% conversion + test = create_variant("test", success_count=2000, failure_count=8000) # 20% conversion + + probabilities = calculate_probabilities(control, [test]) + significance, expected_loss = are_results_significant(control, [test], probabilities) + + if stats_version == 2: + self.assertEqual(significance, ExperimentSignificanceCode.SIGNIFICANT) + self.assertEqual(expected_loss, 0.0) + else: + # Original implementation behavior + self.assertEqual(significance, ExperimentSignificanceCode.SIGNIFICANT) + self.assertEqual(expected_loss, 0.0) + + self.run_test_for_both_implementations(run_test) diff --git a/posthog/schema.py b/posthog/schema.py index 564dcc321fa60..a46596350f23f 100644 --- a/posthog/schema.py +++ b/posthog/schema.py @@ -6292,6 +6292,7 @@ class QueryResponseAlternative15(BaseModel): probability: dict[str, float] significance_code: ExperimentSignificanceCode significant: bool + stats_version: Optional[int] = None variants: list[ExperimentVariantFunnelsBaseStats] @@ -6324,6 +6325,7 @@ class QueryResponseAlternative26(BaseModel): probability: dict[str, float] significance_code: ExperimentSignificanceCode significant: bool + stats_version: Optional[int] = None variants: list[ExperimentVariantFunnelsBaseStats] @@ -6461,6 +6463,7 @@ class CachedExperimentFunnelsQueryResponse(BaseModel): ) significance_code: ExperimentSignificanceCode significant: bool + stats_version: Optional[int] = None timezone: str variants: list[ExperimentVariantFunnelsBaseStats] @@ -6477,6 +6480,7 @@ class Response9(BaseModel): probability: dict[str, float] significance_code: ExperimentSignificanceCode significant: bool + stats_version: Optional[int] = None variants: list[ExperimentVariantFunnelsBaseStats] @@ -6508,6 +6512,7 @@ class ExperimentFunnelsQueryResponse(BaseModel): probability: dict[str, float] significance_code: ExperimentSignificanceCode significant: bool + stats_version: Optional[int] = None variants: list[ExperimentVariantFunnelsBaseStats] @@ -6640,6 +6645,7 @@ class ExperimentFunnelsQuery(BaseModel): ) name: Optional[str] = None response: Optional[ExperimentFunnelsQueryResponse] = None + stats_version: Optional[int] = None class FunnelCorrelationQuery(BaseModel): diff --git a/posthog/settings/__init__.py b/posthog/settings/__init__.py index c6067fd19c1f7..3e7ebc0b7c984 100644 --- a/posthog/settings/__init__.py +++ b/posthog/settings/__init__.py @@ -108,6 +108,7 @@ PROM_PUSHGATEWAY_ADDRESS: str | None = os.getenv("PROM_PUSHGATEWAY_ADDRESS", None) IN_UNIT_TESTING: bool = get_from_env("IN_UNIT_TESTING", False, type_cast=str_to_bool) +IN_EVAL_TESTING: bool = get_from_env("DEEPEVAL", False, type_cast=str_to_bool) HOGQL_INCREASED_MAX_EXECUTION_TIME: int = get_from_env("HOGQL_INCREASED_MAX_EXECUTION_TIME", 600, type_cast=int) diff --git a/posthog/temporal/data_imports/external_data_job.py b/posthog/temporal/data_imports/external_data_job.py index 62a1e1bc834ed..59508a2ee6f25 100644 --- a/posthog/temporal/data_imports/external_data_job.py +++ b/posthog/temporal/data_imports/external_data_job.py @@ -3,12 +3,17 @@ import datetime as dt import json import re +import threading +import time from django.conf import settings from django.db import close_old_connections import posthoganalytics +import psutil from temporalio import activity, exceptions, workflow from temporalio.common import RetryPolicy +from temporalio.exceptions import WorkflowAlreadyStartedError + from posthog.constants import DATA_WAREHOUSE_TASK_QUEUE_V2 @@ -144,20 +149,22 @@ def trigger_pipeline_v2(inputs: ExternalDataWorkflowInputs): logger.debug("Triggering V2 pipeline") temporal = sync_connect() - - asyncio.run( - temporal.start_workflow( - workflow="external-data-job", - arg=dataclasses.asdict(inputs), - id=f"{inputs.external_data_schema_id}-V2", - task_queue=str(DATA_WAREHOUSE_TASK_QUEUE_V2), - retry_policy=RetryPolicy( - maximum_interval=dt.timedelta(seconds=60), - maximum_attempts=1, - non_retryable_error_types=["NondeterminismError"], - ), + try: + asyncio.run( + temporal.start_workflow( + workflow="external-data-job", + arg=dataclasses.asdict(inputs), + id=f"{inputs.external_data_schema_id}-V2", + task_queue=str(DATA_WAREHOUSE_TASK_QUEUE_V2), + retry_policy=RetryPolicy( + maximum_interval=dt.timedelta(seconds=60), + maximum_attempts=1, + non_retryable_error_types=["NondeterminismError"], + ), + ) ) - ) + except WorkflowAlreadyStartedError: + pass logger.debug("V2 pipeline triggered") @@ -173,6 +180,22 @@ def create_source_templates(inputs: CreateSourceTemplateInputs) -> None: create_warehouse_templates_for_source(team_id=inputs.team_id, run_id=inputs.run_id) +def log_memory_usage(): + process = psutil.Process() + logger = bind_temporal_worker_logger_sync(team_id=0) + + while True: + memory_info = process.memory_info() + logger.info(f"Memory Usage: RSS = {memory_info.rss / (1024 * 1024):.2f} MB") + + time.sleep(10) # Log every 10 seconds + + +if settings.TEMPORAL_TASK_QUEUE == DATA_WAREHOUSE_TASK_QUEUE_V2: + thread = threading.Thread(target=log_memory_usage, daemon=True) + thread.start() + + # TODO: update retry policies @workflow.defn(name="external-data-job") class ExternalDataJobWorkflow(PostHogWorkflow): diff --git a/posthog/temporal/data_imports/pipelines/sql_database/helpers.py b/posthog/temporal/data_imports/pipelines/sql_database/helpers.py index 0400a60b32fd5..9bf72a26f3c1e 100644 --- a/posthog/temporal/data_imports/pipelines/sql_database/helpers.py +++ b/posthog/temporal/data_imports/pipelines/sql_database/helpers.py @@ -24,7 +24,7 @@ def __init__( self, engine: Engine, table: Table, - chunk_size: int = 1000, + chunk_size: int = DEFAULT_CHUNK_SIZE, incremental: Optional[dlt.sources.incremental[Any]] = None, connect_args: Optional[list[str]] = None, db_incremental_field_last_value: Optional[Any] = None, diff --git a/posthog/temporal/data_imports/pipelines/sql_database_v2/__init__.py b/posthog/temporal/data_imports/pipelines/sql_database_v2/__init__.py index a3fc1c6b2838b..0c49b04ba1d1c 100644 --- a/posthog/temporal/data_imports/pipelines/sql_database_v2/__init__.py +++ b/posthog/temporal/data_imports/pipelines/sql_database_v2/__init__.py @@ -15,6 +15,7 @@ from dlt.sources.credentials import ConnectionStringCredentials from posthog.settings.utils import get_from_env +from posthog.temporal.data_imports.pipelines.sql_database_v2.settings import DEFAULT_CHUNK_SIZE from posthog.temporal.data_imports.pipelines.sql_database_v2._json import BigQueryJSON from posthog.utils import str_to_bool from posthog.warehouse.models import ExternalDataSource @@ -252,7 +253,7 @@ def sql_database( schema: Optional[str] = dlt.config.value, metadata: Optional[MetaData] = None, table_names: Optional[list[str]] = dlt.config.value, - chunk_size: int = 50000, + chunk_size: int = DEFAULT_CHUNK_SIZE, backend: TableBackend = "pyarrow", detect_precision_hints: Optional[bool] = False, reflection_level: Optional[ReflectionLevel] = "full", @@ -365,7 +366,7 @@ def sql_table( schema: Optional[str] = dlt.config.value, metadata: Optional[MetaData] = None, incremental: Optional[dlt.sources.incremental[Any]] = None, - chunk_size: int = 50000, + chunk_size: int = DEFAULT_CHUNK_SIZE, backend: TableBackend = "sqlalchemy", detect_precision_hints: Optional[bool] = None, reflection_level: Optional[ReflectionLevel] = "full", diff --git a/posthog/temporal/data_imports/pipelines/sql_database_v2/helpers.py b/posthog/temporal/data_imports/pipelines/sql_database_v2/helpers.py index acd64c97aae99..74a79650caa15 100644 --- a/posthog/temporal/data_imports/pipelines/sql_database_v2/helpers.py +++ b/posthog/temporal/data_imports/pipelines/sql_database_v2/helpers.py @@ -18,6 +18,8 @@ from dlt.sources.credentials import ConnectionStringCredentials +from posthog.temporal.data_imports.pipelines.sql_database_v2.settings import DEFAULT_CHUNK_SIZE + from .arrow_helpers import row_tuples_to_arrow from .schema_types import ( default_table_adapter, @@ -44,7 +46,7 @@ def __init__( backend: TableBackend, table: Table, columns: TTableSchemaColumns, - chunk_size: int = 1000, + chunk_size: int = DEFAULT_CHUNK_SIZE, incremental: Optional[dlt.sources.incremental[Any]] = None, db_incremental_field_last_value: Optional[Any] = None, query_adapter_callback: Optional[TQueryAdapter] = None, @@ -302,7 +304,7 @@ class SqlTableResourceConfiguration(BaseConfiguration): table: Optional[str] = None schema: Optional[str] = None incremental: Optional[dlt.sources.incremental] = None # type: ignore[type-arg] - chunk_size: int = 50000 + chunk_size: int = DEFAULT_CHUNK_SIZE backend: TableBackend = "sqlalchemy" detect_precision_hints: Optional[bool] = None defer_table_reflect: Optional[bool] = False diff --git a/posthog/temporal/data_imports/pipelines/sql_database_v2/settings.py b/posthog/temporal/data_imports/pipelines/sql_database_v2/settings.py new file mode 100644 index 0000000000000..d730961c096e8 --- /dev/null +++ b/posthog/temporal/data_imports/pipelines/sql_database_v2/settings.py @@ -0,0 +1 @@ +DEFAULT_CHUNK_SIZE = 10_000 diff --git a/requirements.in b/requirements.in index faefd16d9294d..8c0b98d16587b 100644 --- a/requirements.in +++ b/requirements.in @@ -16,6 +16,7 @@ clickhouse-driver==0.2.7 clickhouse-pool==0.5.3 conditional-cache==1.2 cryptography==39.0.2 +deltalake==0.22.3 dj-database-url==0.5.0 Django~=4.2.15 django-axes==5.9.0 @@ -34,7 +35,6 @@ djangorestframework==3.15.1 djangorestframework-csv==2.1.1 djangorestframework-dataclasses==1.2.0 dlt==1.3.0 -dlt[deltalake]==1.3.0 dnspython==2.2.1 drf-exceptions-hog==0.4.0 drf-extensions==0.7.0 diff --git a/requirements.txt b/requirements.txt index 639c98066ccd4..e4a1521f8dd7a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -138,8 +138,8 @@ defusedxml==0.6.0 # via # python3-openid # social-auth-core -deltalake==0.19.1 - # via dlt +deltalake==0.22.3 + # via -r requirements.in distro==1.9.0 # via openai dj-database-url==0.5.0 @@ -273,8 +273,6 @@ googleapis-common-protos==1.60.0 # via # google-api-core # grpcio-status -greenlet==3.1.1 - # via sqlalchemy grpcio==1.63.2 # via # -r requirements.in @@ -505,7 +503,6 @@ pyarrow==17.0.0 # via # -r requirements.in # deltalake - # dlt # sqlalchemy-bigquery pyasn1==0.5.0 # via