diff --git a/.github/workflows/rust-docker-build.yml b/.github/workflows/rust-docker-build.yml index 960dbd62d6015..acdcf0b342714 100644 --- a/.github/workflows/rust-docker-build.yml +++ b/.github/workflows/rust-docker-build.yml @@ -107,8 +107,6 @@ jobs: tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} platforms: linux/arm64,linux/amd64 - cache-from: type=gha - cache-to: type=gha,mode=max build-args: BIN=${{ matrix.image }} - name: Container image digest diff --git a/cypress/e2e/experiments.cy.ts b/cypress/e2e/experiments.cy.ts index 5f432e1e41dc1..9e661be34591e 100644 --- a/cypress/e2e/experiments.cy.ts +++ b/cypress/e2e/experiments.cy.ts @@ -42,25 +42,6 @@ describe('Experiments', () => { .type('test-variant-2') .should('have.value', 'test-variant-2') - // Continue to step 2 - cy.get('[data-attr="continue-experiment-creation"]').click() - - // Goal type selection is visible - cy.get('[data-attr="experiment-goal-type-select"]') - .should('be.visible') - .within(() => { - cy.contains('Conversion funnel').should('be.visible') - cy.contains('Trend').should('be.visible') - }) - - // Goal input is visible - cy.get('[data-attr="experiment-goal-input"]') - .should('be.visible') - .within(() => { - cy.get('li.ActionFilterRow').should('exist') - cy.get('button').contains('Add funnel step').should('exist') - }) - // Save experiment cy.get('[data-attr="save-experiment"]').first().click() }) @@ -98,10 +79,19 @@ describe('Experiments', () => { .type('test-variant-2') .should('have.value', 'test-variant-2') - // Continue creation - cy.get('[data-attr="continue-experiment-creation"]').first().click() // Save experiment cy.get('[data-attr="save-experiment"]').first().click() + + // Set the experiment goal once the experiment is drafted + cy.get('[data-attr="add-experiment-goal"]').click() + + // Wait for the goal modal to open and click the confirmation button + cy.get('.LemonModal__layout').should('be.visible') + cy.contains('Change experiment goal').should('be.visible') + cy.get('.LemonModal__footer').contains('button', 'Save').should('have.attr', 'aria-disabled', 'true') + cy.get('.LemonModal__content').contains('button', 'Add funnel step').click() + cy.get('.LemonModal__footer').contains('button', 'Save').should('not.have.attr', 'aria-disabled', 'true') + cy.get('.LemonModal__footer').contains('button', 'Save').click() } it('create, launch and stop experiment with new ui', () => { diff --git a/cypress/fixtures/api/decide.js b/cypress/fixtures/api/decide.js index 51c24fe885117..102f1211152c1 100644 --- a/cypress/fixtures/api/decide.js +++ b/cypress/fixtures/api/decide.js @@ -5,7 +5,6 @@ export function decideResponse(featureFlags) { }, toolbarParams: { toolbarVersion: 'toolbar', - jsURL: 'http://localhost:8234/', }, isAuthenticated: true, supportedCompression: ['gzip', 'gzip-js', 'lz64'], diff --git a/ee/clickhouse/views/experiments.py b/ee/clickhouse/views/experiments.py index b40f43f7fd1ad..dc4a3170b93e8 100644 --- a/ee/clickhouse/views/experiments.py +++ b/ee/clickhouse/views/experiments.py @@ -283,8 +283,10 @@ def validate_parameters(self, value): return value def create(self, validated_data: dict, *args: Any, **kwargs: Any) -> Experiment: - if not validated_data.get("filters"): - raise ValidationError("Filters are required to create an Experiment") + is_draft = "start_date" not in validated_data or validated_data["start_date"] is None + + if not validated_data.get("filters") and not is_draft: + raise ValidationError("Filters are required when creating a launched experiment") saved_metrics_data = validated_data.pop("saved_metrics_ids", []) @@ -299,8 +301,6 @@ def create(self, validated_data: dict, *args: Any, **kwargs: Any) -> Experiment: feature_flag_key = validated_data.pop("get_feature_flag_key") - is_draft = "start_date" not in validated_data or validated_data["start_date"] is None - properties = validated_data["filters"].get("properties", []) if properties: @@ -369,6 +369,14 @@ def create(self, validated_data: dict, *args: Any, **kwargs: Any) -> Experiment: return experiment def update(self, instance: Experiment, validated_data: dict, *args: Any, **kwargs: Any) -> Experiment: + if ( + not instance.filters.get("events") + and not instance.filters.get("actions") + and validated_data.get("start_date") + and not validated_data.get("filters") + ): + raise ValidationError("Filters are required when launching an experiment") + update_saved_metrics = "saved_metrics_ids" in validated_data saved_metrics_data = validated_data.pop("saved_metrics_ids", []) or [] diff --git a/ee/clickhouse/views/test/test_clickhouse_experiments.py b/ee/clickhouse/views/test/test_clickhouse_experiments.py index b82ac89727754..676193f58513e 100644 --- a/ee/clickhouse/views/test/test_clickhouse_experiments.py +++ b/ee/clickhouse/views/test/test_clickhouse_experiments.py @@ -681,30 +681,13 @@ def test_invalid_create(self): "end_date": None, "feature_flag_key": ff_key, "parameters": {}, - "filters": {}, # also invalid + "filters": {}, }, ) self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) self.assertEqual(response.json()["detail"], "This field may not be null.") - ff_key = "a-b-tests" - response = self.client.post( - f"/api/projects/{self.team.id}/experiments/", - { - "name": "None", - "description": "", - "start_date": None, - "end_date": None, - "feature_flag_key": ff_key, - "parameters": {}, - "filters": {}, # still invalid - }, - ) - - self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) - self.assertEqual(response.json()["detail"], "Filters are required to create an Experiment") - def test_invalid_update(self): # Draft experiment ff_key = "a-b-tests" @@ -808,7 +791,12 @@ def test_draft_experiment_doesnt_have_FF_active_even_after_updates(self): # Now update response = self.client.patch( f"/api/projects/{self.team.id}/experiments/{id}", - {"description": "Bazinga", "filters": {}}, + { + "description": "Bazinga", + "filters": { + "events": [{"id": "$pageview"}], + }, + }, ) self.assertEqual(response.status_code, status.HTTP_200_OK) @@ -839,7 +827,7 @@ def test_launching_draft_experiment_activates_FF(self): "end_date": None, "feature_flag_key": ff_key, "parameters": {}, - "filters": {"events": []}, + "filters": {"events": [{"id": "$pageview"}]}, }, ) @@ -1732,6 +1720,148 @@ def test_create_experiment_updates_feature_flag_cache(self): }, ) + def test_create_draft_experiment_with_filters(self) -> None: + ff_key = "a-b-tests" + response = self.client.post( + f"/api/projects/{self.team.id}/experiments/", + { + "name": "Test Experiment", + "description": "", + "start_date": None, + "end_date": None, + "feature_flag_key": ff_key, + "parameters": None, + "filters": { + "events": [ + {"order": 0, "id": "$pageview"}, + {"order": 1, "id": "$pageleave"}, + ], + "properties": [], + }, + }, + ) + + self.assertEqual(response.status_code, status.HTTP_201_CREATED) + self.assertEqual(response.json()["name"], "Test Experiment") + self.assertEqual(response.json()["feature_flag_key"], ff_key) + + def test_create_launched_experiment_with_filters(self) -> None: + ff_key = "a-b-tests" + response = self.client.post( + f"/api/projects/{self.team.id}/experiments/", + { + "name": "Test Experiment", + "description": "", + "start_date": "2021-12-01T10:23", + "end_date": None, + "feature_flag_key": ff_key, + "parameters": None, + "filters": { + "events": [ + {"order": 0, "id": "$pageview"}, + {"order": 1, "id": "$pageleave"}, + ], + "properties": [], + }, + }, + ) + + self.assertEqual(response.status_code, status.HTTP_201_CREATED) + self.assertEqual(response.json()["name"], "Test Experiment") + self.assertEqual(response.json()["feature_flag_key"], ff_key) + + def test_create_draft_experiment_without_filters(self) -> None: + ff_key = "a-b-tests" + response = self.client.post( + f"/api/projects/{self.team.id}/experiments/", + { + "name": "Test Experiment", + "description": "", + "start_date": None, + "end_date": None, + "feature_flag_key": ff_key, + "parameters": None, + "filters": {}, + }, + ) + + self.assertEqual(response.status_code, status.HTTP_201_CREATED) + self.assertEqual(response.json()["name"], "Test Experiment") + self.assertEqual(response.json()["feature_flag_key"], ff_key) + + def test_create_launched_experiment_without_filters(self) -> None: + ff_key = "a-b-tests" + response = self.client.post( + f"/api/projects/{self.team.id}/experiments/", + { + "name": "Test Experiment", + "description": "", + "start_date": "2021-12-01T10:23", + "end_date": None, + "feature_flag_key": ff_key, + "parameters": None, + "filters": {}, + }, + ) + + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + self.assertEqual(response.json()["detail"], "Filters are required when creating a launched experiment") + + def test_launch_draft_experiment_without_filters(self) -> None: + ff_key = "a-b-tests" + response = self.client.post( + f"/api/projects/{self.team.id}/experiments/", + { + "name": "Test Experiment", + "description": "", + "start_date": None, + "end_date": None, + "feature_flag_key": ff_key, + "parameters": None, + "filters": {}, + }, + ) + + self.assertEqual(response.status_code, status.HTTP_201_CREATED) + draft_exp = response.json() + + response = self.client.patch( + f"/api/projects/{self.team.id}/experiments/{draft_exp['id']}", + { + "name": "Test Experiment", + "description": "", + "start_date": "2021-12-01T10:23", + "end_date": None, + "feature_flag_key": ff_key, + "parameters": None, + "filters": {}, + }, + ) + + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + self.assertEqual(response.json()["detail"], "Filters are required when launching an experiment") + + response = self.client.patch( + f"/api/projects/{self.team.id}/experiments/{draft_exp['id']}", + { + "name": "Test Experiment", + "description": "", + "start_date": "2021-12-01T10:23", + "end_date": None, + "feature_flag_key": ff_key, + "parameters": None, + "filters": { + "events": [ + {"order": 0, "id": "$pageview"}, + {"order": 1, "id": "$pageleave"}, + ], + "properties": [], + }, + }, + ) + + self.assertEqual(response.status_code, status.HTTP_200_OK) + class TestExperimentAuxiliaryEndpoints(ClickhouseTestMixin, APILicensedTest): def _generate_experiment(self, start_date="2024-01-01T10:23", extra_parameters=None): diff --git a/ee/surveys/summaries/summarize_surveys.py b/ee/surveys/summaries/summarize_surveys.py new file mode 100644 index 0000000000000..1e4b088484f55 --- /dev/null +++ b/ee/surveys/summaries/summarize_surveys.py @@ -0,0 +1,135 @@ +import json + +import openai + +from datetime import datetime +from typing import Optional, cast + +from posthog.hogql import ast +from posthog.hogql.parser import parse_select +from posthog.hogql_queries.insights.paginators import HogQLHasMorePaginator +from posthog.schema import HogQLQueryResponse +from posthog.utils import get_instance_region + +from prometheus_client import Histogram + +from posthog.api.activity_log import ServerTimingsGathered +from posthog.models import Team, User + +import structlog + +logger = structlog.get_logger(__name__) + +TOKENS_IN_PROMPT_HISTOGRAM = Histogram( + "posthog_survey_summary_tokens_in_prompt_histogram", + "histogram of the number of tokens in the prompt used to generate a survey summary", + buckets=[ + 0, + 10, + 50, + 100, + 500, + 1000, + 2000, + 3000, + 4000, + 5000, + 6000, + 7000, + 8000, + 10000, + 20000, + 30000, + 40000, + 50000, + 100000, + 128000, + float("inf"), + ], +) + + +def prepare_data(query_response: HogQLQueryResponse) -> list[str]: + response_values = [] + properties_list: list[dict] = [json.loads(x[1]) for x in query_response.results] + for props in properties_list: + response_values.extend([value for key, value in props.items() if key.startswith("$survey_response") and value]) + return response_values + + +def summarize_survey_responses( + survey_id: str, question_index: Optional[int], survey_start: datetime, survey_end: datetime, team: Team, user: User +): + timer = ServerTimingsGathered() + + with timer("prepare_query"): + paginator = HogQLHasMorePaginator(limit=100, offset=0) + q = parse_select( + """ + SELECT distinct_id, properties + FROM events + WHERE event == 'survey sent' + AND properties.$survey_id = {survey_id} + -- e.g. `$survey_response` or `$survey_response_2` + AND trim(JSONExtractString(properties, {survey_response_property})) != '' + AND timestamp >= {start_date} + AND timestamp <= {end_date} + """, + { + "survey_id": ast.Constant(value=survey_id), + "survey_response_property": ast.Constant( + value=f"$survey_response_{question_index}" if question_index else "$survey_response" + ), + "start_date": ast.Constant(value=survey_start), + "end_date": ast.Constant(value=survey_end), + }, + ) + + with timer("run_query"): + query_response = paginator.execute_hogql_query( + team=team, + query_type="survey_response_list_query", + query=cast(ast.SelectQuery, q), + ) + + with timer("llm_api_prep"): + instance_region = get_instance_region() or "HOBBY" + prepared_data = prepare_data(query_response) + + with timer("openai_completion"): + result = openai.chat.completions.create( + model="gpt-4o-mini", # allows 128k tokens + temperature=0.7, + messages=[ + { + "role": "system", + "content": """ + You are a product manager's assistant. You summarise survey responses from users for the product manager. + You don't do any other tasks. + """, + }, + { + "role": "user", + "content": f"""the survey responses are {prepared_data}.""", + }, + { + "role": "user", + "content": """ + generate a one or two paragraph summary of the survey response. + only summarize, the goal is to identify real user pain points and needs +use bullet points to identify the themes, and highlights of quotes to bring them to life +we're trying to identify what to work on + use as concise and simple language as is possible. + generate no text other than the summary. + the aim is to let people see themes in the responses received. return the text in github flavoured markdown format""", + }, + ], + user=f"{instance_region}/{user.pk}", + ) + + usage = result.usage.prompt_tokens if result.usage else None + if usage: + TOKENS_IN_PROMPT_HISTOGRAM.observe(usage) + + content: str = result.choices[0].message.content or "" + return {"content": content, "timings": timer.get_all_timings()} diff --git a/frontend/__snapshots__/scenes-app-experiments--complete-funnel-experiment--dark.png b/frontend/__snapshots__/scenes-app-experiments--complete-funnel-experiment--dark.png index 30d04ac4dcce1..574e2b03d57cb 100644 Binary files a/frontend/__snapshots__/scenes-app-experiments--complete-funnel-experiment--dark.png and b/frontend/__snapshots__/scenes-app-experiments--complete-funnel-experiment--dark.png differ diff --git a/frontend/__snapshots__/scenes-app-experiments--complete-funnel-experiment--light.png b/frontend/__snapshots__/scenes-app-experiments--complete-funnel-experiment--light.png index 7f1fb1ff1cf1d..3bedb60084fea 100644 Binary files a/frontend/__snapshots__/scenes-app-experiments--complete-funnel-experiment--light.png and b/frontend/__snapshots__/scenes-app-experiments--complete-funnel-experiment--light.png differ diff --git a/frontend/__snapshots__/scenes-app-experiments--running-trend-experiment--dark.png b/frontend/__snapshots__/scenes-app-experiments--running-trend-experiment--dark.png index 63381344731b3..610ec0666ad31 100644 Binary files a/frontend/__snapshots__/scenes-app-experiments--running-trend-experiment--dark.png and b/frontend/__snapshots__/scenes-app-experiments--running-trend-experiment--dark.png differ diff --git a/frontend/__snapshots__/scenes-app-experiments--running-trend-experiment--light.png b/frontend/__snapshots__/scenes-app-experiments--running-trend-experiment--light.png index 4c45d979edb38..5261fb6c96120 100644 Binary files a/frontend/__snapshots__/scenes-app-experiments--running-trend-experiment--light.png and b/frontend/__snapshots__/scenes-app-experiments--running-trend-experiment--light.png differ diff --git a/frontend/__snapshots__/scenes-app-experiments--running-trend-experiment-many-variants--dark.png b/frontend/__snapshots__/scenes-app-experiments--running-trend-experiment-many-variants--dark.png index 61d46bdc53b8f..b38b4908dda88 100644 Binary files a/frontend/__snapshots__/scenes-app-experiments--running-trend-experiment-many-variants--dark.png and b/frontend/__snapshots__/scenes-app-experiments--running-trend-experiment-many-variants--dark.png differ diff --git a/frontend/__snapshots__/scenes-app-experiments--running-trend-experiment-many-variants--light.png b/frontend/__snapshots__/scenes-app-experiments--running-trend-experiment-many-variants--light.png index c0d84c532e486..b8c9d02a5be0d 100644 Binary files a/frontend/__snapshots__/scenes-app-experiments--running-trend-experiment-many-variants--light.png and b/frontend/__snapshots__/scenes-app-experiments--running-trend-experiment-many-variants--light.png differ diff --git a/frontend/__snapshots__/scenes-app-sidepanels--side-panel-support-no-email--dark.png b/frontend/__snapshots__/scenes-app-sidepanels--side-panel-support-no-email--dark.png index 19af04e29656e..1f6f791e5e7a2 100644 Binary files a/frontend/__snapshots__/scenes-app-sidepanels--side-panel-support-no-email--dark.png and b/frontend/__snapshots__/scenes-app-sidepanels--side-panel-support-no-email--dark.png differ diff --git a/frontend/__snapshots__/scenes-app-sidepanels--side-panel-support-no-email--light.png b/frontend/__snapshots__/scenes-app-sidepanels--side-panel-support-no-email--light.png index 3b910e9bc5a3c..cf41e5aa74972 100644 Binary files a/frontend/__snapshots__/scenes-app-sidepanels--side-panel-support-no-email--light.png and b/frontend/__snapshots__/scenes-app-sidepanels--side-panel-support-no-email--light.png differ diff --git a/frontend/src/layout/navigation-3000/sidepanel/SidePanel.tsx b/frontend/src/layout/navigation-3000/sidepanel/SidePanel.tsx index 972ca1515de48..34c18f4fc6ff2 100644 --- a/frontend/src/layout/navigation-3000/sidepanel/SidePanel.tsx +++ b/frontend/src/layout/navigation-3000/sidepanel/SidePanel.tsx @@ -1,6 +1,6 @@ import './SidePanel.scss' -import { IconEllipsis, IconFeatures, IconFlag, IconGear, IconInfo, IconNotebook, IconSupport } from '@posthog/icons' +import { IconEllipsis, IconFeatures, IconGear, IconInfo, IconNotebook, IconSupport } from '@posthog/icons' import { LemonButton, LemonMenu, LemonMenuItems, LemonModal } from '@posthog/lemon-ui' import clsx from 'clsx' import { useActions, useValues } from 'kea' @@ -20,7 +20,6 @@ import { SidePanelActivation, SidePanelActivationIcon } from './panels/activatio import { SidePanelActivity, SidePanelActivityIcon } from './panels/activity/SidePanelActivity' import { SidePanelDiscussion, SidePanelDiscussionIcon } from './panels/discussion/SidePanelDiscussion' import { SidePanelDocs } from './panels/SidePanelDocs' -import { SidePanelExperimentFeatureFlag } from './panels/SidePanelExperimentFeatureFlag' import { SidePanelFeaturePreviews } from './panels/SidePanelFeaturePreviews' import { SidePanelSettings } from './panels/SidePanelSettings' import { SidePanelStatus, SidePanelStatusIcon } from './panels/SidePanelStatus' @@ -88,11 +87,6 @@ export const SIDE_PANEL_TABS: Record< Content: SidePanelStatus, noModalSupport: true, }, - [SidePanelTab.ExperimentFeatureFlag]: { - label: 'Release conditions', - Icon: IconFlag, - Content: SidePanelExperimentFeatureFlag, - }, } const DEFAULT_WIDTH = 512 diff --git a/frontend/src/layout/navigation-3000/sidepanel/panels/SidePanelExperimentFeatureFlag.tsx b/frontend/src/layout/navigation-3000/sidepanel/panels/SidePanelExperimentFeatureFlag.tsx deleted file mode 100644 index 5002f2bd78929..0000000000000 --- a/frontend/src/layout/navigation-3000/sidepanel/panels/SidePanelExperimentFeatureFlag.tsx +++ /dev/null @@ -1,163 +0,0 @@ -import { IconBalance } from '@posthog/icons' -import { LemonBanner, LemonButton, LemonDivider, LemonInput, LemonTable, Link, Spinner } from '@posthog/lemon-ui' -import { useActions, useValues } from 'kea' -import { router } from 'kea-router' -import { useEffect, useMemo } from 'react' -import { experimentLogic } from 'scenes/experiments/experimentLogic' -import { featureFlagLogic, FeatureFlagLogicProps } from 'scenes/feature-flags/featureFlagLogic' -import { FeatureFlagReleaseConditions } from 'scenes/feature-flags/FeatureFlagReleaseConditions' -import { urls } from 'scenes/urls' - -import { sidePanelStateLogic } from '../sidePanelStateLogic' - -export const SidePanelExperimentFeatureFlag = (): JSX.Element => { - const { closeSidePanel } = useActions(sidePanelStateLogic) - const { currentLocation } = useValues(router) - - useEffect(() => { - // Side panel state is persisted in local storage, so we need to check if we're on the experiment page, - // otherwise close the side panel - const isExperimentPath = /^\/project\/[0-9]+\/experiments\/[0-9]+/.test(currentLocation.pathname) - if (!isExperimentPath) { - closeSidePanel() - } - }, [currentLocation, closeSidePanel]) - - // Retrieve experiment ID from URL - const experimentId = useMemo(() => { - const match = currentLocation.pathname.match(/\/experiments\/(\d+)/) - return match ? parseInt(match[1]) : null - }, [currentLocation.pathname]) - - const { experiment } = useValues(experimentLogic({ experimentId: experimentId ?? 'new' })) - - const _featureFlagLogic = featureFlagLogic({ id: experiment.feature_flag?.id ?? null } as FeatureFlagLogicProps) - const { featureFlag, areVariantRolloutsValid, variantRolloutSum, featureFlagLoading, nonEmptyVariants } = - useValues(_featureFlagLogic) - const { setFeatureFlagFilters, saveSidebarExperimentFeatureFlag, distributeVariantsEqually } = - useActions(_featureFlagLogic) - - const variants = featureFlag?.filters?.multivariate?.variants || [] - - const handleRolloutPercentageChange = (index: number, value: number | undefined): void => { - if (!featureFlag?.filters?.multivariate || !value) { - return - } - - const updatedVariants = featureFlag.filters.multivariate.variants.map((variant, i) => - i === index ? { ...variant, rollout_percentage: value } : variant - ) - - const updatedFilters = { - ...featureFlag.filters, - multivariate: { ...featureFlag.filters.multivariate, variants: updatedVariants }, - } - - setFeatureFlagFilters(updatedFilters, null) - } - - if (featureFlagLoading || !featureFlag.id) { - return ( -
- -
- ) - } - - return ( -
- -
-
- Adjusting variant distribution or user targeting may impact the validity of your results. Adjust - only if you're aware of how changes will affect your experiment. -
-
- For full feature flag settings, go to{' '} - - {experiment.feature_flag?.key} - {' '} - . -
-
-
-
-

Experiment variants

- {value}, - width: '50%', - }, - { - title: ( -
- Rollout Percentage - - - -
- ), - dataIndex: 'rollout_percentage', - key: 'rollout_percentage', - render: (_, record, index) => ( - { - if (changedValue !== null) { - const valueInt = - changedValue !== undefined ? parseInt(changedValue.toString()) : 0 - if (!isNaN(valueInt)) { - handleRolloutPercentageChange(index, changedValue) - } - } - }} - min={0} - max={100} - suffix={%} - /> - ), - }, - ]} - /> - {variants.length > 0 && !areVariantRolloutsValid && ( -

- Percentage rollouts for variants must sum to 100 (currently {variantRolloutSum} - ). -

- )} -
- - - -
- { - saveSidebarExperimentFeatureFlag(featureFlag) - }} - > - Save - -
-
- ) -} diff --git a/frontend/src/layout/navigation-3000/sidepanel/panels/SidePanelSupport.tsx b/frontend/src/layout/navigation-3000/sidepanel/panels/SidePanelSupport.tsx index f1f6a5a1e8a7b..58e8f37d7faf0 100644 --- a/frontend/src/layout/navigation-3000/sidepanel/panels/SidePanelSupport.tsx +++ b/frontend/src/layout/navigation-3000/sidepanel/panels/SidePanelSupport.tsx @@ -4,7 +4,6 @@ import { IconDatabase, IconDecisionTree, IconFeatures, - IconFlask, IconHelmet, IconMap, IconMessage, @@ -59,11 +58,6 @@ const PRODUCTS = [ slug: 'feature-flags', icon: , }, - { - name: 'Experiments', - slug: 'experiments', - icon: , - }, { name: 'Surveys', slug: 'surveys', diff --git a/frontend/src/layout/navigation-3000/sidepanel/sidePanelLogic.tsx b/frontend/src/layout/navigation-3000/sidepanel/sidePanelLogic.tsx index 2a4add974a1d9..029b34b6cbf4a 100644 --- a/frontend/src/layout/navigation-3000/sidepanel/sidePanelLogic.tsx +++ b/frontend/src/layout/navigation-3000/sidepanel/sidePanelLogic.tsx @@ -1,5 +1,4 @@ import { connect, kea, path, selectors } from 'kea' -import { router } from 'kea-router' import { FEATURE_FLAGS } from 'lib/constants' import { featureFlagLogic } from 'lib/logic/featureFlagLogic' import { preflightLogic } from 'scenes/PreflightCheck/preflightLogic' @@ -40,8 +39,6 @@ export const sidePanelLogic = kea([ ['status'], userLogic, ['hasAvailableFeature'], - router, - ['currentLocation'], ], actions: [sidePanelStateLogic, ['closeSidePanel', 'openSidePanel']], }), @@ -52,7 +49,6 @@ export const sidePanelLogic = kea([ (isCloudOrDev, isReady, hasCompletedAllTasks, featureflags) => { const tabs: SidePanelTab[] = [] - tabs.push(SidePanelTab.ExperimentFeatureFlag) tabs.push(SidePanelTab.Notebooks) tabs.push(SidePanelTab.Docs) if (isCloudOrDev) { @@ -78,24 +74,8 @@ export const sidePanelLogic = kea([ ], visibleTabs: [ - (s) => [ - s.enabledTabs, - s.selectedTab, - s.sidePanelOpen, - s.unreadCount, - s.status, - s.hasAvailableFeature, - s.currentLocation, - ], - ( - enabledTabs, - selectedTab, - sidePanelOpen, - unreadCount, - status, - hasAvailableFeature, - currentLocation - ): SidePanelTab[] => { + (s) => [s.enabledTabs, s.selectedTab, s.sidePanelOpen, s.unreadCount, s.status, s.hasAvailableFeature], + (enabledTabs, selectedTab, sidePanelOpen, unreadCount, status, hasAvailableFeature): SidePanelTab[] => { return enabledTabs.filter((tab) => { if (tab === selectedTab && sidePanelOpen) { return true @@ -118,10 +98,6 @@ export const sidePanelLogic = kea([ return false } - if (tab === SidePanelTab.ExperimentFeatureFlag) { - return /^\/project\/[0-9]+\/experiments\/[0-9]+/.test(currentLocation.pathname) - } - return true }) }, diff --git a/frontend/src/lib/api.ts b/frontend/src/lib/api.ts index 0d0ddd38d5e7c..d91ab7592dea2 100644 --- a/frontend/src/lib/api.ts +++ b/frontend/src/lib/api.ts @@ -2144,6 +2144,13 @@ const api = { async getResponsesCount(): Promise<{ [key: string]: number }> { return await new ApiRequest().surveysResponsesCount().get() }, + async summarize_responses(surveyId: Survey['id'], questionIndex: number | undefined): Promise { + let apiRequest = new ApiRequest().survey(surveyId).withAction('summarize_responses') + if (questionIndex !== undefined) { + apiRequest = apiRequest.withQueryString('questionIndex=' + questionIndex) + } + return await apiRequest.create() + }, }, dataWarehouseTables: { diff --git a/frontend/src/lib/components/Hogfetti/Hogfetti.tsx b/frontend/src/lib/components/Hogfetti/Hogfetti.tsx index be24513044011..a071eb7c13d54 100644 --- a/frontend/src/lib/components/Hogfetti/Hogfetti.tsx +++ b/frontend/src/lib/components/Hogfetti/Hogfetti.tsx @@ -106,7 +106,7 @@ export const useHogfetti = (options: HogfettiOptions = {}): HogfettiHook => { const trigger = useCallback((): void => { const centerX = Math.random() * dimensions.width - const centerY = Math.random() * dimensions.height + const centerY = Math.random() * dimensions.height * 0.5 const newParticles = Array.from({ length: count }, () => createParticle(centerX, centerY)) setParticleSets((prev) => [...prev, newParticles]) diff --git a/frontend/src/lib/constants.tsx b/frontend/src/lib/constants.tsx index 29873afc7d87e..35ab2bcddc1bf 100644 --- a/frontend/src/lib/constants.tsx +++ b/frontend/src/lib/constants.tsx @@ -228,6 +228,7 @@ export const FEATURE_FLAGS = { DEAD_CLICKS_AUTOCAPTURE: 'dead-clicks-autocapture', // owner: @pauldambra #team-replay ONBOARDING_PRODUCT_MULTISELECT: 'onboarding-product-multiselect', // owner: @danielbachhuber #team-experiments EDIT_DWH_SOURCE_CONFIG: 'edit_dwh_source_config', // owner: @Gilbert09 #team-data-warehouse + AI_SURVEY_RESPONSE_SUMMARY: 'ai-survey-response-summary', // owner: @pauldambra } as const export type FeatureFlagKey = (typeof FEATURE_FLAGS)[keyof typeof FEATURE_FLAGS] diff --git a/frontend/src/queries/schema.json b/frontend/src/queries/schema.json index f9534dac0ec48..70330efd9efad 100644 --- a/frontend/src/queries/schema.json +++ b/frontend/src/queries/schema.json @@ -806,6 +806,9 @@ }, "BreakdownKeyType": { "anyOf": [ + { + "type": "integer" + }, { "type": "string" }, @@ -814,7 +817,17 @@ }, { "items": { - "type": ["string", "number"] + "anyOf": [ + { + "type": "integer" + }, + { + "type": "string" + }, + { + "type": "number" + } + ] }, "type": "array" }, diff --git a/frontend/src/scenes/billing/UnsubscribeSurveyModal.tsx b/frontend/src/scenes/billing/UnsubscribeSurveyModal.tsx index ea2912c20e42f..79d2447260270 100644 --- a/frontend/src/scenes/billing/UnsubscribeSurveyModal.tsx +++ b/frontend/src/scenes/billing/UnsubscribeSurveyModal.tsx @@ -110,7 +110,7 @@ export const UnsubscribeSurveyModal = ({ { resetUnsubscribeModalStep() reportSurveyDismissed(surveyID) diff --git a/frontend/src/scenes/debug/hog/HogRepl.tsx b/frontend/src/scenes/debug/hog/HogRepl.tsx index dc374c9e38ab3..72538355b0c21 100644 --- a/frontend/src/scenes/debug/hog/HogRepl.tsx +++ b/frontend/src/scenes/debug/hog/HogRepl.tsx @@ -1,14 +1,73 @@ -import { LemonButton } from '@posthog/lemon-ui' +import { printHogStringOutput } from '@posthog/hogvm' +import { LemonButton, LemonTable, LemonTabs } from '@posthog/lemon-ui' import { useActions, useValues } from 'kea' +import { JSONViewer } from 'lib/components/JSONViewer' import { CodeEditorInline } from 'lib/monaco/CodeEditorInline' +import React, { useState } from 'react' import { SceneExport } from 'scenes/sceneTypes' +import { renderHogQLX } from '~/queries/nodes/HogQLX/render' + import { hogReplLogic, ReplChunk as ReplChunkType } from './hogReplLogic' +export interface ReplResultsTableProps { + response: { + results: any[][] + columns: string[] + } +} + +export function ReplResultsTable({ response }: ReplResultsTableProps): JSX.Element { + const [activeTab, setActiveTab] = useState<'table' | 'json'>('table') + return ( +
+ ({ dataIndex: index, title: col }))} + dataSource={response.results} + /> + ), + }, + { + key: 'json', + label: 'JSON', + content: , + }, + { + key: 'raw', + label: 'Raw', + content:
{printHogStringOutput(response)}
, + }, + ]} + /> +
+ ) +} + +function printRichHogOutput(arg: any): JSX.Element | string { + if (typeof arg === 'object' && arg !== null) { + if ('__hx_tag' in arg) { + return renderHogQLX(arg) + } + if ('results' in arg && 'columns' in arg && Array.isArray(arg.results) && Array.isArray(arg.columns)) { + return + } + } + return printHogStringOutput(arg) +} + interface ReplChunkProps { chunk: ReplChunkType editFromHere: () => void } + export function ReplChunk({ chunk: { code, result, print, error, status }, editFromHere, @@ -40,7 +99,7 @@ export function ReplChunk({ )} - {print ? ( + {print && Array.isArray(print) ? (
# -
{print}
+
+ {print.map((line, index) => ( +
+ {line.map((arg, argIndex) => ( + + {printRichHogOutput(arg)} + {argIndex < line.length - 1 ? ' ' : ''} + + ))} +
+ ))} +
) : null} - {status === 'success' && ( + {status === 'success' && result !== undefined && (
{'<'} -
{String(result)}
+
{printRichHogOutput(result)}
)} {status === 'error' && ( diff --git a/frontend/src/scenes/debug/hog/hogReplLogic.ts b/frontend/src/scenes/debug/hog/hogReplLogic.ts index 182c90c64cead..ae3b09eb19531 100644 --- a/frontend/src/scenes/debug/hog/hogReplLogic.ts +++ b/frontend/src/scenes/debug/hog/hogReplLogic.ts @@ -1,4 +1,4 @@ -import { newHogCallable, newHogClosure, printHogStringOutput, VMState } from '@posthog/hogvm' +import { newHogCallable, newHogClosure, VMState } from '@posthog/hogvm' import { actions, kea, listeners, path, reducers, selectors } from 'kea' import { actionToUrl, urlToAction } from 'kea-router' import api from 'lib/api' @@ -10,7 +10,7 @@ import type { hogReplLogicType } from './hogReplLogicType' export interface ReplChunk { code: string result?: string - print?: string + print?: any[][] error?: string bytecode?: any[] locals?: any[] @@ -23,7 +23,7 @@ export const hogReplLogic = kea([ actions({ runCode: (code: string) => ({ code }), setResult: (index: number, result?: string, error?: string) => ({ index, result, error }), - print: (index: number, line?: string) => ({ index, line }), + print: (index: number, line: any[]) => ({ index, line }), setBytecode: (index: number, bytecode: any[], locals: any[]) => ({ index, bytecode, locals }), setVMState: (index: number, state: any) => ({ index, state }), setCurrentCode: (code: string) => ({ code }), @@ -46,7 +46,7 @@ export const hogReplLogic = kea([ state.map((chunk, i) => (i === index ? { ...chunk, bytecode, locals } : chunk)), print: (state, { index, line }) => state.map((chunk, i) => - i === index ? { ...chunk, print: (chunk.print ? chunk.print + '\n' : '') + line } : chunk + i === index ? { ...chunk, print: [...(chunk.print ?? []), line] } : chunk ), setVMState: (state, { index, state: vmState }) => state.map((chunk, i) => (i === index ? { ...chunk, state: vmState } : chunk)), @@ -148,7 +148,7 @@ export const hogReplLogic = kea([ repl: true, functions: { print: (...args: any[]) => { - actions.print(index, args.map((arg) => printHogStringOutput(arg)).join(' ')) + actions.print(index, args) }, }, }) @@ -160,7 +160,7 @@ export const hogReplLogic = kea([ : (result.state?.stack?.length ?? 0) > 0 ? result.state?.stack?.[result.state.stack.length - 1] : 'null' - actions.setResult(index, printHogStringOutput(response)) + actions.setResult(index, response) actions.setVMState(index, result.state) } catch (error: any) { // Handle errors @@ -174,12 +174,26 @@ export const hogReplLogic = kea([ }, })), actionToUrl(({ values }) => { - const fn = (): [string, undefined, Record, { replace: true }] | undefined => { + const fn = (): [string, undefined, Record | undefined, { replace: true }] | undefined => { if (values.replChunks.length > 0) { - const code = [...values.replChunks.map((chunk) => chunk.code), values.currentCode] - .filter((a) => !!a) - .join('\n') - return [urls.debugHog(), undefined, { code }, { replace: true }] + // Chrome has a 2MB limit for the HASH params, set ours at 1MB + const replChunksLength = JSON.stringify(values.replChunks).length + if (replChunksLength > 1024 * 1024) { + // Try with just the code + const newCode = values.replChunks.map((chunk) => chunk.code).join('\n') + if (newCode.length > 1024 * 1024) { + // Still not enough, abort + return [urls.debugHog(), undefined, undefined, { replace: true }] + } + return [urls.debugHog(), undefined, { code: newCode }, { replace: true }] + } + + return [ + urls.debugHog(), + undefined, + { repl: values.replChunks, code: values.currentCode }, + { replace: true }, + ] } } @@ -194,8 +208,9 @@ export const hogReplLogic = kea([ } }), urlToAction(({ actions, values }) => ({ - [urls.debugHog()]: (_, __, { code }) => { - if (code && !values.currentCode && values.replChunks.length === 0) { + [urls.debugHog()]: (_, __, { repl, code }) => { + if ((repl || code) && !values.currentCode && values.replChunks.length === 0) { + actions.setReplChunks(repl) actions.setCurrentCode(code) } }, diff --git a/frontend/src/scenes/experiments/ExperimentForm.tsx b/frontend/src/scenes/experiments/ExperimentForm.tsx index d34838d40224f..a1c3f64c53d50 100644 --- a/frontend/src/scenes/experiments/ExperimentForm.tsx +++ b/frontend/src/scenes/experiments/ExperimentForm.tsx @@ -2,33 +2,31 @@ import './Experiment.scss' import { IconPlusSmall, IconTrash } from '@posthog/icons' import { LemonDivider, LemonInput, LemonTextArea, Tooltip } from '@posthog/lemon-ui' -import { BindLogic, useActions, useValues } from 'kea' +import { useActions, useValues } from 'kea' import { Form, Group } from 'kea-forms' import { ExperimentVariantNumber } from 'lib/components/SeriesGlyph' import { FEATURE_FLAGS, MAX_EXPERIMENT_VARIANTS } from 'lib/constants' -import { IconChevronLeft } from 'lib/lemon-ui/icons' import { LemonButton } from 'lib/lemon-ui/LemonButton' import { LemonField } from 'lib/lemon-ui/LemonField' import { LemonRadio } from 'lib/lemon-ui/LemonRadio' import { LemonSelect } from 'lib/lemon-ui/LemonSelect' import { capitalizeFirstLetter } from 'lib/utils' -import { useEffect } from 'react' import { experimentsLogic } from 'scenes/experiments/experimentsLogic' -import { insightDataLogic } from 'scenes/insights/insightDataLogic' -import { insightLogic } from 'scenes/insights/insightLogic' -import { Query } from '~/queries/Query/Query' -import { InsightType } from '~/types' - -import { EXPERIMENT_INSIGHT_ID } from './constants' import { experimentLogic } from './experimentLogic' -import { ExperimentInsightCreator } from './MetricSelector' -const StepInfo = (): JSX.Element => { - const { experiment, featureFlags } = useValues(experimentLogic) - const { addExperimentGroup, removeExperimentGroup, moveToNextFormStep, setExperimentType } = - useActions(experimentLogic) +const ExperimentFormFields = (): JSX.Element => { + const { experiment, featureFlags, groupTypes, aggregationLabel } = useValues(experimentLogic) + const { + addExperimentGroup, + removeExperimentGroup, + setExperiment, + setNewExperimentInsight, + createExperiment, + setExperimentType, + } = useActions(experimentLogic) const { webExperimentsAvailable } = useValues(experimentsLogic) + return (
@@ -91,6 +89,38 @@ const StepInfo = (): JSX.Element => { />
)} +
+

Participant type

+
+ The type on which to aggregate metrics. You can change this at any time during the experiment. +
+ + { + const groupTypeIndex = rawGroupTypeIndex !== -1 ? rawGroupTypeIndex : undefined + + setExperiment({ + parameters: { + ...experiment.parameters, + aggregation_group_type_index: groupTypeIndex ?? undefined, + }, + }) + setNewExperimentInsight() + }} + options={[ + { value: -1, label: 'Persons' }, + ...Array.from(groupTypes.values()).map((groupType) => ({ + value: groupType.group_type_index, + label: capitalizeFirstLetter(aggregationLabel(groupType.group_type_index).plural), + })), + ]} + /> +

Variants

Add up to 9 variants to test against your control.
@@ -187,135 +217,6 @@ const StepInfo = (): JSX.Element => {
)}
- moveToNextFormStep()} - > - Continue - - - ) -} - -const StepGoal = (): JSX.Element => { - const { experiment, experimentInsightType, groupTypes, aggregationLabel } = useValues(experimentLogic) - const { setExperiment, setNewExperimentInsight, createExperiment } = useActions(experimentLogic) - - // insightLogic - const logic = insightLogic({ dashboardItemId: EXPERIMENT_INSIGHT_ID }) - const { insightProps } = useValues(logic) - - // insightDataLogic - const { query } = useValues(insightDataLogic(insightProps)) - - return ( -
-
- {groupTypes.size > 0 && ( -
-

Participant type

-
- The type on which to aggregate metrics. You can change this at any time during the - experiment. -
- - { - const groupTypeIndex = rawGroupTypeIndex !== -1 ? rawGroupTypeIndex : undefined - - setExperiment({ - parameters: { - ...experiment.parameters, - aggregation_group_type_index: groupTypeIndex ?? undefined, - }, - }) - setNewExperimentInsight() - }} - options={[ - { value: -1, label: 'Persons' }, - ...Array.from(groupTypes.values()).map((groupType) => ({ - value: groupType.group_type_index, - label: capitalizeFirstLetter(aggregationLabel(groupType.group_type_index).plural), - })), - ]} - /> -
- )} -
-

Goal type

-
- You can change this at any time during the experiment. -
- -
- { - val && - setNewExperimentInsight({ - insight: val, - properties: experiment?.filters?.properties, - }) - }} - options={[ - { - value: InsightType.FUNNELS, - label: ( -
-
Conversion funnel
-
- Track how many people complete a sequence of actions and/or events -
-
- ), - }, - { - value: InsightType.TRENDS, - label: ( -
-
Trend
-
- Track the total count of a specific event or action. -
-
- ), - }, - ]} - /> -
-
-
-

Goal criteria

-
- {experimentInsightType === InsightType.FUNNELS - ? 'Create the funnel you want to measure.' - : 'Select a single metric to track.'} -
- -
- -
-
-
-

Goal preview

-
- - - -
-
-
{ } export function ExperimentForm(): JSX.Element { - const { currentFormStep, props } = useValues(experimentLogic) - const { setCurrentFormStep } = useActions(experimentLogic) - - const stepComponents = { - 0: , - 1: , - } - const CurrentStepComponent = (currentFormStep && stepComponents[currentFormStep]) || - - useEffect(() => { - setCurrentFormStep(0) - }, []) + const { props } = useValues(experimentLogic) return (
- {currentFormStep > 0 && ( - } - type="secondary" - className="my-4" - onClick={() => { - setCurrentFormStep(currentFormStep - 1) - }} - > - Back - - )}
- {CurrentStepComponent} +
) diff --git a/frontend/src/scenes/experiments/ExperimentView/DistributionTable.tsx b/frontend/src/scenes/experiments/ExperimentView/DistributionTable.tsx index bb07d8914cf41..f6d45ad12d314 100644 --- a/frontend/src/scenes/experiments/ExperimentView/DistributionTable.tsx +++ b/frontend/src/scenes/experiments/ExperimentView/DistributionTable.tsx @@ -1,23 +1,138 @@ import '../Experiment.scss' -import { IconFlag } from '@posthog/icons' -import { LemonButton, LemonDialog, LemonTable, LemonTableColumns } from '@posthog/lemon-ui' +import { IconBalance, IconFlag } from '@posthog/icons' +import { + LemonBanner, + LemonButton, + LemonDialog, + LemonInput, + LemonModal, + LemonTable, + LemonTableColumns, +} from '@posthog/lemon-ui' import { useActions, useValues } from 'kea' import { AuthorizedUrlList } from 'lib/components/AuthorizedUrlList/AuthorizedUrlList' import { AuthorizedUrlListType } from 'lib/components/AuthorizedUrlList/authorizedUrlListLogic' import { IconOpenInApp } from 'lib/lemon-ui/icons' +import { featureFlagLogic, FeatureFlagLogicProps } from 'scenes/feature-flags/featureFlagLogic' -import { sidePanelStateLogic } from '~/layout/navigation-3000/sidepanel/sidePanelStateLogic' -import { MultivariateFlagVariant, SidePanelTab } from '~/types' +import { Experiment, MultivariateFlagVariant } from '~/types' import { experimentLogic } from '../experimentLogic' import { VariantTag } from './components' import { VariantScreenshot } from './VariantScreenshot' +export function DistributionModal({ experimentId }: { experimentId: Experiment['id'] }): JSX.Element { + const { experiment, experimentLoading, isDistributionModalOpen } = useValues(experimentLogic({ experimentId })) + const { closeDistributionModal } = useActions(experimentLogic({ experimentId })) + + const _featureFlagLogic = featureFlagLogic({ id: experiment.feature_flag?.id ?? null } as FeatureFlagLogicProps) + const { featureFlag, areVariantRolloutsValid, variantRolloutSum } = useValues(_featureFlagLogic) + const { setFeatureFlagFilters, distributeVariantsEqually, saveSidebarExperimentFeatureFlag } = + useActions(_featureFlagLogic) + + const handleRolloutPercentageChange = (index: number, value: number | undefined): void => { + if (!featureFlag?.filters?.multivariate || !value) { + return + } + + const updatedVariants = featureFlag.filters.multivariate.variants.map((variant, i) => + i === index ? { ...variant, rollout_percentage: value } : variant + ) + + setFeatureFlagFilters( + { + ...featureFlag.filters, + multivariate: { ...featureFlag.filters.multivariate, variants: updatedVariants }, + }, + null + ) + } + + return ( + + + Cancel + + { + saveSidebarExperimentFeatureFlag(featureFlag) + closeDistributionModal() + }} + type="primary" + loading={experimentLoading} + disabled={!areVariantRolloutsValid} + > + Save + +
+ } + > +
+ + Adjusting variant distribution may impact the validity of your results. Adjust only if you're aware + of how changes will affect your experiment. + + +
+
+

Variant Distribution

+ } + > + Distribute equally + +
+ + {value}, + }, + { + title: 'Rollout Percentage', + dataIndex: 'rollout_percentage', + render: (_, record, index) => ( + handleRolloutPercentageChange(index, value)} + min={0} + max={100} + suffix={%} + /> + ), + }, + ]} + /> + + {!areVariantRolloutsValid && ( +

+ Percentage rollouts must sum to 100 (currently {variantRolloutSum}). +

+ )} +
+
+ + ) +} + export function DistributionTable(): JSX.Element { + const { openDistributionModal } = useActions(experimentLogic) const { experimentId, experiment, experimentResults } = useValues(experimentLogic) const { reportExperimentReleaseConditionsViewed } = useActions(experimentLogic) - const { openSidePanel } = useActions(sidePanelStateLogic) const onSelectElement = (variant: string): void => { LemonDialog.open({ @@ -110,7 +225,7 @@ export function DistributionTable(): JSX.Element { } onClick={() => { - openSidePanel(SidePanelTab.ExperimentFeatureFlag) + openDistributionModal() reportExperimentReleaseConditionsViewed(experiment.id) }} type="secondary" diff --git a/frontend/src/scenes/experiments/ExperimentView/ExperimentView.tsx b/frontend/src/scenes/experiments/ExperimentView/ExperimentView.tsx index de54d1461014b..59b38ffaf3163 100644 --- a/frontend/src/scenes/experiments/ExperimentView/ExperimentView.tsx +++ b/frontend/src/scenes/experiments/ExperimentView/ExperimentView.tsx @@ -1,6 +1,6 @@ import '../Experiment.scss' -import { LemonDivider } from '@posthog/lemon-ui' +import { LemonDivider, LemonTabs } from '@posthog/lemon-ui' import { useActions, useValues } from 'kea' import { WebExperimentImplementationDetails } from 'scenes/experiments/WebExperimentImplementationDetails' @@ -14,19 +14,66 @@ import { ResultsHeader, } from './components' import { DataCollection } from './DataCollection' -import { DistributionTable } from './DistributionTable' +import { DistributionModal, DistributionTable } from './DistributionTable' import { ExperimentExposureModal, ExperimentGoalModal, Goal } from './Goal' import { Info } from './Info' import { Overview } from './Overview' -import { ReleaseConditionsTable } from './ReleaseConditionsTable' +import { ReleaseConditionsModal, ReleaseConditionsTable } from './ReleaseConditionsTable' import { Results } from './Results' import { SecondaryMetricsTable } from './SecondaryMetricsTable' +const ResultsTab = (): JSX.Element => { + const { experiment, experimentResults } = useValues(experimentLogic) + const { updateExperimentSecondaryMetrics } = useActions(experimentLogic) + + const hasResultsInsight = experimentResults && experimentResults.insight + + return ( +
+ {hasResultsInsight ? ( + + ) : ( + <> + {experiment.type === 'web' ? ( + + ) : ( + + )} + + {experiment.start_date && ( +
+ + +
+ )} + + )} + updateExperimentSecondaryMetrics(metrics)} + initialMetrics={experiment.secondary_metrics} + defaultAggregationType={experiment.parameters?.aggregation_group_type_index} + /> +
+ ) +} + +const VariantsTab = (): JSX.Element => { + return ( +
+ + +
+ ) +} + export function ExperimentView(): JSX.Element { - const { experiment, experimentLoading, experimentResultsLoading, experimentId, experimentResults } = + const { experimentLoading, experimentResultsLoading, experimentId, experimentResults, tabKey } = useValues(experimentLogic) - const { updateExperimentSecondaryMetrics } = useActions(experimentLogic) + const { setTabKey } = useActions(experimentLogic) + + const hasResultsInsight = experimentResults && experimentResults.insight return ( <> @@ -39,25 +86,14 @@ export function ExperimentView(): JSX.Element { {experimentResultsLoading ? ( - ) : experimentResults && experimentResults.insight ? ( - <> -
- - -
-
-
- -
- -
- -
-
- - ) : ( <> + {hasResultsInsight ? ( +
+ + +
+ ) : null}
@@ -67,30 +103,28 @@ export function ExperimentView(): JSX.Element {
- {experiment.type === 'web' ? ( - - ) : ( - - )} - - {experiment.start_date && ( -
- - -
- )} + setTabKey(key)} + tabs={[ + { + key: 'results', + label: 'Results', + content: , + }, + { + key: 'variants', + label: 'Variants', + content: , + }, + ]} + /> )} - updateExperimentSecondaryMetrics(metrics)} - initialMetrics={experiment.secondary_metrics} - defaultAggregationType={experiment.parameters?.aggregation_group_type_index} - /> - - + + )} diff --git a/frontend/src/scenes/experiments/ExperimentView/Goal.tsx b/frontend/src/scenes/experiments/ExperimentView/Goal.tsx index 6f4ab76a528a4..776cd61c16d78 100644 --- a/frontend/src/scenes/experiments/ExperimentView/Goal.tsx +++ b/frontend/src/scenes/experiments/ExperimentView/Goal.tsx @@ -1,6 +1,6 @@ import '../Experiment.scss' -import { IconInfo } from '@posthog/icons' +import { IconInfo, IconPlus } from '@posthog/icons' import { LemonButton, LemonDivider, LemonModal, Tooltip } from '@posthog/lemon-ui' import { useActions, useValues } from 'kea' import { Field, Form } from 'kea-forms' @@ -211,7 +211,7 @@ export function ExperimentExposureModal({ experimentId }: { experimentId: Experi } export function Goal(): JSX.Element { - const { experiment, experimentId, experimentInsightType, experimentMathAggregationForTrends } = + const { experiment, experimentId, experimentInsightType, experimentMathAggregationForTrends, hasGoalSet } = useValues(experimentLogic) const { openExperimentGoalModal } = useActions(experimentLogic({ experimentId })) @@ -235,27 +235,44 @@ export function Goal(): JSX.Element { -
-
-
- {experimentInsightType === InsightType.FUNNELS ? 'Conversion goal steps' : 'Trend goal'} + {!hasGoalSet ? ( +
+
+ Add the main goal before launching the experiment.
- - - Change goal + } + type="secondary" + size="small" + data-attr="add-experiment-goal" + onClick={openExperimentGoalModal} + > + Add goal
- {experimentInsightType === InsightType.TRENDS && !experimentMathAggregationForTrends() && ( - <> - -
-
- -
+ ) : ( +
+
+
+ {experimentInsightType === InsightType.FUNNELS ? 'Conversion goal steps' : 'Trend goal'}
- - )} -
+ + + Change goal + +
+ {experimentInsightType === InsightType.TRENDS && !experimentMathAggregationForTrends() && ( + <> + +
+
+ +
+
+ + )} +
+ )}
) } diff --git a/frontend/src/scenes/experiments/ExperimentView/ReleaseConditionsTable.tsx b/frontend/src/scenes/experiments/ExperimentView/ReleaseConditionsTable.tsx index 5b9c8bac492bb..dfe6130db788e 100644 --- a/frontend/src/scenes/experiments/ExperimentView/ReleaseConditionsTable.tsx +++ b/frontend/src/scenes/experiments/ExperimentView/ReleaseConditionsTable.tsx @@ -1,20 +1,68 @@ import '../Experiment.scss' import { IconFlag } from '@posthog/icons' -import { LemonButton, LemonTable, LemonTableColumns, LemonTag } from '@posthog/lemon-ui' +import { LemonBanner, LemonButton, LemonModal, LemonTable, LemonTableColumns, LemonTag } from '@posthog/lemon-ui' import { useActions, useValues } from 'kea' +import { featureFlagLogic, FeatureFlagLogicProps } from 'scenes/feature-flags/featureFlagLogic' +import { FeatureFlagReleaseConditions } from 'scenes/feature-flags/FeatureFlagReleaseConditions' -import { sidePanelStateLogic } from '~/layout/navigation-3000/sidepanel/sidePanelStateLogic' import { groupsModel } from '~/models/groupsModel' -import { FeatureFlagGroupType, SidePanelTab } from '~/types' +import { Experiment, FeatureFlagGroupType } from '~/types' import { experimentLogic } from '../experimentLogic' +export function ReleaseConditionsModal({ experimentId }: { experimentId: Experiment['id'] }): JSX.Element { + const { experiment, isReleaseConditionsModalOpen } = useValues(experimentLogic({ experimentId })) + const { closeReleaseConditionsModal } = useActions(experimentLogic({ experimentId })) + + const _featureFlagLogic = featureFlagLogic({ id: experiment.feature_flag?.id ?? null } as FeatureFlagLogicProps) + const { featureFlag, nonEmptyVariants } = useValues(_featureFlagLogic) + const { setFeatureFlagFilters, saveSidebarExperimentFeatureFlag } = useActions(_featureFlagLogic) + + return ( + + + Cancel + + { + saveSidebarExperimentFeatureFlag(featureFlag) + closeReleaseConditionsModal() + }} + type="primary" + > + Save + +
+ } + > +
+ + Adjusting user targeting may impact the validity of your results. Adjust only if you're aware of how + changes will affect your experiment. + + + +
+ + ) +} + export function ReleaseConditionsTable(): JSX.Element { const { experiment } = useValues(experimentLogic) - const { reportExperimentReleaseConditionsViewed } = useActions(experimentLogic) + const { reportExperimentReleaseConditionsViewed, openReleaseConditionsModal } = useActions(experimentLogic) const { aggregationLabel } = useValues(groupsModel) - const { openSidePanel } = useActions(sidePanelStateLogic) const columns: LemonTableColumns = [ { @@ -67,7 +115,7 @@ export function ReleaseConditionsTable(): JSX.Element { } onClick={() => { - openSidePanel(SidePanelTab.ExperimentFeatureFlag) + openReleaseConditionsModal() reportExperimentReleaseConditionsViewed(experiment.id) }} type="secondary" diff --git a/frontend/src/scenes/experiments/ExperimentView/components.tsx b/frontend/src/scenes/experiments/ExperimentView/components.tsx index 2348635139c67..ddcd2bbd14bd1 100644 --- a/frontend/src/scenes/experiments/ExperimentView/components.tsx +++ b/frontend/src/scenes/experiments/ExperimentView/components.tsx @@ -451,6 +451,7 @@ export function PageHeaderCustom(): JSX.Element { areResultsSignificant, isSingleVariantShipped, featureFlags, + hasGoalSet, } = useValues(experimentLogic) const { launchExperiment, @@ -473,6 +474,9 @@ export function PageHeaderCustom(): JSX.Element { type="primary" data-attr="launch-experiment" onClick={() => launchExperiment()} + disabledReason={ + !hasGoalSet ? 'Add the main goal before launching the experiment' : undefined + } > Launch @@ -746,7 +750,8 @@ export function ActionBanner(): JSX.Element { if (!isExperimentRunning) { return ( - Your experiment is in draft mode. You can edit your variants, adjust release conditions, and{' '} + Your experiment is in draft mode. You can set the goal, edit the variants, adjust release conditions, + and{' '} test your feature flag diff --git a/frontend/src/scenes/experiments/experimentLogic.tsx b/frontend/src/scenes/experiments/experimentLogic.tsx index a928b09db82cd..46f0c259b2fb1 100644 --- a/frontend/src/scenes/experiments/experimentLogic.tsx +++ b/frontend/src/scenes/experiments/experimentLogic.tsx @@ -186,9 +186,12 @@ export const experimentLogic = kea([ closeExperimentCollectionGoalModal: true, openShipVariantModal: true, closeShipVariantModal: true, - setCurrentFormStep: (stepIndex: number) => ({ stepIndex }), - moveToNextFormStep: true, + openDistributionModal: true, + closeDistributionModal: true, + openReleaseConditionsModal: true, + closeReleaseConditionsModal: true, updateExperimentVariantImages: (variantPreviewMediaIds: Record) => ({ variantPreviewMediaIds }), + setTabKey: (tabKey: string) => ({ tabKey }), }), reducers({ experiment: [ @@ -332,6 +335,20 @@ export const experimentLogic = kea([ closeShipVariantModal: () => false, }, ], + isDistributionModalOpen: [ + false, + { + openDistributionModal: () => true, + closeDistributionModal: () => false, + }, + ], + isReleaseConditionsModalOpen: [ + false, + { + openReleaseConditionsModal: () => true, + closeReleaseConditionsModal: () => false, + }, + ], experimentValuesChangedLocally: [ false, { @@ -340,10 +357,10 @@ export const experimentLogic = kea([ updateExperiment: () => false, }, ], - currentFormStep: [ - 0, + tabKey: [ + 'results', { - setCurrentFormStep: (_, { stepIndex }) => stepIndex, + setTabKey: (_, { tabKey }) => tabKey, }, ], }), @@ -351,6 +368,16 @@ export const experimentLogic = kea([ createExperiment: async ({ draft }) => { const { recommendedRunningTime, recommendedSampleSize, minimumDetectableEffect } = values + actions.touchExperimentField('name') + actions.touchExperimentField('feature_flag_key') + values.experiment.parameters.feature_flag_variants.forEach((_, i) => + actions.touchExperimentField(`parameters.feature_flag_variants.${i}.key`) + ) + + if (hasFormErrors(values.experimentErrors)) { + return + } + // Minimum Detectable Effect is calculated based on a loaded insight // Terminate if the insight did not manage to load in time if (!minimumDetectableEffect) { @@ -503,8 +530,6 @@ export const experimentLogic = kea([ loadExperimentSuccess: async ({ experiment }) => { experiment && actions.reportExperimentViewed(experiment) - actions.setNewExperimentInsight(experiment?.filters) - if (experiment?.start_date) { actions.loadExperimentResults() actions.loadSecondaryMetricResults() @@ -697,20 +722,6 @@ export const experimentLogic = kea([ openExperimentExposureModal: async () => { actions.setExperimentExposureInsight(values.experiment?.parameters?.custom_exposure_filter) }, - moveToNextFormStep: async () => { - const { currentFormStep } = values - if (currentFormStep === 0) { - actions.touchExperimentField('name') - actions.touchExperimentField('feature_flag_key') - values.experiment.parameters.feature_flag_variants.forEach((_, i) => - actions.touchExperimentField(`parameters.feature_flag_variants.${i}.key`) - ) - } - - if (!hasFormErrors(values.experimentErrors)) { - actions.setCurrentFormStep(currentFormStep + 1) - } - }, createExposureCohortSuccess: ({ exposureCohort }) => { if (exposureCohort && exposureCohort.id !== 'new') { cohortsModel.actions.cohortCreated(exposureCohort) @@ -1532,6 +1543,17 @@ export const experimentLogic = kea([ ) }, ], + hasGoalSet: [ + (s) => [s.experiment], + (experiment): boolean => { + const filters = experiment?.filters + return !!( + (filters?.actions && filters.actions.length > 0) || + (filters?.events && filters.events.length > 0) || + (filters?.data_warehouse && filters.data_warehouse.length > 0) + ) + }, + ], }), forms(({ actions }) => ({ experiment: { @@ -1561,7 +1583,6 @@ export const experimentLogic = kea([ const parsedId = id === 'new' ? 'new' : parseInt(id) if (parsedId === 'new') { actions.resetExperiment() - actions.setNewExperimentInsight() } if (parsedId !== 'new' && parsedId === values.experimentId) { diff --git a/frontend/src/scenes/projectLogic.ts b/frontend/src/scenes/projectLogic.ts index 9e67d253db5ad..8ebb2c2ee207f 100644 --- a/frontend/src/scenes/projectLogic.ts +++ b/frontend/src/scenes/projectLogic.ts @@ -75,7 +75,12 @@ export const projectLogic = kea([ return patchedProject }, createProject: async ({ name }: { name: string }) => { - return await api.create('api/projects/', { name }) + try { + return await api.create('api/projects/', { name }) + } catch (error: any) { + lemonToast.error('Failed to create project') + return values.currentProject + } }, }, ], @@ -83,7 +88,7 @@ export const projectLogic = kea([ selectors({ currentProjectId: [(s) => [s.currentProject], (currentProject) => currentProject?.id || null], }), - listeners(({ actions }) => ({ + listeners(({ actions, values }) => ({ loadCurrentProjectSuccess: ({ currentProject }) => { if (currentProject) { ApiConfig.setCurrentProjectId(currentProject.id) @@ -102,7 +107,7 @@ export const projectLogic = kea([ lemonToast.success('Project has been deleted') }, createProjectSuccess: ({ currentProject }) => { - if (currentProject) { + if (currentProject && currentProject.id !== values.currentProject?.id) { actions.switchTeam(currentProject.id) } }, diff --git a/frontend/src/scenes/surveys/surveyLogic.tsx b/frontend/src/scenes/surveys/surveyLogic.tsx index 12b4b20e8620f..528aac6db6e96 100644 --- a/frontend/src/scenes/surveys/surveyLogic.tsx +++ b/frontend/src/scenes/surveys/surveyLogic.tsx @@ -181,6 +181,11 @@ export const surveyLogic = kea([ setFlagPropertyErrors: (errors: any) => ({ errors }), }), loaders(({ props, actions, values }) => ({ + responseSummary: { + summarize: async ({ questionIndex }: { questionIndex?: number }) => { + return api.surveys.summarize_responses(props.id, questionIndex) + }, + }, survey: { loadSurvey: async () => { if (props.id && props.id !== 'new') { diff --git a/frontend/src/scenes/surveys/surveyViewViz.tsx b/frontend/src/scenes/surveys/surveyViewViz.tsx index 324c53958dca3..a2ab8db7c32f8 100644 --- a/frontend/src/scenes/surveys/surveyViewViz.tsx +++ b/frontend/src/scenes/surveys/surveyViewViz.tsx @@ -1,10 +1,21 @@ -import { IconInfo } from '@posthog/icons' -import { LemonTable } from '@posthog/lemon-ui' +import { + IconInfo, + IconSparkles, + IconThumbsDown, + IconThumbsDownFilled, + IconThumbsUp, + IconThumbsUpFilled, +} from '@posthog/icons' +import { LemonButton, LemonTable, Spinner } from '@posthog/lemon-ui' import { BindLogic, useActions, useValues } from 'kea' +import { FlaggedFeature } from 'lib/components/FlaggedFeature' +import { FEATURE_FLAGS } from 'lib/constants' import { dayjs } from 'lib/dayjs' import { LemonDivider } from 'lib/lemon-ui/LemonDivider' +import { LemonMarkdown } from 'lib/lemon-ui/LemonMarkdown' import { Tooltip } from 'lib/lemon-ui/Tooltip' import { humanFriendlyNumber } from 'lib/utils' +import posthog from 'posthog-js' import { useEffect, useState } from 'react' import { insightLogic } from 'scenes/insights/insightLogic' import { LineGraph } from 'scenes/insights/views/LineGraph/LineGraph' @@ -577,15 +588,19 @@ export function OpenTextViz({ <> ) : ( <> - -
-
Open text
- -
random selection
- -
-
+
+ +
+
Open text
+ +
random selection
+ +
+
+ +
{question.question}
+
{surveyOpenTextResults[questionIndex].events.map((event, i) => { const personProp = { @@ -617,3 +632,87 @@ export function OpenTextViz({
) } + +function ResponseSummariesButton({ questionIndex }: { questionIndex: number | undefined }): JSX.Element { + const { summarize } = useActions(surveyLogic) + const { responseSummary, responseSummaryLoading } = useValues(surveyLogic) + + return ( + + summarize({ questionIndex })} + disabledReason={ + responseSummaryLoading ? 'Let me think...' : responseSummary ? 'already summarized' : undefined + } + icon={} + > + {responseSummaryLoading ? ( + <> + Let me think... + + + ) : ( + <>Summarize responses + )} + + + ) +} + +function ResponseSummariesDisplay(): JSX.Element { + const { survey, responseSummary } = useValues(surveyLogic) + + return ( + + {responseSummary ? ( + <> +

Responses summary

+ {responseSummary.content} + + + + ) : null} +
+ ) +} + +function ResponseSummaryFeedback({ surveyId }: { surveyId: string }): JSX.Element { + const [rating, setRating] = useState<'good' | 'bad' | null>(null) + + function submitRating(newRating: 'good' | 'bad'): void { + if (rating) { + return // Already rated + } + setRating(newRating) + posthog.capture('chat rating', { + survey_id: surveyId, + answer_rating: rating, + }) + } + + return ( +
+ {rating === null ? <>Summaries are generated by AI. What did you think? : null} + {rating !== 'bad' && ( + : } + type="tertiary" + size="small" + tooltip="Good summary" + onClick={() => submitRating('good')} + /> + )} + {rating !== 'good' && ( + : } + type="tertiary" + size="small" + tooltip="Bad summary" + onClick={() => submitRating('bad')} + /> + )} +
+ ) +} diff --git a/frontend/src/toolbar/Toolbar.stories.tsx b/frontend/src/toolbar/Toolbar.stories.tsx index 593681ba4f32c..fc140a3727f7e 100644 --- a/frontend/src/toolbar/Toolbar.stories.tsx +++ b/frontend/src/toolbar/Toolbar.stories.tsx @@ -55,7 +55,6 @@ const BasicTemplate: StoryFn = (props) => { userIntent: undefined, dataAttributes: ['data-attr'], apiURL: '/', - jsURL: 'http://localhost:8234/', userEmail: 'foobar@posthog.com', } useToolbarStyles() @@ -68,7 +67,6 @@ const BasicTemplate: StoryFn = (props) => { }, toolbarParams: { toolbarVersion: 'toolbar', - jsURL: 'http://localhost:8234/', }, isAuthenticated: props.unauthenticated ?? true, supportedCompression: ['gzip', 'gzip-js', 'lz64'], diff --git a/frontend/src/toolbar/ToolbarApp.tsx b/frontend/src/toolbar/ToolbarApp.tsx index 39d2d15afe8b7..f623736bd9c2b 100644 --- a/frontend/src/toolbar/ToolbarApp.tsx +++ b/frontend/src/toolbar/ToolbarApp.tsx @@ -13,7 +13,7 @@ import { TOOLBAR_ID } from './utils' type HTMLElementWithShadowRoot = HTMLElement & { shadowRoot: ShadowRoot } export function ToolbarApp(props: ToolbarProps = {}): JSX.Element { - const { jsURL } = useValues(toolbarConfigLogic(props)) + const { apiURL } = useValues(toolbarConfigLogic(props)) const shadowRef = useRef(null) const [didLoadStyles, setDidLoadStyles] = useState(false) @@ -32,7 +32,7 @@ export function ToolbarApp(props: ToolbarProps = {}): JSX.Element { // this ensures that we bust the cache periodically const timestampToNearestFiveMinutes = Math.floor(Date.now() / fiveMinutesInMillis) * fiveMinutesInMillis - styleLink.href = `${jsURL}/static/toolbar.css?t=${timestampToNearestFiveMinutes}` + styleLink.href = `${apiURL}/static/toolbar.css?t=${timestampToNearestFiveMinutes}` styleLink.onload = () => setDidLoadStyles(true) const shadowRoot = shadowRef.current?.shadowRoot || window.document.getElementById(TOOLBAR_ID)?.shadowRoot diff --git a/frontend/src/toolbar/index.tsx b/frontend/src/toolbar/index.tsx index e5ae6fa344cc8..7df891f45aa98 100644 --- a/frontend/src/toolbar/index.tsx +++ b/frontend/src/toolbar/index.tsx @@ -25,7 +25,6 @@ import { ToolbarParams } from '~/types' {...toolbarParams} actionId={parseInt(String(toolbarParams.actionId))} experimentId={parseInt(String(toolbarParams.experimentId))} - jsURL={toolbarParams.jsURL || toolbarParams.apiURL} posthog={posthog} /> ) diff --git a/frontend/src/toolbar/stats/currentPageLogic.test.ts b/frontend/src/toolbar/stats/currentPageLogic.test.ts index c943d482ebba1..1aad6eef2e674 100644 --- a/frontend/src/toolbar/stats/currentPageLogic.test.ts +++ b/frontend/src/toolbar/stats/currentPageLogic.test.ts @@ -1,7 +1,7 @@ import { withoutPostHogInit } from '~/toolbar/stats/currentPageLogic' const posthogInitHashParam = - '__posthog={%22action%22:%20%22ph_authorize%22,%20%22token%22:%20%the-ph-token%22,%20%22temporaryToken%22:%20%the-posthog-token%22,%20%22actionId%22:%20null,%20%22userIntent%22:%20%22heatmaps%22,%20%22toolbarVersion%22:%20%22toolbar%22,%20%22apiURL%22:%20%22https://eu.posthog.com%22,%20%22dataAttributes%22:%20[%22data-attr%22],%20%22jsURL%22:%20%22https://app-static.eu.posthog.com%22,%20%22instrument%22:%20true,%20%22userEmail%22:%20%user-email@gmail.com%22,%20%22distinctId%22:%20%the-distinct-id%22}' + '__posthog={%22action%22:%20%22ph_authorize%22,%20%22token%22:%20%the-ph-token%22,%20%22temporaryToken%22:%20%the-posthog-token%22,%20%22actionId%22:%20null,%20%22userIntent%22:%20%22heatmaps%22,%20%22toolbarVersion%22:%20%22toolbar%22,%20%22apiURL%22:%20%22https://eu.posthog.com%22,%20%22dataAttributes%22:%20[%22data-attr%22],%20%22instrument%22:%20true,%20%22userEmail%22:%20%user-email@gmail.com%22,%20%22distinctId%22:%20%the-distinct-id%22}' describe('current page logic', () => { describe('cleaning href', () => { diff --git a/frontend/src/toolbar/toolbarConfigLogic.ts b/frontend/src/toolbar/toolbarConfigLogic.ts index e31442b5b2743..3ab336677f682 100644 --- a/frontend/src/toolbar/toolbarConfigLogic.ts +++ b/frontend/src/toolbar/toolbarConfigLogic.ts @@ -41,11 +41,6 @@ export const toolbarConfigLogic = kea([ (s) => [s.props], (props: ToolbarProps) => `${props.apiURL?.endsWith('/') ? props.apiURL.replace(/\/+$/, '') : props.apiURL}`, ], - jsURL: [ - (s) => [s.props, s.apiURL], - (props: ToolbarProps, apiUrl) => - `${props.jsURL ? (props.jsURL.endsWith('/') ? props.jsURL.replace(/\/+$/, '') : props.jsURL) : apiUrl}`, - ], dataAttributes: [(s) => [s.props], (props): string[] => props.dataAttributes ?? []], isAuthenticated: [(s) => [s.temporaryToken], (temporaryToken) => !!temporaryToken], }), diff --git a/frontend/src/types.ts b/frontend/src/types.ts index 7a481e31fa24d..6b6c2b161c442 100644 --- a/frontend/src/types.ts +++ b/frontend/src/types.ts @@ -626,7 +626,6 @@ export type ExperimentIdType = number | 'new' | 'web' /* sync with posthog-js */ export interface ToolbarParams { apiURL?: string - jsURL?: string token?: string /** public posthog-js token */ temporaryToken?: string /** private temporary user token */ actionId?: number @@ -2193,7 +2192,8 @@ export enum RetentionPeriod { Month = 'Month', } -export type BreakdownKeyType = string | number | (string | number)[] | null +// eslint-disable-next-line @typescript-eslint/no-duplicate-type-constituents +export type BreakdownKeyType = integer | string | number | (integer | string | number)[] | null /** * Legacy breakdown. @@ -4413,7 +4413,6 @@ export enum SidePanelTab { Discussion = 'discussion', Status = 'status', Exports = 'exports', - ExperimentFeatureFlag = 'experiment-feature-flag', } export interface SourceFieldOauthConfig { diff --git a/mypy-baseline.txt b/mypy-baseline.txt index 93573c831c0fe..a2ab36ff3afea 100644 --- a/mypy-baseline.txt +++ b/mypy-baseline.txt @@ -617,13 +617,12 @@ posthog/warehouse/api/external_data_schema.py:0: note: def [_T] get(self, Type, posthog/warehouse/api/table.py:0: error: Unused "type: ignore" comment [unused-ignore] posthog/warehouse/api/table.py:0: error: Unused "type: ignore" comment [unused-ignore] posthog/warehouse/api/table.py:0: error: Unused "type: ignore" comment [unused-ignore] -posthog/temporal/data_imports/workflow_activities/sync_new_schemas.py:0: error: Argument 1 has incompatible type "str"; expected "Type" [arg-type] posthog/temporal/data_imports/workflow_activities/sync_new_schemas.py:0: error: No overload variant of "get" of "dict" matches argument types "str", "tuple[()]" [call-overload] posthog/temporal/data_imports/workflow_activities/sync_new_schemas.py:0: note: Possible overload variants: posthog/temporal/data_imports/workflow_activities/sync_new_schemas.py:0: note: def get(self, Type, /) -> Sequence[str] | None posthog/temporal/data_imports/workflow_activities/sync_new_schemas.py:0: note: def get(self, Type, Sequence[str], /) -> Sequence[str] posthog/temporal/data_imports/workflow_activities/sync_new_schemas.py:0: note: def [_T] get(self, Type, _T, /) -> Sequence[str] | _T -posthog/temporal/data_imports/workflow_activities/sync_new_schemas.py:0: error: Argument "source_id" has incompatible type "str"; expected "UUID" [arg-type] +posthog/temporal/data_imports/workflow_activities/sync_new_schemas.py:0: error: Argument "source_id" to "sync_old_schemas_with_new_schemas" has incompatible type "str"; expected "UUID" [arg-type] posthog/tasks/exports/test/test_csv_exporter.py:0: error: Function is missing a return type annotation [no-untyped-def] posthog/tasks/exports/test/test_csv_exporter.py:0: error: Function is missing a type annotation [no-untyped-def] posthog/tasks/exports/test/test_csv_exporter.py:0: error: Function is missing a type annotation for one or more arguments [no-untyped-def] @@ -796,6 +795,11 @@ posthog/temporal/tests/batch_exports/test_batch_exports.py:0: error: TypedDict k posthog/temporal/data_modeling/run_workflow.py:0: error: Dict entry 20 has incompatible type "str": "Literal['complex']"; expected "str": "Literal['text', 'double', 'bool', 'timestamp', 'bigint', 'binary', 'json', 'decimal', 'wei', 'date', 'time']" [dict-item] posthog/temporal/data_modeling/run_workflow.py:0: error: Dict entry 21 has incompatible type "str": "Literal['complex']"; expected "str": "Literal['text', 'double', 'bool', 'timestamp', 'bigint', 'binary', 'json', 'decimal', 'wei', 'date', 'time']" [dict-item] posthog/temporal/data_modeling/run_workflow.py:0: error: Dict entry 22 has incompatible type "str": "Literal['complex']"; expected "str": "Literal['text', 'double', 'bool', 'timestamp', 'bigint', 'binary', 'json', 'decimal', 'wei', 'date', 'time']" [dict-item] +posthog/temporal/data_imports/pipelines/pipeline_sync.py:0: error: "FilesystemDestinationClientConfiguration" has no attribute "delta_jobs_per_write" [attr-defined] +posthog/temporal/data_imports/pipelines/pipeline_sync.py:0: error: "type[FilesystemDestinationClientConfiguration]" has no attribute "delta_jobs_per_write" [attr-defined] +posthog/temporal/data_imports/pipelines/pipeline_sync.py:0: error: Incompatible types in assignment (expression has type "object", variable has type "DataWarehouseCredential | Combinable | None") [assignment] +posthog/temporal/data_imports/pipelines/pipeline_sync.py:0: error: Incompatible types in assignment (expression has type "object", variable has type "str | int | Combinable") [assignment] +posthog/temporal/data_imports/pipelines/pipeline_sync.py:0: error: Incompatible types in assignment (expression has type "dict[str, dict[str, str | bool]] | dict[str, str]", variable has type "dict[str, dict[str, str]]") [assignment] posthog/session_recordings/session_recording_api.py:0: error: Argument "team_id" to "get_realtime_snapshots" has incompatible type "int"; expected "str" [arg-type] posthog/session_recordings/session_recording_api.py:0: error: Value of type variable "SupportsRichComparisonT" of "sorted" cannot be "str | None" [type-var] posthog/session_recordings/session_recording_api.py:0: error: Argument 1 to "get" of "dict" has incompatible type "str | None"; expected "str" [arg-type] @@ -826,12 +830,6 @@ posthog/temporal/tests/batch_exports/test_snowflake_batch_export_workflow.py:0: posthog/temporal/tests/batch_exports/test_snowflake_batch_export_workflow.py:0: error: List item 0 has incompatible type "tuple[str, str, int, int, int, int, str, int]"; expected "tuple[str, str, int, int, str, str, str, str]" [list-item] posthog/temporal/tests/batch_exports/test_s3_batch_export_workflow.py:0: error: "tuple[Any, ...]" has no attribute "last_uploaded_part_timestamp" [attr-defined] posthog/temporal/tests/batch_exports/test_s3_batch_export_workflow.py:0: error: "tuple[Any, ...]" has no attribute "upload_state" [attr-defined] -posthog/temporal/data_imports/workflow_activities/import_data.py:0: error: Argument "job_type" to "PipelineInputs" has incompatible type "str"; expected "Type" [arg-type] -posthog/temporal/data_imports/pipelines/pipeline_sync.py:0: error: "FilesystemDestinationClientConfiguration" has no attribute "delta_jobs_per_write" [attr-defined] -posthog/temporal/data_imports/pipelines/pipeline_sync.py:0: error: "type[FilesystemDestinationClientConfiguration]" has no attribute "delta_jobs_per_write" [attr-defined] -posthog/temporal/data_imports/pipelines/pipeline_sync.py:0: error: Incompatible types in assignment (expression has type "object", variable has type "DataWarehouseCredential | Combinable | None") [assignment] -posthog/temporal/data_imports/pipelines/pipeline_sync.py:0: error: Incompatible types in assignment (expression has type "object", variable has type "str | int | Combinable") [assignment] -posthog/temporal/data_imports/pipelines/pipeline_sync.py:0: error: Incompatible types in assignment (expression has type "dict[str, dict[str, str | bool]] | dict[str, str]", variable has type "dict[str, dict[str, str]]") [assignment] posthog/migrations/0237_remove_timezone_from_teams.py:0: error: Argument 2 to "RunPython" has incompatible type "Callable[[Migration, Any], None]"; expected "_CodeCallable | None" [arg-type] posthog/migrations/0228_fix_tile_layouts.py:0: error: Argument 2 to "RunPython" has incompatible type "Callable[[Migration, Any], None]"; expected "_CodeCallable | None" [arg-type] posthog/api/plugin_log_entry.py:0: error: Name "timezone.datetime" is not defined [name-defined] @@ -839,6 +837,7 @@ posthog/api/plugin_log_entry.py:0: error: Module "django.utils.timezone" does no posthog/api/plugin_log_entry.py:0: error: Name "timezone.datetime" is not defined [name-defined] posthog/api/plugin_log_entry.py:0: error: Module "django.utils.timezone" does not explicitly export attribute "datetime" [attr-defined] posthog/temporal/tests/batch_exports/test_redshift_batch_export_workflow.py:0: error: Incompatible types in assignment (expression has type "str | int", variable has type "int") [assignment] +posthog/temporal/data_imports/external_data_job.py:0: error: Argument "status" to "update_external_job_status" has incompatible type "str"; expected "Status" [arg-type] posthog/api/sharing.py:0: error: Item "None" of "list[Any] | None" has no attribute "__iter__" (not iterable) [union-attr] posthog/api/test/batch_exports/conftest.py:0: error: Signature of "run" incompatible with supertype "Worker" [override] posthog/api/test/batch_exports/conftest.py:0: note: Superclass: @@ -850,10 +849,10 @@ posthog/temporal/tests/external_data/test_external_data_job.py:0: error: Invalid posthog/temporal/tests/external_data/test_external_data_job.py:0: error: Invalid index type "str" for "dict[Type, Sequence[str]]"; expected type "Type" [index] posthog/temporal/tests/external_data/test_external_data_job.py:0: error: Invalid index type "str" for "dict[Type, Sequence[str]]"; expected type "Type" [index] posthog/temporal/tests/external_data/test_external_data_job.py:0: error: Invalid index type "str" for "dict[Type, Sequence[str]]"; expected type "Type" [index] +posthog/temporal/tests/data_imports/test_end_to_end.py:0: error: Unused "type: ignore" comment [unused-ignore] posthog/api/test/test_team.py:0: error: "HttpResponse" has no attribute "json" [attr-defined] posthog/api/test/test_team.py:0: error: "HttpResponse" has no attribute "json" [attr-defined] posthog/test/test_middleware.py:0: error: Incompatible types in assignment (expression has type "_MonkeyPatchedWSGIResponse", variable has type "_MonkeyPatchedResponse") [assignment] -posthog/temporal/tests/data_imports/test_end_to_end.py:0: error: Unused "type: ignore" comment [unused-ignore] posthog/management/commands/test/test_create_batch_export_from_app.py:0: error: Incompatible return value type (got "dict[str, Collection[str]]", expected "dict[str, str]") [return-value] posthog/management/commands/test/test_create_batch_export_from_app.py:0: error: Incompatible types in assignment (expression has type "dict[str, Collection[str]]", variable has type "dict[str, str]") [assignment] posthog/management/commands/test/test_create_batch_export_from_app.py:0: error: Unpacked dict entry 1 has incompatible type "str"; expected "SupportsKeysAndGetItem[str, str]" [dict-item] diff --git a/posthog/api/person.py b/posthog/api/person.py index b410de3d7da68..862a62b5cea91 100644 --- a/posthog/api/person.py +++ b/posthog/api/person.py @@ -420,12 +420,12 @@ def bulk_delete(self, request: request.Request, pk=None, **kwargs): This endpoint allows you to bulk delete persons, either by the PostHog person IDs or by distinct IDs. You can pass in a maximum of 100 IDs per call. """ if distinct_ids := request.data.get("distinct_ids"): - if len(distinct_ids) > 100: - raise ValidationError("You can only pass 100 distinct_ids in one call") + if len(distinct_ids) > 1000: + raise ValidationError("You can only pass 1000 distinct_ids in one call") persons = self.get_queryset().filter(persondistinctid__distinct_id__in=distinct_ids) elif ids := request.data.get("ids"): - if len(ids) > 100: - raise ValidationError("You can only pass 100 ids in one call") + if len(ids) > 1000: + raise ValidationError("You can only pass 1000 ids in one call") persons = self.get_queryset().filter(uuid__in=ids) else: raise ValidationError("You need to specify either distinct_ids or ids") @@ -438,7 +438,7 @@ def bulk_delete(self, request: request.Request, pk=None, **kwargs): team_id=self.team_id, user=cast(User, request.user), was_impersonated=is_impersonated_session(request), - item_id=person.id, + item_id=person.pk, scope="Person", activity="deleted", detail=Detail(name=str(person.uuid)), diff --git a/posthog/api/survey.py b/posthog/api/survey.py index 4864612c2b438..bfd2dd7d9f87b 100644 --- a/posthog/api/survey.py +++ b/posthog/api/survey.py @@ -1,18 +1,24 @@ +import os from contextlib import contextmanager +from datetime import datetime, timedelta from typing import Any, cast from urllib.parse import urlparse import nh3 +import posthoganalytics +from django.conf import settings +from django.core.cache import cache from django.db.models import Min from django.http import HttpResponse, JsonResponse from django.utils.text import slugify from django.views.decorators.csrf import csrf_exempt from loginas.utils import is_impersonated_session from nanoid import generate -from rest_framework import request, serializers, status, viewsets +from rest_framework import request, serializers, status, viewsets, exceptions from rest_framework.request import Request from rest_framework.response import Response +from ee.surveys.summaries.summarize_surveys import summarize_survey_responses from posthog.api.action import ActionSerializer from posthog.api.feature_flag import ( BEHAVIOURAL_COHORT_FOUND_ERROR_CODE, @@ -23,6 +29,7 @@ from posthog.api.shared import UserBasicSerializer from posthog.api.utils import action, get_token from posthog.client import sync_execute +from posthog.cloud_utils import is_cloud from posthog.constants import AvailableFeature from posthog.event_usage import report_user_action from posthog.exceptions import generate_exception_response @@ -646,6 +653,67 @@ def activity(self, request: request.Request, **kwargs): ) return activity_page_response(activity_page, limit, page, request) + @action(methods=["POST"], detail=True, required_scopes=["survey:read"]) + def summarize_responses(self, request: request.Request, **kwargs): + if not request.user.is_authenticated: + raise exceptions.NotAuthenticated() + + user = cast(User, request.user) + + survey_id = kwargs["pk"] + + if not Survey.objects.filter(id=survey_id, team_id=self.team_id).exists(): + return Response(status=status.HTTP_404_NOT_FOUND) + + survey = self.get_object() + + cache_key = f'summarize_survey_responses_{self.team.pk}_{self.kwargs["pk"]}' + # Check if the response is cached + cached_response = cache.get(cache_key) + if cached_response is not None: + return Response(cached_response) + + environment_is_allowed = settings.DEBUG or is_cloud() + has_openai_api_key = bool(os.environ.get("OPENAI_API_KEY")) + if not environment_is_allowed or not has_openai_api_key: + raise exceptions.ValidationError("session summary is only supported in PostHog Cloud") + + if not posthoganalytics.feature_enabled("ai-survey-response-summary", str(user.distinct_id)): + raise exceptions.ValidationError("survey response summary is not enabled for this user") + + end_date: datetime = (survey.end_date or datetime.now()).replace( + hour=0, minute=0, second=0, microsecond=0 + ) + timedelta(days=1) + + try: + question_index_param = request.query_params.get("question_index", None) + question_index = int(question_index_param) if question_index_param else None + except (ValueError, TypeError): + question_index = None + + summary = summarize_survey_responses( + survey_id=survey_id, + question_index=question_index, + survey_start=(survey.start_date or survey.created_at).replace(hour=0, minute=0, second=0, microsecond=0), + survey_end=end_date, + team=self.team, + user=user, + ) + timings = summary.pop("timings", None) + cache.set(cache_key, summary, timeout=30) + + posthoganalytics.capture( + event="survey response summarized", distinct_id=str(user.distinct_id), properties=summary + ) + + # let the browser cache for half the time we cache on the server + r = Response(summary, headers={"Cache-Control": "max-age=15"}) + if timings: + r.headers["Server-Timing"] = ", ".join( + f"{key};dur={round(duration, ndigits=2)}" for key, duration in timings.items() + ) + return r + class SurveyConfigSerializer(serializers.ModelSerializer): class Meta: diff --git a/posthog/api/team.py b/posthog/api/team.py index 257a10f459d38..70e0bd4199380 100644 --- a/posthog/api/team.py +++ b/posthog/api/team.py @@ -64,13 +64,31 @@ def has_permission(self, request: request.Request, view) -> bool: return False if not request.data.get("is_demo"): - # if we're not requesting to make a demo project - # and if the org already has more than 1 non-demo project (need to be able to make the initial project) - # and the org isn't allowed to make multiple projects - if organization.teams.exclude(is_demo=True).count() >= 1 and not organization.is_feature_available( + has_organization_projects_feature = organization.is_feature_available( AvailableFeature.ORGANIZATIONS_PROJECTS - ): - return False + ) + current_non_demo_project_count = organization.teams.exclude(is_demo=True).count() + + allowed_project_count = next( + ( + feature.get("limit") + for feature in organization.available_product_features or [] + if feature.get("key") == AvailableFeature.ORGANIZATIONS_PROJECTS + ), + None, + ) + + if has_organization_projects_feature: + # If allowed_project_count is None then the user is allowed unlimited projects + if allowed_project_count is None: + return True + # Check current limit against allowed limit + if current_non_demo_project_count >= allowed_project_count: + return False + else: + # If the org doesn't have the feature, they can only have one non-demo project + if current_non_demo_project_count >= 1: + return False else: # if we ARE requesting to make a demo project # but the org already has a demo project diff --git a/posthog/api/test/test_team.py b/posthog/api/test/test_team.py index 6992c1822ef97..0040ddd257e2d 100644 --- a/posthog/api/test/test_team.py +++ b/posthog/api/test/test_team.py @@ -1284,3 +1284,63 @@ def test_teams_outside_personal_api_key_scoped_organizations_not_listed(self): {team_in_other_org.id}, "Only the team belonging to the scoped organization should be listed, the other one should be excluded", ) + + def test_can_create_team_with_valid_project_limit(self): + self.organization_membership.level = OrganizationMembership.Level.ADMIN + self.organization_membership.save() + self.organization.available_product_features = [ + { + "key": AvailableFeature.ORGANIZATIONS_PROJECTS, + "name": "Organizations Projects", + "limit": 5, + } + ] + self.organization.save() + self.assertEqual(Team.objects.count(), 1) + + response = self.client.post("/api/projects/@current/environments/", {"name": "New Project"}) + self.assertEqual(response.status_code, 201) + self.assertEqual(Team.objects.count(), 2) + + def test_cant_create_team_when_at_project_limit(self): + self.organization_membership.level = OrganizationMembership.Level.ADMIN + self.organization_membership.save() + self.organization.available_product_features = [ + { + "key": AvailableFeature.ORGANIZATIONS_PROJECTS, + "name": "Organizations Projects", + "limit": 1, + } + ] + self.organization.save() + self.assertEqual(Team.objects.count(), 1) + + response = self.client.post("/api/projects/@current/environments/", {"name": "New Project"}) + self.assertEqual(response.status_code, 403) + response_data = response.json() + self.assertDictContainsSubset( + { + "type": "authentication_error", + "code": "permission_denied", + "detail": "You must upgrade your PostHog plan to be able to create and manage multiple projects or environments.", + }, + response_data, + ) + self.assertEqual(Team.objects.count(), 1) + + def test_can_create_team_with_unlimited_projects_feature(self): + self.organization_membership.level = OrganizationMembership.Level.ADMIN + self.organization_membership.save() + self.organization.available_product_features = [ + {"key": AvailableFeature.ORGANIZATIONS_PROJECTS, "name": "Organizations Projects", "limit": None} + ] + self.organization.save() + self.assertEqual(Team.objects.count(), 1) + + response = self.client.post("/api/projects/@current/environments/", {"name": "New Project"}) + self.assertEqual(response.status_code, 201) + self.assertEqual(Team.objects.count(), 2) + + response = self.client.post("/api/projects/@current/environments/", {"name": "New Project 2"}) + self.assertEqual(response.status_code, 201) + self.assertEqual(Team.objects.count(), 3) diff --git a/posthog/api/test/test_user.py b/posthog/api/test/test_user.py index 4ee05190e8158..1c9d3b96aec04 100644 --- a/posthog/api/test/test_user.py +++ b/posthog/api/test/test_user.py @@ -864,11 +864,11 @@ def test_redirect_user_to_site_with_toolbar(self, patched_token): ) self.assertEqual(response.status_code, status.HTTP_302_FOUND) locationHeader = response.headers.get("location", "not found") - self.assertIn("%22jsURL%22%3A%20%22http%3A%2F%2Flocalhost%3A8234%22", locationHeader) + self.assertIn("22apiURL%22%3A%20%22http%3A%2F%2Ftestserver%22", locationHeader) self.maxDiff = None self.assertEqual( unquote(locationHeader), - 'http://127.0.0.1:8000#__posthog={"action": "ph_authorize", "token": "token123", "temporaryToken": "tokenvalue", "actionId": null, "experimentId": null, "userIntent": "add-action", "toolbarVersion": "toolbar", "apiURL": "http://testserver", "dataAttributes": ["data-attr"], "jsURL": "http://localhost:8234"}', + 'http://127.0.0.1:8000#__posthog={"action": "ph_authorize", "token": "token123", "temporaryToken": "tokenvalue", "actionId": null, "experimentId": null, "userIntent": "add-action", "toolbarVersion": "toolbar", "apiURL": "http://testserver", "dataAttributes": ["data-attr"]}', ) @patch("posthog.api.user.secrets.token_urlsafe") @@ -883,11 +883,11 @@ def test_redirect_user_to_site_with_experiments_toolbar(self, patched_token): ) self.assertEqual(response.status_code, status.HTTP_302_FOUND) locationHeader = response.headers.get("location", "not found") - self.assertIn("%22jsURL%22%3A%20%22http%3A%2F%2Flocalhost%3A8234%22", locationHeader) + self.assertIn("22apiURL%22%3A%20%22http%3A%2F%2Ftestserver%22", locationHeader) self.maxDiff = None self.assertEqual( unquote(locationHeader), - 'http://127.0.0.1:8000#__posthog={"action": "ph_authorize", "token": "token123", "temporaryToken": "tokenvalue", "actionId": null, "experimentId": "12", "userIntent": "edit-experiment", "toolbarVersion": "toolbar", "apiURL": "http://testserver", "dataAttributes": ["data-attr"], "jsURL": "http://localhost:8234"}', + 'http://127.0.0.1:8000#__posthog={"action": "ph_authorize", "token": "token123", "temporaryToken": "tokenvalue", "actionId": null, "experimentId": "12", "userIntent": "edit-experiment", "toolbarVersion": "toolbar", "apiURL": "http://testserver", "dataAttributes": ["data-attr"]}', ) @patch("posthog.api.user.secrets.token_urlsafe") diff --git a/posthog/api/user.py b/posthog/api/user.py index 92cfa08e01895..451c0b3470e01 100644 --- a/posthog/api/user.py +++ b/posthog/api/user.py @@ -63,7 +63,6 @@ from posthog.tasks import user_identify from posthog.tasks.email import send_email_change_emails from posthog.user_permissions import UserPermissions -from posthog.utils import get_js_url REDIRECT_TO_SITE_COUNTER = Counter("posthog_redirect_to_site", "Redirect to site") REDIRECT_TO_SITE_FAILED_COUNTER = Counter("posthog_redirect_to_site_failed", "Redirect to site failed") @@ -518,9 +517,6 @@ def redirect_to_site(request): "dataAttributes": team.data_attributes, } - if get_js_url(request): - params["jsURL"] = get_js_url(request) - if not settings.TEST and not os.environ.get("OPT_OUT_CAPTURE"): params["instrument"] = True params["userEmail"] = request.user.email diff --git a/posthog/cdp/templates/google_ads/template_google_ads.py b/posthog/cdp/templates/google_ads/template_google_ads.py index ff577988ce025..3743ca93db541 100644 --- a/posthog/cdp/templates/google_ads/template_google_ads.py +++ b/posthog/cdp/templates/google_ads/template_google_ads.py @@ -1,5 +1,7 @@ from posthog.cdp.templates.hog_function_template import HogFunctionTemplate +# Based on https://developers.google.com/google-ads/api/reference/rpc/v17/ClickConversion + template: HogFunctionTemplate = HogFunctionTemplate( status="alpha", type="destination", @@ -14,6 +16,25 @@ return } +let body := { + 'conversions': [ + { + 'gclid': inputs.gclid, + 'conversion_action': f'customers/{replaceAll(inputs.customerId, '-', '')}/conversionActions/{replaceAll(inputs.conversionActionId, 'AW-', '')}', + 'conversion_date_time': inputs.conversionDateTime + } + ], + 'partialFailure': true, + 'validateOnly': true +} + +if (not empty(inputs.conversionValue)) { + body.conversions[1].conversion_value := inputs.conversionValue +} +if (not empty(inputs.currencyCode)) { + body.conversions[1].currency_code := inputs.currencyCode +} + let res := fetch(f'https://googleads.googleapis.com/v17/customers/{replaceAll(inputs.customerId, '-', '')}:uploadClickConversions', { 'method': 'POST', 'headers': { @@ -21,23 +42,12 @@ 'Content-Type': 'application/json', 'developer-token': inputs.developerToken }, - 'body': { - 'conversions': [ - { - 'gclid': inputs.gclid, - 'conversionAction': f'customers/{replaceAll(inputs.customerId, '-', '')}/conversionActions/{replaceAll(inputs.conversionActionId, 'AW-', '')}', - 'conversionDateTime': inputs.conversionDateTime - } - ], - 'partialFailure': true, - 'validateOnly': true - } + 'body': body }) if (res.status >= 400) { throw Error(f'Error from googleads.googleapis.com (status {res.status}): {res.body}') } - """.strip(), inputs_schema=[ { @@ -90,6 +100,24 @@ "secret": False, "required": True, }, + { + "key": "conversionValue", + "type": "string", + "label": "Conversion value", + "description": "The value of the conversion for the advertiser.", + "default": "", + "secret": False, + "required": False, + }, + { + "key": "currencyCode", + "type": "string", + "label": "Currency code", + "description": "Currency associated with the conversion value. This is the ISO 4217 3-character currency code. For example: USD, EUR.", + "default": "", + "secret": False, + "required": False, + }, ], filters={ "events": [], diff --git a/posthog/cdp/templates/google_ads/test_template_google_ads.py b/posthog/cdp/templates/google_ads/test_template_google_ads.py index 7e40cb4fb9f20..0c5ef98abb143 100644 --- a/posthog/cdp/templates/google_ads/test_template_google_ads.py +++ b/posthog/cdp/templates/google_ads/test_template_google_ads.py @@ -18,6 +18,7 @@ def _inputs(self, **kwargs): "conversionActionId": "AW-123456789", "gclid": "89y4thuergnjkd34oihroh3uhg39uwhgt9", "conversionDateTime": "2024-10-10 16:32:45+02:00", + "currencyCode": "USD", } inputs.update(kwargs) return inputs @@ -29,23 +30,24 @@ def test_function_works(self): ( "https://googleads.googleapis.com/v17/customers/1231231234:uploadClickConversions", { + "method": "POST", + "headers": { + "Authorization": "Bearer oauth-1234", + "Content-Type": "application/json", + "developer-token": "developer-token1234", + }, "body": { "conversions": [ { "gclid": "89y4thuergnjkd34oihroh3uhg39uwhgt9", - "conversionAction": f"customers/1231231234/conversionActions/123456789", - "conversionDateTime": "2024-10-10 16:32:45+02:00", + "conversion_action": f"customers/1231231234/conversionActions/123456789", + "conversion_date_time": "2024-10-10 16:32:45+02:00", + "currency_code": "USD", } ], "partialFailure": True, "validateOnly": True, }, - "method": "POST", - "headers": { - "Authorization": "Bearer oauth-1234", - "Content-Type": "application/json", - "developer-token": "developer-token1234", - }, }, ) ) diff --git a/posthog/cdp/templates/hubspot/template_hubspot.py b/posthog/cdp/templates/hubspot/template_hubspot.py index cf70dcd8cf3d6..19cb2bff1e37f 100644 --- a/posthog/cdp/templates/hubspot/template_hubspot.py +++ b/posthog/cdp/templates/hubspot/template_hubspot.py @@ -110,8 +110,8 @@ return } -if (not match(event.event, '^([a-z])([a-z0-9_-])+$')) { - throw Error(f'Event name must start with a letter and can only contain lowercase letters, numbers, underscores, and hyphens. Not sending event: {event.event}') +if (not match(inputs.eventName, '^([a-z])([a-z0-9_-])+$')) { + throw Error(f'Event name must start with a letter and can only contain lowercase letters, numbers, underscores, and hyphens. Not sending event: {inputs.eventName}') return } @@ -139,7 +139,7 @@ } } -let eventSchema := fetch(f'https://api.hubapi.com/events/v3/event-definitions/{event.event}/?includeProperties=true', { +let eventSchema := fetch(f'https://api.hubapi.com/events/v3/event-definitions/{inputs.eventName}/?includeProperties=true', { 'method': 'GET', 'headers': { 'Authorization': f'Bearer {inputs.oauth.access_token}', @@ -213,9 +213,9 @@ if (eventSchema.status >= 400) { let body := { - 'label': event.event, - 'name': event.event, - 'description': f'{event.event} - (created by PostHog)', + 'label': inputs.eventName, + 'name': inputs.eventName, + 'description': f'{inputs.eventName} - (created by PostHog)', 'primaryObject': 'CONTACT', 'propertyDefinitions': [] } @@ -252,7 +252,7 @@ if (not empty(missingProperties)) { for (let i, obj in missingProperties) { - let res := fetch(f'https://api.hubapi.com/events/v3/event-definitions/{event.event}/property', { + let res := fetch(f'https://api.hubapi.com/events/v3/event-definitions/{inputs.eventName}/property', { 'method': 'POST', 'headers': { 'Authorization': f'Bearer {inputs.oauth.access_token}', @@ -299,6 +299,15 @@ "secret": False, "required": True, }, + { + "key": "eventName", + "type": "string", + "label": "Event Name", + "description": "Hubspot only allows events that start with a letter and can only contain lowercase letters, numbers, underscores, and hyphens.", + "default": "{replaceAll(replaceAll(trim(lower(event.event)), '$', ''), ' ', '_')}", + "secret": False, + "required": True, + }, { "key": "email", "type": "string", diff --git a/posthog/cdp/templates/hubspot/test_template_hubspot.py b/posthog/cdp/templates/hubspot/test_template_hubspot.py index c1192a89813fb..d92005f0ce399 100644 --- a/posthog/cdp/templates/hubspot/test_template_hubspot.py +++ b/posthog/cdp/templates/hubspot/test_template_hubspot.py @@ -91,6 +91,7 @@ class TestTemplateHubspotEvent(BaseHogFunctionTemplateTest): def _inputs(self, **kwargs): inputs = { "oauth": {"access_token": "TOKEN"}, + "eventName": "purchase", "email": "example@posthog.com", "include_all_properties": False, "properties": { @@ -126,9 +127,9 @@ def test_body_includes_all_properties_if_set(self): self.mock_fetch_response = lambda *args: EVENT_DEFINITION_RESPONSE # type: ignore self.run_function( - inputs=self._inputs(include_all_properties=False), + inputs=self._inputs(include_all_properties=False, event="purchase"), globals={ - "event": {"event": "purchase", "properties": {"product": "CDP"}}, + "event": {"properties": {"product": "CDP"}}, }, ) @@ -158,10 +159,9 @@ def test_new_event_creation(self): } self.run_function( - inputs=self._inputs(include_all_properties=True), + inputs=self._inputs(include_all_properties=True, eventName="sign_up"), globals={ "event": { - "event": "sign_up", "properties": {"price": 50, "currency": "USD", "expressDelivery": True}, }, }, @@ -246,10 +246,9 @@ def test_new_property_creation(self): } self.run_function( - inputs=self._inputs(include_all_properties=True), + inputs=self._inputs(include_all_properties=True, event="purchase"), globals={ "event": { - "event": "purchase", "properties": {"price": 50, "currency": "USD", "expressDelivery": True, "location": "Planet Earth"}, }, }, @@ -333,10 +332,9 @@ def test_requires_correct_property_types(self): } with pytest.raises(UncaughtHogVMException) as e: self.run_function( - inputs=self._inputs(include_all_properties=True), + inputs=self._inputs(include_all_properties=True, event="purchase"), globals={ "event": { - "event": "purchase", "properties": {"price": "50 coins"}, }, }, @@ -361,10 +359,9 @@ def test_allowed_event_names(self): ]: if allowed: self.run_function( - inputs=self._inputs(), + inputs=self._inputs(eventName=event_name), globals={ "event": { - "event": event_name, "properties": {"url": "https://example.com", "$browser": "Chrome"}, }, }, @@ -376,7 +373,7 @@ def test_allowed_event_names(self): else: with pytest.raises(UncaughtHogVMException) as e: self.run_function( - inputs=self._inputs(), + inputs=self._inputs(eventName=event_name), globals={ "event": { "event": event_name, diff --git a/posthog/hogql/bytecode.py b/posthog/hogql/bytecode.py index 27bdd54c6c295..70360b069d4d4 100644 --- a/posthog/hogql/bytecode.py +++ b/posthog/hogql/bytecode.py @@ -1,5 +1,6 @@ import dataclasses from datetime import timedelta +from enum import StrEnum from typing import Any, Optional, cast, TYPE_CHECKING from collections.abc import Callable @@ -827,6 +828,45 @@ def visit_tuple(self, node: ast.Tuple): response.append(len(node.exprs)) return response + def visit_hogqlx_tag(self, node: ast.HogQLXTag): + response = [] + response.extend(self._visit_hogqlx_value("__hx_tag")) + response.extend(self._visit_hogqlx_value(node.kind)) + for attribute in node.attributes: + response.extend(self._visit_hogqlx_value(attribute.name)) + response.extend(self._visit_hogqlx_value(attribute.value)) + response.append(Operation.DICT) + response.append(len(node.attributes) + 1) + return response + + def _visit_hogqlx_value(self, value: Any) -> list[Any]: + if isinstance(value, AST): + return self.visit(value) + if isinstance(value, list): + elems = [] + for v in value: + elems.extend(self._visit_hogqlx_value(v)) + return [*elems, Operation.ARRAY, len(value)] + if isinstance(value, dict): + elems = [] + for k, v in value.items(): + elems.extend(self._visit_hogqlx_value(k)) + elems.extend(self._visit_hogqlx_value(v)) + return [*elems, Operation.DICT, len(value.items())] + if isinstance(value, StrEnum): + return [Operation.STRING, value.value] + if isinstance(value, int): + return [Operation.INTEGER, value] + if isinstance(value, float): + return [Operation.FLOAT, value] + if isinstance(value, str): + return [Operation.STRING, value] + if value is True: + return [Operation.TRUE] + if value is False: + return [Operation.FALSE] + return [Operation.NULL] + def execute_hog( source_code: str, diff --git a/posthog/hogql/test/test_bytecode.py b/posthog/hogql/test/test_bytecode.py index eb41205140366..860acb7cdec1f 100644 --- a/posthog/hogql/test/test_bytecode.py +++ b/posthog/hogql/test/test_bytecode.py @@ -263,3 +263,9 @@ def test_bytecode_in_repl(self): create_bytecode(parse_program("let a:=1"), in_repl=True).bytecode, [_H, HOGQL_BYTECODE_VERSION, op.INTEGER, 1], ) + + def test_bytecode_hogqlx(self): + self.assertEqual( + execute_hog("", team=self.team).result, + {"__hx_tag": "Sparkline", "data": [1, 2, 3]}, + ) diff --git a/posthog/hogql_queries/insights/funnels/base.py b/posthog/hogql_queries/insights/funnels/base.py index 87d7c2a4b0597..bc70b527e84f7 100644 --- a/posthog/hogql_queries/insights/funnels/base.py +++ b/posthog/hogql_queries/insights/funnels/base.py @@ -634,6 +634,7 @@ def _breakdown_other_subquery(self) -> ast.SelectQuery: ], select_from=ast.JoinExpr(table=select_query), group_by=[ast.Field(chain=["final_prop"])], + limit=ast.Constant(value=self.get_breakdown_limit() + 1), ) def _get_steps_conditions(self, length: int) -> ast.Expr: diff --git a/posthog/hogql_queries/insights/funnels/funnel_udf.py b/posthog/hogql_queries/insights/funnels/funnel_udf.py index 3d55d89aa05ff..ac4fda03069d3 100644 --- a/posthog/hogql_queries/insights/funnels/funnel_udf.py +++ b/posthog/hogql_queries/insights/funnels/funnel_udf.py @@ -1,6 +1,7 @@ from typing import cast, Optional from posthog.hogql import ast +from posthog.hogql.constants import DEFAULT_RETURNED_ROWS from posthog.hogql.parser import parse_select, parse_expr from posthog.hogql_queries.insights.funnels.base import FunnelBase from posthog.schema import BreakdownType, BreakdownAttributionType @@ -144,7 +145,7 @@ def get_query(self) -> ast.SelectQuery: SELECT {step_results}, {conversion_time_arrays}, - rowNumberInBlock() as row_number, + rowNumberInAllBlocks() as row_number, {final_prop} as final_prop FROM {{inner_select}} @@ -179,6 +180,7 @@ def get_query(self) -> ast.SelectQuery: FROM {{s}} GROUP BY final_prop + LIMIT {self.get_breakdown_limit() + 1 if use_breakdown_limit else DEFAULT_RETURNED_ROWS} """, {"s": s}, ) @@ -211,8 +213,8 @@ def _get_funnel_person_step_condition(self) -> ast.Expr: raise ValueError("Missing both funnelStep and funnelCustomSteps") if funnelStepBreakdown is not None: - if isinstance(funnelStepBreakdown, int) and breakdownType != "cohort": - funnelStepBreakdown = str(funnelStepBreakdown) + if isinstance(funnelStepBreakdown, int | float) and breakdownType != "cohort": + funnelStepBreakdown = str(int(funnelStepBreakdown)) conditions.append( parse_expr( diff --git a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel.ambr b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel.ambr index c16a172389dee..2315f2b51ebf6 100644 --- a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel.ambr +++ b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel.ambr @@ -1085,14 +1085,14 @@ and isNull(max(max_steps)))) GROUP BY prop) GROUP BY final_prop - LIMIT 100 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=23622320128, - allow_experimental_analyzer=1 + LIMIT 26 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=23622320128, + allow_experimental_analyzer=1 ''' # --- # name: TestFunnelBreakdown.test_funnel_breakdown_correct_breakdown_props_are_chosen_for_step @@ -1189,14 +1189,14 @@ and isNull(max(max_steps)))) GROUP BY prop) GROUP BY final_prop - LIMIT 100 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=23622320128, - allow_experimental_analyzer=1 + LIMIT 26 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=23622320128, + allow_experimental_analyzer=1 ''' # --- # name: TestFunnelBreakdown.test_funnel_step_multiple_breakdown_snapshot @@ -1286,14 +1286,14 @@ and isNull(max(max_steps)))) GROUP BY prop) GROUP BY final_prop - LIMIT 100 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=23622320128, - allow_experimental_analyzer=1 + LIMIT 26 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=23622320128, + allow_experimental_analyzer=1 ''' # --- # name: TestFunnelGroupBreakdown.test_funnel_aggregate_by_groups_breakdown_group_person_on_events @@ -1424,14 +1424,14 @@ and isNull(max(max_steps)))) GROUP BY prop) GROUP BY final_prop - LIMIT 100 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=23622320128, - allow_experimental_analyzer=1 + LIMIT 26 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=23622320128, + allow_experimental_analyzer=1 ''' # --- # name: TestFunnelGroupBreakdown.test_funnel_aggregate_by_groups_breakdown_group_person_on_events_poe_v2 @@ -1562,14 +1562,14 @@ and isNull(max(max_steps)))) GROUP BY prop) GROUP BY final_prop - LIMIT 100 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=23622320128, - allow_experimental_analyzer=1 + LIMIT 26 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=23622320128, + allow_experimental_analyzer=1 ''' # --- # name: TestFunnelGroupBreakdown.test_funnel_breakdown_group @@ -1707,585 +1707,577 @@ and isNull(max(max_steps)))) GROUP BY prop) GROUP BY final_prop - LIMIT 100 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=23622320128, - allow_experimental_analyzer=1 + LIMIT 26 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=23622320128, + allow_experimental_analyzer=1 ''' # --- # name: TestFunnelGroupBreakdown.test_funnel_breakdown_group.1 ''' - - SELECT replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS value, - count(*) as count - FROM events e - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 - OFFSET 0 - ''' -# --- -# name: TestFunnelGroupBreakdown.test_funnel_breakdown_group.2 - ''' - - SELECT aggregation_target AS actor_id + SELECT persons.id, + persons.id AS id FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - avg(step_2_conversion_time) step_2_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner, - median(step_2_conversion_time) step_2_median_conversion_time_inner , - prop + (SELECT aggregation_target AS actor_id FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target, - prop) as max_steps, - step_1_conversion_time, - step_2_conversion_time , - prop + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + prop AS prop, + prop AS prop, + min(step_1_conversion_time) AS step_1_conversion_time, + min(step_2_conversion_time) AS step_2_conversion_time FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 7 DAY - AND latest_1 <= latest_2 - AND latest_2 <= latest_0 + INTERVAL 7 DAY, 3, if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, - if(isNotNull(latest_2) - AND latest_2 <= latest_1 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_1), toDateTime(latest_2)), NULL) step_2_conversion_time, - prop + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + prop AS prop, + max(steps) OVER (PARTITION BY aggregation_target, + prop) AS max_steps, + step_1_conversion_time AS step_1_conversion_time, + step_2_conversion_time AS step_2_conversion_time, + prop AS prop FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - latest_1, - step_2, - min(latest_2) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 , - prop + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + step_2 AS step_2, + latest_2 AS latest_2, + prop AS prop, + if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0), ifNull(lessOrEquals(latest_1, latest_2), 0), ifNull(lessOrEquals(latest_2, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 3, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 2, 1)) AS steps, + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, + if(and(isNotNull(latest_2), ifNull(lessOrEquals(latest_2, plus(toTimeZone(latest_1, 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', latest_1, latest_2), NULL) AS step_2_conversion_time, + prop AS prop FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - latest_1, - step_2, - if(latest_2 < latest_1, NULL, latest_2) as latest_2 , - prop + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + step_2 AS step_2, + min(latest_2) OVER (PARTITION BY aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2, + prop AS prop FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1, - step_2, - min(latest_2) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 , - if(has(['technology', 'finance'], prop), prop, 'Other') as prop + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + step_2 AS step_2, + if(ifNull(less(latest_2, latest_1), 0), NULL, latest_2) AS latest_2, + prop AS prop FROM - (SELECT *, - prop_vals as prop + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + min(latest_1) OVER (PARTITION BY aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, + step_2 AS step_2, + min(latest_2) OVER (PARTITION BY aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2, + prop AS prop FROM - (SELECT e.timestamp as timestamp, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as aggregation_target, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, - if(event = 'sign up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'play movie', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1, - if(event = 'buy', 1, 0) as step_2, - if(step_2 = 1, timestamp, null) as latest_2, - replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS prop_basic, - prop_basic as prop, - argMinIf(prop, timestamp, isNotNull(prop)) over (PARTITION by aggregation_target) as prop_vals - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') ) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') - AND (step_0 = 1 - OR step_1 = 1 - OR step_2 = 1) ))))) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps, - prop - HAVING steps = max(max_steps)) - WHERE steps IN [1, 2, 3] - AND arrayFlatten(array(prop)) = arrayFlatten(array('finance')) - ORDER BY aggregation_target - LIMIT 100 - OFFSET 0 SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000 - ''' -# --- -# name: TestFunnelGroupBreakdown.test_funnel_breakdown_group.3 - ''' - - SELECT replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS value, - count(*) as count - FROM events e - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 - OFFSET 0 + (SELECT timestamp AS timestamp, + aggregation_target AS aggregation_target, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + step_2 AS step_2, + latest_2 AS latest_2, + prop_basic AS prop_basic, + prop, + prop_vals AS prop_vals, + prop_vals AS prop + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id) AS aggregation_target, + if(equals(e.event, 'sign up'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(equals(e.event, 'play movie'), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, + if(equals(e.event, 'buy'), 1, 0) AS step_2, + if(ifNull(equals(step_2, 1), 0), timestamp, NULL) AS latest_2, + ifNull(toString(e__group_0.properties___industry), '') AS prop_basic, + prop_basic AS prop, + argMinIf(prop, timestamp, isNotNull(prop)) OVER (PARTITION BY aggregation_target) AS prop_vals + FROM events AS e + LEFT OUTER JOIN + (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, + person_distinct_id_overrides.distinct_id AS distinct_id + FROM person_distinct_id_overrides + WHERE equals(person_distinct_id_overrides.team_id, 99999) + GROUP BY person_distinct_id_overrides.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__override ON equals(e.distinct_id, e__override.distinct_id) + LEFT JOIN + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), toTimeZone(groups._timestamp, 'UTC')) AS properties___industry, + groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE and(equals(groups.team_id, 99999), equals(index, 0)) + GROUP BY groups.group_type_index, + groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) + WHERE and(equals(e.team_id, 99999), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'play movie', 'sign up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0), ifNull(equals(step_2, 1), 0)))))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, + steps, + prop + HAVING ifNull(equals(steps, max(max_steps)), isNull(steps) + and isNull(max(max_steps)))) + WHERE and(ifNull(in(steps, [1, 2, 3]), 0), ifNull(equals(arrayFlatten(array(prop)), arrayFlatten(array('finance'))), isNull(arrayFlatten(array(prop))) + and isNull(arrayFlatten(array('finance'))))) + ORDER BY aggregation_target ASC) AS source + INNER JOIN + (SELECT person.id AS id + FROM person + WHERE equals(person.team_id, 99999) + GROUP BY person.id + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id) + ORDER BY persons.id ASC + LIMIT 101 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0, + allow_experimental_analyzer=1 ''' # --- -# name: TestFunnelGroupBreakdown.test_funnel_breakdown_group.4 +# name: TestFunnelGroupBreakdown.test_funnel_breakdown_group.2 ''' - - SELECT aggregation_target AS actor_id + SELECT persons.id, + persons.id AS id FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - avg(step_2_conversion_time) step_2_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner, - median(step_2_conversion_time) step_2_median_conversion_time_inner , - prop + (SELECT aggregation_target AS actor_id FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target, - prop) as max_steps, - step_1_conversion_time, - step_2_conversion_time , - prop + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + prop AS prop, + prop AS prop, + min(step_1_conversion_time) AS step_1_conversion_time, + min(step_2_conversion_time) AS step_2_conversion_time FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 7 DAY - AND latest_1 <= latest_2 - AND latest_2 <= latest_0 + INTERVAL 7 DAY, 3, if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, - if(isNotNull(latest_2) - AND latest_2 <= latest_1 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_1), toDateTime(latest_2)), NULL) step_2_conversion_time, - prop + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + prop AS prop, + max(steps) OVER (PARTITION BY aggregation_target, + prop) AS max_steps, + step_1_conversion_time AS step_1_conversion_time, + step_2_conversion_time AS step_2_conversion_time, + prop AS prop FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - latest_1, - step_2, - min(latest_2) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 , - prop + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + step_2 AS step_2, + latest_2 AS latest_2, + prop AS prop, + if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0), ifNull(lessOrEquals(latest_1, latest_2), 0), ifNull(lessOrEquals(latest_2, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 3, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 2, 1)) AS steps, + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, + if(and(isNotNull(latest_2), ifNull(lessOrEquals(latest_2, plus(toTimeZone(latest_1, 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', latest_1, latest_2), NULL) AS step_2_conversion_time, + prop AS prop FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - latest_1, - step_2, - if(latest_2 < latest_1, NULL, latest_2) as latest_2 , - prop + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + step_2 AS step_2, + min(latest_2) OVER (PARTITION BY aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2, + prop AS prop FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1, - step_2, - min(latest_2) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 , - if(has(['technology', 'finance'], prop), prop, 'Other') as prop + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + step_2 AS step_2, + if(ifNull(less(latest_2, latest_1), 0), NULL, latest_2) AS latest_2, + prop AS prop FROM - (SELECT *, - prop_vals as prop + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + min(latest_1) OVER (PARTITION BY aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, + step_2 AS step_2, + min(latest_2) OVER (PARTITION BY aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2, + prop AS prop FROM - (SELECT e.timestamp as timestamp, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as aggregation_target, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, - if(event = 'sign up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'play movie', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1, - if(event = 'buy', 1, 0) as step_2, - if(step_2 = 1, timestamp, null) as latest_2, - replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS prop_basic, - prop_basic as prop, - argMinIf(prop, timestamp, isNotNull(prop)) over (PARTITION by aggregation_target) as prop_vals - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') ) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') - AND (step_0 = 1 - OR step_1 = 1 - OR step_2 = 1) ))))) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps, - prop - HAVING steps = max(max_steps)) - WHERE steps IN [2, 3] - AND arrayFlatten(array(prop)) = arrayFlatten(array('finance')) - ORDER BY aggregation_target - LIMIT 100 - OFFSET 0 SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000 - ''' -# --- -# name: TestFunnelGroupBreakdown.test_funnel_breakdown_group.5 - ''' - - SELECT replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS value, - count(*) as count - FROM events e - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 - OFFSET 0 + (SELECT timestamp AS timestamp, + aggregation_target AS aggregation_target, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + step_2 AS step_2, + latest_2 AS latest_2, + prop_basic AS prop_basic, + prop, + prop_vals AS prop_vals, + prop_vals AS prop + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id) AS aggregation_target, + if(equals(e.event, 'sign up'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(equals(e.event, 'play movie'), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, + if(equals(e.event, 'buy'), 1, 0) AS step_2, + if(ifNull(equals(step_2, 1), 0), timestamp, NULL) AS latest_2, + ifNull(toString(e__group_0.properties___industry), '') AS prop_basic, + prop_basic AS prop, + argMinIf(prop, timestamp, isNotNull(prop)) OVER (PARTITION BY aggregation_target) AS prop_vals + FROM events AS e + LEFT OUTER JOIN + (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, + person_distinct_id_overrides.distinct_id AS distinct_id + FROM person_distinct_id_overrides + WHERE equals(person_distinct_id_overrides.team_id, 99999) + GROUP BY person_distinct_id_overrides.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__override ON equals(e.distinct_id, e__override.distinct_id) + LEFT JOIN + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), toTimeZone(groups._timestamp, 'UTC')) AS properties___industry, + groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE and(equals(groups.team_id, 99999), equals(index, 0)) + GROUP BY groups.group_type_index, + groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) + WHERE and(equals(e.team_id, 99999), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'play movie', 'sign up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0), ifNull(equals(step_2, 1), 0)))))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, + steps, + prop + HAVING ifNull(equals(steps, max(max_steps)), isNull(steps) + and isNull(max(max_steps)))) + WHERE and(ifNull(in(steps, [2, 3]), 0), ifNull(equals(arrayFlatten(array(prop)), arrayFlatten(array('finance'))), isNull(arrayFlatten(array(prop))) + and isNull(arrayFlatten(array('finance'))))) + ORDER BY aggregation_target ASC) AS source + INNER JOIN + (SELECT person.id AS id + FROM person + WHERE equals(person.team_id, 99999) + GROUP BY person.id + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id) + ORDER BY persons.id ASC + LIMIT 101 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0, + allow_experimental_analyzer=1 ''' # --- -# name: TestFunnelGroupBreakdown.test_funnel_breakdown_group.6 +# name: TestFunnelGroupBreakdown.test_funnel_breakdown_group.3 ''' - - SELECT aggregation_target AS actor_id + SELECT persons.id, + persons.id AS id FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - avg(step_2_conversion_time) step_2_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner, - median(step_2_conversion_time) step_2_median_conversion_time_inner , - prop + (SELECT aggregation_target AS actor_id FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target, - prop) as max_steps, - step_1_conversion_time, - step_2_conversion_time , - prop + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + prop AS prop, + prop AS prop, + min(step_1_conversion_time) AS step_1_conversion_time, + min(step_2_conversion_time) AS step_2_conversion_time FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 7 DAY - AND latest_1 <= latest_2 - AND latest_2 <= latest_0 + INTERVAL 7 DAY, 3, if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, - if(isNotNull(latest_2) - AND latest_2 <= latest_1 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_1), toDateTime(latest_2)), NULL) step_2_conversion_time, - prop + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + prop AS prop, + max(steps) OVER (PARTITION BY aggregation_target, + prop) AS max_steps, + step_1_conversion_time AS step_1_conversion_time, + step_2_conversion_time AS step_2_conversion_time, + prop AS prop FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - latest_1, - step_2, - min(latest_2) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 , - prop + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + step_2 AS step_2, + latest_2 AS latest_2, + prop AS prop, + if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0), ifNull(lessOrEquals(latest_1, latest_2), 0), ifNull(lessOrEquals(latest_2, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 3, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 2, 1)) AS steps, + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, + if(and(isNotNull(latest_2), ifNull(lessOrEquals(latest_2, plus(toTimeZone(latest_1, 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', latest_1, latest_2), NULL) AS step_2_conversion_time, + prop AS prop FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - latest_1, - step_2, - if(latest_2 < latest_1, NULL, latest_2) as latest_2 , - prop + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + step_2 AS step_2, + min(latest_2) OVER (PARTITION BY aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2, + prop AS prop FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1, - step_2, - min(latest_2) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 , - if(has(['technology', 'finance'], prop), prop, 'Other') as prop + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + step_2 AS step_2, + if(ifNull(less(latest_2, latest_1), 0), NULL, latest_2) AS latest_2, + prop AS prop FROM - (SELECT *, - prop_vals as prop + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + min(latest_1) OVER (PARTITION BY aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, + step_2 AS step_2, + min(latest_2) OVER (PARTITION BY aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2, + prop AS prop FROM - (SELECT e.timestamp as timestamp, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as aggregation_target, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, - if(event = 'sign up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'play movie', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1, - if(event = 'buy', 1, 0) as step_2, - if(step_2 = 1, timestamp, null) as latest_2, - replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS prop_basic, - prop_basic as prop, - argMinIf(prop, timestamp, isNotNull(prop)) over (PARTITION by aggregation_target) as prop_vals - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') ) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') - AND (step_0 = 1 - OR step_1 = 1 - OR step_2 = 1) ))))) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps, - prop - HAVING steps = max(max_steps)) - WHERE steps IN [1, 2, 3] - AND arrayFlatten(array(prop)) = arrayFlatten(array('technology')) - ORDER BY aggregation_target - LIMIT 100 - OFFSET 0 SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000 - ''' -# --- -# name: TestFunnelGroupBreakdown.test_funnel_breakdown_group.7 - ''' - - SELECT replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS value, - count(*) as count - FROM events e - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 - OFFSET 0 + (SELECT timestamp AS timestamp, + aggregation_target AS aggregation_target, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + step_2 AS step_2, + latest_2 AS latest_2, + prop_basic AS prop_basic, + prop, + prop_vals AS prop_vals, + prop_vals AS prop + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id) AS aggregation_target, + if(equals(e.event, 'sign up'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(equals(e.event, 'play movie'), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, + if(equals(e.event, 'buy'), 1, 0) AS step_2, + if(ifNull(equals(step_2, 1), 0), timestamp, NULL) AS latest_2, + ifNull(toString(e__group_0.properties___industry), '') AS prop_basic, + prop_basic AS prop, + argMinIf(prop, timestamp, isNotNull(prop)) OVER (PARTITION BY aggregation_target) AS prop_vals + FROM events AS e + LEFT OUTER JOIN + (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, + person_distinct_id_overrides.distinct_id AS distinct_id + FROM person_distinct_id_overrides + WHERE equals(person_distinct_id_overrides.team_id, 99999) + GROUP BY person_distinct_id_overrides.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__override ON equals(e.distinct_id, e__override.distinct_id) + LEFT JOIN + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), toTimeZone(groups._timestamp, 'UTC')) AS properties___industry, + groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE and(equals(groups.team_id, 99999), equals(index, 0)) + GROUP BY groups.group_type_index, + groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) + WHERE and(equals(e.team_id, 99999), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'play movie', 'sign up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0), ifNull(equals(step_2, 1), 0)))))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, + steps, + prop + HAVING ifNull(equals(steps, max(max_steps)), isNull(steps) + and isNull(max(max_steps)))) + WHERE and(ifNull(in(steps, [1, 2, 3]), 0), ifNull(equals(arrayFlatten(array(prop)), arrayFlatten(array('technology'))), isNull(arrayFlatten(array(prop))) + and isNull(arrayFlatten(array('technology'))))) + ORDER BY aggregation_target ASC) AS source + INNER JOIN + (SELECT person.id AS id + FROM person + WHERE equals(person.team_id, 99999) + GROUP BY person.id + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id) + ORDER BY persons.id ASC + LIMIT 101 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0, + allow_experimental_analyzer=1 ''' # --- -# name: TestFunnelGroupBreakdown.test_funnel_breakdown_group.8 +# name: TestFunnelGroupBreakdown.test_funnel_breakdown_group.4 ''' - - SELECT aggregation_target AS actor_id + SELECT persons.id, + persons.id AS id FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - avg(step_2_conversion_time) step_2_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner, - median(step_2_conversion_time) step_2_median_conversion_time_inner , - prop + (SELECT aggregation_target AS actor_id FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target, - prop) as max_steps, - step_1_conversion_time, - step_2_conversion_time , - prop + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + prop AS prop, + prop AS prop, + min(step_1_conversion_time) AS step_1_conversion_time, + min(step_2_conversion_time) AS step_2_conversion_time FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 7 DAY - AND latest_1 <= latest_2 - AND latest_2 <= latest_0 + INTERVAL 7 DAY, 3, if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, - if(isNotNull(latest_2) - AND latest_2 <= latest_1 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_1), toDateTime(latest_2)), NULL) step_2_conversion_time, - prop + (SELECT aggregation_target AS aggregation_target, + steps AS steps, + prop AS prop, + max(steps) OVER (PARTITION BY aggregation_target, + prop) AS max_steps, + step_1_conversion_time AS step_1_conversion_time, + step_2_conversion_time AS step_2_conversion_time, + prop AS prop FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - latest_1, - step_2, - min(latest_2) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 , - prop + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + step_2 AS step_2, + latest_2 AS latest_2, + prop AS prop, + if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0), ifNull(lessOrEquals(latest_1, latest_2), 0), ifNull(lessOrEquals(latest_2, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 3, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 2, 1)) AS steps, + if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, + if(and(isNotNull(latest_2), ifNull(lessOrEquals(latest_2, plus(toTimeZone(latest_1, 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', latest_1, latest_2), NULL) AS step_2_conversion_time, + prop AS prop FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - latest_1, - step_2, - if(latest_2 < latest_1, NULL, latest_2) as latest_2 , - prop + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + step_2 AS step_2, + min(latest_2) OVER (PARTITION BY aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2, + prop AS prop FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1, - step_2, - min(latest_2) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 , - if(has(['technology', 'finance'], prop), prop, 'Other') as prop + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + step_2 AS step_2, + if(ifNull(less(latest_2, latest_1), 0), NULL, latest_2) AS latest_2, + prop AS prop FROM - (SELECT *, - prop_vals as prop + (SELECT aggregation_target AS aggregation_target, + timestamp AS timestamp, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + min(latest_1) OVER (PARTITION BY aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, + step_2 AS step_2, + min(latest_2) OVER (PARTITION BY aggregation_target, + prop + ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2, + prop AS prop FROM - (SELECT e.timestamp as timestamp, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as aggregation_target, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, - if(event = 'sign up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'play movie', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1, - if(event = 'buy', 1, 0) as step_2, - if(step_2 = 1, timestamp, null) as latest_2, - replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS prop_basic, - prop_basic as prop, - argMinIf(prop, timestamp, isNotNull(prop)) over (PARTITION by aggregation_target) as prop_vals - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') ) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') - AND (step_0 = 1 - OR step_1 = 1 - OR step_2 = 1) ))))) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps, - prop - HAVING steps = max(max_steps)) - WHERE steps IN [2, 3] - AND arrayFlatten(array(prop)) = arrayFlatten(array('technology')) - ORDER BY aggregation_target - LIMIT 100 - OFFSET 0 SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000 + (SELECT timestamp AS timestamp, + aggregation_target AS aggregation_target, + step_0 AS step_0, + latest_0 AS latest_0, + step_1 AS step_1, + latest_1 AS latest_1, + step_2 AS step_2, + latest_2 AS latest_2, + prop_basic AS prop_basic, + prop, + prop_vals AS prop_vals, + prop_vals AS prop + FROM + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id) AS aggregation_target, + if(equals(e.event, 'sign up'), 1, 0) AS step_0, + if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, + if(equals(e.event, 'play movie'), 1, 0) AS step_1, + if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, + if(equals(e.event, 'buy'), 1, 0) AS step_2, + if(ifNull(equals(step_2, 1), 0), timestamp, NULL) AS latest_2, + ifNull(toString(e__group_0.properties___industry), '') AS prop_basic, + prop_basic AS prop, + argMinIf(prop, timestamp, isNotNull(prop)) OVER (PARTITION BY aggregation_target) AS prop_vals + FROM events AS e + LEFT OUTER JOIN + (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, + person_distinct_id_overrides.distinct_id AS distinct_id + FROM person_distinct_id_overrides + WHERE equals(person_distinct_id_overrides.team_id, 99999) + GROUP BY person_distinct_id_overrides.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__override ON equals(e.distinct_id, e__override.distinct_id) + LEFT JOIN + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), toTimeZone(groups._timestamp, 'UTC')) AS properties___industry, + groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE and(equals(groups.team_id, 99999), equals(index, 0)) + GROUP BY groups.group_type_index, + groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) + WHERE and(equals(e.team_id, 99999), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'play movie', 'sign up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0), ifNull(equals(step_2, 1), 0)))))))) + WHERE ifNull(equals(step_0, 1), 0))) + GROUP BY aggregation_target, + steps, + prop + HAVING ifNull(equals(steps, max(max_steps)), isNull(steps) + and isNull(max(max_steps)))) + WHERE and(ifNull(in(steps, [2, 3]), 0), ifNull(equals(arrayFlatten(array(prop)), arrayFlatten(array('technology'))), isNull(arrayFlatten(array(prop))) + and isNull(arrayFlatten(array('technology'))))) + ORDER BY aggregation_target ASC) AS source + INNER JOIN + (SELECT person.id AS id + FROM person + WHERE equals(person.team_id, 99999) + GROUP BY person.id + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id) + ORDER BY persons.id ASC + LIMIT 101 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0, + allow_experimental_analyzer=1 ''' # --- diff --git a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_breakdowns_by_current_url.ambr b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_breakdowns_by_current_url.ambr index 5bb342e37abed..e01e48e4c1f7f 100644 --- a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_breakdowns_by_current_url.ambr +++ b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_breakdowns_by_current_url.ambr @@ -86,7 +86,7 @@ and isNull(max(max_steps)))) GROUP BY prop) GROUP BY final_prop - LIMIT 100 SETTINGS readonly=2, + LIMIT 101 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1, format_csv_allow_double_quotes=0, @@ -183,7 +183,7 @@ and isNull(max(max_steps)))) GROUP BY prop) GROUP BY final_prop - LIMIT 100 SETTINGS readonly=2, + LIMIT 101 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1, format_csv_allow_double_quotes=0, diff --git a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_correlation_actors.ambr b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_correlation_actors.ambr index 044feee2b0a90..ca6d26d135828 100644 --- a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_correlation_actors.ambr +++ b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_correlation_actors.ambr @@ -642,385 +642,3 @@ max_bytes_before_external_group_by=0 ''' # --- -# name: TestFunnelCorrelationActors.test_strict_funnel_correlation_with_recordings - ''' - SELECT persons.id, - persons.id AS id, - source.matching_events AS matching_events - FROM - (SELECT funnel_actors.actor_id AS actor_id, - any(funnel_actors.matching_events) AS matching_events - FROM - (SELECT aggregation_target AS actor_id, - final_matching_events AS matching_events, - timestamp AS timestamp, - steps AS steps, - final_timestamp AS final_timestamp, - first_timestamp AS first_timestamp - FROM - (SELECT groupArray(10)(step_0_matching_event) AS step_0_matching_events, - groupArray(10)(step_1_matching_event) AS step_1_matching_events, - groupArray(10)(final_matching_event) AS final_matching_events, - aggregation_target AS aggregation_target, - steps AS steps, - argMax(latest_0, steps) AS timestamp, - argMax(latest_1, steps) AS final_timestamp, - argMax(latest_0, steps) AS first_timestamp, - avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, - median(step_1_conversion_time) AS step_1_median_conversion_time_inner - FROM - (SELECT tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, - tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, - if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event, - aggregation_target AS aggregation_target, - steps AS steps, - max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, - step_1_conversion_time AS step_1_conversion_time, - latest_0 AS latest_0, - latest_1 AS latest_1, - latest_0 AS latest_0 - FROM - (SELECT aggregation_target AS aggregation_target, - timestamp AS timestamp, - step_0 AS step_0, - latest_0 AS latest_0, - uuid_0 AS uuid_0, - `$session_id_0` AS `$session_id_0`, - `$window_id_0` AS `$window_id_0`, - step_1 AS step_1, - latest_1 AS latest_1, - uuid_1 AS uuid_1, - `$session_id_1` AS `$session_id_1`, - `$window_id_1` AS `$window_id_1`, - if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 2, 1) AS steps, - if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, - tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, - tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, - if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event - FROM - (SELECT aggregation_target AS aggregation_target, - timestamp AS timestamp, - step_0 AS step_0, - latest_0 AS latest_0, - uuid_0 AS uuid_0, - `$session_id_0` AS `$session_id_0`, - `$window_id_0` AS `$window_id_0`, - step_1 AS step_1, - min(latest_1) OVER (PARTITION BY aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS latest_1, - min(uuid_1) OVER (PARTITION BY aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS uuid_1, - min(`$session_id_1`) OVER (PARTITION BY aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS `$session_id_1`, - min(`$window_id_1`) OVER (PARTITION BY aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS `$window_id_1` - FROM - (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, - if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id) AS aggregation_target, - e.uuid AS uuid, - if(equals(e.event, '$pageview'), 1, 0) AS step_0, - if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, - if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0, - if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`, - if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`, - if(equals(e.event, 'insight analyzed'), 1, 0) AS step_1, - if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, - if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1, - if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`, - if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1` - FROM events AS e - LEFT OUTER JOIN - (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, - person_distinct_id_overrides.distinct_id AS distinct_id - FROM person_distinct_id_overrides - WHERE equals(person_distinct_id_overrides.team_id, 2) - GROUP BY person_distinct_id_overrides.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__override ON equals(e.distinct_id, e__override.distinct_id) - LEFT JOIN - (SELECT person.id AS id, - replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'foo'), ''), 'null'), '^"|"$', '') AS properties___foo - FROM person - WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), - (SELECT person.id AS id, max(person.version) AS version - FROM person - WHERE equals(person.team_id, 2) - GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__person ON equals(if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id), e__person.id) - WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-01-08 23:59:59.999999', 6, 'UTC'))), ifNull(equals(e__person.properties___foo, 'bar'), 0)))) - WHERE ifNull(equals(step_0, 1), 0))) - GROUP BY aggregation_target, - steps - HAVING ifNull(equals(steps, max(max_steps)), isNull(steps) - and isNull(max(max_steps)))) - WHERE ifNull(in(steps, [1, 2]), 0) - ORDER BY aggregation_target ASC) AS funnel_actors - WHERE ifNull(equals(funnel_actors.steps, 2), 0) - GROUP BY funnel_actors.actor_id - ORDER BY funnel_actors.actor_id ASC) AS source - INNER JOIN - (SELECT person.id AS id - FROM person - WHERE and(equals(person.team_id, 2), in(id, - (SELECT source.actor_id AS actor_id - FROM - (SELECT funnel_actors.actor_id AS actor_id, any(funnel_actors.matching_events) AS matching_events - FROM - (SELECT aggregation_target AS actor_id, final_matching_events AS matching_events, timestamp AS timestamp, steps AS steps, final_timestamp AS final_timestamp, first_timestamp AS first_timestamp - FROM - (SELECT groupArray(10)(step_0_matching_event) AS step_0_matching_events, groupArray(10)(step_1_matching_event) AS step_1_matching_events, groupArray(10)(final_matching_event) AS final_matching_events, aggregation_target AS aggregation_target, steps AS steps, argMax(latest_0, steps) AS timestamp, argMax(latest_1, steps) AS final_timestamp, argMax(latest_0, steps) AS first_timestamp, avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, median(step_1_conversion_time) AS step_1_median_conversion_time_inner - FROM - (SELECT tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event, aggregation_target AS aggregation_target, steps AS steps, max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, step_1_conversion_time AS step_1_conversion_time, latest_0 AS latest_0, latest_1 AS latest_1, latest_0 AS latest_0 - FROM - (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, uuid_0 AS uuid_0, `$session_id_0` AS `$session_id_0`, `$window_id_0` AS `$window_id_0`, step_1 AS step_1, latest_1 AS latest_1, uuid_1 AS uuid_1, `$session_id_1` AS `$session_id_1`, `$window_id_1` AS `$window_id_1`, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 2, 1) AS steps, if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event - FROM - (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, uuid_0 AS uuid_0, `$session_id_0` AS `$session_id_0`, `$window_id_0` AS `$window_id_0`, step_1 AS step_1, min(latest_1) OVER (PARTITION BY aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS latest_1, min(uuid_1) OVER (PARTITION BY aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS uuid_1, min(`$session_id_1`) OVER (PARTITION BY aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS `$session_id_1`, min(`$window_id_1`) OVER (PARTITION BY aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS `$window_id_1` - FROM - (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id) AS aggregation_target, e.uuid AS uuid, if(equals(e.event, '$pageview'), 1, 0) AS step_0, if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0, if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`, if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`, if(equals(e.event, 'insight analyzed'), 1, 0) AS step_1, if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1, if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`, if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1` - FROM events AS e - LEFT OUTER JOIN - (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, person_distinct_id_overrides.distinct_id AS distinct_id - FROM person_distinct_id_overrides - WHERE equals(person_distinct_id_overrides.team_id, 2) - GROUP BY person_distinct_id_overrides.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__override ON equals(e.distinct_id, e__override.distinct_id) - LEFT JOIN - (SELECT person.id AS id, replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'foo'), ''), 'null'), '^"|"$', '') AS properties___foo - FROM person - WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), - (SELECT person.id AS id, max(person.version) AS version - FROM person - WHERE equals(person.team_id, 2) - GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__person ON equals(if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id), e__person.id) - WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-01-08 23:59:59.999999', 6, 'UTC'))), ifNull(equals(e__person.properties___foo, 'bar'), 0)))) - WHERE ifNull(equals(step_0, 1), 0))) - GROUP BY aggregation_target, steps - HAVING ifNull(equals(steps, max(max_steps)), isNull(steps) - and isNull(max(max_steps)))) - WHERE ifNull(in(steps, [1, 2]), 0) - ORDER BY aggregation_target ASC) AS funnel_actors - WHERE ifNull(equals(funnel_actors.steps, 2), 0) - GROUP BY funnel_actors.actor_id - ORDER BY funnel_actors.actor_id ASC) AS source))) - GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id) - ORDER BY persons.id ASC - LIMIT 101 - OFFSET 0 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=0 - ''' -# --- -# name: TestFunnelCorrelationActors.test_strict_funnel_correlation_with_recordings.1 - ''' - SELECT DISTINCT session_replay_events.session_id AS session_id - FROM session_replay_events - WHERE and(equals(session_replay_events.team_id, 2), ifNull(greaterOrEquals(toTimeZone(session_replay_events.min_first_timestamp, 'UTC'), minus(toDateTime64('2021-01-02 00:00:00.000000', 6, 'UTC'), toIntervalDay(21))), 0), in(session_replay_events.session_id, ['s2'])) - LIMIT 100 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=0 - ''' -# --- -# name: TestFunnelCorrelationActors.test_strict_funnel_correlation_with_recordings.2 - ''' - SELECT persons.id, - persons.id AS id, - source.matching_events AS matching_events - FROM - (SELECT funnel_actors.actor_id AS actor_id, - any(funnel_actors.matching_events) AS matching_events - FROM - (SELECT aggregation_target AS actor_id, - final_matching_events AS matching_events, - timestamp AS timestamp, - steps AS steps, - final_timestamp AS final_timestamp, - first_timestamp AS first_timestamp - FROM - (SELECT groupArray(10)(step_0_matching_event) AS step_0_matching_events, - groupArray(10)(step_1_matching_event) AS step_1_matching_events, - groupArray(10)(final_matching_event) AS final_matching_events, - aggregation_target AS aggregation_target, - steps AS steps, - argMax(latest_0, steps) AS timestamp, - argMax(latest_1, steps) AS final_timestamp, - argMax(latest_0, steps) AS first_timestamp, - avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, - median(step_1_conversion_time) AS step_1_median_conversion_time_inner - FROM - (SELECT tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, - tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, - if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event, - aggregation_target AS aggregation_target, - steps AS steps, - max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, - step_1_conversion_time AS step_1_conversion_time, - latest_0 AS latest_0, - latest_1 AS latest_1, - latest_0 AS latest_0 - FROM - (SELECT aggregation_target AS aggregation_target, - timestamp AS timestamp, - step_0 AS step_0, - latest_0 AS latest_0, - uuid_0 AS uuid_0, - `$session_id_0` AS `$session_id_0`, - `$window_id_0` AS `$window_id_0`, - step_1 AS step_1, - latest_1 AS latest_1, - uuid_1 AS uuid_1, - `$session_id_1` AS `$session_id_1`, - `$window_id_1` AS `$window_id_1`, - if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 2, 1) AS steps, - if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, - tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, - tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, - if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event - FROM - (SELECT aggregation_target AS aggregation_target, - timestamp AS timestamp, - step_0 AS step_0, - latest_0 AS latest_0, - uuid_0 AS uuid_0, - `$session_id_0` AS `$session_id_0`, - `$window_id_0` AS `$window_id_0`, - step_1 AS step_1, - min(latest_1) OVER (PARTITION BY aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS latest_1, - min(uuid_1) OVER (PARTITION BY aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS uuid_1, - min(`$session_id_1`) OVER (PARTITION BY aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS `$session_id_1`, - min(`$window_id_1`) OVER (PARTITION BY aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS `$window_id_1` - FROM - (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, - if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id) AS aggregation_target, - e.uuid AS uuid, - if(equals(e.event, '$pageview'), 1, 0) AS step_0, - if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, - if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0, - if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`, - if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`, - if(equals(e.event, 'insight analyzed'), 1, 0) AS step_1, - if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, - if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1, - if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`, - if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1` - FROM events AS e - LEFT OUTER JOIN - (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, - person_distinct_id_overrides.distinct_id AS distinct_id - FROM person_distinct_id_overrides - WHERE equals(person_distinct_id_overrides.team_id, 2) - GROUP BY person_distinct_id_overrides.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__override ON equals(e.distinct_id, e__override.distinct_id) - LEFT JOIN - (SELECT person.id AS id, - replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'foo'), ''), 'null'), '^"|"$', '') AS properties___foo - FROM person - WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), - (SELECT person.id AS id, max(person.version) AS version - FROM person - WHERE equals(person.team_id, 2) - GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__person ON equals(if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id), e__person.id) - WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-01-08 23:59:59.999999', 6, 'UTC'))), ifNull(equals(e__person.properties___foo, 'bar'), 0)))) - WHERE ifNull(equals(step_0, 1), 0))) - GROUP BY aggregation_target, - steps - HAVING ifNull(equals(steps, max(max_steps)), isNull(steps) - and isNull(max(max_steps)))) - WHERE ifNull(in(steps, [1, 2]), 0) - ORDER BY aggregation_target ASC) AS funnel_actors - WHERE ifNull(notEquals(funnel_actors.steps, 2), 1) - GROUP BY funnel_actors.actor_id - ORDER BY funnel_actors.actor_id ASC) AS source - INNER JOIN - (SELECT person.id AS id - FROM person - WHERE and(equals(person.team_id, 2), in(id, - (SELECT source.actor_id AS actor_id - FROM - (SELECT funnel_actors.actor_id AS actor_id, any(funnel_actors.matching_events) AS matching_events - FROM - (SELECT aggregation_target AS actor_id, final_matching_events AS matching_events, timestamp AS timestamp, steps AS steps, final_timestamp AS final_timestamp, first_timestamp AS first_timestamp - FROM - (SELECT groupArray(10)(step_0_matching_event) AS step_0_matching_events, groupArray(10)(step_1_matching_event) AS step_1_matching_events, groupArray(10)(final_matching_event) AS final_matching_events, aggregation_target AS aggregation_target, steps AS steps, argMax(latest_0, steps) AS timestamp, argMax(latest_1, steps) AS final_timestamp, argMax(latest_0, steps) AS first_timestamp, avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, median(step_1_conversion_time) AS step_1_median_conversion_time_inner - FROM - (SELECT tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event, aggregation_target AS aggregation_target, steps AS steps, max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, step_1_conversion_time AS step_1_conversion_time, latest_0 AS latest_0, latest_1 AS latest_1, latest_0 AS latest_0 - FROM - (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, uuid_0 AS uuid_0, `$session_id_0` AS `$session_id_0`, `$window_id_0` AS `$window_id_0`, step_1 AS step_1, latest_1 AS latest_1, uuid_1 AS uuid_1, `$session_id_1` AS `$session_id_1`, `$window_id_1` AS `$window_id_1`, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 2, 1) AS steps, if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event - FROM - (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, uuid_0 AS uuid_0, `$session_id_0` AS `$session_id_0`, `$window_id_0` AS `$window_id_0`, step_1 AS step_1, min(latest_1) OVER (PARTITION BY aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS latest_1, min(uuid_1) OVER (PARTITION BY aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS uuid_1, min(`$session_id_1`) OVER (PARTITION BY aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS `$session_id_1`, min(`$window_id_1`) OVER (PARTITION BY aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS `$window_id_1` - FROM - (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id) AS aggregation_target, e.uuid AS uuid, if(equals(e.event, '$pageview'), 1, 0) AS step_0, if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0, if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`, if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`, if(equals(e.event, 'insight analyzed'), 1, 0) AS step_1, if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1, if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`, if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1` - FROM events AS e - LEFT OUTER JOIN - (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, person_distinct_id_overrides.distinct_id AS distinct_id - FROM person_distinct_id_overrides - WHERE equals(person_distinct_id_overrides.team_id, 2) - GROUP BY person_distinct_id_overrides.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__override ON equals(e.distinct_id, e__override.distinct_id) - LEFT JOIN - (SELECT person.id AS id, replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'foo'), ''), 'null'), '^"|"$', '') AS properties___foo - FROM person - WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), - (SELECT person.id AS id, max(person.version) AS version - FROM person - WHERE equals(person.team_id, 2) - GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__person ON equals(if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id), e__person.id) - WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-01-08 23:59:59.999999', 6, 'UTC'))), ifNull(equals(e__person.properties___foo, 'bar'), 0)))) - WHERE ifNull(equals(step_0, 1), 0))) - GROUP BY aggregation_target, steps - HAVING ifNull(equals(steps, max(max_steps)), isNull(steps) - and isNull(max(max_steps)))) - WHERE ifNull(in(steps, [1, 2]), 0) - ORDER BY aggregation_target ASC) AS funnel_actors - WHERE ifNull(notEquals(funnel_actors.steps, 2), 1) - GROUP BY funnel_actors.actor_id - ORDER BY funnel_actors.actor_id ASC) AS source))) - GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id) - ORDER BY persons.id ASC - LIMIT 101 - OFFSET 0 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=0 - ''' -# --- -# name: TestFunnelCorrelationActors.test_strict_funnel_correlation_with_recordings.3 - ''' - SELECT DISTINCT session_replay_events.session_id AS session_id - FROM session_replay_events - WHERE and(equals(session_replay_events.team_id, 2), ifNull(greaterOrEquals(toTimeZone(session_replay_events.min_first_timestamp, 'UTC'), minus(toDateTime64('2021-01-02 00:00:00.000000', 6, 'UTC'), toIntervalDay(21))), 0), in(session_replay_events.session_id, ['s3'])) - LIMIT 100 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=0 - ''' -# --- diff --git a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_correlation_actors_udf.ambr b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_correlation_actors_udf.ambr index 8c6788fe66107..3f3fd82910546 100644 --- a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_correlation_actors_udf.ambr +++ b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_correlation_actors_udf.ambr @@ -397,205 +397,3 @@ max_bytes_before_external_group_by=0 ''' # --- -# name: TestFunnelCorrelationsActorsUDF.test_strict_funnel_correlation_with_recordings - ''' - SELECT sum(step_1) AS step_1, - sum(step_2) AS step_2, - arrayMap(x -> if(isNaN(x), NULL, x), [avgArrayOrNull(step_1_conversion_times)])[1] AS step_1_average_conversion_time, - arrayMap(x -> if(isNaN(x), NULL, x), [medianArrayOrNull(step_1_conversion_times)])[1] AS step_1_median_conversion_time, - groupArray(row_number) AS row_number, - final_prop AS final_prop - FROM - (SELECT countIf(ifNull(ifNull(equals(step_reached, 0), 0), 0)) AS step_1, - countIf(ifNull(ifNull(equals(step_reached, 1), 0), 0)) AS step_2, - groupArrayIf(timings[1], ifNull(greater(timings[1], 0), 0)) AS step_1_conversion_times, - rowNumberInBlock() AS row_number, - breakdown AS final_prop - FROM - (SELECT arraySort(t -> t.1, groupArray(tuple(accurateCastOrNull(timestamp, 'Float64'), uuid, [], arrayFilter(x -> ifNull(notEquals(x, 0), 1), [multiply(1, step_0), multiply(2, step_1)])))) AS events_array, - arrayJoin(aggregate_funnel_array(2, 1209600, 'first_touch', 'strict', [[]], arrayFilter((x, x2) -> not(and(empty(x.4), empty(x2.4), ifNull(equals(x.3, x2.3), isNull(x.3) - and isNull(x2.3)), ifNull(greater(x.1, x2.1), 0))), events_array, arrayRotateRight(events_array, 1)))) AS af_tuple, - af_tuple.1 AS step_reached, - plus(af_tuple.1, 1) AS steps, - af_tuple.2 AS breakdown, - af_tuple.3 AS timings, - aggregation_target AS aggregation_target - FROM - (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, - if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id) AS aggregation_target, - e.uuid AS uuid, - e.`$session_id` AS `$session_id`, - e.`$window_id` AS `$window_id`, - if(equals(e.event, '$pageview'), 1, 0) AS step_0, - if(equals(e.event, 'insight analyzed'), 1, 0) AS step_1 - FROM events AS e - LEFT OUTER JOIN - (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, - person_distinct_id_overrides.distinct_id AS distinct_id - FROM person_distinct_id_overrides - WHERE equals(person_distinct_id_overrides.team_id, 2) - GROUP BY person_distinct_id_overrides.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__override ON equals(e.distinct_id, e__override.distinct_id) - WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-01-08 23:59:59.999999', 6, 'UTC'))))) - GROUP BY aggregation_target - HAVING ifNull(greaterOrEquals(step_reached, 0), 0)) - GROUP BY breakdown - ORDER BY step_2 DESC, step_1 DESC) - GROUP BY final_prop - LIMIT 100 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=23622320128, - allow_experimental_analyzer=1 - ''' -# --- -# name: TestFunnelCorrelationsActorsUDF.test_strict_funnel_correlation_with_recordings.1 - ''' - SELECT DISTINCT session_replay_events.session_id AS session_id - FROM session_replay_events - WHERE and(equals(session_replay_events.team_id, 2), ifNull(greaterOrEquals(toTimeZone(session_replay_events.min_first_timestamp, 'UTC'), minus(toDateTime64('2021-01-02 00:00:00.000000', 6, 'UTC'), toIntervalDay(21))), 0), in(session_replay_events.session_id, ['s2'])) - LIMIT 100 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=0 - ''' -# --- -# name: TestFunnelCorrelationsActorsUDF.test_strict_funnel_correlation_with_recordings.2 - ''' - SELECT persons.id, - persons.id AS id, - source.matching_events AS matching_events - FROM - (SELECT funnel_actors.actor_id AS actor_id, - any(funnel_actors.matching_events) AS matching_events - FROM - (SELECT aggregation_target AS actor_id, - matched_events_array[plus(step_reached, 1)] AS matching_events, - (matched_events_array[1][1]).1 AS timestamp, - nullIf((matched_events_array[2][1]).1, 0) AS final_timestamp, - (matched_events_array[1][1]).1 AS first_timestamp, - steps AS steps, - final_timestamp, - first_timestamp - FROM - (SELECT arraySort(t -> t.1, groupArray(tuple(accurateCastOrNull(timestamp, 'Float64'), uuid, [], arrayFilter(x -> ifNull(notEquals(x, 0), 1), [multiply(1, step_0), multiply(2, step_1)])))) AS events_array, - arrayJoin(aggregate_funnel_array_v1(2, 1209600, 'first_touch', 'strict', [[]], arrayFilter((x, x2) -> not(and(empty(x.4), empty(x2.4), ifNull(equals(x.3, x2.3), isNull(x.3) - and isNull(x2.3)), ifNull(greater(x.1, x2.1), 0))), events_array, arrayRotateRight(events_array, 1)))) AS af_tuple, - arrayJoin(aggregate_funnel_array(2, 1209600, 'first_touch', 'strict', [[]], arrayFilter((x, x2) -> not(and(empty(x.4), empty(x2.4), ifNull(equals(x.3, x2.3), isNull(x.3) - and isNull(x2.3)), ifNull(greater(x.1, x2.1), 0))), events_array, arrayRotateRight(events_array, 1)))) AS af_tuple, - af_tuple.1 AS step_reached, - plus(af_tuple.1, 1) AS steps, - af_tuple.2 AS breakdown, - af_tuple.3 AS timings, - af_tuple.4 AS matched_event_uuids_array_array, - groupArray(tuple(timestamp, uuid, `$session_id`, `$window_id`)) AS user_events, - mapFromArrays(arrayMap(x -> x.2, user_events), user_events) AS user_events_map, - arrayMap(matched_event_uuids_array -> arrayMap(event_uuid -> user_events_map[event_uuid], arrayDistinct(matched_event_uuids_array)), matched_event_uuids_array_array) AS matched_events_array, - aggregation_target AS aggregation_target - FROM - (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, - if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id) AS aggregation_target, - e.uuid AS uuid, - e.`$session_id` AS `$session_id`, - e.`$window_id` AS `$window_id`, - if(equals(e.event, '$pageview'), 1, 0) AS step_0, - if(equals(e.event, 'insight analyzed'), 1, 0) AS step_1 - FROM events AS e - LEFT OUTER JOIN - (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, - person_distinct_id_overrides.distinct_id AS distinct_id - FROM person_distinct_id_overrides - WHERE equals(person_distinct_id_overrides.team_id, 2) - GROUP BY person_distinct_id_overrides.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__override ON equals(e.distinct_id, e__override.distinct_id) - LEFT JOIN - (SELECT person.id AS id, - replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'foo'), ''), 'null'), '^"|"$', '') AS properties___foo - FROM person - WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), - (SELECT person.id AS id, max(person.version) AS version - FROM person - WHERE equals(person.team_id, 2) - GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__person ON equals(if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id), e__person.id) - WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-01-08 23:59:59.999999', 6, 'UTC'))), ifNull(equals(e__person.properties___foo, 'bar'), 0))) - GROUP BY aggregation_target - HAVING ifNull(greaterOrEquals(step_reached, 0), 0)) - WHERE ifNull(greaterOrEquals(step_reached, 0), 0) - ORDER BY aggregation_target ASC) AS funnel_actors - WHERE ifNull(notEquals(funnel_actors.steps, 2), 1) - GROUP BY funnel_actors.actor_id - ORDER BY funnel_actors.actor_id ASC) AS source - INNER JOIN - (SELECT person.id AS id - FROM person - WHERE and(equals(person.team_id, 2), in(id, - (SELECT source.actor_id AS actor_id - FROM - (SELECT funnel_actors.actor_id AS actor_id, any(funnel_actors.matching_events) AS matching_events - FROM - (SELECT aggregation_target AS actor_id, matched_events_array[plus(step_reached, 1)] AS matching_events, (matched_events_array[1][1]).1 AS timestamp, nullIf((matched_events_array[2][1]).1, 0) AS final_timestamp, (matched_events_array[1][1]).1 AS first_timestamp, steps AS steps, final_timestamp, first_timestamp - FROM - (SELECT arraySort(t -> t.1, groupArray(tuple(accurateCastOrNull(timestamp, 'Float64'), uuid, [], arrayFilter(x -> ifNull(notEquals(x, 0), 1), [multiply(1, step_0), multiply(2, step_1)])))) AS events_array, arrayJoin(aggregate_funnel_array_v1(2, 1209600, 'first_touch', 'strict', [[]], arrayFilter((x, x2) -> not(and(empty(x.4), empty(x2.4), ifNull(equals(x.3, x2.3), isNull(x.3) - and isNull(x2.3)), ifNull(greater(x.1, x2.1), 0))), events_array, arrayRotateRight(events_array, 1)))) AS af_tuple, af_tuple.1 AS step_reached, plus(af_tuple.1, 1) AS steps, af_tuple.2 AS breakdown, af_tuple.3 AS timings, af_tuple.4 AS matched_event_uuids_array_array, groupArray(tuple(timestamp, uuid, `$session_id`, `$window_id`)) AS user_events, mapFromArrays(arrayMap(x -> x.2, user_events), user_events) AS user_events_map, arrayMap(matched_event_uuids_array -> arrayMap(event_uuid -> user_events_map[event_uuid], arrayDistinct(matched_event_uuids_array)), matched_event_uuids_array_array) AS matched_events_array, aggregation_target AS aggregation_target - (SELECT arraySort(t -> t.1, groupArray(tuple(accurateCastOrNull(timestamp, 'Float64'), uuid, [], arrayFilter(x -> ifNull(notEquals(x, 0), 1), [multiply(1, step_0), multiply(2, step_1)])))) AS events_array, arrayJoin(aggregate_funnel_array(2, 1209600, 'first_touch', 'strict', [[]], arrayFilter((x, x2) -> not(and(empty(x.4), empty(x2.4), ifNull(equals(x.3, x2.3), isNull(x.3) - and isNull(x2.3)), ifNull(greater(x.1, x2.1), 0))), events_array, arrayRotateRight(events_array, 1)))) AS af_tuple, af_tuple.1 AS step_reached, plus(af_tuple.1, 1) AS steps, af_tuple.2 AS breakdown, af_tuple.3 AS timings, af_tuple.4 AS matched_event_uuids_array_array, groupArray(tuple(timestamp, uuid, `$session_id`, `$window_id`)) AS user_events, mapFromArrays(arrayMap(x -> x.2, user_events), user_events) AS user_events_map, arrayMap(matched_event_uuids_array -> arrayMap(event_uuid -> user_events_map[event_uuid], arrayDistinct(matched_event_uuids_array)), matched_event_uuids_array_array) AS matched_events_array, aggregation_target AS aggregation_target - FROM - (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id) AS aggregation_target, e.uuid AS uuid, e.`$session_id` AS `$session_id`, e.`$window_id` AS `$window_id`, if(equals(e.event, '$pageview'), 1, 0) AS step_0, if(equals(e.event, 'insight analyzed'), 1, 0) AS step_1 - FROM events AS e - LEFT OUTER JOIN - (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, person_distinct_id_overrides.distinct_id AS distinct_id - FROM person_distinct_id_overrides - WHERE equals(person_distinct_id_overrides.team_id, 2) - GROUP BY person_distinct_id_overrides.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__override ON equals(e.distinct_id, e__override.distinct_id) - LEFT JOIN - (SELECT person.id AS id, replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'foo'), ''), 'null'), '^"|"$', '') AS properties___foo - FROM person - WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), - (SELECT person.id AS id, max(person.version) AS version - FROM person - WHERE equals(person.team_id, 2) - GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__person ON equals(if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id), e__person.id) - WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-01-08 23:59:59.999999', 6, 'UTC'))), ifNull(equals(e__person.properties___foo, 'bar'), 0))) - GROUP BY aggregation_target - HAVING ifNull(greaterOrEquals(step_reached, 0), 0)) - WHERE ifNull(greaterOrEquals(step_reached, 0), 0) - ORDER BY aggregation_target ASC) AS funnel_actors - WHERE ifNull(notEquals(funnel_actors.steps, 2), 1) - GROUP BY funnel_actors.actor_id - ORDER BY funnel_actors.actor_id ASC) AS source))) - GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id) - ORDER BY persons.id ASC - LIMIT 101 - OFFSET 0 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=0 - ''' -# --- -# name: TestFunnelCorrelationsActorsUDF.test_strict_funnel_correlation_with_recordings.3 - ''' - SELECT DISTINCT session_replay_events.session_id AS session_id - FROM session_replay_events - WHERE and(equals(session_replay_events.team_id, 2), ifNull(greaterOrEquals(toTimeZone(session_replay_events.min_first_timestamp, 'UTC'), minus(toDateTime64('2021-01-02 00:00:00.000000', 6, 'UTC'), toIntervalDay(21))), 0), in(session_replay_events.session_id, ['s3'])) - LIMIT 100 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=0 - ''' -# --- diff --git a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_strict.ambr b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_strict.ambr index 6cd3cbbd8132e..ff107d7eeb376 100644 --- a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_strict.ambr +++ b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_strict.ambr @@ -85,14 +85,14 @@ and isNull(max(max_steps)))) GROUP BY prop) GROUP BY final_prop - LIMIT 100 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=23622320128, - allow_experimental_analyzer=1 + LIMIT 26 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=23622320128, + allow_experimental_analyzer=1 ''' # --- # name: TestFunnelStrictStepsBreakdown.test_funnel_breakdown_correct_breakdown_props_are_chosen_for_step @@ -188,14 +188,14 @@ and isNull(max(max_steps)))) GROUP BY prop) GROUP BY final_prop - LIMIT 100 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=23622320128, - allow_experimental_analyzer=1 + LIMIT 26 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=23622320128, + allow_experimental_analyzer=1 ''' # --- # name: TestFunnelStrictStepsBreakdown.test_funnel_step_multiple_breakdown_snapshot @@ -284,14 +284,14 @@ and isNull(max(max_steps)))) GROUP BY prop) GROUP BY final_prop - LIMIT 100 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=23622320128, - allow_experimental_analyzer=1 + LIMIT 26 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=23622320128, + allow_experimental_analyzer=1 ''' # --- # name: TestStrictFunnelGroupBreakdown.test_funnel_aggregate_by_groups_breakdown_group_person_on_events @@ -399,14 +399,14 @@ and isNull(max(max_steps)))) GROUP BY prop) GROUP BY final_prop - LIMIT 100 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=23622320128, - allow_experimental_analyzer=1 + LIMIT 26 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=23622320128, + allow_experimental_analyzer=1 ''' # --- # name: TestStrictFunnelGroupBreakdown.test_funnel_aggregate_by_groups_breakdown_group_person_on_events_poe_v2 @@ -514,14 +514,14 @@ and isNull(max(max_steps)))) GROUP BY prop) GROUP BY final_prop - LIMIT 100 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=23622320128, - allow_experimental_analyzer=1 + LIMIT 26 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=23622320128, + allow_experimental_analyzer=1 ''' # --- # name: TestStrictFunnelGroupBreakdown.test_funnel_breakdown_group @@ -636,14 +636,14 @@ and isNull(max(max_steps)))) GROUP BY prop) GROUP BY final_prop - LIMIT 100 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=23622320128, - allow_experimental_analyzer=1 + LIMIT 26 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=23622320128, + allow_experimental_analyzer=1 ''' # --- # name: TestStrictFunnelGroupBreakdown.test_funnel_breakdown_group.1 diff --git a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_strict_udf.ambr b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_strict_udf.ambr index a45664788b8e3..8aeadef465f63 100644 --- a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_strict_udf.ambr +++ b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_strict_udf.ambr @@ -11,7 +11,7 @@ (SELECT countIf(ifNull(ifNull(equals(step_reached, 0), 0), 0)) AS step_1, countIf(ifNull(ifNull(equals(step_reached, 1), 0), 0)) AS step_2, groupArrayIf(timings[1], ifNull(greater(timings[1], 0), 0)) AS step_1_conversion_times, - rowNumberInBlock() AS row_number, + rowNumberInAllBlocks() AS row_number, if(ifNull(less(row_number, 25), 0), breakdown, ['Other']) AS final_prop FROM (SELECT arraySort(t -> t.1, groupArray(tuple(accurateCastOrNull(timestamp, 'Float64'), uuid, prop, arrayFilter(x -> ifNull(notEquals(x, 0), 1), [multiply(1, step_0), multiply(2, step_1)])))) AS events_array, @@ -62,14 +62,14 @@ GROUP BY breakdown ORDER BY step_2 DESC, step_1 DESC) GROUP BY final_prop - LIMIT 100 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=23622320128, - allow_experimental_analyzer=1 + LIMIT 26 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=23622320128, + allow_experimental_analyzer=1 ''' # --- # name: TestFunnelStrictStepsBreakdownUDF.test_funnel_breakdown_correct_breakdown_props_are_chosen_for_step @@ -84,7 +84,7 @@ (SELECT countIf(ifNull(ifNull(equals(step_reached, 0), 0), 0)) AS step_1, countIf(ifNull(ifNull(equals(step_reached, 1), 0), 0)) AS step_2, groupArrayIf(timings[1], ifNull(greater(timings[1], 0), 0)) AS step_1_conversion_times, - rowNumberInBlock() AS row_number, + rowNumberInAllBlocks() AS row_number, if(ifNull(less(row_number, 25), 0), breakdown, ['Other']) AS final_prop FROM (SELECT arraySort(t -> t.1, groupArray(tuple(accurateCastOrNull(timestamp, 'Float64'), uuid, prop, arrayFilter(x -> ifNull(notEquals(x, 0), 1), [multiply(1, step_0), multiply(2, step_1)])))) AS events_array, @@ -142,14 +142,14 @@ GROUP BY breakdown ORDER BY step_2 DESC, step_1 DESC) GROUP BY final_prop - LIMIT 100 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=23622320128, - allow_experimental_analyzer=1 + LIMIT 26 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=23622320128, + allow_experimental_analyzer=1 ''' # --- # name: TestFunnelStrictStepsBreakdownUDF.test_funnel_step_multiple_breakdown_snapshot @@ -164,7 +164,7 @@ (SELECT countIf(ifNull(ifNull(equals(step_reached, 0), 0), 0)) AS step_1, countIf(ifNull(ifNull(equals(step_reached, 1), 0), 0)) AS step_2, groupArrayIf(timings[1], ifNull(greater(timings[1], 0), 0)) AS step_1_conversion_times, - rowNumberInBlock() AS row_number, + rowNumberInAllBlocks() AS row_number, if(ifNull(less(row_number, 25), 0), breakdown, ['Other']) AS final_prop FROM (SELECT arraySort(t -> t.1, groupArray(tuple(accurateCastOrNull(timestamp, 'Float64'), uuid, prop, arrayFilter(x -> ifNull(notEquals(x, 0), 1), [multiply(1, step_0), multiply(2, step_1)])))) AS events_array, @@ -215,14 +215,14 @@ GROUP BY breakdown ORDER BY step_2 DESC, step_1 DESC) GROUP BY final_prop - LIMIT 100 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=23622320128, - allow_experimental_analyzer=1 + LIMIT 26 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=23622320128, + allow_experimental_analyzer=1 ''' # --- # name: TestStrictFunnelGroupBreakdownUDF.test_funnel_aggregate_by_groups_breakdown_group_person_on_events @@ -242,7 +242,7 @@ countIf(ifNull(ifNull(equals(step_reached, 2), 0), 0)) AS step_3, groupArrayIf(timings[1], ifNull(greater(timings[1], 0), 0)) AS step_1_conversion_times, groupArrayIf(timings[2], ifNull(greater(timings[2], 0), 0)) AS step_2_conversion_times, - rowNumberInBlock() AS row_number, + rowNumberInAllBlocks() AS row_number, if(ifNull(less(row_number, 25), 0), breakdown, 'Other') AS final_prop FROM (SELECT arraySort(t -> t.1, groupArray(tuple(accurateCastOrNull(timestamp, 'Float64'), uuid, prop, arrayFilter(x -> ifNull(notEquals(x, 0), 1), [multiply(1, step_0), multiply(2, step_1), multiply(3, step_2)])))) AS events_array, @@ -296,14 +296,14 @@ GROUP BY breakdown ORDER BY step_3 DESC, step_2 DESC, step_1 DESC) GROUP BY final_prop - LIMIT 100 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=23622320128, - allow_experimental_analyzer=1 + LIMIT 26 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=23622320128, + allow_experimental_analyzer=1 ''' # --- # name: TestStrictFunnelGroupBreakdownUDF.test_funnel_aggregate_by_groups_breakdown_group_person_on_events_poe_v2 @@ -323,7 +323,7 @@ countIf(ifNull(ifNull(equals(step_reached, 2), 0), 0)) AS step_3, groupArrayIf(timings[1], ifNull(greater(timings[1], 0), 0)) AS step_1_conversion_times, groupArrayIf(timings[2], ifNull(greater(timings[2], 0), 0)) AS step_2_conversion_times, - rowNumberInBlock() AS row_number, + rowNumberInAllBlocks() AS row_number, if(ifNull(less(row_number, 25), 0), breakdown, 'Other') AS final_prop FROM (SELECT arraySort(t -> t.1, groupArray(tuple(accurateCastOrNull(timestamp, 'Float64'), uuid, prop, arrayFilter(x -> ifNull(notEquals(x, 0), 1), [multiply(1, step_0), multiply(2, step_1), multiply(3, step_2)])))) AS events_array, @@ -377,14 +377,14 @@ GROUP BY breakdown ORDER BY step_3 DESC, step_2 DESC, step_1 DESC) GROUP BY final_prop - LIMIT 100 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=23622320128, - allow_experimental_analyzer=1 + LIMIT 26 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=23622320128, + allow_experimental_analyzer=1 ''' # --- # name: TestStrictFunnelGroupBreakdownUDF.test_funnel_breakdown_group @@ -404,7 +404,7 @@ countIf(ifNull(ifNull(equals(step_reached, 2), 0), 0)) AS step_3, groupArrayIf(timings[1], ifNull(greater(timings[1], 0), 0)) AS step_1_conversion_times, groupArrayIf(timings[2], ifNull(greater(timings[2], 0), 0)) AS step_2_conversion_times, - rowNumberInBlock() AS row_number, + rowNumberInAllBlocks() AS row_number, if(ifNull(less(row_number, 25), 0), breakdown, 'Other') AS final_prop FROM (SELECT arraySort(t -> t.1, groupArray(tuple(accurateCastOrNull(timestamp, 'Float64'), uuid, prop, arrayFilter(x -> ifNull(notEquals(x, 0), 1), [multiply(1, step_0), multiply(2, step_1), multiply(3, step_2)])))) AS events_array, @@ -465,14 +465,14 @@ GROUP BY breakdown ORDER BY step_3 DESC, step_2 DESC, step_1 DESC) GROUP BY final_prop - LIMIT 100 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=23622320128, - allow_experimental_analyzer=1 + LIMIT 26 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=23622320128, + allow_experimental_analyzer=1 ''' # --- # name: TestStrictFunnelGroupBreakdownUDF.test_funnel_breakdown_group.1 diff --git a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_trends_persons.ambr b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_trends_persons.ambr deleted file mode 100644 index cdfb24412bf92..0000000000000 --- a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_trends_persons.ambr +++ /dev/null @@ -1,520 +0,0 @@ -# serializer version: 1 -# name: TestFunnelTrendsPersons.test_funnel_trend_persons_returns_recordings - ''' - SELECT persons.id, - persons.id AS id, - source.matching_events AS matching_events - FROM - (SELECT aggregation_target AS actor_id, - step_1_matching_events AS matching_events - FROM - (SELECT aggregation_target AS aggregation_target, - toStartOfDay(timestamp) AS entrance_period_start, - max(steps) AS steps_completed, - groupArray(10)(step_0_matching_event) AS step_0_matching_events, - groupArray(10)(step_1_matching_event) AS step_1_matching_events, - groupArray(10)(step_2_matching_event) AS step_2_matching_events, - groupArray(10)(final_matching_event) AS final_matching_events - FROM - (SELECT aggregation_target AS aggregation_target, - timestamp AS timestamp, - step_0 AS step_0, - latest_0 AS latest_0, - uuid_0 AS uuid_0, - `$session_id_0` AS `$session_id_0`, - `$window_id_0` AS `$window_id_0`, - step_1 AS step_1, - latest_1 AS latest_1, - uuid_1 AS uuid_1, - `$session_id_1` AS `$session_id_1`, - `$window_id_1` AS `$window_id_1`, - step_2 AS step_2, - latest_2 AS latest_2, - uuid_2 AS uuid_2, - `$session_id_2` AS `$session_id_2`, - `$window_id_2` AS `$window_id_2`, - if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0), ifNull(lessOrEquals(latest_1, latest_2), 0), ifNull(lessOrEquals(latest_2, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 3, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 2, 1)) AS steps, - if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, - if(and(isNotNull(latest_2), ifNull(lessOrEquals(latest_2, plus(toTimeZone(latest_1, 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', latest_1, latest_2), NULL) AS step_2_conversion_time, - tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, - tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, - tuple(latest_2, uuid_2, `$session_id_2`, `$window_id_2`) AS step_2_matching_event, - if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, if(isNull(latest_2), step_1_matching_event, step_2_matching_event))) AS final_matching_event - FROM - (SELECT aggregation_target AS aggregation_target, - timestamp AS timestamp, - step_0 AS step_0, - latest_0 AS latest_0, - uuid_0 AS uuid_0, - `$session_id_0` AS `$session_id_0`, - `$window_id_0` AS `$window_id_0`, - step_1 AS step_1, - latest_1 AS latest_1, - uuid_1 AS uuid_1, - `$session_id_1` AS `$session_id_1`, - `$window_id_1` AS `$window_id_1`, - step_2 AS step_2, - min(latest_2) OVER (PARTITION BY aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2, - last_value(uuid_2) OVER (PARTITION BY aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_2, - last_value(`$session_id_2`) OVER (PARTITION BY aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_2`, - last_value(`$window_id_2`) OVER (PARTITION BY aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_2` - FROM - (SELECT aggregation_target AS aggregation_target, - timestamp AS timestamp, - step_0 AS step_0, - latest_0 AS latest_0, - uuid_0 AS uuid_0, - `$session_id_0` AS `$session_id_0`, - `$window_id_0` AS `$window_id_0`, - step_1 AS step_1, - latest_1 AS latest_1, - uuid_1 AS uuid_1, - `$session_id_1` AS `$session_id_1`, - `$window_id_1` AS `$window_id_1`, - step_2 AS step_2, - if(ifNull(less(latest_2, latest_1), 0), NULL, latest_2) AS latest_2, - if(ifNull(less(latest_2, latest_1), 0), NULL, uuid_2) AS uuid_2, - if(ifNull(less(latest_2, latest_1), 0), NULL, `$session_id_2`) AS `$session_id_2`, - if(ifNull(less(latest_2, latest_1), 0), NULL, `$window_id_2`) AS `$window_id_2` - FROM - (SELECT aggregation_target AS aggregation_target, - timestamp AS timestamp, - step_0 AS step_0, - latest_0 AS latest_0, - uuid_0 AS uuid_0, - `$session_id_0` AS `$session_id_0`, - `$window_id_0` AS `$window_id_0`, - step_1 AS step_1, - min(latest_1) OVER (PARTITION BY aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, - last_value(uuid_1) OVER (PARTITION BY aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1, - last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`, - last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1`, - step_2 AS step_2, - min(latest_2) OVER (PARTITION BY aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2, - last_value(uuid_2) OVER (PARTITION BY aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_2, - last_value(`$session_id_2`) OVER (PARTITION BY aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_2`, - last_value(`$window_id_2`) OVER (PARTITION BY aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_2` - FROM - (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, - if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id) AS aggregation_target, - e.uuid AS uuid, - if(equals(e.event, 'step one'), 1, 0) AS step_0, - if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, - if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0, - if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`, - if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`, - if(equals(e.event, 'step two'), 1, 0) AS step_1, - if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, - if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1, - if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`, - if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1`, - if(equals(e.event, 'step three'), 1, 0) AS step_2, - if(ifNull(equals(step_2, 1), 0), timestamp, NULL) AS latest_2, - if(ifNull(equals(step_2, 1), 0), uuid, NULL) AS uuid_2, - if(ifNull(equals(step_2, 1), 0), e.`$session_id`, NULL) AS `$session_id_2`, - if(ifNull(equals(step_2, 1), 0), e.`$window_id`, NULL) AS `$window_id_2` - FROM events AS e - LEFT OUTER JOIN - (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, - person_distinct_id_overrides.distinct_id AS distinct_id - FROM person_distinct_id_overrides - WHERE equals(person_distinct_id_overrides.team_id, 2) - GROUP BY person_distinct_id_overrides.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__override ON equals(e.distinct_id, e__override.distinct_id) - WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-05-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-05-07 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('step one', 'step three', 'step two'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0), ifNull(equals(step_2, 1), 0))))))) - WHERE ifNull(equals(step_0, 1), 0)) - WHERE ifNull(equals(entrance_period_start, toDateTime64('2021-05-01 00:00:00.000000', 6, 'UTC')), 0) - GROUP BY aggregation_target, - entrance_period_start) - WHERE ifNull(greaterOrEquals(steps_completed, 2), 0) - ORDER BY aggregation_target ASC) AS source - INNER JOIN - (SELECT person.id AS id - FROM person - WHERE equals(person.team_id, 2) - GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id) - ORDER BY persons.id ASC - LIMIT 101 - OFFSET 0 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=0, - allow_experimental_analyzer=1 - ''' -# --- -# name: TestFunnelTrendsPersons.test_funnel_trend_persons_returns_recordings.1 - ''' - SELECT DISTINCT session_replay_events.session_id AS session_id - FROM session_replay_events - WHERE and(equals(session_replay_events.team_id, 2), ifNull(greaterOrEquals(toTimeZone(session_replay_events.min_first_timestamp, 'UTC'), minus(toDateTime64('2021-05-01 00:00:00.000000', 6, 'UTC'), toIntervalDay(21))), 0), in(session_replay_events.session_id, ['s1b'])) - LIMIT 100 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=0 - ''' -# --- -# name: TestFunnelTrendsPersons.test_funnel_trend_persons_with_drop_off - ''' - SELECT persons.id, - persons.id AS id, - source.matching_events AS matching_events - FROM - (SELECT aggregation_target AS actor_id, - final_matching_events AS matching_events - FROM - (SELECT aggregation_target AS aggregation_target, - toStartOfDay(timestamp) AS entrance_period_start, - max(steps) AS steps_completed, - groupArray(10)(step_0_matching_event) AS step_0_matching_events, - groupArray(10)(step_1_matching_event) AS step_1_matching_events, - groupArray(10)(step_2_matching_event) AS step_2_matching_events, - groupArray(10)(final_matching_event) AS final_matching_events - FROM - (SELECT aggregation_target AS aggregation_target, - timestamp AS timestamp, - step_0 AS step_0, - latest_0 AS latest_0, - uuid_0 AS uuid_0, - `$session_id_0` AS `$session_id_0`, - `$window_id_0` AS `$window_id_0`, - step_1 AS step_1, - latest_1 AS latest_1, - uuid_1 AS uuid_1, - `$session_id_1` AS `$session_id_1`, - `$window_id_1` AS `$window_id_1`, - step_2 AS step_2, - latest_2 AS latest_2, - uuid_2 AS uuid_2, - `$session_id_2` AS `$session_id_2`, - `$window_id_2` AS `$window_id_2`, - if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0), ifNull(lessOrEquals(latest_1, latest_2), 0), ifNull(lessOrEquals(latest_2, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 3, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 2, 1)) AS steps, - if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, - if(and(isNotNull(latest_2), ifNull(lessOrEquals(latest_2, plus(toTimeZone(latest_1, 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', latest_1, latest_2), NULL) AS step_2_conversion_time, - tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, - tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, - tuple(latest_2, uuid_2, `$session_id_2`, `$window_id_2`) AS step_2_matching_event, - if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, if(isNull(latest_2), step_1_matching_event, step_2_matching_event))) AS final_matching_event - FROM - (SELECT aggregation_target AS aggregation_target, - timestamp AS timestamp, - step_0 AS step_0, - latest_0 AS latest_0, - uuid_0 AS uuid_0, - `$session_id_0` AS `$session_id_0`, - `$window_id_0` AS `$window_id_0`, - step_1 AS step_1, - latest_1 AS latest_1, - uuid_1 AS uuid_1, - `$session_id_1` AS `$session_id_1`, - `$window_id_1` AS `$window_id_1`, - step_2 AS step_2, - min(latest_2) OVER (PARTITION BY aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2, - last_value(uuid_2) OVER (PARTITION BY aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_2, - last_value(`$session_id_2`) OVER (PARTITION BY aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_2`, - last_value(`$window_id_2`) OVER (PARTITION BY aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_2` - FROM - (SELECT aggregation_target AS aggregation_target, - timestamp AS timestamp, - step_0 AS step_0, - latest_0 AS latest_0, - uuid_0 AS uuid_0, - `$session_id_0` AS `$session_id_0`, - `$window_id_0` AS `$window_id_0`, - step_1 AS step_1, - latest_1 AS latest_1, - uuid_1 AS uuid_1, - `$session_id_1` AS `$session_id_1`, - `$window_id_1` AS `$window_id_1`, - step_2 AS step_2, - if(ifNull(less(latest_2, latest_1), 0), NULL, latest_2) AS latest_2, - if(ifNull(less(latest_2, latest_1), 0), NULL, uuid_2) AS uuid_2, - if(ifNull(less(latest_2, latest_1), 0), NULL, `$session_id_2`) AS `$session_id_2`, - if(ifNull(less(latest_2, latest_1), 0), NULL, `$window_id_2`) AS `$window_id_2` - FROM - (SELECT aggregation_target AS aggregation_target, - timestamp AS timestamp, - step_0 AS step_0, - latest_0 AS latest_0, - uuid_0 AS uuid_0, - `$session_id_0` AS `$session_id_0`, - `$window_id_0` AS `$window_id_0`, - step_1 AS step_1, - min(latest_1) OVER (PARTITION BY aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, - last_value(uuid_1) OVER (PARTITION BY aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1, - last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`, - last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1`, - step_2 AS step_2, - min(latest_2) OVER (PARTITION BY aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2, - last_value(uuid_2) OVER (PARTITION BY aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_2, - last_value(`$session_id_2`) OVER (PARTITION BY aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_2`, - last_value(`$window_id_2`) OVER (PARTITION BY aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_2` - FROM - (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, - if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id) AS aggregation_target, - e.uuid AS uuid, - if(equals(e.event, 'step one'), 1, 0) AS step_0, - if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, - if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0, - if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`, - if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`, - if(equals(e.event, 'step two'), 1, 0) AS step_1, - if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, - if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1, - if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`, - if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1`, - if(equals(e.event, 'step three'), 1, 0) AS step_2, - if(ifNull(equals(step_2, 1), 0), timestamp, NULL) AS latest_2, - if(ifNull(equals(step_2, 1), 0), uuid, NULL) AS uuid_2, - if(ifNull(equals(step_2, 1), 0), e.`$session_id`, NULL) AS `$session_id_2`, - if(ifNull(equals(step_2, 1), 0), e.`$window_id`, NULL) AS `$window_id_2` - FROM events AS e - LEFT OUTER JOIN - (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, - person_distinct_id_overrides.distinct_id AS distinct_id - FROM person_distinct_id_overrides - WHERE equals(person_distinct_id_overrides.team_id, 2) - GROUP BY person_distinct_id_overrides.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__override ON equals(e.distinct_id, e__override.distinct_id) - WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-05-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-05-07 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('step one', 'step three', 'step two'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0), ifNull(equals(step_2, 1), 0))))))) - WHERE ifNull(equals(step_0, 1), 0)) - WHERE ifNull(equals(entrance_period_start, toDateTime64('2021-05-01 00:00:00.000000', 6, 'UTC')), 0) - GROUP BY aggregation_target, - entrance_period_start) - WHERE and(ifNull(greaterOrEquals(steps_completed, 1), 0), ifNull(less(steps_completed, 3), 0)) - ORDER BY aggregation_target ASC) AS source - INNER JOIN - (SELECT person.id AS id - FROM person - WHERE equals(person.team_id, 2) - GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id) - ORDER BY persons.id ASC - LIMIT 101 - OFFSET 0 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=0, - allow_experimental_analyzer=1 - ''' -# --- -# name: TestFunnelTrendsPersons.test_funnel_trend_persons_with_drop_off.1 - ''' - SELECT DISTINCT session_replay_events.session_id AS session_id - FROM session_replay_events - WHERE and(equals(session_replay_events.team_id, 2), ifNull(greaterOrEquals(toTimeZone(session_replay_events.min_first_timestamp, 'UTC'), minus(toDateTime64('2021-05-01 00:00:00.000000', 6, 'UTC'), toIntervalDay(21))), 0), in(session_replay_events.session_id, ['s1a'])) - LIMIT 100 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=0 - ''' -# --- -# name: TestFunnelTrendsPersons.test_funnel_trend_persons_with_no_to_step - ''' - SELECT persons.id, - persons.id AS id, - source.matching_events AS matching_events - FROM - (SELECT aggregation_target AS actor_id, - final_matching_events AS matching_events - FROM - (SELECT aggregation_target AS aggregation_target, - toStartOfDay(timestamp) AS entrance_period_start, - max(steps) AS steps_completed, - groupArray(10)(step_0_matching_event) AS step_0_matching_events, - groupArray(10)(step_1_matching_event) AS step_1_matching_events, - groupArray(10)(step_2_matching_event) AS step_2_matching_events, - groupArray(10)(final_matching_event) AS final_matching_events - FROM - (SELECT aggregation_target AS aggregation_target, - timestamp AS timestamp, - step_0 AS step_0, - latest_0 AS latest_0, - uuid_0 AS uuid_0, - `$session_id_0` AS `$session_id_0`, - `$window_id_0` AS `$window_id_0`, - step_1 AS step_1, - latest_1 AS latest_1, - uuid_1 AS uuid_1, - `$session_id_1` AS `$session_id_1`, - `$window_id_1` AS `$window_id_1`, - step_2 AS step_2, - latest_2 AS latest_2, - uuid_2 AS uuid_2, - `$session_id_2` AS `$session_id_2`, - `$window_id_2` AS `$window_id_2`, - if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0), ifNull(lessOrEquals(latest_1, latest_2), 0), ifNull(lessOrEquals(latest_2, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 3, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 2, 1)) AS steps, - if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, - if(and(isNotNull(latest_2), ifNull(lessOrEquals(latest_2, plus(toTimeZone(latest_1, 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', latest_1, latest_2), NULL) AS step_2_conversion_time, - tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, - tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, - tuple(latest_2, uuid_2, `$session_id_2`, `$window_id_2`) AS step_2_matching_event, - if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, if(isNull(latest_2), step_1_matching_event, step_2_matching_event))) AS final_matching_event - FROM - (SELECT aggregation_target AS aggregation_target, - timestamp AS timestamp, - step_0 AS step_0, - latest_0 AS latest_0, - uuid_0 AS uuid_0, - `$session_id_0` AS `$session_id_0`, - `$window_id_0` AS `$window_id_0`, - step_1 AS step_1, - latest_1 AS latest_1, - uuid_1 AS uuid_1, - `$session_id_1` AS `$session_id_1`, - `$window_id_1` AS `$window_id_1`, - step_2 AS step_2, - min(latest_2) OVER (PARTITION BY aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2, - last_value(uuid_2) OVER (PARTITION BY aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_2, - last_value(`$session_id_2`) OVER (PARTITION BY aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_2`, - last_value(`$window_id_2`) OVER (PARTITION BY aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_2` - FROM - (SELECT aggregation_target AS aggregation_target, - timestamp AS timestamp, - step_0 AS step_0, - latest_0 AS latest_0, - uuid_0 AS uuid_0, - `$session_id_0` AS `$session_id_0`, - `$window_id_0` AS `$window_id_0`, - step_1 AS step_1, - latest_1 AS latest_1, - uuid_1 AS uuid_1, - `$session_id_1` AS `$session_id_1`, - `$window_id_1` AS `$window_id_1`, - step_2 AS step_2, - if(ifNull(less(latest_2, latest_1), 0), NULL, latest_2) AS latest_2, - if(ifNull(less(latest_2, latest_1), 0), NULL, uuid_2) AS uuid_2, - if(ifNull(less(latest_2, latest_1), 0), NULL, `$session_id_2`) AS `$session_id_2`, - if(ifNull(less(latest_2, latest_1), 0), NULL, `$window_id_2`) AS `$window_id_2` - FROM - (SELECT aggregation_target AS aggregation_target, - timestamp AS timestamp, - step_0 AS step_0, - latest_0 AS latest_0, - uuid_0 AS uuid_0, - `$session_id_0` AS `$session_id_0`, - `$window_id_0` AS `$window_id_0`, - step_1 AS step_1, - min(latest_1) OVER (PARTITION BY aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1, - last_value(uuid_1) OVER (PARTITION BY aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1, - last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`, - last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1`, - step_2 AS step_2, - min(latest_2) OVER (PARTITION BY aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2, - last_value(uuid_2) OVER (PARTITION BY aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_2, - last_value(`$session_id_2`) OVER (PARTITION BY aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_2`, - last_value(`$window_id_2`) OVER (PARTITION BY aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_2` - FROM - (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, - if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id) AS aggregation_target, - e.uuid AS uuid, - if(equals(e.event, 'step one'), 1, 0) AS step_0, - if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, - if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0, - if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`, - if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`, - if(equals(e.event, 'step two'), 1, 0) AS step_1, - if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, - if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1, - if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`, - if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1`, - if(equals(e.event, 'step three'), 1, 0) AS step_2, - if(ifNull(equals(step_2, 1), 0), timestamp, NULL) AS latest_2, - if(ifNull(equals(step_2, 1), 0), uuid, NULL) AS uuid_2, - if(ifNull(equals(step_2, 1), 0), e.`$session_id`, NULL) AS `$session_id_2`, - if(ifNull(equals(step_2, 1), 0), e.`$window_id`, NULL) AS `$window_id_2` - FROM events AS e - LEFT OUTER JOIN - (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, - person_distinct_id_overrides.distinct_id AS distinct_id - FROM person_distinct_id_overrides - WHERE equals(person_distinct_id_overrides.team_id, 2) - GROUP BY person_distinct_id_overrides.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__override ON equals(e.distinct_id, e__override.distinct_id) - WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-05-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-05-07 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('step one', 'step three', 'step two'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0), ifNull(equals(step_2, 1), 0))))))) - WHERE ifNull(equals(step_0, 1), 0)) - WHERE ifNull(equals(entrance_period_start, toDateTime64('2021-05-01 00:00:00.000000', 6, 'UTC')), 0) - GROUP BY aggregation_target, - entrance_period_start) - WHERE ifNull(greaterOrEquals(steps_completed, 3), 0) - ORDER BY aggregation_target ASC) AS source - INNER JOIN - (SELECT person.id AS id - FROM person - WHERE equals(person.team_id, 2) - GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id) - ORDER BY persons.id ASC - LIMIT 101 - OFFSET 0 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=0, - allow_experimental_analyzer=1 - ''' -# --- -# name: TestFunnelTrendsPersons.test_funnel_trend_persons_with_no_to_step.1 - ''' - SELECT DISTINCT session_replay_events.session_id AS session_id - FROM session_replay_events - WHERE and(equals(session_replay_events.team_id, 2), ifNull(greaterOrEquals(toTimeZone(session_replay_events.min_first_timestamp, 'UTC'), minus(toDateTime64('2021-05-01 00:00:00.000000', 6, 'UTC'), toIntervalDay(21))), 0), in(session_replay_events.session_id, ['s1c'])) - LIMIT 100 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=0 - ''' -# --- diff --git a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_udf.ambr b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_udf.ambr index db1d9211dad93..76914332b11c4 100644 --- a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_udf.ambr +++ b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_udf.ambr @@ -16,7 +16,7 @@ countIf(ifNull(ifNull(equals(step_reached, 2), 0), 0)) AS step_3, groupArrayIf(timings[1], ifNull(greater(timings[1], 0), 0)) AS step_1_conversion_times, groupArrayIf(timings[2], ifNull(greater(timings[2], 0), 0)) AS step_2_conversion_times, - rowNumberInBlock() AS row_number, + rowNumberInAllBlocks() AS row_number, breakdown AS final_prop FROM (SELECT arraySort(t -> t.1, groupArray(tuple(accurateCastOrNull(timestamp, 'Float64'), uuid, [], arrayFilter(x -> ifNull(notEquals(x, 0), 1), [multiply(1, step_0), multiply(2, step_1), multiply(3, step_2)])))) AS events_array, @@ -150,7 +150,7 @@ countIf(ifNull(ifNull(equals(step_reached, 2), 0), 0)) AS step_3, groupArrayIf(timings[1], ifNull(greater(timings[1], 0), 0)) AS step_1_conversion_times, groupArrayIf(timings[2], ifNull(greater(timings[2], 0), 0)) AS step_2_conversion_times, - rowNumberInBlock() AS row_number, + rowNumberInAllBlocks() AS row_number, breakdown AS final_prop FROM (SELECT arraySort(t -> t.1, groupArray(tuple(accurateCastOrNull(timestamp, 'Float64'), uuid, [], arrayFilter(x -> ifNull(notEquals(x, 0), 1), [multiply(1, step_0), multiply(2, step_1), multiply(3, step_2)])))) AS events_array, @@ -229,7 +229,7 @@ (SELECT countIf(ifNull(ifNull(equals(step_reached, 0), 0), 0)) AS step_1, countIf(ifNull(ifNull(equals(step_reached, 1), 0), 0)) AS step_2, groupArrayIf(timings[1], ifNull(greater(timings[1], 0), 0)) AS step_1_conversion_times, - rowNumberInBlock() AS row_number, + rowNumberInAllBlocks() AS row_number, breakdown AS final_prop FROM (SELECT arraySort(t -> t.1, groupArray(tuple(accurateCastOrNull(timestamp, 'Float64'), uuid, [], arrayFilter(x -> ifNull(notEquals(x, 0), 1), [multiply(1, step_0), multiply(2, step_1)])))) AS events_array, @@ -295,7 +295,7 @@ countIf(ifNull(ifNull(equals(step_reached, 2), 0), 0)) AS step_3, groupArrayIf(timings[1], ifNull(greater(timings[1], 0), 0)) AS step_1_conversion_times, groupArrayIf(timings[2], ifNull(greater(timings[2], 0), 0)) AS step_2_conversion_times, - rowNumberInBlock() AS row_number, + rowNumberInAllBlocks() AS row_number, breakdown AS final_prop FROM (SELECT arraySort(t -> t.1, groupArray(tuple(accurateCastOrNull(timestamp, 'Float64'), uuid, [], arrayFilter(x -> ifNull(notEquals(x, 0), 1), [multiply(1, step_0), multiply(2, step_1), multiply(3, step_2)])))) AS events_array, @@ -590,7 +590,7 @@ (SELECT countIf(ifNull(ifNull(equals(step_reached, 0), 0), 0)) AS step_1, countIf(ifNull(ifNull(equals(step_reached, 1), 0), 0)) AS step_2, groupArrayIf(timings[1], ifNull(greater(timings[1], 0), 0)) AS step_1_conversion_times, - rowNumberInBlock() AS row_number, + rowNumberInAllBlocks() AS row_number, breakdown AS final_prop FROM (SELECT arraySort(t -> t.1, groupArray(tuple(accurateCastOrNull(timestamp, 'Float64'), uuid, [], arrayFilter(x -> ifNull(notEquals(x, 0), 1), [multiply(1, step_0), multiply(2, step_1)])))) AS events_array, @@ -651,7 +651,7 @@ (SELECT countIf(ifNull(ifNull(equals(step_reached, 0), 0), 0)) AS step_1, countIf(ifNull(ifNull(equals(step_reached, 1), 0), 0)) AS step_2, groupArrayIf(timings[1], ifNull(greater(timings[1], 0), 0)) AS step_1_conversion_times, - rowNumberInBlock() AS row_number, + rowNumberInAllBlocks() AS row_number, breakdown AS final_prop FROM (SELECT arraySort(t -> t.1, groupArray(tuple(accurateCastOrNull(timestamp, 'Float64'), uuid, [], arrayFilter(x -> ifNull(notEquals(x, 0), 1), [multiply(1, step_0), multiply(2, step_1)])))) AS events_array, @@ -709,7 +709,7 @@ (SELECT countIf(ifNull(ifNull(equals(step_reached, 0), 0), 0)) AS step_1, countIf(ifNull(ifNull(equals(step_reached, 1), 0), 0)) AS step_2, groupArrayIf(timings[1], ifNull(greater(timings[1], 0), 0)) AS step_1_conversion_times, - rowNumberInBlock() AS row_number, + rowNumberInAllBlocks() AS row_number, if(ifNull(less(row_number, 25), 0), breakdown, ['Other']) AS final_prop FROM (SELECT arraySort(t -> t.1, groupArray(tuple(accurateCastOrNull(timestamp, 'Float64'), uuid, prop, arrayFilter(x -> ifNull(notEquals(x, 0), 1), [multiply(1, step_0), multiply(2, step_1)])))) AS events_array, @@ -760,14 +760,14 @@ GROUP BY breakdown ORDER BY step_2 DESC, step_1 DESC) GROUP BY final_prop - LIMIT 100 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=23622320128, - allow_experimental_analyzer=1 + LIMIT 26 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=23622320128, + allow_experimental_analyzer=1 ''' # --- # name: TestFunnelBreakdownUDF.test_funnel_breakdown_correct_breakdown_props_are_chosen_for_step @@ -782,7 +782,7 @@ (SELECT countIf(ifNull(ifNull(equals(step_reached, 0), 0), 0)) AS step_1, countIf(ifNull(ifNull(equals(step_reached, 1), 0), 0)) AS step_2, groupArrayIf(timings[1], ifNull(greater(timings[1], 0), 0)) AS step_1_conversion_times, - rowNumberInBlock() AS row_number, + rowNumberInAllBlocks() AS row_number, if(ifNull(less(row_number, 25), 0), breakdown, ['Other']) AS final_prop FROM (SELECT arraySort(t -> t.1, groupArray(tuple(accurateCastOrNull(timestamp, 'Float64'), uuid, prop, arrayFilter(x -> ifNull(notEquals(x, 0), 1), [multiply(1, step_0), multiply(2, step_1)])))) AS events_array, @@ -840,14 +840,14 @@ GROUP BY breakdown ORDER BY step_2 DESC, step_1 DESC) GROUP BY final_prop - LIMIT 100 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=23622320128, - allow_experimental_analyzer=1 + LIMIT 26 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=23622320128, + allow_experimental_analyzer=1 ''' # --- # name: TestFunnelBreakdownUDF.test_funnel_step_multiple_breakdown_snapshot @@ -862,7 +862,7 @@ (SELECT countIf(ifNull(ifNull(equals(step_reached, 0), 0), 0)) AS step_1, countIf(ifNull(ifNull(equals(step_reached, 1), 0), 0)) AS step_2, groupArrayIf(timings[1], ifNull(greater(timings[1], 0), 0)) AS step_1_conversion_times, - rowNumberInBlock() AS row_number, + rowNumberInAllBlocks() AS row_number, if(ifNull(less(row_number, 25), 0), breakdown, ['Other']) AS final_prop FROM (SELECT arraySort(t -> t.1, groupArray(tuple(accurateCastOrNull(timestamp, 'Float64'), uuid, prop, arrayFilter(x -> ifNull(notEquals(x, 0), 1), [multiply(1, step_0), multiply(2, step_1)])))) AS events_array, @@ -913,14 +913,14 @@ GROUP BY breakdown ORDER BY step_2 DESC, step_1 DESC) GROUP BY final_prop - LIMIT 100 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=23622320128, - allow_experimental_analyzer=1 + LIMIT 26 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=23622320128, + allow_experimental_analyzer=1 ''' # --- # name: TestFunnelGroupBreakdownUDF.test_funnel_aggregate_by_groups_breakdown_group_person_on_events @@ -940,7 +940,7 @@ countIf(ifNull(ifNull(equals(step_reached, 2), 0), 0)) AS step_3, groupArrayIf(timings[1], ifNull(greater(timings[1], 0), 0)) AS step_1_conversion_times, groupArrayIf(timings[2], ifNull(greater(timings[2], 0), 0)) AS step_2_conversion_times, - rowNumberInBlock() AS row_number, + rowNumberInAllBlocks() AS row_number, if(ifNull(less(row_number, 25), 0), breakdown, 'Other') AS final_prop FROM (SELECT arraySort(t -> t.1, groupArray(tuple(accurateCastOrNull(timestamp, 'Float64'), uuid, prop, arrayFilter(x -> ifNull(notEquals(x, 0), 1), [multiply(1, step_0), multiply(2, step_1), multiply(3, step_2)])))) AS events_array, @@ -994,14 +994,14 @@ GROUP BY breakdown ORDER BY step_3 DESC, step_2 DESC, step_1 DESC) GROUP BY final_prop - LIMIT 100 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=23622320128, - allow_experimental_analyzer=1 + LIMIT 26 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=23622320128, + allow_experimental_analyzer=1 ''' # --- # name: TestFunnelGroupBreakdownUDF.test_funnel_aggregate_by_groups_breakdown_group_person_on_events_poe_v2 @@ -1021,7 +1021,7 @@ countIf(ifNull(ifNull(equals(step_reached, 2), 0), 0)) AS step_3, groupArrayIf(timings[1], ifNull(greater(timings[1], 0), 0)) AS step_1_conversion_times, groupArrayIf(timings[2], ifNull(greater(timings[2], 0), 0)) AS step_2_conversion_times, - rowNumberInBlock() AS row_number, + rowNumberInAllBlocks() AS row_number, if(ifNull(less(row_number, 25), 0), breakdown, 'Other') AS final_prop FROM (SELECT arraySort(t -> t.1, groupArray(tuple(accurateCastOrNull(timestamp, 'Float64'), uuid, prop, arrayFilter(x -> ifNull(notEquals(x, 0), 1), [multiply(1, step_0), multiply(2, step_1), multiply(3, step_2)])))) AS events_array, @@ -1075,14 +1075,14 @@ GROUP BY breakdown ORDER BY step_3 DESC, step_2 DESC, step_1 DESC) GROUP BY final_prop - LIMIT 100 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=23622320128, - allow_experimental_analyzer=1 + LIMIT 26 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=23622320128, + allow_experimental_analyzer=1 ''' # --- # name: TestFunnelGroupBreakdownUDF.test_funnel_breakdown_group @@ -1102,7 +1102,7 @@ countIf(ifNull(ifNull(equals(step_reached, 2), 0), 0)) AS step_3, groupArrayIf(timings[1], ifNull(greater(timings[1], 0), 0)) AS step_1_conversion_times, groupArrayIf(timings[2], ifNull(greater(timings[2], 0), 0)) AS step_2_conversion_times, - rowNumberInBlock() AS row_number, + rowNumberInAllBlocks() AS row_number, if(ifNull(less(row_number, 25), 0), breakdown, 'Other') AS final_prop FROM (SELECT arraySort(t -> t.1, groupArray(tuple(accurateCastOrNull(timestamp, 'Float64'), uuid, prop, arrayFilter(x -> ifNull(notEquals(x, 0), 1), [multiply(1, step_0), multiply(2, step_1), multiply(3, step_2)])))) AS events_array, @@ -1163,585 +1163,345 @@ GROUP BY breakdown ORDER BY step_3 DESC, step_2 DESC, step_1 DESC) GROUP BY final_prop - LIMIT 100 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=23622320128, - allow_experimental_analyzer=1 + LIMIT 26 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=23622320128, + allow_experimental_analyzer=1 ''' # --- # name: TestFunnelGroupBreakdownUDF.test_funnel_breakdown_group.1 ''' - - SELECT replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS value, - count(*) as count - FROM events e - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 - OFFSET 0 - ''' -# --- -# name: TestFunnelGroupBreakdownUDF.test_funnel_breakdown_group.2 - ''' - - SELECT aggregation_target AS actor_id + SELECT persons.id, + persons.id AS id FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - avg(step_2_conversion_time) step_2_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner, - median(step_2_conversion_time) step_2_median_conversion_time_inner , - prop + (SELECT aggregation_target AS actor_id FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target, - prop) as max_steps, - step_1_conversion_time, - step_2_conversion_time , - prop + (SELECT arraySort(t -> t.1, groupArray(tuple(accurateCastOrNull(timestamp, 'Float64'), uuid, prop, arrayFilter(x -> ifNull(notEquals(x, 0), 1), [multiply(1, step_0), multiply(2, step_1), multiply(3, step_2)])))) AS events_array, + arrayJoin(aggregate_funnel_v3(3, 1209600, 'first_touch', 'ordered', groupUniqArray(prop), arrayFilter((x, x_before, x_after) -> not(and(ifNull(lessOrEquals(length(x.4), 1), 0), ifNull(equals(x.4, x_before.4), isNull(x.4) + and isNull(x_before.4)), ifNull(equals(x.4, x_after.4), isNull(x.4) + and isNull(x_after.4)), ifNull(equals(x.3, x_before.3), isNull(x.3) + and isNull(x_before.3)), ifNull(equals(x.3, x_after.3), isNull(x.3) + and isNull(x_after.3)), ifNull(greater(x.1, x_before.1), 0), ifNull(less(x.1, x_after.1), 0))), events_array, arrayRotateRight(events_array, 1), arrayRotateLeft(events_array, 1)))) AS af_tuple, + af_tuple.1 AS step_reached, + plus(af_tuple.1, 1) AS steps, + af_tuple.2 AS breakdown, + af_tuple.3 AS timings, + aggregation_target AS aggregation_target FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 7 DAY - AND latest_1 <= latest_2 - AND latest_2 <= latest_0 + INTERVAL 7 DAY, 3, if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, - if(isNotNull(latest_2) - AND latest_2 <= latest_1 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_1), toDateTime(latest_2)), NULL) step_2_conversion_time, - prop + (SELECT timestamp AS timestamp, + aggregation_target AS aggregation_target, + uuid AS uuid, + `$session_id` AS `$session_id`, + `$window_id` AS `$window_id`, + step_0 AS step_0, + step_1 AS step_1, + step_2 AS step_2, + prop_basic AS prop_basic, + prop, + prop_vals AS prop_vals, + prop_vals AS prop FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - latest_1, - step_2, - min(latest_2) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 , - prop - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - latest_1, - step_2, - if(latest_2 < latest_1, NULL, latest_2) as latest_2 , - prop - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1, - step_2, - min(latest_2) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 , - if(has(['technology', 'finance'], prop), prop, 'Other') as prop - FROM - (SELECT *, - prop_vals as prop - FROM - (SELECT e.timestamp as timestamp, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as aggregation_target, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, - if(event = 'sign up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'play movie', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1, - if(event = 'buy', 1, 0) as step_2, - if(step_2 = 1, timestamp, null) as latest_2, - replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS prop_basic, - prop_basic as prop, - argMinIf(prop, timestamp, isNotNull(prop)) over (PARTITION by aggregation_target) as prop_vals - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') ) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') - AND (step_0 = 1 - OR step_1 = 1 - OR step_2 = 1) ))))) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps, - prop - HAVING steps = max(max_steps)) - WHERE steps IN [1, 2, 3] - AND arrayFlatten(array(prop)) = arrayFlatten(array('finance')) - ORDER BY aggregation_target - LIMIT 100 - OFFSET 0 SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000 - ''' -# --- -# name: TestFunnelGroupBreakdownUDF.test_funnel_breakdown_group.3 - ''' - - SELECT replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS value, - count(*) as count - FROM events e - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 - OFFSET 0 + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id) AS aggregation_target, + e.uuid AS uuid, + e.`$session_id` AS `$session_id`, + e.`$window_id` AS `$window_id`, + if(equals(e.event, 'sign up'), 1, 0) AS step_0, + if(equals(e.event, 'play movie'), 1, 0) AS step_1, + if(equals(e.event, 'buy'), 1, 0) AS step_2, + ifNull(toString(e__group_0.properties___industry), '') AS prop_basic, + prop_basic AS prop, + argMinIf(prop, timestamp, isNotNull(prop)) OVER (PARTITION BY aggregation_target) AS prop_vals + FROM events AS e + LEFT OUTER JOIN + (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, + person_distinct_id_overrides.distinct_id AS distinct_id + FROM person_distinct_id_overrides + WHERE equals(person_distinct_id_overrides.team_id, 99999) + GROUP BY person_distinct_id_overrides.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__override ON equals(e.distinct_id, e__override.distinct_id) + LEFT JOIN + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), toTimeZone(groups._timestamp, 'UTC')) AS properties___industry, + groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE and(equals(groups.team_id, 99999), equals(index, 0)) + GROUP BY groups.group_type_index, + groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) + WHERE and(equals(e.team_id, 99999), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'play movie', 'sign up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0), ifNull(equals(step_2, 1), 0))))) + GROUP BY aggregation_target + HAVING ifNull(greaterOrEquals(step_reached, 0), 0)) + WHERE and(ifNull(greaterOrEquals(step_reached, 0), 0), ifNull(equals(arrayFlatten(array(breakdown)), arrayFlatten(array('finance'))), isNull(arrayFlatten(array(breakdown))) + and isNull(arrayFlatten(array('finance'))))) + ORDER BY aggregation_target ASC) AS source + INNER JOIN + (SELECT person.id AS id + FROM person + WHERE equals(person.team_id, 99999) + GROUP BY person.id + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id) + ORDER BY persons.id ASC + LIMIT 101 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0, + allow_experimental_analyzer=1 ''' # --- -# name: TestFunnelGroupBreakdownUDF.test_funnel_breakdown_group.4 +# name: TestFunnelGroupBreakdownUDF.test_funnel_breakdown_group.2 ''' - - SELECT aggregation_target AS actor_id + SELECT persons.id, + persons.id AS id FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - avg(step_2_conversion_time) step_2_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner, - median(step_2_conversion_time) step_2_median_conversion_time_inner , - prop + (SELECT aggregation_target AS actor_id FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target, - prop) as max_steps, - step_1_conversion_time, - step_2_conversion_time , - prop + (SELECT arraySort(t -> t.1, groupArray(tuple(accurateCastOrNull(timestamp, 'Float64'), uuid, prop, arrayFilter(x -> ifNull(notEquals(x, 0), 1), [multiply(1, step_0), multiply(2, step_1), multiply(3, step_2)])))) AS events_array, + arrayJoin(aggregate_funnel_v3(3, 1209600, 'first_touch', 'ordered', groupUniqArray(prop), arrayFilter((x, x_before, x_after) -> not(and(ifNull(lessOrEquals(length(x.4), 1), 0), ifNull(equals(x.4, x_before.4), isNull(x.4) + and isNull(x_before.4)), ifNull(equals(x.4, x_after.4), isNull(x.4) + and isNull(x_after.4)), ifNull(equals(x.3, x_before.3), isNull(x.3) + and isNull(x_before.3)), ifNull(equals(x.3, x_after.3), isNull(x.3) + and isNull(x_after.3)), ifNull(greater(x.1, x_before.1), 0), ifNull(less(x.1, x_after.1), 0))), events_array, arrayRotateRight(events_array, 1), arrayRotateLeft(events_array, 1)))) AS af_tuple, + af_tuple.1 AS step_reached, + plus(af_tuple.1, 1) AS steps, + af_tuple.2 AS breakdown, + af_tuple.3 AS timings, + aggregation_target AS aggregation_target FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 7 DAY - AND latest_1 <= latest_2 - AND latest_2 <= latest_0 + INTERVAL 7 DAY, 3, if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, - if(isNotNull(latest_2) - AND latest_2 <= latest_1 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_1), toDateTime(latest_2)), NULL) step_2_conversion_time, - prop + (SELECT timestamp AS timestamp, + aggregation_target AS aggregation_target, + uuid AS uuid, + `$session_id` AS `$session_id`, + `$window_id` AS `$window_id`, + step_0 AS step_0, + step_1 AS step_1, + step_2 AS step_2, + prop_basic AS prop_basic, + prop, + prop_vals AS prop_vals, + prop_vals AS prop FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - latest_1, - step_2, - min(latest_2) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 , - prop - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - latest_1, - step_2, - if(latest_2 < latest_1, NULL, latest_2) as latest_2 , - prop - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1, - step_2, - min(latest_2) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 , - if(has(['technology', 'finance'], prop), prop, 'Other') as prop - FROM - (SELECT *, - prop_vals as prop - FROM - (SELECT e.timestamp as timestamp, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as aggregation_target, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, - if(event = 'sign up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'play movie', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1, - if(event = 'buy', 1, 0) as step_2, - if(step_2 = 1, timestamp, null) as latest_2, - replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS prop_basic, - prop_basic as prop, - argMinIf(prop, timestamp, isNotNull(prop)) over (PARTITION by aggregation_target) as prop_vals - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') ) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') - AND (step_0 = 1 - OR step_1 = 1 - OR step_2 = 1) ))))) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps, - prop - HAVING steps = max(max_steps)) - WHERE steps IN [2, 3] - AND arrayFlatten(array(prop)) = arrayFlatten(array('finance')) - ORDER BY aggregation_target - LIMIT 100 - OFFSET 0 SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000 - ''' -# --- -# name: TestFunnelGroupBreakdownUDF.test_funnel_breakdown_group.5 - ''' - - SELECT replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS value, - count(*) as count - FROM events e - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 - OFFSET 0 + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id) AS aggregation_target, + e.uuid AS uuid, + e.`$session_id` AS `$session_id`, + e.`$window_id` AS `$window_id`, + if(equals(e.event, 'sign up'), 1, 0) AS step_0, + if(equals(e.event, 'play movie'), 1, 0) AS step_1, + if(equals(e.event, 'buy'), 1, 0) AS step_2, + ifNull(toString(e__group_0.properties___industry), '') AS prop_basic, + prop_basic AS prop, + argMinIf(prop, timestamp, isNotNull(prop)) OVER (PARTITION BY aggregation_target) AS prop_vals + FROM events AS e + LEFT OUTER JOIN + (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, + person_distinct_id_overrides.distinct_id AS distinct_id + FROM person_distinct_id_overrides + WHERE equals(person_distinct_id_overrides.team_id, 99999) + GROUP BY person_distinct_id_overrides.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__override ON equals(e.distinct_id, e__override.distinct_id) + LEFT JOIN + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), toTimeZone(groups._timestamp, 'UTC')) AS properties___industry, + groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE and(equals(groups.team_id, 99999), equals(index, 0)) + GROUP BY groups.group_type_index, + groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) + WHERE and(equals(e.team_id, 99999), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'play movie', 'sign up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0), ifNull(equals(step_2, 1), 0))))) + GROUP BY aggregation_target + HAVING ifNull(greaterOrEquals(step_reached, 0), 0)) + WHERE and(ifNull(greaterOrEquals(step_reached, 1), 0), ifNull(equals(arrayFlatten(array(breakdown)), arrayFlatten(array('finance'))), isNull(arrayFlatten(array(breakdown))) + and isNull(arrayFlatten(array('finance'))))) + ORDER BY aggregation_target ASC) AS source + INNER JOIN + (SELECT person.id AS id + FROM person + WHERE equals(person.team_id, 99999) + GROUP BY person.id + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id) + ORDER BY persons.id ASC + LIMIT 101 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0, + allow_experimental_analyzer=1 ''' # --- -# name: TestFunnelGroupBreakdownUDF.test_funnel_breakdown_group.6 +# name: TestFunnelGroupBreakdownUDF.test_funnel_breakdown_group.3 ''' - - SELECT aggregation_target AS actor_id + SELECT persons.id, + persons.id AS id FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - avg(step_2_conversion_time) step_2_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner, - median(step_2_conversion_time) step_2_median_conversion_time_inner , - prop + (SELECT aggregation_target AS actor_id FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target, - prop) as max_steps, - step_1_conversion_time, - step_2_conversion_time , - prop + (SELECT arraySort(t -> t.1, groupArray(tuple(accurateCastOrNull(timestamp, 'Float64'), uuid, prop, arrayFilter(x -> ifNull(notEquals(x, 0), 1), [multiply(1, step_0), multiply(2, step_1), multiply(3, step_2)])))) AS events_array, + arrayJoin(aggregate_funnel_v3(3, 1209600, 'first_touch', 'ordered', groupUniqArray(prop), arrayFilter((x, x_before, x_after) -> not(and(ifNull(lessOrEquals(length(x.4), 1), 0), ifNull(equals(x.4, x_before.4), isNull(x.4) + and isNull(x_before.4)), ifNull(equals(x.4, x_after.4), isNull(x.4) + and isNull(x_after.4)), ifNull(equals(x.3, x_before.3), isNull(x.3) + and isNull(x_before.3)), ifNull(equals(x.3, x_after.3), isNull(x.3) + and isNull(x_after.3)), ifNull(greater(x.1, x_before.1), 0), ifNull(less(x.1, x_after.1), 0))), events_array, arrayRotateRight(events_array, 1), arrayRotateLeft(events_array, 1)))) AS af_tuple, + af_tuple.1 AS step_reached, + plus(af_tuple.1, 1) AS steps, + af_tuple.2 AS breakdown, + af_tuple.3 AS timings, + aggregation_target AS aggregation_target FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 7 DAY - AND latest_1 <= latest_2 - AND latest_2 <= latest_0 + INTERVAL 7 DAY, 3, if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, - if(isNotNull(latest_2) - AND latest_2 <= latest_1 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_1), toDateTime(latest_2)), NULL) step_2_conversion_time, - prop + (SELECT timestamp AS timestamp, + aggregation_target AS aggregation_target, + uuid AS uuid, + `$session_id` AS `$session_id`, + `$window_id` AS `$window_id`, + step_0 AS step_0, + step_1 AS step_1, + step_2 AS step_2, + prop_basic AS prop_basic, + prop, + prop_vals AS prop_vals, + prop_vals AS prop FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - latest_1, - step_2, - min(latest_2) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 , - prop - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - latest_1, - step_2, - if(latest_2 < latest_1, NULL, latest_2) as latest_2 , - prop - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1, - step_2, - min(latest_2) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 , - if(has(['technology', 'finance'], prop), prop, 'Other') as prop - FROM - (SELECT *, - prop_vals as prop - FROM - (SELECT e.timestamp as timestamp, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as aggregation_target, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, - if(event = 'sign up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'play movie', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1, - if(event = 'buy', 1, 0) as step_2, - if(step_2 = 1, timestamp, null) as latest_2, - replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS prop_basic, - prop_basic as prop, - argMinIf(prop, timestamp, isNotNull(prop)) over (PARTITION by aggregation_target) as prop_vals - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') ) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') - AND (step_0 = 1 - OR step_1 = 1 - OR step_2 = 1) ))))) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps, - prop - HAVING steps = max(max_steps)) - WHERE steps IN [1, 2, 3] - AND arrayFlatten(array(prop)) = arrayFlatten(array('technology')) - ORDER BY aggregation_target - LIMIT 100 - OFFSET 0 SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000 - ''' -# --- -# name: TestFunnelGroupBreakdownUDF.test_funnel_breakdown_group.7 - ''' - - SELECT replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS value, - count(*) as count - FROM events e - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 - OFFSET 0 + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id) AS aggregation_target, + e.uuid AS uuid, + e.`$session_id` AS `$session_id`, + e.`$window_id` AS `$window_id`, + if(equals(e.event, 'sign up'), 1, 0) AS step_0, + if(equals(e.event, 'play movie'), 1, 0) AS step_1, + if(equals(e.event, 'buy'), 1, 0) AS step_2, + ifNull(toString(e__group_0.properties___industry), '') AS prop_basic, + prop_basic AS prop, + argMinIf(prop, timestamp, isNotNull(prop)) OVER (PARTITION BY aggregation_target) AS prop_vals + FROM events AS e + LEFT OUTER JOIN + (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, + person_distinct_id_overrides.distinct_id AS distinct_id + FROM person_distinct_id_overrides + WHERE equals(person_distinct_id_overrides.team_id, 99999) + GROUP BY person_distinct_id_overrides.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__override ON equals(e.distinct_id, e__override.distinct_id) + LEFT JOIN + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), toTimeZone(groups._timestamp, 'UTC')) AS properties___industry, + groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE and(equals(groups.team_id, 99999), equals(index, 0)) + GROUP BY groups.group_type_index, + groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) + WHERE and(equals(e.team_id, 99999), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'play movie', 'sign up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0), ifNull(equals(step_2, 1), 0))))) + GROUP BY aggregation_target + HAVING ifNull(greaterOrEquals(step_reached, 0), 0)) + WHERE and(ifNull(greaterOrEquals(step_reached, 0), 0), ifNull(equals(arrayFlatten(array(breakdown)), arrayFlatten(array('technology'))), isNull(arrayFlatten(array(breakdown))) + and isNull(arrayFlatten(array('technology'))))) + ORDER BY aggregation_target ASC) AS source + INNER JOIN + (SELECT person.id AS id + FROM person + WHERE equals(person.team_id, 99999) + GROUP BY person.id + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id) + ORDER BY persons.id ASC + LIMIT 101 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0, + allow_experimental_analyzer=1 ''' # --- -# name: TestFunnelGroupBreakdownUDF.test_funnel_breakdown_group.8 +# name: TestFunnelGroupBreakdownUDF.test_funnel_breakdown_group.4 ''' - - SELECT aggregation_target AS actor_id + SELECT persons.id, + persons.id AS id FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - avg(step_2_conversion_time) step_2_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner, - median(step_2_conversion_time) step_2_median_conversion_time_inner , - prop + (SELECT aggregation_target AS actor_id FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target, - prop) as max_steps, - step_1_conversion_time, - step_2_conversion_time , - prop + (SELECT arraySort(t -> t.1, groupArray(tuple(accurateCastOrNull(timestamp, 'Float64'), uuid, prop, arrayFilter(x -> ifNull(notEquals(x, 0), 1), [multiply(1, step_0), multiply(2, step_1), multiply(3, step_2)])))) AS events_array, + arrayJoin(aggregate_funnel_v3(3, 1209600, 'first_touch', 'ordered', groupUniqArray(prop), arrayFilter((x, x_before, x_after) -> not(and(ifNull(lessOrEquals(length(x.4), 1), 0), ifNull(equals(x.4, x_before.4), isNull(x.4) + and isNull(x_before.4)), ifNull(equals(x.4, x_after.4), isNull(x.4) + and isNull(x_after.4)), ifNull(equals(x.3, x_before.3), isNull(x.3) + and isNull(x_before.3)), ifNull(equals(x.3, x_after.3), isNull(x.3) + and isNull(x_after.3)), ifNull(greater(x.1, x_before.1), 0), ifNull(less(x.1, x_after.1), 0))), events_array, arrayRotateRight(events_array, 1), arrayRotateLeft(events_array, 1)))) AS af_tuple, + af_tuple.1 AS step_reached, + plus(af_tuple.1, 1) AS steps, + af_tuple.2 AS breakdown, + af_tuple.3 AS timings, + aggregation_target AS aggregation_target FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 7 DAY - AND latest_1 <= latest_2 - AND latest_2 <= latest_0 + INTERVAL 7 DAY, 3, if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, - if(isNotNull(latest_2) - AND latest_2 <= latest_1 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_1), toDateTime(latest_2)), NULL) step_2_conversion_time, - prop + (SELECT timestamp AS timestamp, + aggregation_target AS aggregation_target, + uuid AS uuid, + `$session_id` AS `$session_id`, + `$window_id` AS `$window_id`, + step_0 AS step_0, + step_1 AS step_1, + step_2 AS step_2, + prop_basic AS prop_basic, + prop, + prop_vals AS prop_vals, + prop_vals AS prop FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - latest_1, - step_2, - min(latest_2) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 , - prop - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - latest_1, - step_2, - if(latest_2 < latest_1, NULL, latest_2) as latest_2 , - prop - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1, - step_2, - min(latest_2) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 , - if(has(['technology', 'finance'], prop), prop, 'Other') as prop - FROM - (SELECT *, - prop_vals as prop - FROM - (SELECT e.timestamp as timestamp, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as aggregation_target, - if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id, - if(event = 'sign up', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = 'play movie', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1, - if(event = 'buy', 1, 0) as step_2, - if(step_2 = 1, timestamp, null) as latest_2, - replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS prop_basic, - prop_basic as prop, - argMinIf(prop, timestamp, isNotNull(prop)) over (PARTITION by aggregation_target) as prop_vals - FROM events e - LEFT OUTER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 99999 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') ) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - LEFT JOIN - (SELECT group_key, - argMax(group_properties, _timestamp) AS group_properties_0 - FROM groups - WHERE team_id = 99999 - AND group_type_index = 0 - GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key - WHERE team_id = 99999 - AND event IN ['buy', 'play movie', 'sign up'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') - AND (step_0 = 1 - OR step_1 = 1 - OR step_2 = 1) ))))) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps, - prop - HAVING steps = max(max_steps)) - WHERE steps IN [2, 3] - AND arrayFlatten(array(prop)) = arrayFlatten(array('technology')) - ORDER BY aggregation_target - LIMIT 100 - OFFSET 0 SETTINGS max_ast_elements=1000000, - max_expanded_ast_elements=1000000 + (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, + if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id) AS aggregation_target, + e.uuid AS uuid, + e.`$session_id` AS `$session_id`, + e.`$window_id` AS `$window_id`, + if(equals(e.event, 'sign up'), 1, 0) AS step_0, + if(equals(e.event, 'play movie'), 1, 0) AS step_1, + if(equals(e.event, 'buy'), 1, 0) AS step_2, + ifNull(toString(e__group_0.properties___industry), '') AS prop_basic, + prop_basic AS prop, + argMinIf(prop, timestamp, isNotNull(prop)) OVER (PARTITION BY aggregation_target) AS prop_vals + FROM events AS e + LEFT OUTER JOIN + (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, + person_distinct_id_overrides.distinct_id AS distinct_id + FROM person_distinct_id_overrides + WHERE equals(person_distinct_id_overrides.team_id, 99999) + GROUP BY person_distinct_id_overrides.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__override ON equals(e.distinct_id, e__override.distinct_id) + LEFT JOIN + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), toTimeZone(groups._timestamp, 'UTC')) AS properties___industry, + groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE and(equals(groups.team_id, 99999), equals(index, 0)) + GROUP BY groups.group_type_index, + groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key) + WHERE and(equals(e.team_id, 99999), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'play movie', 'sign up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0), ifNull(equals(step_2, 1), 0))))) + GROUP BY aggregation_target + HAVING ifNull(greaterOrEquals(step_reached, 0), 0)) + WHERE and(ifNull(greaterOrEquals(step_reached, 1), 0), ifNull(equals(arrayFlatten(array(breakdown)), arrayFlatten(array('technology'))), isNull(arrayFlatten(array(breakdown))) + and isNull(arrayFlatten(array('technology'))))) + ORDER BY aggregation_target ASC) AS source + INNER JOIN + (SELECT person.id AS id + FROM person + WHERE equals(person.team_id, 99999) + GROUP BY person.id + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id) + ORDER BY persons.id ASC + LIMIT 101 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0, + allow_experimental_analyzer=1 ''' # --- diff --git a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_unordered.ambr b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_unordered.ambr index 7982e95c56bed..2c4f5e6564765 100644 --- a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_unordered.ambr +++ b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_unordered.ambr @@ -139,14 +139,14 @@ and isNull(max(max_steps)))) GROUP BY prop) GROUP BY final_prop - LIMIT 100 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=23622320128, - allow_experimental_analyzer=1 + LIMIT 26 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=23622320128, + allow_experimental_analyzer=1 ''' # --- # name: TestFunnelUnorderedStepsBreakdown.test_funnel_breakdown_correct_breakdown_props_are_chosen_for_step @@ -303,14 +303,14 @@ and isNull(max(max_steps)))) GROUP BY prop) GROUP BY final_prop - LIMIT 100 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=23622320128, - allow_experimental_analyzer=1 + LIMIT 26 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=23622320128, + allow_experimental_analyzer=1 ''' # --- # name: TestFunnelUnorderedStepsBreakdown.test_funnel_step_multiple_breakdown_snapshot @@ -453,14 +453,14 @@ and isNull(max(max_steps)))) GROUP BY prop) GROUP BY final_prop - LIMIT 100 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=23622320128, - allow_experimental_analyzer=1 + LIMIT 26 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=23622320128, + allow_experimental_analyzer=1 ''' # --- # name: TestUnorderedFunnelGroupBreakdown.test_funnel_aggregate_by_groups_breakdown_group_person_on_events @@ -698,14 +698,14 @@ and isNull(max(max_steps)))) GROUP BY prop) GROUP BY final_prop - LIMIT 100 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=23622320128, - allow_experimental_analyzer=1 + LIMIT 26 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=23622320128, + allow_experimental_analyzer=1 ''' # --- # name: TestUnorderedFunnelGroupBreakdown.test_funnel_aggregate_by_groups_breakdown_group_person_on_events_poe_v2 @@ -943,14 +943,14 @@ and isNull(max(max_steps)))) GROUP BY prop) GROUP BY final_prop - LIMIT 100 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=23622320128, - allow_experimental_analyzer=1 + LIMIT 26 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=23622320128, + allow_experimental_analyzer=1 ''' # --- # name: TestUnorderedFunnelGroupBreakdown.test_funnel_breakdown_group @@ -1209,14 +1209,14 @@ and isNull(max(max_steps)))) GROUP BY prop) GROUP BY final_prop - LIMIT 100 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=4000000, - max_expanded_ast_elements=4000000, - max_bytes_before_external_group_by=23622320128, - allow_experimental_analyzer=1 + LIMIT 26 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=23622320128, + allow_experimental_analyzer=1 ''' # --- # name: TestUnorderedFunnelGroupBreakdown.test_funnel_breakdown_group.1 diff --git a/posthog/hogql_queries/insights/funnels/test/breakdown_cases.py b/posthog/hogql_queries/insights/funnels/test/breakdown_cases.py index ddef2c9567d6b..ee1122013788b 100644 --- a/posthog/hogql_queries/insights/funnels/test/breakdown_cases.py +++ b/posthog/hogql_queries/insights/funnels/test/breakdown_cases.py @@ -1,8 +1,10 @@ +import ast from collections.abc import Callable from dataclasses import dataclass from datetime import datetime from string import ascii_lowercase from typing import Any, Literal, Optional, Union, cast +from unittest import skip from posthog.constants import INSIGHT_FUNNELS, FunnelOrderType from posthog.hogql_queries.insights.funnels.funnels_query_runner import FunnelsQueryRunner @@ -460,6 +462,7 @@ def test_funnel_step_breakdown_event(self): ) @also_test_with_materialized_columns(["$browser"]) + @skip('Using "Other" as a breakdown is not yet implemented in HogQL Actors Queries') def test_funnel_step_breakdown_event_with_other(self): filters = { "insight": INSIGHT_FUNNELS, @@ -533,7 +536,8 @@ def test_funnel_step_breakdown_event_with_other(self): people = journeys_for(events_by_person, self.team) query = cast(FunnelsQuery, filter_to_query(filters)) - results = FunnelsQueryRunner(query=query, team=self.team).calculate().results + query_runner = FunnelsQueryRunner(query=query, team=self.team) + results = query_runner.calculate().results results = sort_breakdown_funnel_results(results) self._assert_funnel_breakdown_result_is_correct( @@ -597,6 +601,7 @@ def test_funnel_step_breakdown_event_with_other(self): self._get_actor_ids_at_step(filters, 2, "Other"), [people["person1"].uuid], ) + self.assertEqual(2, cast(ast.Constant, query_runner.to_query().limit).value) @also_test_with_materialized_columns(["$browser"]) def test_funnel_step_breakdown_event_no_type(self): @@ -847,6 +852,7 @@ def test_funnel_step_breakdown_limit(self): self.assertEqual([["5"], ["6"], ["7"], ["8"], ["9"], ["Other"]], breakdown_vals) @also_test_with_materialized_columns(["some_breakdown_val"]) + @skip('Using "Other" as a breakdown is not yet implemented in HogQL Actors Queries') def test_funnel_step_custom_breakdown_limit_with_nulls(self): filters = { "insight": INSIGHT_FUNNELS, diff --git a/posthog/hogql_queries/insights/funnels/test/test_funnel.py b/posthog/hogql_queries/insights/funnels/test/test_funnel.py index ceaa344041d58..b92891822cb87 100644 --- a/posthog/hogql_queries/insights/funnels/test/test_funnel.py +++ b/posthog/hogql_queries/insights/funnels/test/test_funnel.py @@ -1,6 +1,6 @@ import uuid from datetime import datetime -from typing import cast +from typing import cast, Any from unittest.mock import Mock, patch from django.test import override_settings @@ -25,12 +25,11 @@ funnel_conversion_time_test_factory, ) from posthog.hogql_queries.legacy_compatibility.filter_to_query import filter_to_query -from posthog.models import Action, Element +from posthog.models import Action, Element, Team from posthog.models.cohort.cohort import Cohort from posthog.models.group.util import create_group from posthog.models.group_type_mapping import GroupTypeMapping from posthog.models.property_definition import PropertyDefinition -from posthog.queries.funnels import ClickhouseFunnelActors from posthog.schema import ( ActionsNode, ActorsQuery, @@ -60,6 +59,27 @@ snapshot_clickhouse_queries, ) from posthog.test.test_journeys import journeys_for +from posthog.hogql_queries.insights.funnels.test.test_funnel_persons import get_actors + + +class PseudoFunnelActors: + def __init__(self, person_filter: Any, team: Team): + self.filters = person_filter._data + self.team = team + + def get_actors(self): + actors = get_actors( + self.filters, + self.team, + funnel_step=self.filters.get("funnel_step"), + funnel_step_breakdown=self.filters.get("funnel_step_breakdown"), + ) + + return ( + None, + [{"id": x[0]} for x in actors], + None, + ) def _create_action(**kwargs): @@ -75,7 +95,7 @@ class TestFunnelBreakdown( ClickhouseTestMixin, funnel_breakdown_test_factory( # type: ignore FunnelOrderType.ORDERED, - ClickhouseFunnelActors, + PseudoFunnelActors, _create_action, _create_person, ), @@ -89,7 +109,7 @@ class TestFunnelGroupBreakdown( ClickhouseTestMixin, funnel_breakdown_group_test_factory( # type: ignore FunnelOrderType.ORDERED, - ClickhouseFunnelActors, + PseudoFunnelActors, ), ): pass @@ -98,7 +118,7 @@ class TestFunnelGroupBreakdown( @patch("posthoganalytics.feature_enabled", new=Mock(return_value=False)) class TestFunnelConversionTime( ClickhouseTestMixin, - funnel_conversion_time_test_factory(FunnelOrderType.ORDERED, ClickhouseFunnelActors), # type: ignore + funnel_conversion_time_test_factory(FunnelOrderType.ORDERED, PseudoFunnelActors), # type: ignore ): maxDiff = None pass diff --git a/posthog/hogql_queries/insights/funnels/test/test_funnel_udf.py b/posthog/hogql_queries/insights/funnels/test/test_funnel_udf.py index 285dd9d127fac..48faf02ddc06f 100644 --- a/posthog/hogql_queries/insights/funnels/test/test_funnel_udf.py +++ b/posthog/hogql_queries/insights/funnels/test/test_funnel_udf.py @@ -13,7 +13,6 @@ ) from posthog.hogql_queries.legacy_compatibility.filter_to_query import filter_to_query from posthog.models import Action -from posthog.queries.funnels import ClickhouseFunnelActors from posthog.schema import FunnelsQuery, FunnelsQueryResponse from posthog.test.base import ( ClickhouseTestMixin, @@ -21,7 +20,7 @@ _create_person, ) from posthog.test.test_journeys import journeys_for -from test_funnel import funnel_test_factory +from test_funnel import funnel_test_factory, PseudoFunnelActors from posthog.hogql_queries.insights.funnels.test.conversion_time_cases import ( funnel_conversion_time_test_factory, ) @@ -43,7 +42,7 @@ class TestFunnelBreakdownUDF( ClickhouseTestMixin, funnel_breakdown_test_factory( # type: ignore FunnelOrderType.ORDERED, - ClickhouseFunnelActors, + PseudoFunnelActors, _create_action, _create_person, ), @@ -57,7 +56,7 @@ class TestFunnelGroupBreakdownUDF( ClickhouseTestMixin, funnel_breakdown_group_test_factory( # type: ignore FunnelOrderType.ORDERED, - ClickhouseFunnelActors, + PseudoFunnelActors, ), ): pass @@ -199,7 +198,7 @@ def test_excluded_after_time_expires(self): @patch("posthoganalytics.feature_enabled", new=Mock(side_effect=use_udf_funnel_flag_side_effect)) class TestFunnelConversionTimeUDF( ClickhouseTestMixin, - funnel_conversion_time_test_factory(FunnelOrderType.ORDERED, ClickhouseFunnelActors), # type: ignore + funnel_conversion_time_test_factory(FunnelOrderType.ORDERED, PseudoFunnelActors), # type: ignore ): maxDiff = None pass diff --git a/posthog/schema.py b/posthog/schema.py index 2c7b546c928a7..8f3a71cf4ea99 100644 --- a/posthog/schema.py +++ b/posthog/schema.py @@ -6058,7 +6058,7 @@ class FunnelsActorsQuery(BaseModel): " negative for dropped of persons." ), ) - funnelStepBreakdown: Optional[Union[str, float, list[Union[str, float]]]] = Field( + funnelStepBreakdown: Optional[Union[int, str, float, list[Union[int, str, float]]]] = Field( default=None, description=( "The breakdown value for which to get persons for. This is an array for person and event properties, a" diff --git a/posthog/temporal/common/heartbeat_sync.py b/posthog/temporal/common/heartbeat_sync.py index 35ac79515b9f4..cf775c3bf5cb0 100644 --- a/posthog/temporal/common/heartbeat_sync.py +++ b/posthog/temporal/common/heartbeat_sync.py @@ -11,6 +11,8 @@ def __init__(self, details: tuple[Any, ...] = (), factor: int = 12, logger: Opti self.details: tuple[Any, ...] = details self.factor = factor self.logger = logger + self.stop_event: Optional[threading.Event] = None + self.heartbeat_thread: Optional[threading.Thread] = None def log_debug(self, message: str, exc_info: Optional[Any] = None) -> None: if self.logger: diff --git a/posthog/temporal/data_imports/__init__.py b/posthog/temporal/data_imports/__init__.py index cabeaf433d4e1..c59f20b05d8cf 100644 --- a/posthog/temporal/data_imports/__init__.py +++ b/posthog/temporal/data_imports/__init__.py @@ -2,10 +2,8 @@ ExternalDataJobWorkflow, create_external_data_job_model_activity, create_source_templates, - import_data_activity, import_data_activity_sync, update_external_data_job_model, - check_schedule_activity, check_billing_limits_activity, sync_new_schemas_activity, ) @@ -15,10 +13,8 @@ ACTIVITIES = [ create_external_data_job_model_activity, update_external_data_job_model, - import_data_activity, import_data_activity_sync, create_source_templates, - check_schedule_activity, check_billing_limits_activity, sync_new_schemas_activity, ] diff --git a/posthog/temporal/data_imports/external_data_job.py b/posthog/temporal/data_imports/external_data_job.py index 1820f462093ca..0bccbf9b95fa9 100644 --- a/posthog/temporal/data_imports/external_data_job.py +++ b/posthog/temporal/data_imports/external_data_job.py @@ -8,7 +8,6 @@ # TODO: remove dependency from posthog.temporal.batch_exports.base import PostHogWorkflow -from posthog.temporal.data_imports.util import is_posthog_team from posthog.temporal.data_imports.workflow_activities.check_billing_limits import ( CheckBillingLimitsActivityInputs, check_billing_limits_activity, @@ -23,28 +22,19 @@ CreateExternalDataJobModelActivityInputs, create_external_data_job_model_activity, ) -from posthog.temporal.data_imports.workflow_activities.import_data import ImportDataActivityInputs, import_data_activity +from posthog.temporal.data_imports.workflow_activities.import_data_sync import ImportDataActivityInputs from posthog.utils import get_machine_id -from posthog.warehouse.data_load.service import ( - a_delete_external_data_schedule, - a_external_data_workflow_exists, - a_sync_external_data_job_workflow, - a_trigger_external_data_workflow, -) from posthog.warehouse.data_load.source_templates import create_warehouse_templates_for_source from posthog.warehouse.external_data_source.jobs import ( - aget_running_job_for_schema, - aupdate_external_job_status, + update_external_job_status, ) from posthog.warehouse.models import ( ExternalDataJob, - get_active_schemas_for_source_id, ExternalDataSource, - get_external_data_source, ) -from posthog.temporal.common.logger import bind_temporal_worker_logger -from posthog.warehouse.models.external_data_schema import aupdate_should_sync +from posthog.temporal.common.logger import bind_temporal_worker_logger_sync +from posthog.warehouse.models.external_data_schema import update_should_sync Non_Retryable_Schema_Errors: dict[ExternalDataSource.Type, list[str]] = { @@ -76,11 +66,15 @@ class UpdateExternalDataJobStatusInputs: @activity.defn -async def update_external_data_job_model(inputs: UpdateExternalDataJobStatusInputs) -> None: - logger = await bind_temporal_worker_logger(team_id=inputs.team_id) +def update_external_data_job_model(inputs: UpdateExternalDataJobStatusInputs) -> None: + logger = bind_temporal_worker_logger_sync(team_id=inputs.team_id) if inputs.job_id is None: - job: ExternalDataJob | None = await aget_running_job_for_schema(inputs.schema_id) + job: ExternalDataJob | None = ( + ExternalDataJob.objects.filter(schema_id=inputs.schema_id, status=ExternalDataJob.Status.RUNNING) + .order_by("-created_at") + .first() + ) if job is None: logger.info("No job to update status on") return @@ -94,7 +88,7 @@ async def update_external_data_job_model(inputs: UpdateExternalDataJobStatusInpu f"External data job failed for external data schema {inputs.schema_id} with error: {inputs.internal_error}" ) - source: ExternalDataSource = await get_external_data_source(inputs.source_id) + source: ExternalDataSource = ExternalDataSource.objects.get(pk=inputs.source_id) non_retryable_errors = Non_Retryable_Schema_Errors.get(ExternalDataSource.Type(source.source_type)) if non_retryable_errors is not None: @@ -113,9 +107,9 @@ async def update_external_data_job_model(inputs: UpdateExternalDataJobStatusInpu "error": inputs.internal_error, }, ) - await aupdate_should_sync(schema_id=inputs.schema_id, team_id=inputs.team_id, should_sync=False) + update_should_sync(schema_id=inputs.schema_id, team_id=inputs.team_id, should_sync=False) - await aupdate_external_job_status( + update_external_job_status( job_id=job_id, status=inputs.status, latest_error=inputs.latest_error, @@ -134,34 +128,8 @@ class CreateSourceTemplateInputs: @activity.defn -async def create_source_templates(inputs: CreateSourceTemplateInputs) -> None: - await create_warehouse_templates_for_source(team_id=inputs.team_id, run_id=inputs.run_id) - - -@activity.defn -async def check_schedule_activity(inputs: ExternalDataWorkflowInputs) -> bool: - logger = await bind_temporal_worker_logger(team_id=inputs.team_id) - - # Creates schedules for all schemas if they don't exist yet, and then remove itself as a source schedule - if inputs.external_data_schema_id is None: - logger.info("Schema ID is none, creating schedules for schemas...") - schemas = await get_active_schemas_for_source_id( - team_id=inputs.team_id, source_id=inputs.external_data_source_id - ) - for schema in schemas: - if await a_external_data_workflow_exists(schema.id): - await a_trigger_external_data_workflow(schema) - logger.info(f"Schedule exists for schema {schema.id}. Triggered schedule") - else: - await a_sync_external_data_job_workflow(schema, create=True) - logger.info(f"Created schedule for schema {schema.id}") - # Delete the source schedule in favour of the schema schedules - await a_delete_external_data_schedule(ExternalDataSource(id=inputs.external_data_source_id)) - logger.info(f"Deleted schedule for source {inputs.external_data_source_id}") - return True - - logger.info("Schema ID is set. Continuing...") - return False +def create_source_templates(inputs: CreateSourceTemplateInputs) -> None: + create_warehouse_templates_for_source(team_id=inputs.team_id, run_id=inputs.run_id) # TODO: update retry policies @@ -174,21 +142,6 @@ def parse_inputs(inputs: list[str]) -> ExternalDataWorkflowInputs: @workflow.run async def run(self, inputs: ExternalDataWorkflowInputs): - should_exit = await workflow.execute_activity( - check_schedule_activity, - inputs, - start_to_close_timeout=dt.timedelta(minutes=1), - retry_policy=RetryPolicy( - initial_interval=dt.timedelta(seconds=10), - maximum_interval=dt.timedelta(seconds=60), - maximum_attempts=0, - non_retryable_error_types=["NotNullViolation", "IntegrityError"], - ), - ) - - if should_exit: - return - assert inputs.external_data_schema_id is not None update_inputs = UpdateExternalDataJobStatusInputs( @@ -262,24 +215,12 @@ async def run(self, inputs: ExternalDataWorkflowInputs): else {"start_to_close_timeout": dt.timedelta(hours=12), "retry_policy": RetryPolicy(maximum_attempts=3)} ) - if is_posthog_team(inputs.team_id) and ( - source_type == ExternalDataSource.Type.POSTGRES or source_type == ExternalDataSource.Type.BIGQUERY - ): - # Sync activity for testing - await workflow.execute_activity( - import_data_activity_sync, - job_inputs, - heartbeat_timeout=dt.timedelta(minutes=5), - **timeout_params, - ) # type: ignore - else: - # Async activity for everyone else - await workflow.execute_activity( - import_data_activity, - job_inputs, - heartbeat_timeout=dt.timedelta(minutes=5), - **timeout_params, - ) # type: ignore + await workflow.execute_activity( + import_data_activity_sync, + job_inputs, + heartbeat_timeout=dt.timedelta(minutes=5), + **timeout_params, + ) # type: ignore # Create source templates await workflow.execute_activity( diff --git a/posthog/temporal/data_imports/pipelines/pipeline.py b/posthog/temporal/data_imports/pipelines/pipeline.py deleted file mode 100644 index 24099e698fb7c..0000000000000 --- a/posthog/temporal/data_imports/pipelines/pipeline.py +++ /dev/null @@ -1,266 +0,0 @@ -from concurrent.futures import ThreadPoolExecutor -from dataclasses import dataclass -from typing import Literal -from uuid import UUID - -import dlt -from django.conf import settings -from dlt.pipeline.exceptions import PipelineStepFailed - -from asgiref.sync import async_to_sync -import asyncio -from posthog.settings.base_variables import TEST -from structlog.typing import FilteringBoundLogger -from dlt.common.libs.deltalake import get_delta_tables -from dlt.load.exceptions import LoadClientJobRetry -from dlt.sources import DltSource -from deltalake.exceptions import DeltaError -from collections import Counter - -from posthog.warehouse.data_load.validate_schema import update_last_synced_at, validate_schema_and_update_table -from posthog.warehouse.models.external_data_job import ExternalDataJob, get_external_data_job -from posthog.warehouse.models.external_data_schema import ExternalDataSchema, aget_schema_by_id -from posthog.warehouse.models.external_data_source import ExternalDataSource -from posthog.warehouse.models.table import DataWarehouseTable -from posthog.temporal.data_imports.util import prepare_s3_files_for_querying - - -@dataclass -class PipelineInputs: - source_id: UUID - run_id: str - schema_id: UUID - dataset_name: str - job_type: ExternalDataSource.Type - team_id: int - - -class DataImportPipeline: - loader_file_format: Literal["parquet"] = "parquet" - - def __init__( - self, - inputs: PipelineInputs, - source: DltSource, - logger: FilteringBoundLogger, - reset_pipeline: bool, - incremental: bool = False, - ): - self.inputs = inputs - self.logger = logger - - self._incremental = incremental - self.refresh_dlt = reset_pipeline - self.should_chunk_pipeline = ( - incremental - and inputs.job_type != ExternalDataSource.Type.POSTGRES - and inputs.job_type != ExternalDataSource.Type.MYSQL - and inputs.job_type != ExternalDataSource.Type.MSSQL - and inputs.job_type != ExternalDataSource.Type.SNOWFLAKE - and inputs.job_type != ExternalDataSource.Type.BIGQUERY - ) - - if self.should_chunk_pipeline: - # Incremental syncs: Assuming each page is 100 items for now so bound each run at 50_000 items - self.source = source.add_limit(500) - else: - self.source = source - - def _get_pipeline_name(self): - return f"{self.inputs.job_type}_pipeline_{self.inputs.team_id}_run_{self.inputs.schema_id}" - - def _get_destination(self): - if TEST: - credentials = { - "aws_access_key_id": settings.AIRBYTE_BUCKET_KEY, - "aws_secret_access_key": settings.AIRBYTE_BUCKET_SECRET, - "endpoint_url": settings.OBJECT_STORAGE_ENDPOINT, - "region_name": settings.AIRBYTE_BUCKET_REGION, - "AWS_ALLOW_HTTP": "true", - "AWS_S3_ALLOW_UNSAFE_RENAME": "true", - } - else: - credentials = { - "aws_access_key_id": settings.AIRBYTE_BUCKET_KEY, - "aws_secret_access_key": settings.AIRBYTE_BUCKET_SECRET, - "region_name": settings.AIRBYTE_BUCKET_REGION, - "AWS_S3_ALLOW_UNSAFE_RENAME": "true", - } - - return dlt.destinations.filesystem( - credentials=credentials, - bucket_url=settings.BUCKET_URL, # type: ignore - ) - - def _create_pipeline(self): - pipeline_name = self._get_pipeline_name() - destination = self._get_destination() - - dlt.config["normalize.parquet_normalizer.add_dlt_load_id"] = True - dlt.config["normalize.parquet_normalizer.add_dlt_id"] = True - - return dlt.pipeline( - pipeline_name=pipeline_name, destination=destination, dataset_name=self.inputs.dataset_name, progress="log" - ) - - async def _prepare_s3_files_for_querying(self, file_uris: list[str]): - job: ExternalDataJob = await get_external_data_job(job_id=self.inputs.run_id) - schema: ExternalDataSchema = await aget_schema_by_id(self.inputs.schema_id, self.inputs.team_id) - - prepare_s3_files_for_querying(job.folder_path(), schema.name, file_uris) - - def _run(self) -> dict[str, int]: - if self.refresh_dlt: - self.logger.info("Pipeline getting a full refresh due to reset_pipeline being set") - - pipeline = self._create_pipeline() - - total_counts: Counter[str] = Counter({}) - - # Do chunking for incremental syncing on API based endpoints (e.g. not sql databases) - if self.should_chunk_pipeline: - # will get overwritten - counts: Counter[str] = Counter({"start": 1}) - pipeline_runs = 0 - - while counts: - self.logger.info(f"Running incremental (non-sql) pipeline, run ${pipeline_runs}") - - try: - pipeline.run( - self.source, - loader_file_format=self.loader_file_format, - refresh="drop_sources" if self.refresh_dlt and pipeline_runs == 0 else None, - ) - except PipelineStepFailed as e: - # Remove once DLT support writing empty Delta files - if isinstance(e.exception, LoadClientJobRetry): - if "Generic S3 error" not in e.exception.retry_message: - raise - elif isinstance(e.exception, DeltaError): - if e.exception.args[0] != "Generic error: No data source supplied to write command.": - raise - else: - raise - - if pipeline.last_trace.last_normalize_info is not None: - row_counts = pipeline.last_trace.last_normalize_info.row_counts - else: - row_counts = {} - # Remove any DLT tables from the counts - filtered_rows = dict(filter(lambda pair: not pair[0].startswith("_dlt"), row_counts.items())) - counts = Counter(filtered_rows) - total_counts = counts + total_counts - - if total_counts.total() > 0: - delta_tables = get_delta_tables(pipeline) - - table_format = DataWarehouseTable.TableFormat.DeltaS3Wrapper - - # Workaround while we fix msising table_format on DLT resource - if len(delta_tables.values()) == 0: - table_format = DataWarehouseTable.TableFormat.Delta - - # There should only ever be one table here - for table in delta_tables.values(): - self.logger.info("Compacting delta table") - table.optimize.compact() - table.vacuum(retention_hours=24, enforce_retention_duration=False, dry_run=False) - - file_uris = table.file_uris() - self.logger.info(f"Preparing S3 files - total parquet files: {len(file_uris)}") - async_to_sync(self._prepare_s3_files_for_querying)(file_uris) - - self.logger.info(f"Table format: {table_format}") - - async_to_sync(validate_schema_and_update_table)( - run_id=self.inputs.run_id, - team_id=self.inputs.team_id, - schema_id=self.inputs.schema_id, - table_schema=self.source.schema.tables, - row_count=total_counts.total(), - table_format=table_format, - ) - else: - self.logger.info("No table_counts, skipping validate_schema_and_update_table") - - pipeline_runs = pipeline_runs + 1 - else: - self.logger.info("Running standard pipeline") - try: - pipeline.run( - self.source, - loader_file_format=self.loader_file_format, - refresh="drop_sources" if self.refresh_dlt else None, - ) - except PipelineStepFailed as e: - # Remove once DLT support writing empty Delta files - if isinstance(e.exception, LoadClientJobRetry): - if "Generic S3 error" not in e.exception.retry_message: - raise - elif isinstance(e.exception, DeltaError): - if e.exception.args[0] != "Generic error: No data source supplied to write command.": - raise - else: - raise - - if pipeline.last_trace.last_normalize_info is not None: - row_counts = pipeline.last_trace.last_normalize_info.row_counts - else: - row_counts = {} - - filtered_rows = dict(filter(lambda pair: not pair[0].startswith("_dlt"), row_counts.items())) - counts = Counter(filtered_rows) - total_counts = total_counts + counts - - if total_counts.total() > 0: - delta_tables = get_delta_tables(pipeline) - - table_format = DataWarehouseTable.TableFormat.DeltaS3Wrapper - - # Workaround while we fix msising table_format on DLT resource - if len(delta_tables.values()) == 0: - table_format = DataWarehouseTable.TableFormat.Delta - - # There should only ever be one table here - for table in delta_tables.values(): - self.logger.info("Compacting delta table") - table.optimize.compact() - table.vacuum(retention_hours=24, enforce_retention_duration=False, dry_run=False) - - file_uris = table.file_uris() - self.logger.info(f"Preparing S3 files - total parquet files: {len(file_uris)}") - async_to_sync(self._prepare_s3_files_for_querying)(file_uris) - - self.logger.info(f"Table format: {table_format}") - - async_to_sync(validate_schema_and_update_table)( - run_id=self.inputs.run_id, - team_id=self.inputs.team_id, - schema_id=self.inputs.schema_id, - table_schema=self.source.schema.tables, - row_count=total_counts.total(), - table_format=table_format, - ) - else: - self.logger.info("No table_counts, skipping validate_schema_and_update_table") - - # Update last_synced_at on schema - async_to_sync(update_last_synced_at)( - job_id=self.inputs.run_id, schema_id=str(self.inputs.schema_id), team_id=self.inputs.team_id - ) - - # Cleanup: delete local state from the file system - pipeline.drop() - - return dict(total_counts) - - async def run(self) -> dict[str, int]: - try: - # Use a dedicated thread pool to not interfere with the heartbeater thread - with ThreadPoolExecutor(max_workers=5) as pipeline_executor: - loop = asyncio.get_event_loop() - return await loop.run_in_executor(pipeline_executor, self._run) - except PipelineStepFailed as e: - self.logger.exception(f"Data import failed for endpoint with exception {e}", exc_info=e) - raise diff --git a/posthog/temporal/data_imports/pipelines/pipeline_sync.py b/posthog/temporal/data_imports/pipelines/pipeline_sync.py index ac6d31433a808..e3ca8a4ecbdaa 100644 --- a/posthog/temporal/data_imports/pipelines/pipeline_sync.py +++ b/posthog/temporal/data_imports/pipelines/pipeline_sync.py @@ -1,3 +1,4 @@ +from dataclasses import dataclass from typing import Any, Literal, Optional from collections.abc import Iterator, Sequence import uuid @@ -34,7 +35,6 @@ from clickhouse_driver.errors import ServerException from posthog.temporal.common.logger import bind_temporal_worker_logger_sync -from posthog.temporal.data_imports.pipelines.pipeline import PipelineInputs from posthog.warehouse.data_load.validate_schema import dlt_to_hogql_type from posthog.warehouse.models.credential import get_or_create_datawarehouse_credential from posthog.warehouse.models.external_data_job import ExternalDataJob @@ -44,6 +44,16 @@ from posthog.temporal.data_imports.util import prepare_s3_files_for_querying +@dataclass +class PipelineInputs: + source_id: uuid.UUID + run_id: str + schema_id: uuid.UUID + dataset_name: str + job_type: ExternalDataSource.Type + team_id: int + + class DataImportPipelineSync: loader_file_format: Literal["parquet"] = "parquet" @@ -141,16 +151,19 @@ def _iter_chunks(self, lst: list[Any], n: int) -> Iterator[list[Any]]: yield lst[i : i + n] # Monkey patch to fix large memory consumption until https://github.com/dlt-hub/dlt/pull/2031 gets merged in - FilesystemDestinationClientConfiguration.delta_jobs_per_write = 1 - FilesystemClient.create_table_chain_completed_followup_jobs = create_table_chain_completed_followup_jobs # type: ignore - FilesystemClient._iter_chunks = _iter_chunks # type: ignore + # This only works on incremental syncs right now though + if self._incremental: + FilesystemDestinationClientConfiguration.delta_jobs_per_write = 1 + FilesystemClient.create_table_chain_completed_followup_jobs = create_table_chain_completed_followup_jobs # type: ignore + FilesystemClient._iter_chunks = _iter_chunks # type: ignore + + dlt.config["data_writer.file_max_items"] = 500_000 + dlt.config["data_writer.file_max_bytes"] = 500_000_000 # 500 MB + dlt.config["loader_parallelism_strategy"] = "table-sequential" + dlt.config["delta_jobs_per_write"] = 1 dlt.config["normalize.parquet_normalizer.add_dlt_load_id"] = True dlt.config["normalize.parquet_normalizer.add_dlt_id"] = True - dlt.config["data_writer.file_max_items"] = 500_000 - dlt.config["data_writer.file_max_bytes"] = 500_000_000 # 500 MB - dlt.config["loader_parallelism_strategy"] = "table-sequential" - dlt.config["delta_jobs_per_write"] = 1 return dlt.pipeline( pipeline_name=pipeline_name, destination=destination, dataset_name=self.inputs.dataset_name, progress="log" diff --git a/posthog/temporal/data_imports/pipelines/test/test_pipeline.py b/posthog/temporal/data_imports/pipelines/test/test_pipeline_sync.py similarity index 73% rename from posthog/temporal/data_imports/pipelines/test/test_pipeline.py rename to posthog/temporal/data_imports/pipelines/test/test_pipeline_sync.py index 965b77ca5f9ae..3b265f54a352a 100644 --- a/posthog/temporal/data_imports/pipelines/test/test_pipeline.py +++ b/posthog/temporal/data_imports/pipelines/test/test_pipeline_sync.py @@ -4,8 +4,7 @@ import pytest import structlog -from asgiref.sync import sync_to_async -from posthog.temporal.data_imports.pipelines.pipeline import DataImportPipeline, PipelineInputs +from posthog.temporal.data_imports.pipelines.pipeline_sync import DataImportPipelineSync, PipelineInputs from posthog.temporal.data_imports.pipelines.stripe import stripe_source from posthog.test.base import APIBaseTest from posthog.warehouse.models.external_data_job import ExternalDataJob @@ -14,8 +13,8 @@ class TestDataImportPipeline(APIBaseTest): - async def _create_pipeline(self, schema_name: str, incremental: bool): - source = await sync_to_async(ExternalDataSource.objects.create)( + def _create_pipeline(self, schema_name: str, incremental: bool): + source = ExternalDataSource.objects.create( source_id=str(uuid.uuid4()), connection_id=str(uuid.uuid4()), destination_id=str(uuid.uuid4()), @@ -23,13 +22,13 @@ async def _create_pipeline(self, schema_name: str, incremental: bool): status="running", source_type="Stripe", ) - schema = await sync_to_async(ExternalDataSchema.objects.create)( + schema = ExternalDataSchema.objects.create( name=schema_name, team_id=self.team.pk, source_id=source.pk, source=source, ) - job = await sync_to_async(ExternalDataJob.objects.create)( + job = ExternalDataJob.objects.create( team_id=self.team.pk, pipeline_id=source.pk, pipeline=source, @@ -40,7 +39,7 @@ async def _create_pipeline(self, schema_name: str, incremental: bool): workflow_id=str(uuid.uuid4()), ) - pipeline = DataImportPipeline( + pipeline = DataImportPipelineSync( inputs=PipelineInputs( source_id=source.pk, run_id=str(job.pk), @@ -65,45 +64,43 @@ async def _create_pipeline(self, schema_name: str, incremental: bool): return pipeline @pytest.mark.django_db(transaction=True) - @pytest.mark.asyncio - async def test_pipeline_non_incremental(self): + def test_pipeline_non_incremental(self): def mock_create_pipeline(local_self: Any): mock = MagicMock() mock.last_trace.last_normalize_info.row_counts = {"customer": 1} return mock with ( - patch.object(DataImportPipeline, "_create_pipeline", mock_create_pipeline), + patch.object(DataImportPipelineSync, "_create_pipeline", mock_create_pipeline), patch( - "posthog.temporal.data_imports.pipelines.pipeline.validate_schema_and_update_table" + "posthog.temporal.data_imports.pipelines.pipeline_sync.validate_schema_and_update_table_sync" ) as mock_validate_schema_and_update_table, - patch("posthog.temporal.data_imports.pipelines.pipeline.get_delta_tables"), - patch("posthog.temporal.data_imports.pipelines.pipeline.update_last_synced_at"), + patch("posthog.temporal.data_imports.pipelines.pipeline_sync.get_delta_tables"), + patch("posthog.temporal.data_imports.pipelines.pipeline_sync.update_last_synced_at_sync"), ): - pipeline = await self._create_pipeline("Customer", False) - res = await pipeline.run() + pipeline = self._create_pipeline("Customer", False) + res = pipeline.run() assert res.get("customer") == 1 assert mock_validate_schema_and_update_table.call_count == 1 @pytest.mark.django_db(transaction=True) - @pytest.mark.asyncio - async def test_pipeline_incremental(self): + def test_pipeline_incremental(self): def mock_create_pipeline(local_self: Any): mock = MagicMock() type(mock.last_trace.last_normalize_info).row_counts = PropertyMock(side_effect=[{"customer": 1}, {}]) return mock with ( - patch.object(DataImportPipeline, "_create_pipeline", mock_create_pipeline), + patch.object(DataImportPipelineSync, "_create_pipeline", mock_create_pipeline), patch( - "posthog.temporal.data_imports.pipelines.pipeline.validate_schema_and_update_table" + "posthog.temporal.data_imports.pipelines.pipeline_sync.validate_schema_and_update_table_sync" ) as mock_validate_schema_and_update_table, - patch("posthog.temporal.data_imports.pipelines.pipeline.get_delta_tables"), - patch("posthog.temporal.data_imports.pipelines.pipeline.update_last_synced_at"), + patch("posthog.temporal.data_imports.pipelines.pipeline_sync.get_delta_tables"), + patch("posthog.temporal.data_imports.pipelines.pipeline_sync.update_last_synced_at_sync"), ): - pipeline = await self._create_pipeline("Customer", True) - res = await pipeline.run() + pipeline = self._create_pipeline("Customer", True) + res = pipeline.run() assert res.get("customer") == 1 assert mock_validate_schema_and_update_table.call_count == 2 diff --git a/posthog/temporal/data_imports/workflow_activities/create_job_model.py b/posthog/temporal/data_imports/workflow_activities/create_job_model.py index 8d3577cf1ff23..02eb6aee7d52a 100644 --- a/posthog/temporal/data_imports/workflow_activities/create_job_model.py +++ b/posthog/temporal/data_imports/workflow_activities/create_job_model.py @@ -1,19 +1,15 @@ import dataclasses import uuid -from asgiref.sync import sync_to_async from temporalio import activity # TODO: remove dependency -from posthog.warehouse.external_data_source.jobs import ( - acreate_external_data_job, -) -from posthog.warehouse.models import ExternalDataSource +from posthog.warehouse.models import ExternalDataJob, ExternalDataSource from posthog.warehouse.models.external_data_schema import ( ExternalDataSchema, ) -from posthog.temporal.common.logger import bind_temporal_worker_logger +from posthog.temporal.common.logger import bind_temporal_worker_logger_sync @dataclasses.dataclass @@ -24,25 +20,27 @@ class CreateExternalDataJobModelActivityInputs: @activity.defn -async def create_external_data_job_model_activity( +def create_external_data_job_model_activity( inputs: CreateExternalDataJobModelActivityInputs, ) -> tuple[str, bool, str]: - logger = await bind_temporal_worker_logger(team_id=inputs.team_id) + logger = bind_temporal_worker_logger_sync(team_id=inputs.team_id) try: - job = await acreate_external_data_job( + job = ExternalDataJob.objects.create( team_id=inputs.team_id, - external_data_source_id=inputs.source_id, - external_data_schema_id=inputs.schema_id, + pipeline_id=inputs.source_id, + schema_id=inputs.schema_id, + status=ExternalDataJob.Status.RUNNING, + rows_synced=0, workflow_id=activity.info().workflow_id, workflow_run_id=activity.info().workflow_run_id, ) - schema = await sync_to_async(ExternalDataSchema.objects.get)(team_id=inputs.team_id, id=inputs.schema_id) + schema = ExternalDataSchema.objects.get(team_id=inputs.team_id, id=inputs.schema_id) schema.status = ExternalDataSchema.Status.RUNNING - await sync_to_async(schema.save)() + schema.save() - source = await sync_to_async(ExternalDataSource.objects.get)(team_id=inputs.team_id, id=schema.source_id) + source: ExternalDataSource = schema.source logger.info( f"Created external data job for external data source {inputs.source_id}", diff --git a/posthog/temporal/data_imports/workflow_activities/import_data.py b/posthog/temporal/data_imports/workflow_activities/import_data.py deleted file mode 100644 index 26ce621f99a3d..0000000000000 --- a/posthog/temporal/data_imports/workflow_activities/import_data.py +++ /dev/null @@ -1,434 +0,0 @@ -import dataclasses -import uuid -from datetime import datetime -from typing import Any - -from structlog.typing import FilteringBoundLogger -from temporalio import activity - -from posthog.temporal.common.heartbeat import Heartbeater -from posthog.temporal.common.logger import bind_temporal_worker_logger -from posthog.temporal.data_imports.pipelines.bigquery import delete_table -from posthog.temporal.data_imports.pipelines.helpers import aremove_reset_pipeline, aupdate_job_count - -from posthog.temporal.data_imports.pipelines.pipeline import DataImportPipeline, PipelineInputs -from posthog.temporal.data_imports.util import is_posthog_team -from posthog.warehouse.models import ( - ExternalDataJob, - ExternalDataSource, - get_external_data_job, -) -from posthog.warehouse.models.external_data_schema import ( - ExternalDataSchema, - aget_schema_by_id, -) -from posthog.warehouse.models.ssh_tunnel import SSHTunnel - - -@dataclasses.dataclass -class ImportDataActivityInputs: - team_id: int - schema_id: uuid.UUID - source_id: uuid.UUID - run_id: str - - -@activity.defn -async def import_data_activity(inputs: ImportDataActivityInputs): - async with Heartbeater(factor=30): # Every 10 secs - model: ExternalDataJob = await get_external_data_job( - job_id=inputs.run_id, - ) - - logger = await bind_temporal_worker_logger(team_id=inputs.team_id) - - logger.debug("Running *ASYNC* import_data") - - job_inputs = PipelineInputs( - source_id=inputs.source_id, - schema_id=inputs.schema_id, - run_id=inputs.run_id, - team_id=inputs.team_id, - job_type=model.pipeline.source_type, - dataset_name=model.folder_path(), - ) - - reset_pipeline = model.pipeline.job_inputs.get("reset_pipeline", "False") == "True" - - schema: ExternalDataSchema = await aget_schema_by_id(inputs.schema_id, inputs.team_id) - - endpoints = [schema.name] - - source = None - if model.pipeline.source_type == ExternalDataSource.Type.STRIPE: - from posthog.temporal.data_imports.pipelines.stripe import stripe_source - - stripe_secret_key = model.pipeline.job_inputs.get("stripe_secret_key", None) - account_id = model.pipeline.job_inputs.get("stripe_account_id", None) - if not stripe_secret_key: - raise ValueError(f"Stripe secret key not found for job {model.id}") - - source = stripe_source( - api_key=stripe_secret_key, - account_id=account_id, - endpoint=schema.name, - team_id=inputs.team_id, - job_id=inputs.run_id, - is_incremental=schema.is_incremental, - ) - - return await _run( - job_inputs=job_inputs, - source=source, - logger=logger, - inputs=inputs, - schema=schema, - reset_pipeline=reset_pipeline, - ) - elif model.pipeline.source_type == ExternalDataSource.Type.HUBSPOT: - from posthog.temporal.data_imports.pipelines.hubspot import hubspot - from posthog.temporal.data_imports.pipelines.hubspot.auth import ( - hubspot_refresh_access_token, - ) - - hubspot_access_code = model.pipeline.job_inputs.get("hubspot_secret_key", None) - refresh_token = model.pipeline.job_inputs.get("hubspot_refresh_token", None) - if not refresh_token: - raise ValueError(f"Hubspot refresh token not found for job {model.id}") - - if not hubspot_access_code: - hubspot_access_code = hubspot_refresh_access_token(refresh_token) - - source = hubspot( - api_key=hubspot_access_code, - refresh_token=refresh_token, - endpoints=tuple(endpoints), - ) - - return await _run( - job_inputs=job_inputs, - source=source, - logger=logger, - inputs=inputs, - schema=schema, - reset_pipeline=reset_pipeline, - ) - elif model.pipeline.source_type in [ - ExternalDataSource.Type.POSTGRES, - ExternalDataSource.Type.MYSQL, - ExternalDataSource.Type.MSSQL, - ]: - if is_posthog_team(inputs.team_id): - from posthog.temporal.data_imports.pipelines.sql_database_v2 import ( - sql_source_for_type, - ) - else: - from posthog.temporal.data_imports.pipelines.sql_database import ( - sql_source_for_type, - ) - - host = model.pipeline.job_inputs.get("host") - port = model.pipeline.job_inputs.get("port") - user = model.pipeline.job_inputs.get("user") - password = model.pipeline.job_inputs.get("password") - database = model.pipeline.job_inputs.get("database") - pg_schema = model.pipeline.job_inputs.get("schema") - - using_ssh_tunnel = str(model.pipeline.job_inputs.get("ssh_tunnel_enabled", False)) == "True" - ssh_tunnel_host = model.pipeline.job_inputs.get("ssh_tunnel_host") - ssh_tunnel_port = model.pipeline.job_inputs.get("ssh_tunnel_port") - ssh_tunnel_auth_type = model.pipeline.job_inputs.get("ssh_tunnel_auth_type") - ssh_tunnel_auth_type_username = model.pipeline.job_inputs.get("ssh_tunnel_auth_type_username") - ssh_tunnel_auth_type_password = model.pipeline.job_inputs.get("ssh_tunnel_auth_type_password") - ssh_tunnel_auth_type_passphrase = model.pipeline.job_inputs.get("ssh_tunnel_auth_type_passphrase") - ssh_tunnel_auth_type_private_key = model.pipeline.job_inputs.get("ssh_tunnel_auth_type_private_key") - - ssh_tunnel = SSHTunnel( - enabled=using_ssh_tunnel, - host=ssh_tunnel_host, - port=ssh_tunnel_port, - auth_type=ssh_tunnel_auth_type, - username=ssh_tunnel_auth_type_username, - password=ssh_tunnel_auth_type_password, - passphrase=ssh_tunnel_auth_type_passphrase, - private_key=ssh_tunnel_auth_type_private_key, - ) - - if ssh_tunnel.enabled: - with ssh_tunnel.get_tunnel(host, int(port)) as tunnel: - if tunnel is None: - raise Exception("Can't open tunnel to SSH server") - - source = sql_source_for_type( - source_type=ExternalDataSource.Type(model.pipeline.source_type), - host=tunnel.local_bind_host, - port=tunnel.local_bind_port, - user=user, - password=password, - database=database, - sslmode="prefer", - schema=pg_schema, - table_names=endpoints, - incremental_field=schema.sync_type_config.get("incremental_field") - if schema.is_incremental - else None, - incremental_field_type=schema.sync_type_config.get("incremental_field_type") - if schema.is_incremental - else None, - team_id=inputs.team_id, - ) - - return await _run( - job_inputs=job_inputs, - source=source, - logger=logger, - inputs=inputs, - schema=schema, - reset_pipeline=reset_pipeline, - ) - - source = sql_source_for_type( - source_type=ExternalDataSource.Type(model.pipeline.source_type), - host=host, - port=port, - user=user, - password=password, - database=database, - sslmode="prefer", - schema=pg_schema, - table_names=endpoints, - incremental_field=schema.sync_type_config.get("incremental_field") if schema.is_incremental else None, - incremental_field_type=schema.sync_type_config.get("incremental_field_type") - if schema.is_incremental - else None, - team_id=inputs.team_id, - ) - - return await _run( - job_inputs=job_inputs, - source=source, - logger=logger, - inputs=inputs, - schema=schema, - reset_pipeline=reset_pipeline, - ) - elif model.pipeline.source_type == ExternalDataSource.Type.SNOWFLAKE: - if is_posthog_team(inputs.team_id): - from posthog.temporal.data_imports.pipelines.sql_database_v2 import ( - snowflake_source, - ) - else: - from posthog.temporal.data_imports.pipelines.sql_database import ( - snowflake_source, - ) - - account_id = model.pipeline.job_inputs.get("account_id") - user = model.pipeline.job_inputs.get("user") - password = model.pipeline.job_inputs.get("password") - database = model.pipeline.job_inputs.get("database") - warehouse = model.pipeline.job_inputs.get("warehouse") - sf_schema = model.pipeline.job_inputs.get("schema") - role = model.pipeline.job_inputs.get("role") - - source = snowflake_source( - account_id=account_id, - user=user, - password=password, - database=database, - schema=sf_schema, - warehouse=warehouse, - role=role, - table_names=endpoints, - incremental_field=schema.sync_type_config.get("incremental_field") if schema.is_incremental else None, - incremental_field_type=schema.sync_type_config.get("incremental_field_type") - if schema.is_incremental - else None, - ) - - return await _run( - job_inputs=job_inputs, - source=source, - logger=logger, - inputs=inputs, - schema=schema, - reset_pipeline=reset_pipeline, - ) - elif model.pipeline.source_type == ExternalDataSource.Type.SALESFORCE: - from posthog.models.integration import aget_integration_by_id - from posthog.temporal.data_imports.pipelines.salesforce import ( - salesforce_source, - ) - from posthog.temporal.data_imports.pipelines.salesforce.auth import ( - salesforce_refresh_access_token, - ) - - salesforce_integration_id = model.pipeline.job_inputs.get("salesforce_integration_id", None) - - if not salesforce_integration_id: - raise ValueError(f"Salesforce integration not found for job {model.id}") - - integration = await aget_integration_by_id(integration_id=salesforce_integration_id, team_id=inputs.team_id) - salesforce_refresh_token = integration.refresh_token - - if not salesforce_refresh_token: - raise ValueError(f"Salesforce refresh token not found for job {model.id}") - - salesforce_access_token = integration.access_token - - if not salesforce_access_token: - salesforce_access_token = salesforce_refresh_access_token(salesforce_refresh_token) - - salesforce_instance_url = integration.config.get("instance_url") - - source = salesforce_source( - instance_url=salesforce_instance_url, - access_token=salesforce_access_token, - refresh_token=salesforce_refresh_token, - endpoint=schema.name, - team_id=inputs.team_id, - job_id=inputs.run_id, - is_incremental=schema.is_incremental, - ) - - return await _run( - job_inputs=job_inputs, - source=source, - logger=logger, - inputs=inputs, - schema=schema, - reset_pipeline=reset_pipeline, - ) - - elif model.pipeline.source_type == ExternalDataSource.Type.ZENDESK: - from posthog.temporal.data_imports.pipelines.zendesk import zendesk_source - - source = zendesk_source( - subdomain=model.pipeline.job_inputs.get("zendesk_subdomain"), - api_key=model.pipeline.job_inputs.get("zendesk_api_key"), - email_address=model.pipeline.job_inputs.get("zendesk_email_address"), - endpoint=schema.name, - team_id=inputs.team_id, - job_id=inputs.run_id, - is_incremental=schema.is_incremental, - ) - - return await _run( - job_inputs=job_inputs, - source=source, - logger=logger, - inputs=inputs, - schema=schema, - reset_pipeline=reset_pipeline, - ) - elif model.pipeline.source_type == ExternalDataSource.Type.VITALLY: - from posthog.temporal.data_imports.pipelines.vitally import vitally_source - - source = vitally_source( - secret_token=model.pipeline.job_inputs.get("secret_token"), - region=model.pipeline.job_inputs.get("region"), - subdomain=model.pipeline.job_inputs.get("subdomain"), - endpoint=schema.name, - team_id=inputs.team_id, - job_id=inputs.run_id, - is_incremental=schema.is_incremental, - ) - - return await _run( - job_inputs=job_inputs, - source=source, - logger=logger, - inputs=inputs, - schema=schema, - reset_pipeline=reset_pipeline, - ) - elif model.pipeline.source_type == ExternalDataSource.Type.BIGQUERY: - from posthog.temporal.data_imports.pipelines.sql_database_v2 import ( - bigquery_source, - ) - - dataset_id = model.pipeline.job_inputs.get("dataset_id") - project_id = model.pipeline.job_inputs.get("project_id") - private_key = model.pipeline.job_inputs.get("private_key") - private_key_id = model.pipeline.job_inputs.get("private_key_id") - client_email = model.pipeline.job_inputs.get("client_email") - token_uri = model.pipeline.job_inputs.get("token_uri") - - destination_table = f"{project_id}.{dataset_id}.__posthog_import_{inputs.run_id}_{str(datetime.now().timestamp()).replace('.', '')}" - try: - source = bigquery_source( - dataset_id=dataset_id, - project_id=project_id, - private_key=private_key, - private_key_id=private_key_id, - client_email=client_email, - token_uri=token_uri, - table_name=schema.name, - bq_destination_table_id=destination_table, - incremental_field=schema.sync_type_config.get("incremental_field") - if schema.is_incremental - else None, - incremental_field_type=schema.sync_type_config.get("incremental_field_type") - if schema.is_incremental - else None, - ) - - await _run( - job_inputs=job_inputs, - source=source, - logger=logger, - inputs=inputs, - schema=schema, - reset_pipeline=reset_pipeline, - ) - except: - raise - finally: - # Delete the destination table (if it exists) after we're done with it - delete_table( - table_id=destination_table, - project_id=project_id, - private_key=private_key, - private_key_id=private_key_id, - client_email=client_email, - token_uri=token_uri, - ) - logger.info(f"Deleting bigquery temp destination table: {destination_table}") - elif model.pipeline.source_type == ExternalDataSource.Type.CHARGEBEE: - from posthog.temporal.data_imports.pipelines.chargebee import ( - chargebee_source, - ) - - source = chargebee_source( - api_key=model.pipeline.job_inputs.get("api_key"), - site_name=model.pipeline.job_inputs.get("site_name"), - endpoint=schema.name, - team_id=inputs.team_id, - job_id=inputs.run_id, - is_incremental=schema.is_incremental, - ) - - return await _run( - job_inputs=job_inputs, - source=source, - logger=logger, - inputs=inputs, - schema=schema, - reset_pipeline=reset_pipeline, - ) - else: - raise ValueError(f"Source type {model.pipeline.source_type} not supported") - - -async def _run( - job_inputs: PipelineInputs, - source: Any, - logger: FilteringBoundLogger, - inputs: ImportDataActivityInputs, - schema: ExternalDataSchema, - reset_pipeline: bool, -): - table_row_counts = await DataImportPipeline(job_inputs, source, logger, reset_pipeline, schema.is_incremental).run() - total_rows_synced = sum(table_row_counts.values()) - - await aupdate_job_count(inputs.run_id, inputs.team_id, total_rows_synced) - await aremove_reset_pipeline(inputs.source_id) diff --git a/posthog/temporal/data_imports/workflow_activities/import_data_sync.py b/posthog/temporal/data_imports/workflow_activities/import_data_sync.py index 9fc9489fabc94..ddb242483ab31 100644 --- a/posthog/temporal/data_imports/workflow_activities/import_data_sync.py +++ b/posthog/temporal/data_imports/workflow_activities/import_data_sync.py @@ -1,3 +1,5 @@ +import dataclasses +import uuid from datetime import datetime from typing import Any @@ -5,13 +7,12 @@ from temporalio import activity +from posthog.models.integration import Integration from posthog.temporal.common.heartbeat_sync import HeartbeaterSync from posthog.temporal.data_imports.pipelines.bigquery import delete_table -from posthog.temporal.data_imports.pipelines.pipeline import PipelineInputs -from posthog.temporal.data_imports.pipelines.pipeline_sync import DataImportPipelineSync +from posthog.temporal.data_imports.pipelines.pipeline_sync import DataImportPipelineSync, PipelineInputs from posthog.temporal.data_imports.util import is_posthog_team -from posthog.temporal.data_imports.workflow_activities.import_data import ImportDataActivityInputs from posthog.warehouse.models import ( ExternalDataJob, ExternalDataSource, @@ -22,6 +23,14 @@ from posthog.warehouse.models.ssh_tunnel import SSHTunnel +@dataclasses.dataclass +class ImportDataActivityInputs: + team_id: int + schema_id: uuid.UUID + source_id: uuid.UUID + run_id: str + + @activity.defn def import_data_activity_sync(inputs: ImportDataActivityInputs): logger = bind_temporal_worker_logger_sync(team_id=inputs.team_id) @@ -53,7 +62,60 @@ def import_data_activity_sync(inputs: ImportDataActivityInputs): endpoints = [schema.name] source = None - if model.pipeline.source_type in [ + if model.pipeline.source_type == ExternalDataSource.Type.STRIPE: + from posthog.temporal.data_imports.pipelines.stripe import stripe_source + + stripe_secret_key = model.pipeline.job_inputs.get("stripe_secret_key", None) + account_id = model.pipeline.job_inputs.get("stripe_account_id", None) + if not stripe_secret_key: + raise ValueError(f"Stripe secret key not found for job {model.id}") + + source = stripe_source( + api_key=stripe_secret_key, + account_id=account_id, + endpoint=schema.name, + team_id=inputs.team_id, + job_id=inputs.run_id, + is_incremental=schema.is_incremental, + ) + + return _run( + job_inputs=job_inputs, + source=source, + logger=logger, + inputs=inputs, + schema=schema, + reset_pipeline=reset_pipeline, + ) + elif model.pipeline.source_type == ExternalDataSource.Type.HUBSPOT: + from posthog.temporal.data_imports.pipelines.hubspot import hubspot + from posthog.temporal.data_imports.pipelines.hubspot.auth import ( + hubspot_refresh_access_token, + ) + + hubspot_access_code = model.pipeline.job_inputs.get("hubspot_secret_key", None) + refresh_token = model.pipeline.job_inputs.get("hubspot_refresh_token", None) + if not refresh_token: + raise ValueError(f"Hubspot refresh token not found for job {model.id}") + + if not hubspot_access_code: + hubspot_access_code = hubspot_refresh_access_token(refresh_token) + + source = hubspot( + api_key=hubspot_access_code, + refresh_token=refresh_token, + endpoints=tuple(endpoints), + ) + + return _run( + job_inputs=job_inputs, + source=source, + logger=logger, + inputs=inputs, + schema=schema, + reset_pipeline=reset_pipeline, + ) + elif model.pipeline.source_type in [ ExternalDataSource.Type.POSTGRES, ExternalDataSource.Type.MYSQL, ExternalDataSource.Type.MSSQL, @@ -140,6 +202,134 @@ def import_data_activity_sync(inputs: ImportDataActivityInputs): team_id=inputs.team_id, ) + return _run( + job_inputs=job_inputs, + source=source, + logger=logger, + inputs=inputs, + schema=schema, + reset_pipeline=reset_pipeline, + ) + elif model.pipeline.source_type == ExternalDataSource.Type.SNOWFLAKE: + if is_posthog_team(inputs.team_id): + from posthog.temporal.data_imports.pipelines.sql_database_v2 import ( + snowflake_source, + ) + else: + from posthog.temporal.data_imports.pipelines.sql_database import ( + snowflake_source, + ) + + account_id = model.pipeline.job_inputs.get("account_id") + user = model.pipeline.job_inputs.get("user") + password = model.pipeline.job_inputs.get("password") + database = model.pipeline.job_inputs.get("database") + warehouse = model.pipeline.job_inputs.get("warehouse") + sf_schema = model.pipeline.job_inputs.get("schema") + role = model.pipeline.job_inputs.get("role") + + source = snowflake_source( + account_id=account_id, + user=user, + password=password, + database=database, + schema=sf_schema, + warehouse=warehouse, + role=role, + table_names=endpoints, + incremental_field=schema.sync_type_config.get("incremental_field") if schema.is_incremental else None, + incremental_field_type=schema.sync_type_config.get("incremental_field_type") + if schema.is_incremental + else None, + ) + + return _run( + job_inputs=job_inputs, + source=source, + logger=logger, + inputs=inputs, + schema=schema, + reset_pipeline=reset_pipeline, + ) + elif model.pipeline.source_type == ExternalDataSource.Type.SALESFORCE: + from posthog.temporal.data_imports.pipelines.salesforce import ( + salesforce_source, + ) + from posthog.temporal.data_imports.pipelines.salesforce.auth import ( + salesforce_refresh_access_token, + ) + + salesforce_integration_id = model.pipeline.job_inputs.get("salesforce_integration_id", None) + + if not salesforce_integration_id: + raise ValueError(f"Salesforce integration not found for job {model.id}") + + integration = Integration.objects.get(id=salesforce_integration_id, team_id=inputs.team_id) + salesforce_refresh_token = integration.refresh_token + + if not salesforce_refresh_token: + raise ValueError(f"Salesforce refresh token not found for job {model.id}") + + salesforce_access_token = integration.access_token + + if not salesforce_access_token: + salesforce_access_token = salesforce_refresh_access_token(salesforce_refresh_token) + + salesforce_instance_url = integration.config.get("instance_url") + + source = salesforce_source( + instance_url=salesforce_instance_url, + access_token=salesforce_access_token, + refresh_token=salesforce_refresh_token, + endpoint=schema.name, + team_id=inputs.team_id, + job_id=inputs.run_id, + is_incremental=schema.is_incremental, + ) + + return _run( + job_inputs=job_inputs, + source=source, + logger=logger, + inputs=inputs, + schema=schema, + reset_pipeline=reset_pipeline, + ) + + elif model.pipeline.source_type == ExternalDataSource.Type.ZENDESK: + from posthog.temporal.data_imports.pipelines.zendesk import zendesk_source + + source = zendesk_source( + subdomain=model.pipeline.job_inputs.get("zendesk_subdomain"), + api_key=model.pipeline.job_inputs.get("zendesk_api_key"), + email_address=model.pipeline.job_inputs.get("zendesk_email_address"), + endpoint=schema.name, + team_id=inputs.team_id, + job_id=inputs.run_id, + is_incremental=schema.is_incremental, + ) + + return _run( + job_inputs=job_inputs, + source=source, + logger=logger, + inputs=inputs, + schema=schema, + reset_pipeline=reset_pipeline, + ) + elif model.pipeline.source_type == ExternalDataSource.Type.VITALLY: + from posthog.temporal.data_imports.pipelines.vitally import vitally_source + + source = vitally_source( + secret_token=model.pipeline.job_inputs.get("secret_token"), + region=model.pipeline.job_inputs.get("region"), + subdomain=model.pipeline.job_inputs.get("subdomain"), + endpoint=schema.name, + team_id=inputs.team_id, + job_id=inputs.run_id, + is_incremental=schema.is_incremental, + ) + return _run( job_inputs=job_inputs, source=source, @@ -198,6 +388,28 @@ def import_data_activity_sync(inputs: ImportDataActivityInputs): token_uri=token_uri, ) logger.info(f"Deleting bigquery temp destination table: {destination_table}") + elif model.pipeline.source_type == ExternalDataSource.Type.CHARGEBEE: + from posthog.temporal.data_imports.pipelines.chargebee import ( + chargebee_source, + ) + + source = chargebee_source( + api_key=model.pipeline.job_inputs.get("api_key"), + site_name=model.pipeline.job_inputs.get("site_name"), + endpoint=schema.name, + team_id=inputs.team_id, + job_id=inputs.run_id, + is_incremental=schema.is_incremental, + ) + + return _run( + job_inputs=job_inputs, + source=source, + logger=logger, + inputs=inputs, + schema=schema, + reset_pipeline=reset_pipeline, + ) else: raise ValueError(f"Source type {model.pipeline.source_type} not supported") diff --git a/posthog/temporal/data_imports/workflow_activities/sync_new_schemas.py b/posthog/temporal/data_imports/workflow_activities/sync_new_schemas.py index 34e27b0cd49ff..2bc916d3ec9d4 100644 --- a/posthog/temporal/data_imports/workflow_activities/sync_new_schemas.py +++ b/posthog/temporal/data_imports/workflow_activities/sync_new_schemas.py @@ -1,9 +1,8 @@ import dataclasses -from asgiref.sync import sync_to_async from temporalio import activity -from posthog.temporal.common.logger import bind_temporal_worker_logger +from posthog.temporal.common.logger import bind_temporal_worker_logger_sync from posthog.temporal.data_imports.pipelines.schemas import PIPELINE_TYPE_SCHEMA_DEFAULT_MAPPING from posthog.warehouse.models import sync_old_schemas_with_new_schemas, ExternalDataSource @@ -21,12 +20,12 @@ class SyncNewSchemasActivityInputs: @activity.defn -async def sync_new_schemas_activity(inputs: SyncNewSchemasActivityInputs) -> None: - logger = await bind_temporal_worker_logger(team_id=inputs.team_id) +def sync_new_schemas_activity(inputs: SyncNewSchemasActivityInputs) -> None: + logger = bind_temporal_worker_logger_sync(team_id=inputs.team_id) logger.info("Syncing new -> old schemas") - source = await sync_to_async(ExternalDataSource.objects.get)(team_id=inputs.team_id, id=inputs.source_id) + source = ExternalDataSource.objects.get(team_id=inputs.team_id, id=inputs.source_id) schemas_to_sync: list[str] = [] @@ -65,8 +64,8 @@ async def sync_new_schemas_activity(inputs: SyncNewSchemasActivityInputs) -> Non private_key=ssh_tunnel_auth_type_private_key, ) - sql_schemas = await sync_to_async(get_sql_schemas_for_source_type)( - source.source_type, host, port, database, user, password, db_schema, ssh_tunnel + sql_schemas = get_sql_schemas_for_source_type( + ExternalDataSource.Type(source.source_type), host, port, database, user, password, db_schema, ssh_tunnel ) schemas_to_sync = list(sql_schemas.keys()) @@ -82,9 +81,7 @@ async def sync_new_schemas_activity(inputs: SyncNewSchemasActivityInputs) -> Non sf_schema = source.job_inputs.get("schema") role = source.job_inputs.get("role") - sql_schemas = await sync_to_async(get_snowflake_schemas)( - account_id, database, warehouse, user, password, sf_schema, role - ) + sql_schemas = get_snowflake_schemas(account_id, database, warehouse, user, password, sf_schema, role) schemas_to_sync = list(sql_schemas.keys()) else: @@ -92,7 +89,7 @@ async def sync_new_schemas_activity(inputs: SyncNewSchemasActivityInputs) -> Non # TODO: this could cause a race condition where each schema worker creates the missing schema - schemas_created = await sync_to_async(sync_old_schemas_with_new_schemas)( + schemas_created = sync_old_schemas_with_new_schemas( schemas_to_sync, source_id=inputs.source_id, team_id=inputs.team_id, diff --git a/posthog/temporal/tests/batch_exports/test_import_data.py b/posthog/temporal/tests/batch_exports/test_import_data.py index 229f063cc9b43..93d20fbd44b23 100644 --- a/posthog/temporal/tests/batch_exports/test_import_data.py +++ b/posthog/temporal/tests/batch_exports/test_import_data.py @@ -1,9 +1,9 @@ from typing import Any from unittest import mock import pytest -from asgiref.sync import sync_to_async from posthog.models.team.team import Team -from posthog.temporal.data_imports.workflow_activities.import_data import ImportDataActivityInputs, import_data_activity +from posthog.temporal.data_imports import import_data_activity_sync +from posthog.temporal.data_imports.workflow_activities.import_data_sync import ImportDataActivityInputs from posthog.warehouse.models.credential import DataWarehouseCredential from posthog.warehouse.models.external_data_job import ExternalDataJob from posthog.warehouse.models.external_data_schema import ExternalDataSchema @@ -12,8 +12,8 @@ from posthog.warehouse.models.table import DataWarehouseTable -async def _setup(team: Team, job_inputs: dict[Any, Any]) -> ImportDataActivityInputs: - source = await sync_to_async(ExternalDataSource.objects.create)( +def _setup(team: Team, job_inputs: dict[Any, Any]) -> ImportDataActivityInputs: + source = ExternalDataSource.objects.create( team=team, source_id="source_id", connection_id="connection_id", @@ -21,10 +21,8 @@ async def _setup(team: Team, job_inputs: dict[Any, Any]) -> ImportDataActivityIn source_type=ExternalDataSource.Type.POSTGRES, job_inputs=job_inputs, ) - credentials = await sync_to_async(DataWarehouseCredential.objects.create)( - access_key="blah", access_secret="blah", team=team - ) - warehouse_table = await sync_to_async(DataWarehouseTable.objects.create)( + credentials = DataWarehouseCredential.objects.create(access_key="blah", access_secret="blah", team=team) + warehouse_table = DataWarehouseTable.objects.create( name="table_1", format="Parquet", team=team, @@ -34,7 +32,7 @@ async def _setup(team: Team, job_inputs: dict[Any, Any]) -> ImportDataActivityIn url_pattern="https://bucket.s3/data/*", columns={"id": {"hogql": "StringDatabaseField", "clickhouse": "Nullable(String)", "schema_valid": True}}, ) - schema = await sync_to_async(ExternalDataSchema.objects.create)( + schema = ExternalDataSchema.objects.create( team=team, name="table_1", source=source, @@ -43,7 +41,7 @@ async def _setup(team: Team, job_inputs: dict[Any, Any]) -> ImportDataActivityIn status=ExternalDataSchema.Status.COMPLETED, last_synced_at="2024-01-01", ) - job = await sync_to_async(ExternalDataJob.objects.create)( + job = ExternalDataJob.objects.create( team=team, pipeline=source, schema=schema, @@ -56,8 +54,7 @@ async def _setup(team: Team, job_inputs: dict[Any, Any]) -> ImportDataActivityIn @pytest.mark.django_db(transaction=True) -@pytest.mark.asyncio -async def test_postgres_source_without_ssh_tunnel(activity_environment, team, **kwargs): +def test_postgres_source_without_ssh_tunnel(activity_environment, team, **kwargs): job_inputs = { "host": "host.com", "port": 5432, @@ -67,15 +64,15 @@ async def test_postgres_source_without_ssh_tunnel(activity_environment, team, ** "schema": "schema", } - activity_inputs = await _setup(team, job_inputs) + activity_inputs = _setup(team, job_inputs) with ( mock.patch( "posthog.temporal.data_imports.pipelines.sql_database_v2.sql_source_for_type" ) as sql_source_for_type, - mock.patch("posthog.temporal.data_imports.workflow_activities.import_data._run"), + mock.patch("posthog.temporal.data_imports.workflow_activities.import_data_sync._run"), ): - await activity_environment.run(import_data_activity, activity_inputs) + activity_environment.run(import_data_activity_sync, activity_inputs) sql_source_for_type.assert_called_once_with( source_type=ExternalDataSource.Type.POSTGRES, @@ -94,8 +91,7 @@ async def test_postgres_source_without_ssh_tunnel(activity_environment, team, ** @pytest.mark.django_db(transaction=True) -@pytest.mark.asyncio -async def test_postgres_source_with_ssh_tunnel_disabled(activity_environment, team, **kwargs): +def test_postgres_source_with_ssh_tunnel_disabled(activity_environment, team, **kwargs): job_inputs = { "host": "host.com", "port": "5432", @@ -108,15 +104,15 @@ async def test_postgres_source_with_ssh_tunnel_disabled(activity_environment, te "ssh_tunnel_port": "", } - activity_inputs = await _setup(team, job_inputs) + activity_inputs = _setup(team, job_inputs) with ( mock.patch( "posthog.temporal.data_imports.pipelines.sql_database_v2.sql_source_for_type" ) as sql_source_for_type, - mock.patch("posthog.temporal.data_imports.workflow_activities.import_data._run"), + mock.patch("posthog.temporal.data_imports.workflow_activities.import_data_sync._run"), ): - await activity_environment.run(import_data_activity, activity_inputs) + activity_environment.run(import_data_activity_sync, activity_inputs) sql_source_for_type.assert_called_once_with( source_type=ExternalDataSource.Type.POSTGRES, @@ -136,7 +132,7 @@ async def test_postgres_source_with_ssh_tunnel_disabled(activity_environment, te @pytest.mark.django_db(transaction=True) @pytest.mark.asyncio -async def test_postgres_source_with_ssh_tunnel_enabled(activity_environment, team, **kwargs): +def test_postgres_source_with_ssh_tunnel_enabled(activity_environment, team, **kwargs): job_inputs = { "host": "host.com", "port": "5432", @@ -152,7 +148,7 @@ async def test_postgres_source_with_ssh_tunnel_enabled(activity_environment, tea "ssh_tunnel_auth_type_password": "password", } - activity_inputs = await _setup(team, job_inputs) + activity_inputs = _setup(team, job_inputs) def mock_get_tunnel(self_class, host, port): class MockedTunnel: @@ -171,10 +167,10 @@ def __exit__(self, exc_type, exc_value, exc_traceback): mock.patch( "posthog.temporal.data_imports.pipelines.sql_database_v2.sql_source_for_type" ) as sql_source_for_type_v2, - mock.patch("posthog.temporal.data_imports.workflow_activities.import_data._run"), + mock.patch("posthog.temporal.data_imports.workflow_activities.import_data_sync._run"), mock.patch.object(SSHTunnel, "get_tunnel", mock_get_tunnel), ): - await activity_environment.run(import_data_activity, activity_inputs) + activity_environment.run(import_data_activity_sync, activity_inputs) sql_source_for_type_v2.assert_called_once_with( source_type=ExternalDataSource.Type.POSTGRES, diff --git a/posthog/temporal/tests/data_imports/test_end_to_end.py b/posthog/temporal/tests/data_imports/test_end_to_end.py index 786d6fdd56596..cb29cbafa5d78 100644 --- a/posthog/temporal/tests/data_imports/test_end_to_end.py +++ b/posthog/temporal/tests/data_imports/test_end_to_end.py @@ -870,10 +870,11 @@ def get_jobs(): return list(jobs) - with mock.patch( - "posthog.temporal.data_imports.workflow_activities.create_job_model.acreate_external_data_job", - ) as acreate_external_data_job: - acreate_external_data_job.side_effect = Exception("Ruhoh!") + with mock.patch.object( + ExternalDataJob.objects, + "create", + ) as create_external_data_job: + create_external_data_job.side_effect = Exception("Ruhoh!") with pytest.raises(Exception): await _execute_run(workflow_id, inputs, stripe_customer["data"]) diff --git a/posthog/temporal/tests/external_data/test_external_data_job.py b/posthog/temporal/tests/external_data/test_external_data_job.py index d554fe81fc5e1..f931c97f93943 100644 --- a/posthog/temporal/tests/external_data/test_external_data_job.py +++ b/posthog/temporal/tests/external_data/test_external_data_job.py @@ -6,9 +6,9 @@ from asgiref.sync import sync_to_async from django.test import override_settings +from posthog.temporal.data_imports import import_data_activity_sync from posthog.temporal.data_imports.external_data_job import ( UpdateExternalDataJobStatusInputs, - check_schedule_activity, create_source_templates, update_external_data_job_model, ) @@ -16,58 +16,55 @@ ExternalDataJobWorkflow, ExternalDataWorkflowInputs, ) +from posthog.temporal.data_imports.pipelines.pipeline_sync import DataImportPipelineSync from posthog.temporal.data_imports.workflow_activities.check_billing_limits import check_billing_limits_activity from posthog.temporal.data_imports.workflow_activities.create_job_model import ( CreateExternalDataJobModelActivityInputs, create_external_data_job_model_activity, ) -from posthog.temporal.data_imports.workflow_activities.import_data import ImportDataActivityInputs, import_data_activity +from posthog.temporal.data_imports.workflow_activities.import_data_sync import ImportDataActivityInputs from posthog.temporal.data_imports.workflow_activities.sync_new_schemas import ( SyncNewSchemasActivityInputs, sync_new_schemas_activity, ) -from posthog.warehouse.external_data_source.jobs import acreate_external_data_job from posthog.warehouse.models import ( get_latest_run_if_exists, ExternalDataJob, ExternalDataSource, ExternalDataSchema, - get_external_data_job, ) from posthog.temporal.data_imports.pipelines.schemas import ( PIPELINE_TYPE_SCHEMA_DEFAULT_MAPPING, ) from posthog.models import Team -from posthog.temporal.data_imports.pipelines.pipeline import DataImportPipeline from temporalio.testing import WorkflowEnvironment from temporalio.common import RetryPolicy from temporalio.worker import UnsandboxedWorkflowRunner, Worker from posthog.constants import DATA_WAREHOUSE_TASK_QUEUE import pytest_asyncio -import aioboto3 +import boto3 import functools from django.conf import settings from dlt.sources.helpers.rest_client.client import RESTClient from dlt.common.configuration.specs.aws_credentials import AwsCredentials -import asyncio import psycopg from posthog.warehouse.models.external_data_schema import get_all_schemas_for_source_id BUCKET_NAME = "test-pipeline" -SESSION = aioboto3.Session() +SESSION = boto3.Session() create_test_client = functools.partial(SESSION.client, endpoint_url=settings.OBJECT_STORAGE_ENDPOINT) -async def delete_all_from_s3(minio_client, bucket_name: str, key_prefix: str): +def delete_all_from_s3(minio_client, bucket_name: str, key_prefix: str): """Delete all objects in bucket_name under key_prefix.""" - response = await minio_client.list_objects_v2(Bucket=bucket_name, Prefix=key_prefix) + response = minio_client.list_objects_v2(Bucket=bucket_name, Prefix=key_prefix) if "Contents" in response: for obj in response["Contents"]: if "Key" in obj: - await minio_client.delete_object(Bucket=bucket_name, Key=obj["Key"]) + minio_client.delete_object(Bucket=bucket_name, Key=obj["Key"]) @pytest.fixture @@ -76,28 +73,29 @@ def bucket_name(request) -> str: return BUCKET_NAME -@pytest_asyncio.fixture -async def minio_client(bucket_name): +@pytest.fixture +def minio_client(bucket_name): """Manage an S3 client to interact with a MinIO bucket. Yields the client after creating a bucket. Upon resuming, we delete the contents and the bucket itself. """ - async with create_test_client( + minio_client = create_test_client( "s3", aws_access_key_id=settings.OBJECT_STORAGE_ACCESS_KEY_ID, aws_secret_access_key=settings.OBJECT_STORAGE_SECRET_ACCESS_KEY, - ) as minio_client: - try: - await minio_client.head_bucket(Bucket=bucket_name) - except: - await minio_client.create_bucket(Bucket=bucket_name) + ) - yield minio_client + try: + minio_client.head_bucket(Bucket=bucket_name) + except: + minio_client.create_bucket(Bucket=bucket_name) - await delete_all_from_s3(minio_client, bucket_name, key_prefix="/") + yield minio_client - await minio_client.delete_bucket(Bucket=bucket_name) + delete_all_from_s3(minio_client, bucket_name, key_prefix="/") + + minio_client.delete_bucket(Bucket=bucket_name) @pytest.fixture @@ -127,8 +125,8 @@ async def postgres_connection(postgres_config, setup_postgres_test_db): await connection.close() -async def _create_schema(schema_name: str, source: ExternalDataSource, team: Team, table_id: Optional[str] = None): - return await sync_to_async(ExternalDataSchema.objects.create)( +def _create_schema(schema_name: str, source: ExternalDataSource, team: Team, table_id: Optional[str] = None): + return ExternalDataSchema.objects.create( name=schema_name, team_id=team.pk, source_id=source.pk, @@ -136,46 +134,64 @@ async def _create_schema(schema_name: str, source: ExternalDataSource, team: Tea ) +def _create_external_data_job( + external_data_source_id: uuid.UUID, + external_data_schema_id: uuid.UUID, + workflow_id: str, + workflow_run_id: str, + team_id: int, +) -> ExternalDataJob: + job = ExternalDataJob.objects.create( + team_id=team_id, + pipeline_id=external_data_source_id, + schema_id=external_data_schema_id, + status=ExternalDataJob.Status.RUNNING, + rows_synced=0, + workflow_id=workflow_id, + workflow_run_id=workflow_run_id, + ) + + return job + + @pytest.mark.django_db(transaction=True) -@pytest.mark.asyncio -async def test_create_external_job_activity(activity_environment, team, **kwargs): +def test_create_external_job_activity(activity_environment, team, **kwargs): """ Test that the create external job activity creates a new job """ - new_source = await sync_to_async(ExternalDataSource.objects.create)( - source_id=uuid.uuid4(), - connection_id=uuid.uuid4(), - destination_id=uuid.uuid4(), + new_source = ExternalDataSource.objects.create( + source_id=str(uuid.uuid4()), + connection_id=str(uuid.uuid4()), + destination_id=str(uuid.uuid4()), team=team, status="running", source_type="Stripe", ) - test_1_schema = await _create_schema("test-1", new_source, team) + test_1_schema = _create_schema("test-1", new_source, team) inputs = CreateExternalDataJobModelActivityInputs( team_id=team.id, source_id=new_source.pk, schema_id=test_1_schema.id ) - run_id, _, __ = await activity_environment.run(create_external_data_job_model_activity, inputs) + run_id, _, __ = activity_environment.run(create_external_data_job_model_activity, inputs) runs = ExternalDataJob.objects.filter(id=run_id) - assert await sync_to_async(runs.exists)() + assert runs.exists() @pytest.mark.django_db(transaction=True) -@pytest.mark.asyncio -async def test_create_external_job_activity_schemas_exist(activity_environment, team, **kwargs): - new_source = await sync_to_async(ExternalDataSource.objects.create)( - source_id=uuid.uuid4(), - connection_id=uuid.uuid4(), - destination_id=uuid.uuid4(), +def test_create_external_job_activity_schemas_exist(activity_environment, team, **kwargs): + new_source = ExternalDataSource.objects.create( + source_id=str(uuid.uuid4()), + connection_id=str(uuid.uuid4()), + destination_id=str(uuid.uuid4()), team=team, status="running", source_type="Stripe", ) - schema = await sync_to_async(ExternalDataSchema.objects.create)( + schema = ExternalDataSchema.objects.create( name=PIPELINE_TYPE_SCHEMA_DEFAULT_MAPPING[new_source.source_type][0], team_id=team.id, source_id=new_source.pk, @@ -183,25 +199,24 @@ async def test_create_external_job_activity_schemas_exist(activity_environment, inputs = CreateExternalDataJobModelActivityInputs(team_id=team.id, source_id=new_source.pk, schema_id=schema.id) - run_id, _, __ = await activity_environment.run(create_external_data_job_model_activity, inputs) + run_id, _, __ = activity_environment.run(create_external_data_job_model_activity, inputs) runs = ExternalDataJob.objects.filter(id=run_id) - assert await sync_to_async(runs.exists)() + assert runs.exists() @pytest.mark.django_db(transaction=True) -@pytest.mark.asyncio -async def test_create_external_job_activity_update_schemas(activity_environment, team, **kwargs): - new_source = await sync_to_async(ExternalDataSource.objects.create)( - source_id=uuid.uuid4(), - connection_id=uuid.uuid4(), - destination_id=uuid.uuid4(), +def test_create_external_job_activity_update_schemas(activity_environment, team, **kwargs): + new_source = ExternalDataSource.objects.create( + source_id=str(uuid.uuid4()), + connection_id=str(uuid.uuid4()), + destination_id=str(uuid.uuid4()), team=team, status="running", source_type="Stripe", ) - await sync_to_async(ExternalDataSchema.objects.create)( + ExternalDataSchema.objects.create( name=PIPELINE_TYPE_SCHEMA_DEFAULT_MAPPING[new_source.source_type][0], team_id=team.id, source_id=new_source.pk, @@ -210,36 +225,35 @@ async def test_create_external_job_activity_update_schemas(activity_environment, inputs = SyncNewSchemasActivityInputs(source_id=str(new_source.pk), team_id=team.id) - await activity_environment.run(sync_new_schemas_activity, inputs) + activity_environment.run(sync_new_schemas_activity, inputs) - all_schemas = await sync_to_async(get_all_schemas_for_source_id)(new_source.pk, team.id) + all_schemas = get_all_schemas_for_source_id(new_source.pk, team.id) assert len(all_schemas) == len(PIPELINE_TYPE_SCHEMA_DEFAULT_MAPPING[ExternalDataSource.Type.STRIPE]) @pytest.mark.django_db(transaction=True) -@pytest.mark.asyncio -async def test_update_external_job_activity(activity_environment, team, **kwargs): +def test_update_external_job_activity(activity_environment, team, **kwargs): """ Test that the update external job activity updates the job status """ - new_source = await sync_to_async(ExternalDataSource.objects.create)( - source_id=uuid.uuid4(), - connection_id=uuid.uuid4(), - destination_id=uuid.uuid4(), + new_source = ExternalDataSource.objects.create( + source_id=str(uuid.uuid4()), + connection_id=str(uuid.uuid4()), + destination_id=str(uuid.uuid4()), team=team, status="running", source_type="Stripe", ) - schema = await sync_to_async(ExternalDataSchema.objects.create)( + schema = ExternalDataSchema.objects.create( name=PIPELINE_TYPE_SCHEMA_DEFAULT_MAPPING[new_source.source_type][0], team_id=team.id, source_id=new_source.pk, should_sync=True, ) - new_job = await acreate_external_data_job( + new_job = _create_external_data_job( team_id=team.id, external_data_source_id=new_source.pk, workflow_id=activity_environment.info.workflow_id, @@ -257,34 +271,33 @@ async def test_update_external_job_activity(activity_environment, team, **kwargs team_id=team.id, ) - await activity_environment.run(update_external_data_job_model, inputs) - await sync_to_async(new_job.refresh_from_db)() - await sync_to_async(schema.refresh_from_db)() + activity_environment.run(update_external_data_job_model, inputs) + new_job.refresh_from_db() + schema.refresh_from_db() assert new_job.status == ExternalDataJob.Status.COMPLETED assert schema.status == ExternalDataJob.Status.COMPLETED @pytest.mark.django_db(transaction=True) -@pytest.mark.asyncio -async def test_update_external_job_activity_with_retryable_error(activity_environment, team, **kwargs): - new_source = await sync_to_async(ExternalDataSource.objects.create)( - source_id=uuid.uuid4(), - connection_id=uuid.uuid4(), - destination_id=uuid.uuid4(), +def test_update_external_job_activity_with_retryable_error(activity_environment, team, **kwargs): + new_source = ExternalDataSource.objects.create( + source_id=str(uuid.uuid4()), + connection_id=str(uuid.uuid4()), + destination_id=str(uuid.uuid4()), team=team, status="running", source_type="Stripe", ) - schema = await sync_to_async(ExternalDataSchema.objects.create)( + schema = ExternalDataSchema.objects.create( name=PIPELINE_TYPE_SCHEMA_DEFAULT_MAPPING[new_source.source_type][0], team_id=team.id, source_id=new_source.pk, should_sync=True, ) - new_job = await acreate_external_data_job( + new_job = _create_external_data_job( team_id=team.id, external_data_source_id=new_source.pk, workflow_id=activity_environment.info.workflow_id, @@ -302,9 +315,9 @@ async def test_update_external_job_activity_with_retryable_error(activity_enviro team_id=team.id, ) - await activity_environment.run(update_external_data_job_model, inputs) - await sync_to_async(new_job.refresh_from_db)() - await sync_to_async(schema.refresh_from_db)() + activity_environment.run(update_external_data_job_model, inputs) + new_job.refresh_from_db() + schema.refresh_from_db() assert new_job.status == ExternalDataJob.Status.COMPLETED assert schema.status == ExternalDataJob.Status.COMPLETED @@ -312,25 +325,24 @@ async def test_update_external_job_activity_with_retryable_error(activity_enviro @pytest.mark.django_db(transaction=True) -@pytest.mark.asyncio -async def test_update_external_job_activity_with_non_retryable_error(activity_environment, team, **kwargs): - new_source = await sync_to_async(ExternalDataSource.objects.create)( - source_id=uuid.uuid4(), - connection_id=uuid.uuid4(), - destination_id=uuid.uuid4(), +def test_update_external_job_activity_with_non_retryable_error(activity_environment, team, **kwargs): + new_source = ExternalDataSource.objects.create( + source_id=str(uuid.uuid4()), + connection_id=str(uuid.uuid4()), + destination_id=str(uuid.uuid4()), team=team, status="running", source_type="Postgres", ) - schema = await sync_to_async(ExternalDataSchema.objects.create)( + schema = ExternalDataSchema.objects.create( name="test_123", team_id=team.id, source_id=new_source.pk, should_sync=True, ) - new_job = await acreate_external_data_job( + new_job = _create_external_data_job( team_id=team.id, external_data_source_id=new_source.pk, workflow_id=activity_environment.info.workflow_id, @@ -348,10 +360,10 @@ async def test_update_external_job_activity_with_non_retryable_error(activity_en team_id=team.id, ) with mock.patch("posthog.warehouse.models.external_data_schema.external_data_workflow_exists", return_value=False): - await activity_environment.run(update_external_data_job_model, inputs) + activity_environment.run(update_external_data_job_model, inputs) - await sync_to_async(new_job.refresh_from_db)() - await sync_to_async(schema.refresh_from_db)() + new_job.refresh_from_db() + schema.refresh_from_db() assert new_job.status == ExternalDataJob.Status.COMPLETED assert schema.status == ExternalDataJob.Status.COMPLETED @@ -359,22 +371,21 @@ async def test_update_external_job_activity_with_non_retryable_error(activity_en @pytest.mark.django_db(transaction=True) -@pytest.mark.asyncio -async def test_run_stripe_job(activity_environment, team, minio_client, **kwargs): - async def setup_job_1(): - new_source = await sync_to_async(ExternalDataSource.objects.create)( - source_id=uuid.uuid4(), - connection_id=uuid.uuid4(), - destination_id=uuid.uuid4(), +def test_run_stripe_job(activity_environment, team, minio_client, **kwargs): + def setup_job_1(): + new_source = ExternalDataSource.objects.create( + source_id=str(uuid.uuid4()), + connection_id=str(uuid.uuid4()), + destination_id=str(uuid.uuid4()), team=team, status="running", source_type="Stripe", job_inputs={"stripe_secret_key": "test-key", "stripe_account_id": "acct_id"}, ) - customer_schema = await _create_schema("Customer", new_source, team) + customer_schema = _create_schema("Customer", new_source, team) - new_job: ExternalDataJob = await sync_to_async(ExternalDataJob.objects.create)( + new_job: ExternalDataJob = ExternalDataJob.objects.create( team_id=team.id, pipeline_id=new_source.pk, status=ExternalDataJob.Status.RUNNING, @@ -382,7 +393,7 @@ async def setup_job_1(): schema=customer_schema, ) - new_job = await get_external_data_job(new_job.id) + new_job = ExternalDataJob.objects.get(id=new_job.id) inputs = ImportDataActivityInputs( team_id=team.id, @@ -393,20 +404,20 @@ async def setup_job_1(): return new_job, inputs - async def setup_job_2(): - new_source = await sync_to_async(ExternalDataSource.objects.create)( - source_id=uuid.uuid4(), - connection_id=uuid.uuid4(), - destination_id=uuid.uuid4(), + def setup_job_2(): + new_source = ExternalDataSource.objects.create( + source_id=str(uuid.uuid4()), + connection_id=str(uuid.uuid4()), + destination_id=str(uuid.uuid4()), team=team, status="running", source_type="Stripe", job_inputs={"stripe_secret_key": "test-key", "stripe_account_id": "acct_id"}, ) - charge_schema = await _create_schema("Charge", new_source, team) + charge_schema = _create_schema("Charge", new_source, team) - new_job: ExternalDataJob = await sync_to_async(ExternalDataJob.objects.create)( + new_job: ExternalDataJob = ExternalDataJob.objects.create( team_id=team.id, pipeline_id=new_source.pk, status=ExternalDataJob.Status.RUNNING, @@ -414,7 +425,7 @@ async def setup_job_2(): schema=charge_schema, ) - new_job = await get_external_data_job(new_job.id) + new_job = ExternalDataJob.objects.get(id=new_job.id) inputs = ImportDataActivityInputs( team_id=team.id, @@ -425,8 +436,8 @@ async def setup_job_2(): return new_job, inputs - job_1, job_1_inputs = await setup_job_1() - job_2, job_2_inputs = await setup_job_2() + job_1, job_1_inputs = setup_job_1() + job_2, job_2_inputs = setup_job_2() def mock_customers_paginate( class_self, @@ -504,14 +515,10 @@ def mock_to_object_store_rs_credentials(class_self): mock.patch.object(AwsCredentials, "to_session_credentials", mock_to_session_credentials), mock.patch.object(AwsCredentials, "to_object_store_rs_credentials", mock_to_object_store_rs_credentials), ): - await asyncio.gather( - activity_environment.run(import_data_activity, job_1_inputs), - ) + activity_environment.run(import_data_activity_sync, job_1_inputs) - folder_path = await sync_to_async(job_1.folder_path)() - job_1_customer_objects = await minio_client.list_objects_v2( - Bucket=BUCKET_NAME, Prefix=f"{folder_path}/customer/" - ) + folder_path = job_1.folder_path() + job_1_customer_objects = minio_client.list_objects_v2(Bucket=BUCKET_NAME, Prefix=f"{folder_path}/customer/") assert len(job_1_customer_objects["Contents"]) == 2 @@ -531,33 +538,28 @@ def mock_to_object_store_rs_credentials(class_self): mock.patch.object(AwsCredentials, "to_session_credentials", mock_to_session_credentials), mock.patch.object(AwsCredentials, "to_object_store_rs_credentials", mock_to_object_store_rs_credentials), ): - await asyncio.gather( - activity_environment.run(import_data_activity, job_2_inputs), - ) + activity_environment.run(import_data_activity_sync, job_2_inputs) - job_2_charge_objects = await minio_client.list_objects_v2( - Bucket=BUCKET_NAME, Prefix=f"{job_2.folder_path()}/charge/" - ) + job_2_charge_objects = minio_client.list_objects_v2(Bucket=BUCKET_NAME, Prefix=f"{job_2.folder_path()}/charge/") assert len(job_2_charge_objects["Contents"]) == 2 @pytest.mark.django_db(transaction=True) -@pytest.mark.asyncio -async def test_run_stripe_job_row_count_update(activity_environment, team, minio_client, **kwargs): - async def setup_job_1(): - new_source = await sync_to_async(ExternalDataSource.objects.create)( - source_id=uuid.uuid4(), - connection_id=uuid.uuid4(), - destination_id=uuid.uuid4(), +def test_run_stripe_job_row_count_update(activity_environment, team, minio_client, **kwargs): + def setup_job_1(): + new_source = ExternalDataSource.objects.create( + source_id=str(uuid.uuid4()), + connection_id=str(uuid.uuid4()), + destination_id=str(uuid.uuid4()), team=team, status="running", source_type="Stripe", job_inputs={"stripe_secret_key": "test-key", "stripe_account_id": "acct_id"}, ) - customer_schema = await _create_schema("Customer", new_source, team) + customer_schema = _create_schema("Customer", new_source, team) - new_job: ExternalDataJob = await sync_to_async(ExternalDataJob.objects.create)( + new_job: ExternalDataJob = ExternalDataJob.objects.create( team_id=team.id, pipeline_id=new_source.pk, status=ExternalDataJob.Status.RUNNING, @@ -565,9 +567,9 @@ async def setup_job_1(): schema=customer_schema, ) - new_job = await sync_to_async( - ExternalDataJob.objects.filter(id=new_job.id).prefetch_related("pipeline").prefetch_related("schema").get - )() + new_job = ( + ExternalDataJob.objects.filter(id=new_job.id).prefetch_related("pipeline").prefetch_related("schema").get() + ) inputs = ImportDataActivityInputs( team_id=team.id, @@ -578,7 +580,7 @@ async def setup_job_1(): return new_job, inputs - job_1, job_1_inputs = await setup_job_1() + job_1, job_1_inputs = setup_job_1() def mock_customers_paginate( class_self, @@ -636,18 +638,14 @@ def mock_to_object_store_rs_credentials(class_self): mock.patch.object(AwsCredentials, "to_session_credentials", mock_to_session_credentials), mock.patch.object(AwsCredentials, "to_object_store_rs_credentials", mock_to_object_store_rs_credentials), ): - await asyncio.gather( - activity_environment.run(import_data_activity, job_1_inputs), - ) + activity_environment.run(import_data_activity_sync, job_1_inputs) - folder_path = await sync_to_async(job_1.folder_path)() - job_1_customer_objects = await minio_client.list_objects_v2( - Bucket=BUCKET_NAME, Prefix=f"{folder_path}/customer/" - ) + folder_path = job_1.folder_path() + job_1_customer_objects = minio_client.list_objects_v2(Bucket=BUCKET_NAME, Prefix=f"{folder_path}/customer/") assert len(job_1_customer_objects["Contents"]) == 2 - await sync_to_async(job_1.refresh_from_db)() + job_1.refresh_from_db() assert job_1.rows_synced == 1 @@ -680,24 +678,30 @@ async def test_external_data_job_workflow_with_schema(team, **kwargs): external_data_schema_id=schema.id, ) - async def mock_async_func(inputs): + def mock_func(inputs): return {} with ( mock.patch("posthog.warehouse.models.table.DataWarehouseTable.get_columns", return_value={"id": "string"}), - mock.patch.object(DataImportPipeline, "run", mock_async_func), + mock.patch.object(DataImportPipelineSync, "run", mock_func), ): - with override_settings(AIRBYTE_BUCKET_KEY="test-key", AIRBYTE_BUCKET_SECRET="test-secret"): + with override_settings( + BUCKET_URL=f"s3://{BUCKET_NAME}", + AIRBYTE_BUCKET_KEY=settings.OBJECT_STORAGE_ACCESS_KEY_ID, + AIRBYTE_BUCKET_SECRET=settings.OBJECT_STORAGE_SECRET_ACCESS_KEY, + AIRBYTE_BUCKET_REGION="us-east-1", + AIRBYTE_BUCKET_DOMAIN="objectstorage:19000", + BUCKET_NAME=BUCKET_NAME, + ): async with await WorkflowEnvironment.start_time_skipping() as activity_environment: async with Worker( activity_environment.client, task_queue=DATA_WAREHOUSE_TASK_QUEUE, workflows=[ExternalDataJobWorkflow], activities=[ - check_schedule_activity, create_external_data_job_model_activity, update_external_data_job_model, - import_data_activity, + import_data_activity_sync, create_source_templates, check_billing_limits_activity, sync_new_schemas_activity, @@ -752,7 +756,7 @@ async def setup_job_1(): }, ) - posthog_test_schema = await _create_schema("posthog_test", new_source, team) + posthog_test_schema = await sync_to_async(_create_schema)("posthog_test", new_source, team) new_job: ExternalDataJob = await sync_to_async(ExternalDataJob.objects.create)( team_id=team.id, @@ -806,127 +810,8 @@ def mock_to_object_store_rs_credentials(class_self): mock.patch.object(AwsCredentials, "to_session_credentials", mock_to_session_credentials), mock.patch.object(AwsCredentials, "to_object_store_rs_credentials", mock_to_object_store_rs_credentials), ): - await asyncio.gather( - activity_environment.run(import_data_activity, job_1_inputs), - ) + await sync_to_async(activity_environment.run)(import_data_activity_sync, job_1_inputs) folder_path = await sync_to_async(job_1.folder_path)() - job_1_team_objects = await minio_client.list_objects_v2( - Bucket=BUCKET_NAME, Prefix=f"{folder_path}/posthog_test/" - ) + job_1_team_objects = minio_client.list_objects_v2(Bucket=BUCKET_NAME, Prefix=f"{folder_path}/posthog_test/") assert len(job_1_team_objects["Contents"]) == 2 - - -@pytest.mark.django_db(transaction=True) -@pytest.mark.asyncio -async def test_check_schedule_activity_with_schema_id(activity_environment, team, **kwargs): - new_source = await sync_to_async(ExternalDataSource.objects.create)( - source_id=uuid.uuid4(), - connection_id=uuid.uuid4(), - destination_id=uuid.uuid4(), - team=team, - status="running", - source_type="Stripe", - job_inputs={"stripe_secret_key": "test-key", "stripe_account_id": "acct_id"}, - ) - - test_1_schema = await _create_schema("test-1", new_source, team) - - should_exit = await activity_environment.run( - check_schedule_activity, - ExternalDataWorkflowInputs( - team_id=team.id, - external_data_source_id=new_source.id, - external_data_schema_id=test_1_schema.id, - ), - ) - - assert should_exit is False - - -@pytest.mark.django_db(transaction=True) -@pytest.mark.asyncio -async def test_check_schedule_activity_with_missing_schema_id_but_with_schedule(activity_environment, team, **kwargs): - new_source = await sync_to_async(ExternalDataSource.objects.create)( - source_id=uuid.uuid4(), - connection_id=uuid.uuid4(), - destination_id=uuid.uuid4(), - team=team, - status="running", - source_type="Stripe", - job_inputs={"stripe_secret_key": "test-key", "stripe_account_id": "acct_id"}, - ) - - await sync_to_async(ExternalDataSchema.objects.create)( - name="test-1", - team_id=team.id, - source_id=new_source.pk, - should_sync=True, - ) - - with ( - mock.patch( - "posthog.temporal.data_imports.external_data_job.a_external_data_workflow_exists", return_value=True - ), - mock.patch( - "posthog.temporal.data_imports.external_data_job.a_delete_external_data_schedule", return_value=True - ), - mock.patch( - "posthog.temporal.data_imports.external_data_job.a_trigger_external_data_workflow" - ) as mock_a_trigger_external_data_workflow, - ): - should_exit = await activity_environment.run( - check_schedule_activity, - ExternalDataWorkflowInputs( - team_id=team.id, - external_data_source_id=new_source.id, - external_data_schema_id=None, - ), - ) - - assert should_exit is True - assert mock_a_trigger_external_data_workflow.call_count == 1 - - -@pytest.mark.django_db(transaction=True) -@pytest.mark.asyncio -async def test_check_schedule_activity_with_missing_schema_id_and_no_schedule(activity_environment, team, **kwargs): - new_source = await sync_to_async(ExternalDataSource.objects.create)( - source_id=uuid.uuid4(), - connection_id=uuid.uuid4(), - destination_id=uuid.uuid4(), - team=team, - status="running", - source_type="Stripe", - job_inputs={"stripe_secret_key": "test-key", "stripe_account_id": "acct_id"}, - ) - - await sync_to_async(ExternalDataSchema.objects.create)( - name="test-1", - team_id=team.id, - source_id=new_source.pk, - should_sync=True, - ) - - with ( - mock.patch( - "posthog.temporal.data_imports.external_data_job.a_external_data_workflow_exists", return_value=False - ), - mock.patch( - "posthog.temporal.data_imports.external_data_job.a_delete_external_data_schedule", return_value=True - ), - mock.patch( - "posthog.temporal.data_imports.external_data_job.a_sync_external_data_job_workflow" - ) as mock_a_sync_external_data_job_workflow, - ): - should_exit = await activity_environment.run( - check_schedule_activity, - ExternalDataWorkflowInputs( - team_id=team.id, - external_data_source_id=new_source.id, - external_data_schema_id=None, - ), - ) - - assert should_exit is True - assert mock_a_sync_external_data_job_workflow.call_count == 1 diff --git a/posthog/warehouse/data_load/source_templates.py b/posthog/warehouse/data_load/source_templates.py index 5a7d515bc8536..6b993e00d3d97 100644 --- a/posthog/warehouse/data_load/source_templates.py +++ b/posthog/warehouse/data_load/source_templates.py @@ -1,11 +1,9 @@ -from posthog.temporal.common.logger import bind_temporal_worker_logger -from posthog.warehouse.models.external_data_job import ExternalDataJob, get_external_data_job, get_latest_run_if_exists +from posthog.temporal.common.logger import bind_temporal_worker_logger_sync +from posthog.warehouse.models.external_data_job import ExternalDataJob from posthog.warehouse.models.external_data_source import ExternalDataSource from posthog.warehouse.models.join import DataWarehouseJoin -from posthog.warehouse.util import database_sync_to_async -@database_sync_to_async def database_operations(team_id: int, table_prefix: str) -> None: customer_join_exists = ( DataWarehouseJoin.objects.filter( @@ -54,11 +52,18 @@ def database_operations(team_id: int, table_prefix: str) -> None: ) -async def create_warehouse_templates_for_source(team_id: int, run_id: str) -> None: - logger = await bind_temporal_worker_logger(team_id=team_id) +def create_warehouse_templates_for_source(team_id: int, run_id: str) -> None: + logger = bind_temporal_worker_logger_sync(team_id=team_id) - job: ExternalDataJob = await get_external_data_job(job_id=run_id) - last_successful_job: ExternalDataJob | None = await get_latest_run_if_exists(job.team_id, job.pipeline_id) + job: ExternalDataJob = ExternalDataJob.objects.get(pk=run_id) + last_successful_job: ExternalDataJob | None = ( + ExternalDataJob.objects.filter( + team_id=job.team_id, pipeline_id=job.pipeline_id, status=ExternalDataJob.Status.COMPLETED + ) + .prefetch_related("pipeline") + .order_by("-created_at") + .first() + ) source: ExternalDataSource.Type = job.pipeline.source_type @@ -71,7 +76,7 @@ async def create_warehouse_templates_for_source(team_id: int, run_id: str) -> No table_prefix = job.pipeline.prefix or "" - await database_operations(team_id, table_prefix) + database_operations(team_id, table_prefix) logger.info( f"Created warehouse template for job {run_id}", diff --git a/posthog/warehouse/external_data_source/jobs.py b/posthog/warehouse/external_data_source/jobs.py index d21210f2ec097..b7d37eb746270 100644 --- a/posthog/warehouse/external_data_source/jobs.py +++ b/posthog/warehouse/external_data_source/jobs.py @@ -1,4 +1,3 @@ -from uuid import UUID from posthog.warehouse.util import database_sync_to_async from posthog.warehouse.models.external_data_job import ExternalDataJob from posthog.warehouse.models.external_data_schema import ExternalDataSchema @@ -9,27 +8,6 @@ def get_external_data_source(team_id: str, external_data_source_id: str) -> Exte return ExternalDataSource.objects.get(team_id=team_id, id=external_data_source_id) -@database_sync_to_async -def acreate_external_data_job( - external_data_source_id: UUID, - external_data_schema_id: UUID, - workflow_id: str, - workflow_run_id: str, - team_id: int, -) -> ExternalDataJob: - job = ExternalDataJob.objects.create( - team_id=team_id, - pipeline_id=external_data_source_id, - schema_id=external_data_schema_id, - status=ExternalDataJob.Status.RUNNING, - rows_synced=0, - workflow_id=workflow_id, - workflow_run_id=workflow_run_id, - ) - - return job - - @database_sync_to_async def aget_running_job_for_schema(schema_id: str) -> ExternalDataJob | None: return ( @@ -39,8 +17,7 @@ def aget_running_job_for_schema(schema_id: str) -> ExternalDataJob | None: ) -@database_sync_to_async -def aupdate_external_job_status( +def update_external_job_status( job_id: str, team_id: int, status: ExternalDataJob.Status, latest_error: str | None ) -> ExternalDataJob: model = ExternalDataJob.objects.get(id=job_id, team_id=team_id) diff --git a/posthog/warehouse/models/external_data_schema.py b/posthog/warehouse/models/external_data_schema.py index c90a5c2e472bb..3bcbc6c658f7f 100644 --- a/posthog/warehouse/models/external_data_schema.py +++ b/posthog/warehouse/models/external_data_schema.py @@ -99,8 +99,7 @@ def aget_schema_by_id(schema_id: str, team_id: int) -> ExternalDataSchema | None ) -@database_sync_to_async -def aupdate_should_sync(schema_id: str, team_id: int, should_sync: bool) -> ExternalDataSchema | None: +def update_should_sync(schema_id: str, team_id: int, should_sync: bool) -> ExternalDataSchema | None: schema = ExternalDataSchema.objects.get(id=schema_id, team_id=team_id) schema.should_sync = should_sync schema.save() @@ -119,15 +118,6 @@ def aupdate_should_sync(schema_id: str, team_id: int, should_sync: bool) -> Exte return schema -@database_sync_to_async -def get_active_schemas_for_source_id(source_id: uuid.UUID, team_id: int): - return list( - ExternalDataSchema.objects.exclude(deleted=True) - .filter(team_id=team_id, source_id=source_id, should_sync=True) - .all() - ) - - def get_all_schemas_for_source_id(source_id: uuid.UUID, team_id: int): return list(ExternalDataSchema.objects.exclude(deleted=True).filter(team_id=team_id, source_id=source_id).all()) diff --git a/rust/cymbal/src/frames/mod.rs b/rust/cymbal/src/frames/mod.rs index 09b12ff625b89..01ba7d13e4e97 100644 --- a/rust/cymbal/src/frames/mod.rs +++ b/rust/cymbal/src/frames/mod.rs @@ -32,14 +32,9 @@ impl RawFrame { res } - pub fn needs_symbols(&self) -> bool { - // For now, we only support JS, so this is always true - true - } - - pub fn symbol_set_ref(&self) -> String { + pub fn symbol_set_ref(&self) -> Option { let RawFrame::JavaScript(raw) = self; - raw.source_url().map(String::from).unwrap_or_default() + raw.source_url().map(String::from).ok() } pub fn frame_id(&self) -> String { diff --git a/rust/cymbal/src/frames/resolver.rs b/rust/cymbal/src/frames/resolver.rs index 6a10c68c67208..b93da78b6b8ed 100644 --- a/rust/cymbal/src/frames/resolver.rs +++ b/rust/cymbal/src/frames/resolver.rs @@ -35,10 +35,6 @@ impl Resolver { return Ok(result.contents); } - if !frame.needs_symbols() { - return frame.resolve(team_id, catalog).await; - } - if let Some(result) = ErrorTrackingStackFrame::load(pool, team_id, &frame.frame_id()).await? { @@ -48,7 +44,11 @@ impl Resolver { let resolved = frame.resolve(team_id, catalog).await?; - let set = SymbolSetRecord::load(pool, team_id, &frame.symbol_set_ref()).await?; + let set = if let Some(set_ref) = frame.symbol_set_ref() { + SymbolSetRecord::load(pool, team_id, &set_ref).await? + } else { + None + }; let record = ErrorTrackingStackFrame::new( frame.frame_id(), @@ -212,7 +212,7 @@ mod test { // get the symbol set let set_ref = frame.symbol_set_ref(); - let set = SymbolSetRecord::load(&pool, 0, &set_ref) + let set = SymbolSetRecord::load(&pool, 0, &set_ref.unwrap()) .await .unwrap() .unwrap(); diff --git a/rust/cymbal/src/main.rs b/rust/cymbal/src/main.rs index 8fca47a17f34b..fc18cfbf946b2 100644 --- a/rust/cymbal/src/main.rs +++ b/rust/cymbal/src/main.rs @@ -14,7 +14,7 @@ use cymbal::{ }; use envconfig::Envconfig; use tokio::task::JoinHandle; -use tracing::{error, info}; +use tracing::{error, info, warn}; use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt, EnvFilter, Layer}; common_alloc::used!(); @@ -83,7 +83,7 @@ async fn main() -> Result<(), Error> { offset.store().unwrap(); if event.event != "$exception" { - error!("event of type {}", event.event); + warn!("event of type {}", event.event); continue; } @@ -96,13 +96,17 @@ async fn main() -> Result<(), Error> { Ok(r) => r, Err(err) => { metrics::counter!(ERRORS, "cause" => "invalid_exception_properties").increment(1); - error!("Error parsing properties: {:?}", err); + error!( + "Error parsing properties: {:?} from properties {:?}", + err, properties + ); continue; } }; let Some(mut exception_list) = properties.exception_list else { // Known issue that $exception_list didn't exist on old clients + metrics::counter!(ERRORS, "cause" => "no_exception_list").increment(1); continue; }; @@ -155,6 +159,6 @@ async fn main() -> Result<(), Error> { let _fingerprint = fingerprinting::generate_fingerprint(&exception_list); metrics::counter!(STACK_PROCESSED).increment(1); - whole_loop.label("had_frame", "true").fin(); + whole_loop.label("finished", "true").fin(); } } diff --git a/rust/cymbal/src/types/mod.rs b/rust/cymbal/src/types/mod.rs index 6a329c75572d2..317262c52aff0 100644 --- a/rust/cymbal/src/types/mod.rs +++ b/rust/cymbal/src/types/mod.rs @@ -48,17 +48,6 @@ pub struct Exception { pub struct ErrProps { #[serde(rename = "$exception_list")] pub exception_list: Option>, // Required from exception producers - we will not process events without this. Optional to support older clients, should eventually be removed - #[serde(skip_serializing_if = "Option::is_none")] - #[serde(rename = "$exception_type")] - pub exception_type: Option, // legacy, overridden by exception_list - #[serde(skip_serializing_if = "Option::is_none")] - #[serde(rename = "$exception_message")] - pub exception_message: Option, // legacy, overridden by exception_list - #[serde(skip_serializing_if = "Option::is_none")] - #[serde(rename = "$exception_stack_trace_raw")] - pub exception_stack_trace_raw: Option, // Not all exceptions have a stack trace - #[serde(rename = "$exception_level")] - pub exception_level: Option, // We generally don't touch this, but we break it out explicitly for users. Not all exceptions have a level #[serde(flatten)] // A catch-all for all the properties we don't "care" about pub other: HashMap, } @@ -148,11 +137,6 @@ mod test { assert!(frame.in_app); assert_eq!(frame.line, 64); assert_eq!(frame.column, 15003); - - assert_eq!(props.exception_type, None); - assert_eq!(props.exception_message, None); - assert_eq!(props.exception_stack_trace_raw, None); - assert_eq!(props.exception_level, Some("error".to_string())); } #[test]