diff --git a/.github/workflows/rust-docker-build.yml b/.github/workflows/rust-docker-build.yml
index 960dbd62d6015..acdcf0b342714 100644
--- a/.github/workflows/rust-docker-build.yml
+++ b/.github/workflows/rust-docker-build.yml
@@ -107,8 +107,6 @@ jobs:
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
platforms: linux/arm64,linux/amd64
- cache-from: type=gha
- cache-to: type=gha,mode=max
build-args: BIN=${{ matrix.image }}
- name: Container image digest
diff --git a/cypress/e2e/experiments.cy.ts b/cypress/e2e/experiments.cy.ts
index 5f432e1e41dc1..9e661be34591e 100644
--- a/cypress/e2e/experiments.cy.ts
+++ b/cypress/e2e/experiments.cy.ts
@@ -42,25 +42,6 @@ describe('Experiments', () => {
.type('test-variant-2')
.should('have.value', 'test-variant-2')
- // Continue to step 2
- cy.get('[data-attr="continue-experiment-creation"]').click()
-
- // Goal type selection is visible
- cy.get('[data-attr="experiment-goal-type-select"]')
- .should('be.visible')
- .within(() => {
- cy.contains('Conversion funnel').should('be.visible')
- cy.contains('Trend').should('be.visible')
- })
-
- // Goal input is visible
- cy.get('[data-attr="experiment-goal-input"]')
- .should('be.visible')
- .within(() => {
- cy.get('li.ActionFilterRow').should('exist')
- cy.get('button').contains('Add funnel step').should('exist')
- })
-
// Save experiment
cy.get('[data-attr="save-experiment"]').first().click()
})
@@ -98,10 +79,19 @@ describe('Experiments', () => {
.type('test-variant-2')
.should('have.value', 'test-variant-2')
- // Continue creation
- cy.get('[data-attr="continue-experiment-creation"]').first().click()
// Save experiment
cy.get('[data-attr="save-experiment"]').first().click()
+
+ // Set the experiment goal once the experiment is drafted
+ cy.get('[data-attr="add-experiment-goal"]').click()
+
+ // Wait for the goal modal to open and click the confirmation button
+ cy.get('.LemonModal__layout').should('be.visible')
+ cy.contains('Change experiment goal').should('be.visible')
+ cy.get('.LemonModal__footer').contains('button', 'Save').should('have.attr', 'aria-disabled', 'true')
+ cy.get('.LemonModal__content').contains('button', 'Add funnel step').click()
+ cy.get('.LemonModal__footer').contains('button', 'Save').should('not.have.attr', 'aria-disabled', 'true')
+ cy.get('.LemonModal__footer').contains('button', 'Save').click()
}
it('create, launch and stop experiment with new ui', () => {
diff --git a/cypress/fixtures/api/decide.js b/cypress/fixtures/api/decide.js
index 51c24fe885117..102f1211152c1 100644
--- a/cypress/fixtures/api/decide.js
+++ b/cypress/fixtures/api/decide.js
@@ -5,7 +5,6 @@ export function decideResponse(featureFlags) {
},
toolbarParams: {
toolbarVersion: 'toolbar',
- jsURL: 'http://localhost:8234/',
},
isAuthenticated: true,
supportedCompression: ['gzip', 'gzip-js', 'lz64'],
diff --git a/ee/clickhouse/views/experiments.py b/ee/clickhouse/views/experiments.py
index b40f43f7fd1ad..dc4a3170b93e8 100644
--- a/ee/clickhouse/views/experiments.py
+++ b/ee/clickhouse/views/experiments.py
@@ -283,8 +283,10 @@ def validate_parameters(self, value):
return value
def create(self, validated_data: dict, *args: Any, **kwargs: Any) -> Experiment:
- if not validated_data.get("filters"):
- raise ValidationError("Filters are required to create an Experiment")
+ is_draft = "start_date" not in validated_data or validated_data["start_date"] is None
+
+ if not validated_data.get("filters") and not is_draft:
+ raise ValidationError("Filters are required when creating a launched experiment")
saved_metrics_data = validated_data.pop("saved_metrics_ids", [])
@@ -299,8 +301,6 @@ def create(self, validated_data: dict, *args: Any, **kwargs: Any) -> Experiment:
feature_flag_key = validated_data.pop("get_feature_flag_key")
- is_draft = "start_date" not in validated_data or validated_data["start_date"] is None
-
properties = validated_data["filters"].get("properties", [])
if properties:
@@ -369,6 +369,14 @@ def create(self, validated_data: dict, *args: Any, **kwargs: Any) -> Experiment:
return experiment
def update(self, instance: Experiment, validated_data: dict, *args: Any, **kwargs: Any) -> Experiment:
+ if (
+ not instance.filters.get("events")
+ and not instance.filters.get("actions")
+ and validated_data.get("start_date")
+ and not validated_data.get("filters")
+ ):
+ raise ValidationError("Filters are required when launching an experiment")
+
update_saved_metrics = "saved_metrics_ids" in validated_data
saved_metrics_data = validated_data.pop("saved_metrics_ids", []) or []
diff --git a/ee/clickhouse/views/test/test_clickhouse_experiments.py b/ee/clickhouse/views/test/test_clickhouse_experiments.py
index b82ac89727754..676193f58513e 100644
--- a/ee/clickhouse/views/test/test_clickhouse_experiments.py
+++ b/ee/clickhouse/views/test/test_clickhouse_experiments.py
@@ -681,30 +681,13 @@ def test_invalid_create(self):
"end_date": None,
"feature_flag_key": ff_key,
"parameters": {},
- "filters": {}, # also invalid
+ "filters": {},
},
)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
self.assertEqual(response.json()["detail"], "This field may not be null.")
- ff_key = "a-b-tests"
- response = self.client.post(
- f"/api/projects/{self.team.id}/experiments/",
- {
- "name": "None",
- "description": "",
- "start_date": None,
- "end_date": None,
- "feature_flag_key": ff_key,
- "parameters": {},
- "filters": {}, # still invalid
- },
- )
-
- self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
- self.assertEqual(response.json()["detail"], "Filters are required to create an Experiment")
-
def test_invalid_update(self):
# Draft experiment
ff_key = "a-b-tests"
@@ -808,7 +791,12 @@ def test_draft_experiment_doesnt_have_FF_active_even_after_updates(self):
# Now update
response = self.client.patch(
f"/api/projects/{self.team.id}/experiments/{id}",
- {"description": "Bazinga", "filters": {}},
+ {
+ "description": "Bazinga",
+ "filters": {
+ "events": [{"id": "$pageview"}],
+ },
+ },
)
self.assertEqual(response.status_code, status.HTTP_200_OK)
@@ -839,7 +827,7 @@ def test_launching_draft_experiment_activates_FF(self):
"end_date": None,
"feature_flag_key": ff_key,
"parameters": {},
- "filters": {"events": []},
+ "filters": {"events": [{"id": "$pageview"}]},
},
)
@@ -1732,6 +1720,148 @@ def test_create_experiment_updates_feature_flag_cache(self):
},
)
+ def test_create_draft_experiment_with_filters(self) -> None:
+ ff_key = "a-b-tests"
+ response = self.client.post(
+ f"/api/projects/{self.team.id}/experiments/",
+ {
+ "name": "Test Experiment",
+ "description": "",
+ "start_date": None,
+ "end_date": None,
+ "feature_flag_key": ff_key,
+ "parameters": None,
+ "filters": {
+ "events": [
+ {"order": 0, "id": "$pageview"},
+ {"order": 1, "id": "$pageleave"},
+ ],
+ "properties": [],
+ },
+ },
+ )
+
+ self.assertEqual(response.status_code, status.HTTP_201_CREATED)
+ self.assertEqual(response.json()["name"], "Test Experiment")
+ self.assertEqual(response.json()["feature_flag_key"], ff_key)
+
+ def test_create_launched_experiment_with_filters(self) -> None:
+ ff_key = "a-b-tests"
+ response = self.client.post(
+ f"/api/projects/{self.team.id}/experiments/",
+ {
+ "name": "Test Experiment",
+ "description": "",
+ "start_date": "2021-12-01T10:23",
+ "end_date": None,
+ "feature_flag_key": ff_key,
+ "parameters": None,
+ "filters": {
+ "events": [
+ {"order": 0, "id": "$pageview"},
+ {"order": 1, "id": "$pageleave"},
+ ],
+ "properties": [],
+ },
+ },
+ )
+
+ self.assertEqual(response.status_code, status.HTTP_201_CREATED)
+ self.assertEqual(response.json()["name"], "Test Experiment")
+ self.assertEqual(response.json()["feature_flag_key"], ff_key)
+
+ def test_create_draft_experiment_without_filters(self) -> None:
+ ff_key = "a-b-tests"
+ response = self.client.post(
+ f"/api/projects/{self.team.id}/experiments/",
+ {
+ "name": "Test Experiment",
+ "description": "",
+ "start_date": None,
+ "end_date": None,
+ "feature_flag_key": ff_key,
+ "parameters": None,
+ "filters": {},
+ },
+ )
+
+ self.assertEqual(response.status_code, status.HTTP_201_CREATED)
+ self.assertEqual(response.json()["name"], "Test Experiment")
+ self.assertEqual(response.json()["feature_flag_key"], ff_key)
+
+ def test_create_launched_experiment_without_filters(self) -> None:
+ ff_key = "a-b-tests"
+ response = self.client.post(
+ f"/api/projects/{self.team.id}/experiments/",
+ {
+ "name": "Test Experiment",
+ "description": "",
+ "start_date": "2021-12-01T10:23",
+ "end_date": None,
+ "feature_flag_key": ff_key,
+ "parameters": None,
+ "filters": {},
+ },
+ )
+
+ self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
+ self.assertEqual(response.json()["detail"], "Filters are required when creating a launched experiment")
+
+ def test_launch_draft_experiment_without_filters(self) -> None:
+ ff_key = "a-b-tests"
+ response = self.client.post(
+ f"/api/projects/{self.team.id}/experiments/",
+ {
+ "name": "Test Experiment",
+ "description": "",
+ "start_date": None,
+ "end_date": None,
+ "feature_flag_key": ff_key,
+ "parameters": None,
+ "filters": {},
+ },
+ )
+
+ self.assertEqual(response.status_code, status.HTTP_201_CREATED)
+ draft_exp = response.json()
+
+ response = self.client.patch(
+ f"/api/projects/{self.team.id}/experiments/{draft_exp['id']}",
+ {
+ "name": "Test Experiment",
+ "description": "",
+ "start_date": "2021-12-01T10:23",
+ "end_date": None,
+ "feature_flag_key": ff_key,
+ "parameters": None,
+ "filters": {},
+ },
+ )
+
+ self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
+ self.assertEqual(response.json()["detail"], "Filters are required when launching an experiment")
+
+ response = self.client.patch(
+ f"/api/projects/{self.team.id}/experiments/{draft_exp['id']}",
+ {
+ "name": "Test Experiment",
+ "description": "",
+ "start_date": "2021-12-01T10:23",
+ "end_date": None,
+ "feature_flag_key": ff_key,
+ "parameters": None,
+ "filters": {
+ "events": [
+ {"order": 0, "id": "$pageview"},
+ {"order": 1, "id": "$pageleave"},
+ ],
+ "properties": [],
+ },
+ },
+ )
+
+ self.assertEqual(response.status_code, status.HTTP_200_OK)
+
class TestExperimentAuxiliaryEndpoints(ClickhouseTestMixin, APILicensedTest):
def _generate_experiment(self, start_date="2024-01-01T10:23", extra_parameters=None):
diff --git a/ee/surveys/summaries/summarize_surveys.py b/ee/surveys/summaries/summarize_surveys.py
new file mode 100644
index 0000000000000..1e4b088484f55
--- /dev/null
+++ b/ee/surveys/summaries/summarize_surveys.py
@@ -0,0 +1,135 @@
+import json
+
+import openai
+
+from datetime import datetime
+from typing import Optional, cast
+
+from posthog.hogql import ast
+from posthog.hogql.parser import parse_select
+from posthog.hogql_queries.insights.paginators import HogQLHasMorePaginator
+from posthog.schema import HogQLQueryResponse
+from posthog.utils import get_instance_region
+
+from prometheus_client import Histogram
+
+from posthog.api.activity_log import ServerTimingsGathered
+from posthog.models import Team, User
+
+import structlog
+
+logger = structlog.get_logger(__name__)
+
+TOKENS_IN_PROMPT_HISTOGRAM = Histogram(
+ "posthog_survey_summary_tokens_in_prompt_histogram",
+ "histogram of the number of tokens in the prompt used to generate a survey summary",
+ buckets=[
+ 0,
+ 10,
+ 50,
+ 100,
+ 500,
+ 1000,
+ 2000,
+ 3000,
+ 4000,
+ 5000,
+ 6000,
+ 7000,
+ 8000,
+ 10000,
+ 20000,
+ 30000,
+ 40000,
+ 50000,
+ 100000,
+ 128000,
+ float("inf"),
+ ],
+)
+
+
+def prepare_data(query_response: HogQLQueryResponse) -> list[str]:
+ response_values = []
+ properties_list: list[dict] = [json.loads(x[1]) for x in query_response.results]
+ for props in properties_list:
+ response_values.extend([value for key, value in props.items() if key.startswith("$survey_response") and value])
+ return response_values
+
+
+def summarize_survey_responses(
+ survey_id: str, question_index: Optional[int], survey_start: datetime, survey_end: datetime, team: Team, user: User
+):
+ timer = ServerTimingsGathered()
+
+ with timer("prepare_query"):
+ paginator = HogQLHasMorePaginator(limit=100, offset=0)
+ q = parse_select(
+ """
+ SELECT distinct_id, properties
+ FROM events
+ WHERE event == 'survey sent'
+ AND properties.$survey_id = {survey_id}
+ -- e.g. `$survey_response` or `$survey_response_2`
+ AND trim(JSONExtractString(properties, {survey_response_property})) != ''
+ AND timestamp >= {start_date}
+ AND timestamp <= {end_date}
+ """,
+ {
+ "survey_id": ast.Constant(value=survey_id),
+ "survey_response_property": ast.Constant(
+ value=f"$survey_response_{question_index}" if question_index else "$survey_response"
+ ),
+ "start_date": ast.Constant(value=survey_start),
+ "end_date": ast.Constant(value=survey_end),
+ },
+ )
+
+ with timer("run_query"):
+ query_response = paginator.execute_hogql_query(
+ team=team,
+ query_type="survey_response_list_query",
+ query=cast(ast.SelectQuery, q),
+ )
+
+ with timer("llm_api_prep"):
+ instance_region = get_instance_region() or "HOBBY"
+ prepared_data = prepare_data(query_response)
+
+ with timer("openai_completion"):
+ result = openai.chat.completions.create(
+ model="gpt-4o-mini", # allows 128k tokens
+ temperature=0.7,
+ messages=[
+ {
+ "role": "system",
+ "content": """
+ You are a product manager's assistant. You summarise survey responses from users for the product manager.
+ You don't do any other tasks.
+ """,
+ },
+ {
+ "role": "user",
+ "content": f"""the survey responses are {prepared_data}.""",
+ },
+ {
+ "role": "user",
+ "content": """
+ generate a one or two paragraph summary of the survey response.
+ only summarize, the goal is to identify real user pain points and needs
+use bullet points to identify the themes, and highlights of quotes to bring them to life
+we're trying to identify what to work on
+ use as concise and simple language as is possible.
+ generate no text other than the summary.
+ the aim is to let people see themes in the responses received. return the text in github flavoured markdown format""",
+ },
+ ],
+ user=f"{instance_region}/{user.pk}",
+ )
+
+ usage = result.usage.prompt_tokens if result.usage else None
+ if usage:
+ TOKENS_IN_PROMPT_HISTOGRAM.observe(usage)
+
+ content: str = result.choices[0].message.content or ""
+ return {"content": content, "timings": timer.get_all_timings()}
diff --git a/frontend/__snapshots__/scenes-app-experiments--complete-funnel-experiment--dark.png b/frontend/__snapshots__/scenes-app-experiments--complete-funnel-experiment--dark.png
index 30d04ac4dcce1..574e2b03d57cb 100644
Binary files a/frontend/__snapshots__/scenes-app-experiments--complete-funnel-experiment--dark.png and b/frontend/__snapshots__/scenes-app-experiments--complete-funnel-experiment--dark.png differ
diff --git a/frontend/__snapshots__/scenes-app-experiments--complete-funnel-experiment--light.png b/frontend/__snapshots__/scenes-app-experiments--complete-funnel-experiment--light.png
index 7f1fb1ff1cf1d..3bedb60084fea 100644
Binary files a/frontend/__snapshots__/scenes-app-experiments--complete-funnel-experiment--light.png and b/frontend/__snapshots__/scenes-app-experiments--complete-funnel-experiment--light.png differ
diff --git a/frontend/__snapshots__/scenes-app-experiments--running-trend-experiment--dark.png b/frontend/__snapshots__/scenes-app-experiments--running-trend-experiment--dark.png
index 63381344731b3..610ec0666ad31 100644
Binary files a/frontend/__snapshots__/scenes-app-experiments--running-trend-experiment--dark.png and b/frontend/__snapshots__/scenes-app-experiments--running-trend-experiment--dark.png differ
diff --git a/frontend/__snapshots__/scenes-app-experiments--running-trend-experiment--light.png b/frontend/__snapshots__/scenes-app-experiments--running-trend-experiment--light.png
index 4c45d979edb38..5261fb6c96120 100644
Binary files a/frontend/__snapshots__/scenes-app-experiments--running-trend-experiment--light.png and b/frontend/__snapshots__/scenes-app-experiments--running-trend-experiment--light.png differ
diff --git a/frontend/__snapshots__/scenes-app-experiments--running-trend-experiment-many-variants--dark.png b/frontend/__snapshots__/scenes-app-experiments--running-trend-experiment-many-variants--dark.png
index 61d46bdc53b8f..b38b4908dda88 100644
Binary files a/frontend/__snapshots__/scenes-app-experiments--running-trend-experiment-many-variants--dark.png and b/frontend/__snapshots__/scenes-app-experiments--running-trend-experiment-many-variants--dark.png differ
diff --git a/frontend/__snapshots__/scenes-app-experiments--running-trend-experiment-many-variants--light.png b/frontend/__snapshots__/scenes-app-experiments--running-trend-experiment-many-variants--light.png
index c0d84c532e486..b8c9d02a5be0d 100644
Binary files a/frontend/__snapshots__/scenes-app-experiments--running-trend-experiment-many-variants--light.png and b/frontend/__snapshots__/scenes-app-experiments--running-trend-experiment-many-variants--light.png differ
diff --git a/frontend/__snapshots__/scenes-app-sidepanels--side-panel-support-no-email--dark.png b/frontend/__snapshots__/scenes-app-sidepanels--side-panel-support-no-email--dark.png
index 19af04e29656e..1f6f791e5e7a2 100644
Binary files a/frontend/__snapshots__/scenes-app-sidepanels--side-panel-support-no-email--dark.png and b/frontend/__snapshots__/scenes-app-sidepanels--side-panel-support-no-email--dark.png differ
diff --git a/frontend/__snapshots__/scenes-app-sidepanels--side-panel-support-no-email--light.png b/frontend/__snapshots__/scenes-app-sidepanels--side-panel-support-no-email--light.png
index 3b910e9bc5a3c..cf41e5aa74972 100644
Binary files a/frontend/__snapshots__/scenes-app-sidepanels--side-panel-support-no-email--light.png and b/frontend/__snapshots__/scenes-app-sidepanels--side-panel-support-no-email--light.png differ
diff --git a/frontend/src/layout/navigation-3000/sidepanel/SidePanel.tsx b/frontend/src/layout/navigation-3000/sidepanel/SidePanel.tsx
index 972ca1515de48..34c18f4fc6ff2 100644
--- a/frontend/src/layout/navigation-3000/sidepanel/SidePanel.tsx
+++ b/frontend/src/layout/navigation-3000/sidepanel/SidePanel.tsx
@@ -1,6 +1,6 @@
import './SidePanel.scss'
-import { IconEllipsis, IconFeatures, IconFlag, IconGear, IconInfo, IconNotebook, IconSupport } from '@posthog/icons'
+import { IconEllipsis, IconFeatures, IconGear, IconInfo, IconNotebook, IconSupport } from '@posthog/icons'
import { LemonButton, LemonMenu, LemonMenuItems, LemonModal } from '@posthog/lemon-ui'
import clsx from 'clsx'
import { useActions, useValues } from 'kea'
@@ -20,7 +20,6 @@ import { SidePanelActivation, SidePanelActivationIcon } from './panels/activatio
import { SidePanelActivity, SidePanelActivityIcon } from './panels/activity/SidePanelActivity'
import { SidePanelDiscussion, SidePanelDiscussionIcon } from './panels/discussion/SidePanelDiscussion'
import { SidePanelDocs } from './panels/SidePanelDocs'
-import { SidePanelExperimentFeatureFlag } from './panels/SidePanelExperimentFeatureFlag'
import { SidePanelFeaturePreviews } from './panels/SidePanelFeaturePreviews'
import { SidePanelSettings } from './panels/SidePanelSettings'
import { SidePanelStatus, SidePanelStatusIcon } from './panels/SidePanelStatus'
@@ -88,11 +87,6 @@ export const SIDE_PANEL_TABS: Record<
Content: SidePanelStatus,
noModalSupport: true,
},
- [SidePanelTab.ExperimentFeatureFlag]: {
- label: 'Release conditions',
- Icon: IconFlag,
- Content: SidePanelExperimentFeatureFlag,
- },
}
const DEFAULT_WIDTH = 512
diff --git a/frontend/src/layout/navigation-3000/sidepanel/panels/SidePanelExperimentFeatureFlag.tsx b/frontend/src/layout/navigation-3000/sidepanel/panels/SidePanelExperimentFeatureFlag.tsx
deleted file mode 100644
index 5002f2bd78929..0000000000000
--- a/frontend/src/layout/navigation-3000/sidepanel/panels/SidePanelExperimentFeatureFlag.tsx
+++ /dev/null
@@ -1,163 +0,0 @@
-import { IconBalance } from '@posthog/icons'
-import { LemonBanner, LemonButton, LemonDivider, LemonInput, LemonTable, Link, Spinner } from '@posthog/lemon-ui'
-import { useActions, useValues } from 'kea'
-import { router } from 'kea-router'
-import { useEffect, useMemo } from 'react'
-import { experimentLogic } from 'scenes/experiments/experimentLogic'
-import { featureFlagLogic, FeatureFlagLogicProps } from 'scenes/feature-flags/featureFlagLogic'
-import { FeatureFlagReleaseConditions } from 'scenes/feature-flags/FeatureFlagReleaseConditions'
-import { urls } from 'scenes/urls'
-
-import { sidePanelStateLogic } from '../sidePanelStateLogic'
-
-export const SidePanelExperimentFeatureFlag = (): JSX.Element => {
- const { closeSidePanel } = useActions(sidePanelStateLogic)
- const { currentLocation } = useValues(router)
-
- useEffect(() => {
- // Side panel state is persisted in local storage, so we need to check if we're on the experiment page,
- // otherwise close the side panel
- const isExperimentPath = /^\/project\/[0-9]+\/experiments\/[0-9]+/.test(currentLocation.pathname)
- if (!isExperimentPath) {
- closeSidePanel()
- }
- }, [currentLocation, closeSidePanel])
-
- // Retrieve experiment ID from URL
- const experimentId = useMemo(() => {
- const match = currentLocation.pathname.match(/\/experiments\/(\d+)/)
- return match ? parseInt(match[1]) : null
- }, [currentLocation.pathname])
-
- const { experiment } = useValues(experimentLogic({ experimentId: experimentId ?? 'new' }))
-
- const _featureFlagLogic = featureFlagLogic({ id: experiment.feature_flag?.id ?? null } as FeatureFlagLogicProps)
- const { featureFlag, areVariantRolloutsValid, variantRolloutSum, featureFlagLoading, nonEmptyVariants } =
- useValues(_featureFlagLogic)
- const { setFeatureFlagFilters, saveSidebarExperimentFeatureFlag, distributeVariantsEqually } =
- useActions(_featureFlagLogic)
-
- const variants = featureFlag?.filters?.multivariate?.variants || []
-
- const handleRolloutPercentageChange = (index: number, value: number | undefined): void => {
- if (!featureFlag?.filters?.multivariate || !value) {
- return
- }
-
- const updatedVariants = featureFlag.filters.multivariate.variants.map((variant, i) =>
- i === index ? { ...variant, rollout_percentage: value } : variant
- )
-
- const updatedFilters = {
- ...featureFlag.filters,
- multivariate: { ...featureFlag.filters.multivariate, variants: updatedVariants },
- }
-
- setFeatureFlagFilters(updatedFilters, null)
- }
-
- if (featureFlagLoading || !featureFlag.id) {
- return (
-
-
-
- )
- }
-
- return (
-
-
-
-
- Adjusting variant distribution or user targeting may impact the validity of your results. Adjust
- only if you're aware of how changes will affect your experiment.
-
-
- For full feature flag settings, go to{' '}
-
- {experiment.feature_flag?.key}
- {' '}
- .
-
-
-
-
-
Experiment variants
-
{value} ,
- width: '50%',
- },
- {
- title: (
-
- Rollout Percentage
-
-
-
-
- ),
- dataIndex: 'rollout_percentage',
- key: 'rollout_percentage',
- render: (_, record, index) => (
- {
- if (changedValue !== null) {
- const valueInt =
- changedValue !== undefined ? parseInt(changedValue.toString()) : 0
- if (!isNaN(valueInt)) {
- handleRolloutPercentageChange(index, changedValue)
- }
- }
- }}
- min={0}
- max={100}
- suffix={% }
- />
- ),
- },
- ]}
- />
- {variants.length > 0 && !areVariantRolloutsValid && (
-
- Percentage rollouts for variants must sum to 100 (currently {variantRolloutSum}
- ).
-
- )}
-
-
-
-
-
- {
- saveSidebarExperimentFeatureFlag(featureFlag)
- }}
- >
- Save
-
-
-
- )
-}
diff --git a/frontend/src/layout/navigation-3000/sidepanel/panels/SidePanelSupport.tsx b/frontend/src/layout/navigation-3000/sidepanel/panels/SidePanelSupport.tsx
index f1f6a5a1e8a7b..58e8f37d7faf0 100644
--- a/frontend/src/layout/navigation-3000/sidepanel/panels/SidePanelSupport.tsx
+++ b/frontend/src/layout/navigation-3000/sidepanel/panels/SidePanelSupport.tsx
@@ -4,7 +4,6 @@ import {
IconDatabase,
IconDecisionTree,
IconFeatures,
- IconFlask,
IconHelmet,
IconMap,
IconMessage,
@@ -59,11 +58,6 @@ const PRODUCTS = [
slug: 'feature-flags',
icon: ,
},
- {
- name: 'Experiments',
- slug: 'experiments',
- icon: ,
- },
{
name: 'Surveys',
slug: 'surveys',
diff --git a/frontend/src/layout/navigation-3000/sidepanel/sidePanelLogic.tsx b/frontend/src/layout/navigation-3000/sidepanel/sidePanelLogic.tsx
index 2a4add974a1d9..029b34b6cbf4a 100644
--- a/frontend/src/layout/navigation-3000/sidepanel/sidePanelLogic.tsx
+++ b/frontend/src/layout/navigation-3000/sidepanel/sidePanelLogic.tsx
@@ -1,5 +1,4 @@
import { connect, kea, path, selectors } from 'kea'
-import { router } from 'kea-router'
import { FEATURE_FLAGS } from 'lib/constants'
import { featureFlagLogic } from 'lib/logic/featureFlagLogic'
import { preflightLogic } from 'scenes/PreflightCheck/preflightLogic'
@@ -40,8 +39,6 @@ export const sidePanelLogic = kea([
['status'],
userLogic,
['hasAvailableFeature'],
- router,
- ['currentLocation'],
],
actions: [sidePanelStateLogic, ['closeSidePanel', 'openSidePanel']],
}),
@@ -52,7 +49,6 @@ export const sidePanelLogic = kea([
(isCloudOrDev, isReady, hasCompletedAllTasks, featureflags) => {
const tabs: SidePanelTab[] = []
- tabs.push(SidePanelTab.ExperimentFeatureFlag)
tabs.push(SidePanelTab.Notebooks)
tabs.push(SidePanelTab.Docs)
if (isCloudOrDev) {
@@ -78,24 +74,8 @@ export const sidePanelLogic = kea([
],
visibleTabs: [
- (s) => [
- s.enabledTabs,
- s.selectedTab,
- s.sidePanelOpen,
- s.unreadCount,
- s.status,
- s.hasAvailableFeature,
- s.currentLocation,
- ],
- (
- enabledTabs,
- selectedTab,
- sidePanelOpen,
- unreadCount,
- status,
- hasAvailableFeature,
- currentLocation
- ): SidePanelTab[] => {
+ (s) => [s.enabledTabs, s.selectedTab, s.sidePanelOpen, s.unreadCount, s.status, s.hasAvailableFeature],
+ (enabledTabs, selectedTab, sidePanelOpen, unreadCount, status, hasAvailableFeature): SidePanelTab[] => {
return enabledTabs.filter((tab) => {
if (tab === selectedTab && sidePanelOpen) {
return true
@@ -118,10 +98,6 @@ export const sidePanelLogic = kea([
return false
}
- if (tab === SidePanelTab.ExperimentFeatureFlag) {
- return /^\/project\/[0-9]+\/experiments\/[0-9]+/.test(currentLocation.pathname)
- }
-
return true
})
},
diff --git a/frontend/src/lib/api.ts b/frontend/src/lib/api.ts
index 0d0ddd38d5e7c..d91ab7592dea2 100644
--- a/frontend/src/lib/api.ts
+++ b/frontend/src/lib/api.ts
@@ -2144,6 +2144,13 @@ const api = {
async getResponsesCount(): Promise<{ [key: string]: number }> {
return await new ApiRequest().surveysResponsesCount().get()
},
+ async summarize_responses(surveyId: Survey['id'], questionIndex: number | undefined): Promise {
+ let apiRequest = new ApiRequest().survey(surveyId).withAction('summarize_responses')
+ if (questionIndex !== undefined) {
+ apiRequest = apiRequest.withQueryString('questionIndex=' + questionIndex)
+ }
+ return await apiRequest.create()
+ },
},
dataWarehouseTables: {
diff --git a/frontend/src/lib/components/Hogfetti/Hogfetti.tsx b/frontend/src/lib/components/Hogfetti/Hogfetti.tsx
index be24513044011..a071eb7c13d54 100644
--- a/frontend/src/lib/components/Hogfetti/Hogfetti.tsx
+++ b/frontend/src/lib/components/Hogfetti/Hogfetti.tsx
@@ -106,7 +106,7 @@ export const useHogfetti = (options: HogfettiOptions = {}): HogfettiHook => {
const trigger = useCallback((): void => {
const centerX = Math.random() * dimensions.width
- const centerY = Math.random() * dimensions.height
+ const centerY = Math.random() * dimensions.height * 0.5
const newParticles = Array.from({ length: count }, () => createParticle(centerX, centerY))
setParticleSets((prev) => [...prev, newParticles])
diff --git a/frontend/src/lib/constants.tsx b/frontend/src/lib/constants.tsx
index 29873afc7d87e..35ab2bcddc1bf 100644
--- a/frontend/src/lib/constants.tsx
+++ b/frontend/src/lib/constants.tsx
@@ -228,6 +228,7 @@ export const FEATURE_FLAGS = {
DEAD_CLICKS_AUTOCAPTURE: 'dead-clicks-autocapture', // owner: @pauldambra #team-replay
ONBOARDING_PRODUCT_MULTISELECT: 'onboarding-product-multiselect', // owner: @danielbachhuber #team-experiments
EDIT_DWH_SOURCE_CONFIG: 'edit_dwh_source_config', // owner: @Gilbert09 #team-data-warehouse
+ AI_SURVEY_RESPONSE_SUMMARY: 'ai-survey-response-summary', // owner: @pauldambra
} as const
export type FeatureFlagKey = (typeof FEATURE_FLAGS)[keyof typeof FEATURE_FLAGS]
diff --git a/frontend/src/queries/schema.json b/frontend/src/queries/schema.json
index f9534dac0ec48..70330efd9efad 100644
--- a/frontend/src/queries/schema.json
+++ b/frontend/src/queries/schema.json
@@ -806,6 +806,9 @@
},
"BreakdownKeyType": {
"anyOf": [
+ {
+ "type": "integer"
+ },
{
"type": "string"
},
@@ -814,7 +817,17 @@
},
{
"items": {
- "type": ["string", "number"]
+ "anyOf": [
+ {
+ "type": "integer"
+ },
+ {
+ "type": "string"
+ },
+ {
+ "type": "number"
+ }
+ ]
},
"type": "array"
},
diff --git a/frontend/src/scenes/billing/UnsubscribeSurveyModal.tsx b/frontend/src/scenes/billing/UnsubscribeSurveyModal.tsx
index ea2912c20e42f..79d2447260270 100644
--- a/frontend/src/scenes/billing/UnsubscribeSurveyModal.tsx
+++ b/frontend/src/scenes/billing/UnsubscribeSurveyModal.tsx
@@ -110,7 +110,7 @@ export const UnsubscribeSurveyModal = ({
{
resetUnsubscribeModalStep()
reportSurveyDismissed(surveyID)
diff --git a/frontend/src/scenes/debug/hog/HogRepl.tsx b/frontend/src/scenes/debug/hog/HogRepl.tsx
index dc374c9e38ab3..72538355b0c21 100644
--- a/frontend/src/scenes/debug/hog/HogRepl.tsx
+++ b/frontend/src/scenes/debug/hog/HogRepl.tsx
@@ -1,14 +1,73 @@
-import { LemonButton } from '@posthog/lemon-ui'
+import { printHogStringOutput } from '@posthog/hogvm'
+import { LemonButton, LemonTable, LemonTabs } from '@posthog/lemon-ui'
import { useActions, useValues } from 'kea'
+import { JSONViewer } from 'lib/components/JSONViewer'
import { CodeEditorInline } from 'lib/monaco/CodeEditorInline'
+import React, { useState } from 'react'
import { SceneExport } from 'scenes/sceneTypes'
+import { renderHogQLX } from '~/queries/nodes/HogQLX/render'
+
import { hogReplLogic, ReplChunk as ReplChunkType } from './hogReplLogic'
+export interface ReplResultsTableProps {
+ response: {
+ results: any[][]
+ columns: string[]
+ }
+}
+
+export function ReplResultsTable({ response }: ReplResultsTableProps): JSX.Element {
+ const [activeTab, setActiveTab] = useState<'table' | 'json'>('table')
+ return (
+
+
({ dataIndex: index, title: col }))}
+ dataSource={response.results}
+ />
+ ),
+ },
+ {
+ key: 'json',
+ label: 'JSON',
+ content: ,
+ },
+ {
+ key: 'raw',
+ label: 'Raw',
+ content: {printHogStringOutput(response)}
,
+ },
+ ]}
+ />
+
+ )
+}
+
+function printRichHogOutput(arg: any): JSX.Element | string {
+ if (typeof arg === 'object' && arg !== null) {
+ if ('__hx_tag' in arg) {
+ return renderHogQLX(arg)
+ }
+ if ('results' in arg && 'columns' in arg && Array.isArray(arg.results) && Array.isArray(arg.columns)) {
+ return
+ }
+ }
+ return printHogStringOutput(arg)
+}
+
interface ReplChunkProps {
chunk: ReplChunkType
editFromHere: () => void
}
+
export function ReplChunk({
chunk: { code, result, print, error, status },
editFromHere,
@@ -40,7 +99,7 @@ export function ReplChunk({
)}
- {print ? (
+ {print && Array.isArray(print) ? (
#
-
{print}
+
+ {print.map((line, index) => (
+
+ {line.map((arg, argIndex) => (
+
+ {printRichHogOutput(arg)}
+ {argIndex < line.length - 1 ? ' ' : ''}
+
+ ))}
+
+ ))}
+
) : null}
- {status === 'success' && (
+ {status === 'success' && result !== undefined && (
{'<'}
-
{String(result)}
+
{printRichHogOutput(result)}
)}
{status === 'error' && (
diff --git a/frontend/src/scenes/debug/hog/hogReplLogic.ts b/frontend/src/scenes/debug/hog/hogReplLogic.ts
index 182c90c64cead..ae3b09eb19531 100644
--- a/frontend/src/scenes/debug/hog/hogReplLogic.ts
+++ b/frontend/src/scenes/debug/hog/hogReplLogic.ts
@@ -1,4 +1,4 @@
-import { newHogCallable, newHogClosure, printHogStringOutput, VMState } from '@posthog/hogvm'
+import { newHogCallable, newHogClosure, VMState } from '@posthog/hogvm'
import { actions, kea, listeners, path, reducers, selectors } from 'kea'
import { actionToUrl, urlToAction } from 'kea-router'
import api from 'lib/api'
@@ -10,7 +10,7 @@ import type { hogReplLogicType } from './hogReplLogicType'
export interface ReplChunk {
code: string
result?: string
- print?: string
+ print?: any[][]
error?: string
bytecode?: any[]
locals?: any[]
@@ -23,7 +23,7 @@ export const hogReplLogic = kea([
actions({
runCode: (code: string) => ({ code }),
setResult: (index: number, result?: string, error?: string) => ({ index, result, error }),
- print: (index: number, line?: string) => ({ index, line }),
+ print: (index: number, line: any[]) => ({ index, line }),
setBytecode: (index: number, bytecode: any[], locals: any[]) => ({ index, bytecode, locals }),
setVMState: (index: number, state: any) => ({ index, state }),
setCurrentCode: (code: string) => ({ code }),
@@ -46,7 +46,7 @@ export const hogReplLogic = kea([
state.map((chunk, i) => (i === index ? { ...chunk, bytecode, locals } : chunk)),
print: (state, { index, line }) =>
state.map((chunk, i) =>
- i === index ? { ...chunk, print: (chunk.print ? chunk.print + '\n' : '') + line } : chunk
+ i === index ? { ...chunk, print: [...(chunk.print ?? []), line] } : chunk
),
setVMState: (state, { index, state: vmState }) =>
state.map((chunk, i) => (i === index ? { ...chunk, state: vmState } : chunk)),
@@ -148,7 +148,7 @@ export const hogReplLogic = kea([
repl: true,
functions: {
print: (...args: any[]) => {
- actions.print(index, args.map((arg) => printHogStringOutput(arg)).join(' '))
+ actions.print(index, args)
},
},
})
@@ -160,7 +160,7 @@ export const hogReplLogic = kea([
: (result.state?.stack?.length ?? 0) > 0
? result.state?.stack?.[result.state.stack.length - 1]
: 'null'
- actions.setResult(index, printHogStringOutput(response))
+ actions.setResult(index, response)
actions.setVMState(index, result.state)
} catch (error: any) {
// Handle errors
@@ -174,12 +174,26 @@ export const hogReplLogic = kea([
},
})),
actionToUrl(({ values }) => {
- const fn = (): [string, undefined, Record, { replace: true }] | undefined => {
+ const fn = (): [string, undefined, Record | undefined, { replace: true }] | undefined => {
if (values.replChunks.length > 0) {
- const code = [...values.replChunks.map((chunk) => chunk.code), values.currentCode]
- .filter((a) => !!a)
- .join('\n')
- return [urls.debugHog(), undefined, { code }, { replace: true }]
+ // Chrome has a 2MB limit for the HASH params, set ours at 1MB
+ const replChunksLength = JSON.stringify(values.replChunks).length
+ if (replChunksLength > 1024 * 1024) {
+ // Try with just the code
+ const newCode = values.replChunks.map((chunk) => chunk.code).join('\n')
+ if (newCode.length > 1024 * 1024) {
+ // Still not enough, abort
+ return [urls.debugHog(), undefined, undefined, { replace: true }]
+ }
+ return [urls.debugHog(), undefined, { code: newCode }, { replace: true }]
+ }
+
+ return [
+ urls.debugHog(),
+ undefined,
+ { repl: values.replChunks, code: values.currentCode },
+ { replace: true },
+ ]
}
}
@@ -194,8 +208,9 @@ export const hogReplLogic = kea([
}
}),
urlToAction(({ actions, values }) => ({
- [urls.debugHog()]: (_, __, { code }) => {
- if (code && !values.currentCode && values.replChunks.length === 0) {
+ [urls.debugHog()]: (_, __, { repl, code }) => {
+ if ((repl || code) && !values.currentCode && values.replChunks.length === 0) {
+ actions.setReplChunks(repl)
actions.setCurrentCode(code)
}
},
diff --git a/frontend/src/scenes/experiments/ExperimentForm.tsx b/frontend/src/scenes/experiments/ExperimentForm.tsx
index d34838d40224f..a1c3f64c53d50 100644
--- a/frontend/src/scenes/experiments/ExperimentForm.tsx
+++ b/frontend/src/scenes/experiments/ExperimentForm.tsx
@@ -2,33 +2,31 @@ import './Experiment.scss'
import { IconPlusSmall, IconTrash } from '@posthog/icons'
import { LemonDivider, LemonInput, LemonTextArea, Tooltip } from '@posthog/lemon-ui'
-import { BindLogic, useActions, useValues } from 'kea'
+import { useActions, useValues } from 'kea'
import { Form, Group } from 'kea-forms'
import { ExperimentVariantNumber } from 'lib/components/SeriesGlyph'
import { FEATURE_FLAGS, MAX_EXPERIMENT_VARIANTS } from 'lib/constants'
-import { IconChevronLeft } from 'lib/lemon-ui/icons'
import { LemonButton } from 'lib/lemon-ui/LemonButton'
import { LemonField } from 'lib/lemon-ui/LemonField'
import { LemonRadio } from 'lib/lemon-ui/LemonRadio'
import { LemonSelect } from 'lib/lemon-ui/LemonSelect'
import { capitalizeFirstLetter } from 'lib/utils'
-import { useEffect } from 'react'
import { experimentsLogic } from 'scenes/experiments/experimentsLogic'
-import { insightDataLogic } from 'scenes/insights/insightDataLogic'
-import { insightLogic } from 'scenes/insights/insightLogic'
-import { Query } from '~/queries/Query/Query'
-import { InsightType } from '~/types'
-
-import { EXPERIMENT_INSIGHT_ID } from './constants'
import { experimentLogic } from './experimentLogic'
-import { ExperimentInsightCreator } from './MetricSelector'
-const StepInfo = (): JSX.Element => {
- const { experiment, featureFlags } = useValues(experimentLogic)
- const { addExperimentGroup, removeExperimentGroup, moveToNextFormStep, setExperimentType } =
- useActions(experimentLogic)
+const ExperimentFormFields = (): JSX.Element => {
+ const { experiment, featureFlags, groupTypes, aggregationLabel } = useValues(experimentLogic)
+ const {
+ addExperimentGroup,
+ removeExperimentGroup,
+ setExperiment,
+ setNewExperimentInsight,
+ createExperiment,
+ setExperimentType,
+ } = useActions(experimentLogic)
const { webExperimentsAvailable } = useValues(experimentsLogic)
+
return (
@@ -91,6 +89,38 @@ const StepInfo = (): JSX.Element => {
/>
)}
+
+
Participant type
+
+ The type on which to aggregate metrics. You can change this at any time during the experiment.
+
+
+
{
+ const groupTypeIndex = rawGroupTypeIndex !== -1 ? rawGroupTypeIndex : undefined
+
+ setExperiment({
+ parameters: {
+ ...experiment.parameters,
+ aggregation_group_type_index: groupTypeIndex ?? undefined,
+ },
+ })
+ setNewExperimentInsight()
+ }}
+ options={[
+ { value: -1, label: 'Persons' },
+ ...Array.from(groupTypes.values()).map((groupType) => ({
+ value: groupType.group_type_index,
+ label: capitalizeFirstLetter(aggregationLabel(groupType.group_type_index).plural),
+ })),
+ ]}
+ />
+
Variants
Add up to 9 variants to test against your control.
@@ -187,135 +217,6 @@ const StepInfo = (): JSX.Element => {
)}
- moveToNextFormStep()}
- >
- Continue
-
-
- )
-}
-
-const StepGoal = (): JSX.Element => {
- const { experiment, experimentInsightType, groupTypes, aggregationLabel } = useValues(experimentLogic)
- const { setExperiment, setNewExperimentInsight, createExperiment } = useActions(experimentLogic)
-
- // insightLogic
- const logic = insightLogic({ dashboardItemId: EXPERIMENT_INSIGHT_ID })
- const { insightProps } = useValues(logic)
-
- // insightDataLogic
- const { query } = useValues(insightDataLogic(insightProps))
-
- return (
-
-
- {groupTypes.size > 0 && (
-
-
Participant type
-
- The type on which to aggregate metrics. You can change this at any time during the
- experiment.
-
-
-
{
- const groupTypeIndex = rawGroupTypeIndex !== -1 ? rawGroupTypeIndex : undefined
-
- setExperiment({
- parameters: {
- ...experiment.parameters,
- aggregation_group_type_index: groupTypeIndex ?? undefined,
- },
- })
- setNewExperimentInsight()
- }}
- options={[
- { value: -1, label: 'Persons' },
- ...Array.from(groupTypes.values()).map((groupType) => ({
- value: groupType.group_type_index,
- label: capitalizeFirstLetter(aggregationLabel(groupType.group_type_index).plural),
- })),
- ]}
- />
-
- )}
-
-
Goal type
-
- You can change this at any time during the experiment.
-
-
-
-
{
- val &&
- setNewExperimentInsight({
- insight: val,
- properties: experiment?.filters?.properties,
- })
- }}
- options={[
- {
- value: InsightType.FUNNELS,
- label: (
-
-
Conversion funnel
-
- Track how many people complete a sequence of actions and/or events
-
-
- ),
- },
- {
- value: InsightType.TRENDS,
- label: (
-
-
Trend
-
- Track the total count of a specific event or action.
-
-
- ),
- },
- ]}
- />
-
-
-
-
Goal criteria
-
- {experimentInsightType === InsightType.FUNNELS
- ? 'Create the funnel you want to measure.'
- : 'Select a single metric to track.'}
-
-
-
-
-
-
-
-
Goal preview
-
-
-
-
-
-
-
{
}
export function ExperimentForm(): JSX.Element {
- const { currentFormStep, props } = useValues(experimentLogic)
- const { setCurrentFormStep } = useActions(experimentLogic)
-
- const stepComponents = {
- 0: ,
- 1: ,
- }
- const CurrentStepComponent = (currentFormStep && stepComponents[currentFormStep]) ||
-
- useEffect(() => {
- setCurrentFormStep(0)
- }, [])
+ const { props } = useValues(experimentLogic)
return (
- {currentFormStep > 0 && (
- }
- type="secondary"
- className="my-4"
- onClick={() => {
- setCurrentFormStep(currentFormStep - 1)
- }}
- >
- Back
-
- )}
)
diff --git a/frontend/src/scenes/experiments/ExperimentView/DistributionTable.tsx b/frontend/src/scenes/experiments/ExperimentView/DistributionTable.tsx
index bb07d8914cf41..f6d45ad12d314 100644
--- a/frontend/src/scenes/experiments/ExperimentView/DistributionTable.tsx
+++ b/frontend/src/scenes/experiments/ExperimentView/DistributionTable.tsx
@@ -1,23 +1,138 @@
import '../Experiment.scss'
-import { IconFlag } from '@posthog/icons'
-import { LemonButton, LemonDialog, LemonTable, LemonTableColumns } from '@posthog/lemon-ui'
+import { IconBalance, IconFlag } from '@posthog/icons'
+import {
+ LemonBanner,
+ LemonButton,
+ LemonDialog,
+ LemonInput,
+ LemonModal,
+ LemonTable,
+ LemonTableColumns,
+} from '@posthog/lemon-ui'
import { useActions, useValues } from 'kea'
import { AuthorizedUrlList } from 'lib/components/AuthorizedUrlList/AuthorizedUrlList'
import { AuthorizedUrlListType } from 'lib/components/AuthorizedUrlList/authorizedUrlListLogic'
import { IconOpenInApp } from 'lib/lemon-ui/icons'
+import { featureFlagLogic, FeatureFlagLogicProps } from 'scenes/feature-flags/featureFlagLogic'
-import { sidePanelStateLogic } from '~/layout/navigation-3000/sidepanel/sidePanelStateLogic'
-import { MultivariateFlagVariant, SidePanelTab } from '~/types'
+import { Experiment, MultivariateFlagVariant } from '~/types'
import { experimentLogic } from '../experimentLogic'
import { VariantTag } from './components'
import { VariantScreenshot } from './VariantScreenshot'
+export function DistributionModal({ experimentId }: { experimentId: Experiment['id'] }): JSX.Element {
+ const { experiment, experimentLoading, isDistributionModalOpen } = useValues(experimentLogic({ experimentId }))
+ const { closeDistributionModal } = useActions(experimentLogic({ experimentId }))
+
+ const _featureFlagLogic = featureFlagLogic({ id: experiment.feature_flag?.id ?? null } as FeatureFlagLogicProps)
+ const { featureFlag, areVariantRolloutsValid, variantRolloutSum } = useValues(_featureFlagLogic)
+ const { setFeatureFlagFilters, distributeVariantsEqually, saveSidebarExperimentFeatureFlag } =
+ useActions(_featureFlagLogic)
+
+ const handleRolloutPercentageChange = (index: number, value: number | undefined): void => {
+ if (!featureFlag?.filters?.multivariate || !value) {
+ return
+ }
+
+ const updatedVariants = featureFlag.filters.multivariate.variants.map((variant, i) =>
+ i === index ? { ...variant, rollout_percentage: value } : variant
+ )
+
+ setFeatureFlagFilters(
+ {
+ ...featureFlag.filters,
+ multivariate: { ...featureFlag.filters.multivariate, variants: updatedVariants },
+ },
+ null
+ )
+ }
+
+ return (
+
+
+ Cancel
+
+ {
+ saveSidebarExperimentFeatureFlag(featureFlag)
+ closeDistributionModal()
+ }}
+ type="primary"
+ loading={experimentLoading}
+ disabled={!areVariantRolloutsValid}
+ >
+ Save
+
+
+ }
+ >
+
+
+ Adjusting variant distribution may impact the validity of your results. Adjust only if you're aware
+ of how changes will affect your experiment.
+
+
+
+
+
Variant Distribution
+ }
+ >
+ Distribute equally
+
+
+
+
{value} ,
+ },
+ {
+ title: 'Rollout Percentage',
+ dataIndex: 'rollout_percentage',
+ render: (_, record, index) => (
+ handleRolloutPercentageChange(index, value)}
+ min={0}
+ max={100}
+ suffix={% }
+ />
+ ),
+ },
+ ]}
+ />
+
+ {!areVariantRolloutsValid && (
+
+ Percentage rollouts must sum to 100 (currently {variantRolloutSum}).
+
+ )}
+
+
+
+ )
+}
+
export function DistributionTable(): JSX.Element {
+ const { openDistributionModal } = useActions(experimentLogic)
const { experimentId, experiment, experimentResults } = useValues(experimentLogic)
const { reportExperimentReleaseConditionsViewed } = useActions(experimentLogic)
- const { openSidePanel } = useActions(sidePanelStateLogic)
const onSelectElement = (variant: string): void => {
LemonDialog.open({
@@ -110,7 +225,7 @@ export function DistributionTable(): JSX.Element {
}
onClick={() => {
- openSidePanel(SidePanelTab.ExperimentFeatureFlag)
+ openDistributionModal()
reportExperimentReleaseConditionsViewed(experiment.id)
}}
type="secondary"
diff --git a/frontend/src/scenes/experiments/ExperimentView/ExperimentView.tsx b/frontend/src/scenes/experiments/ExperimentView/ExperimentView.tsx
index de54d1461014b..59b38ffaf3163 100644
--- a/frontend/src/scenes/experiments/ExperimentView/ExperimentView.tsx
+++ b/frontend/src/scenes/experiments/ExperimentView/ExperimentView.tsx
@@ -1,6 +1,6 @@
import '../Experiment.scss'
-import { LemonDivider } from '@posthog/lemon-ui'
+import { LemonDivider, LemonTabs } from '@posthog/lemon-ui'
import { useActions, useValues } from 'kea'
import { WebExperimentImplementationDetails } from 'scenes/experiments/WebExperimentImplementationDetails'
@@ -14,19 +14,66 @@ import {
ResultsHeader,
} from './components'
import { DataCollection } from './DataCollection'
-import { DistributionTable } from './DistributionTable'
+import { DistributionModal, DistributionTable } from './DistributionTable'
import { ExperimentExposureModal, ExperimentGoalModal, Goal } from './Goal'
import { Info } from './Info'
import { Overview } from './Overview'
-import { ReleaseConditionsTable } from './ReleaseConditionsTable'
+import { ReleaseConditionsModal, ReleaseConditionsTable } from './ReleaseConditionsTable'
import { Results } from './Results'
import { SecondaryMetricsTable } from './SecondaryMetricsTable'
+const ResultsTab = (): JSX.Element => {
+ const { experiment, experimentResults } = useValues(experimentLogic)
+ const { updateExperimentSecondaryMetrics } = useActions(experimentLogic)
+
+ const hasResultsInsight = experimentResults && experimentResults.insight
+
+ return (
+
+ {hasResultsInsight ? (
+
+ ) : (
+ <>
+ {experiment.type === 'web' ? (
+
+ ) : (
+
+ )}
+
+ {experiment.start_date && (
+
+
+
+
+ )}
+ >
+ )}
+
updateExperimentSecondaryMetrics(metrics)}
+ initialMetrics={experiment.secondary_metrics}
+ defaultAggregationType={experiment.parameters?.aggregation_group_type_index}
+ />
+
+ )
+}
+
+const VariantsTab = (): JSX.Element => {
+ return (
+
+
+
+
+ )
+}
+
export function ExperimentView(): JSX.Element {
- const { experiment, experimentLoading, experimentResultsLoading, experimentId, experimentResults } =
+ const { experimentLoading, experimentResultsLoading, experimentId, experimentResults, tabKey } =
useValues(experimentLogic)
- const { updateExperimentSecondaryMetrics } = useActions(experimentLogic)
+ const { setTabKey } = useActions(experimentLogic)
+
+ const hasResultsInsight = experimentResults && experimentResults.insight
return (
<>
@@ -39,25 +86,14 @@ export function ExperimentView(): JSX.Element {
{experimentResultsLoading ? (
- ) : experimentResults && experimentResults.insight ? (
- <>
-
-
-
-
-
-
- >
) : (
<>
+ {hasResultsInsight ? (
+
+
+
+
+ ) : null}
@@ -67,30 +103,28 @@ export function ExperimentView(): JSX.Element {
- {experiment.type === 'web' ? (
-
- ) : (
-
- )}
-
- {experiment.start_date && (
-
-
-
-
- )}
+ setTabKey(key)}
+ tabs={[
+ {
+ key: 'results',
+ label: 'Results',
+ content: ,
+ },
+ {
+ key: 'variants',
+ label: 'Variants',
+ content: ,
+ },
+ ]}
+ />
>
)}
- updateExperimentSecondaryMetrics(metrics)}
- initialMetrics={experiment.secondary_metrics}
- defaultAggregationType={experiment.parameters?.aggregation_group_type_index}
- />
-
-
+
+
>
)}
diff --git a/frontend/src/scenes/experiments/ExperimentView/Goal.tsx b/frontend/src/scenes/experiments/ExperimentView/Goal.tsx
index 6f4ab76a528a4..776cd61c16d78 100644
--- a/frontend/src/scenes/experiments/ExperimentView/Goal.tsx
+++ b/frontend/src/scenes/experiments/ExperimentView/Goal.tsx
@@ -1,6 +1,6 @@
import '../Experiment.scss'
-import { IconInfo } from '@posthog/icons'
+import { IconInfo, IconPlus } from '@posthog/icons'
import { LemonButton, LemonDivider, LemonModal, Tooltip } from '@posthog/lemon-ui'
import { useActions, useValues } from 'kea'
import { Field, Form } from 'kea-forms'
@@ -211,7 +211,7 @@ export function ExperimentExposureModal({ experimentId }: { experimentId: Experi
}
export function Goal(): JSX.Element {
- const { experiment, experimentId, experimentInsightType, experimentMathAggregationForTrends } =
+ const { experiment, experimentId, experimentInsightType, experimentMathAggregationForTrends, hasGoalSet } =
useValues(experimentLogic)
const { openExperimentGoalModal } = useActions(experimentLogic({ experimentId }))
@@ -235,27 +235,44 @@ export function Goal(): JSX.Element {
-
-
-
- {experimentInsightType === InsightType.FUNNELS ? 'Conversion goal steps' : 'Trend goal'}
+ {!hasGoalSet ? (
+
+
+ Add the main goal before launching the experiment.
-
-
- Change goal
+ }
+ type="secondary"
+ size="small"
+ data-attr="add-experiment-goal"
+ onClick={openExperimentGoalModal}
+ >
+ Add goal
- {experimentInsightType === InsightType.TRENDS && !experimentMathAggregationForTrends() && (
- <>
-
-
-
-
-
+ ) : (
+
+
+
+ {experimentInsightType === InsightType.FUNNELS ? 'Conversion goal steps' : 'Trend goal'}
- >
- )}
-
+
+
+ Change goal
+
+
+ {experimentInsightType === InsightType.TRENDS && !experimentMathAggregationForTrends() && (
+ <>
+
+
+ >
+ )}
+
+ )}
)
}
diff --git a/frontend/src/scenes/experiments/ExperimentView/ReleaseConditionsTable.tsx b/frontend/src/scenes/experiments/ExperimentView/ReleaseConditionsTable.tsx
index 5b9c8bac492bb..dfe6130db788e 100644
--- a/frontend/src/scenes/experiments/ExperimentView/ReleaseConditionsTable.tsx
+++ b/frontend/src/scenes/experiments/ExperimentView/ReleaseConditionsTable.tsx
@@ -1,20 +1,68 @@
import '../Experiment.scss'
import { IconFlag } from '@posthog/icons'
-import { LemonButton, LemonTable, LemonTableColumns, LemonTag } from '@posthog/lemon-ui'
+import { LemonBanner, LemonButton, LemonModal, LemonTable, LemonTableColumns, LemonTag } from '@posthog/lemon-ui'
import { useActions, useValues } from 'kea'
+import { featureFlagLogic, FeatureFlagLogicProps } from 'scenes/feature-flags/featureFlagLogic'
+import { FeatureFlagReleaseConditions } from 'scenes/feature-flags/FeatureFlagReleaseConditions'
-import { sidePanelStateLogic } from '~/layout/navigation-3000/sidepanel/sidePanelStateLogic'
import { groupsModel } from '~/models/groupsModel'
-import { FeatureFlagGroupType, SidePanelTab } from '~/types'
+import { Experiment, FeatureFlagGroupType } from '~/types'
import { experimentLogic } from '../experimentLogic'
+export function ReleaseConditionsModal({ experimentId }: { experimentId: Experiment['id'] }): JSX.Element {
+ const { experiment, isReleaseConditionsModalOpen } = useValues(experimentLogic({ experimentId }))
+ const { closeReleaseConditionsModal } = useActions(experimentLogic({ experimentId }))
+
+ const _featureFlagLogic = featureFlagLogic({ id: experiment.feature_flag?.id ?? null } as FeatureFlagLogicProps)
+ const { featureFlag, nonEmptyVariants } = useValues(_featureFlagLogic)
+ const { setFeatureFlagFilters, saveSidebarExperimentFeatureFlag } = useActions(_featureFlagLogic)
+
+ return (
+
+
+ Cancel
+
+ {
+ saveSidebarExperimentFeatureFlag(featureFlag)
+ closeReleaseConditionsModal()
+ }}
+ type="primary"
+ >
+ Save
+
+
+ }
+ >
+
+
+ Adjusting user targeting may impact the validity of your results. Adjust only if you're aware of how
+ changes will affect your experiment.
+
+
+
+
+
+ )
+}
+
export function ReleaseConditionsTable(): JSX.Element {
const { experiment } = useValues(experimentLogic)
- const { reportExperimentReleaseConditionsViewed } = useActions(experimentLogic)
+ const { reportExperimentReleaseConditionsViewed, openReleaseConditionsModal } = useActions(experimentLogic)
const { aggregationLabel } = useValues(groupsModel)
- const { openSidePanel } = useActions(sidePanelStateLogic)
const columns: LemonTableColumns
= [
{
@@ -67,7 +115,7 @@ export function ReleaseConditionsTable(): JSX.Element {
}
onClick={() => {
- openSidePanel(SidePanelTab.ExperimentFeatureFlag)
+ openReleaseConditionsModal()
reportExperimentReleaseConditionsViewed(experiment.id)
}}
type="secondary"
diff --git a/frontend/src/scenes/experiments/ExperimentView/components.tsx b/frontend/src/scenes/experiments/ExperimentView/components.tsx
index 2348635139c67..ddcd2bbd14bd1 100644
--- a/frontend/src/scenes/experiments/ExperimentView/components.tsx
+++ b/frontend/src/scenes/experiments/ExperimentView/components.tsx
@@ -451,6 +451,7 @@ export function PageHeaderCustom(): JSX.Element {
areResultsSignificant,
isSingleVariantShipped,
featureFlags,
+ hasGoalSet,
} = useValues(experimentLogic)
const {
launchExperiment,
@@ -473,6 +474,9 @@ export function PageHeaderCustom(): JSX.Element {
type="primary"
data-attr="launch-experiment"
onClick={() => launchExperiment()}
+ disabledReason={
+ !hasGoalSet ? 'Add the main goal before launching the experiment' : undefined
+ }
>
Launch
@@ -746,7 +750,8 @@ export function ActionBanner(): JSX.Element {
if (!isExperimentRunning) {
return (
- Your experiment is in draft mode. You can edit your variants, adjust release conditions, and{' '}
+ Your experiment is in draft mode. You can set the goal, edit the variants, adjust release conditions,
+ and{' '}
test your feature flag
diff --git a/frontend/src/scenes/experiments/experimentLogic.tsx b/frontend/src/scenes/experiments/experimentLogic.tsx
index a928b09db82cd..46f0c259b2fb1 100644
--- a/frontend/src/scenes/experiments/experimentLogic.tsx
+++ b/frontend/src/scenes/experiments/experimentLogic.tsx
@@ -186,9 +186,12 @@ export const experimentLogic = kea([
closeExperimentCollectionGoalModal: true,
openShipVariantModal: true,
closeShipVariantModal: true,
- setCurrentFormStep: (stepIndex: number) => ({ stepIndex }),
- moveToNextFormStep: true,
+ openDistributionModal: true,
+ closeDistributionModal: true,
+ openReleaseConditionsModal: true,
+ closeReleaseConditionsModal: true,
updateExperimentVariantImages: (variantPreviewMediaIds: Record) => ({ variantPreviewMediaIds }),
+ setTabKey: (tabKey: string) => ({ tabKey }),
}),
reducers({
experiment: [
@@ -332,6 +335,20 @@ export const experimentLogic = kea([
closeShipVariantModal: () => false,
},
],
+ isDistributionModalOpen: [
+ false,
+ {
+ openDistributionModal: () => true,
+ closeDistributionModal: () => false,
+ },
+ ],
+ isReleaseConditionsModalOpen: [
+ false,
+ {
+ openReleaseConditionsModal: () => true,
+ closeReleaseConditionsModal: () => false,
+ },
+ ],
experimentValuesChangedLocally: [
false,
{
@@ -340,10 +357,10 @@ export const experimentLogic = kea([
updateExperiment: () => false,
},
],
- currentFormStep: [
- 0,
+ tabKey: [
+ 'results',
{
- setCurrentFormStep: (_, { stepIndex }) => stepIndex,
+ setTabKey: (_, { tabKey }) => tabKey,
},
],
}),
@@ -351,6 +368,16 @@ export const experimentLogic = kea([
createExperiment: async ({ draft }) => {
const { recommendedRunningTime, recommendedSampleSize, minimumDetectableEffect } = values
+ actions.touchExperimentField('name')
+ actions.touchExperimentField('feature_flag_key')
+ values.experiment.parameters.feature_flag_variants.forEach((_, i) =>
+ actions.touchExperimentField(`parameters.feature_flag_variants.${i}.key`)
+ )
+
+ if (hasFormErrors(values.experimentErrors)) {
+ return
+ }
+
// Minimum Detectable Effect is calculated based on a loaded insight
// Terminate if the insight did not manage to load in time
if (!minimumDetectableEffect) {
@@ -503,8 +530,6 @@ export const experimentLogic = kea([
loadExperimentSuccess: async ({ experiment }) => {
experiment && actions.reportExperimentViewed(experiment)
- actions.setNewExperimentInsight(experiment?.filters)
-
if (experiment?.start_date) {
actions.loadExperimentResults()
actions.loadSecondaryMetricResults()
@@ -697,20 +722,6 @@ export const experimentLogic = kea([
openExperimentExposureModal: async () => {
actions.setExperimentExposureInsight(values.experiment?.parameters?.custom_exposure_filter)
},
- moveToNextFormStep: async () => {
- const { currentFormStep } = values
- if (currentFormStep === 0) {
- actions.touchExperimentField('name')
- actions.touchExperimentField('feature_flag_key')
- values.experiment.parameters.feature_flag_variants.forEach((_, i) =>
- actions.touchExperimentField(`parameters.feature_flag_variants.${i}.key`)
- )
- }
-
- if (!hasFormErrors(values.experimentErrors)) {
- actions.setCurrentFormStep(currentFormStep + 1)
- }
- },
createExposureCohortSuccess: ({ exposureCohort }) => {
if (exposureCohort && exposureCohort.id !== 'new') {
cohortsModel.actions.cohortCreated(exposureCohort)
@@ -1532,6 +1543,17 @@ export const experimentLogic = kea([
)
},
],
+ hasGoalSet: [
+ (s) => [s.experiment],
+ (experiment): boolean => {
+ const filters = experiment?.filters
+ return !!(
+ (filters?.actions && filters.actions.length > 0) ||
+ (filters?.events && filters.events.length > 0) ||
+ (filters?.data_warehouse && filters.data_warehouse.length > 0)
+ )
+ },
+ ],
}),
forms(({ actions }) => ({
experiment: {
@@ -1561,7 +1583,6 @@ export const experimentLogic = kea([
const parsedId = id === 'new' ? 'new' : parseInt(id)
if (parsedId === 'new') {
actions.resetExperiment()
- actions.setNewExperimentInsight()
}
if (parsedId !== 'new' && parsedId === values.experimentId) {
diff --git a/frontend/src/scenes/projectLogic.ts b/frontend/src/scenes/projectLogic.ts
index 9e67d253db5ad..8ebb2c2ee207f 100644
--- a/frontend/src/scenes/projectLogic.ts
+++ b/frontend/src/scenes/projectLogic.ts
@@ -75,7 +75,12 @@ export const projectLogic = kea([
return patchedProject
},
createProject: async ({ name }: { name: string }) => {
- return await api.create('api/projects/', { name })
+ try {
+ return await api.create('api/projects/', { name })
+ } catch (error: any) {
+ lemonToast.error('Failed to create project')
+ return values.currentProject
+ }
},
},
],
@@ -83,7 +88,7 @@ export const projectLogic = kea([
selectors({
currentProjectId: [(s) => [s.currentProject], (currentProject) => currentProject?.id || null],
}),
- listeners(({ actions }) => ({
+ listeners(({ actions, values }) => ({
loadCurrentProjectSuccess: ({ currentProject }) => {
if (currentProject) {
ApiConfig.setCurrentProjectId(currentProject.id)
@@ -102,7 +107,7 @@ export const projectLogic = kea([
lemonToast.success('Project has been deleted')
},
createProjectSuccess: ({ currentProject }) => {
- if (currentProject) {
+ if (currentProject && currentProject.id !== values.currentProject?.id) {
actions.switchTeam(currentProject.id)
}
},
diff --git a/frontend/src/scenes/surveys/surveyLogic.tsx b/frontend/src/scenes/surveys/surveyLogic.tsx
index 12b4b20e8620f..528aac6db6e96 100644
--- a/frontend/src/scenes/surveys/surveyLogic.tsx
+++ b/frontend/src/scenes/surveys/surveyLogic.tsx
@@ -181,6 +181,11 @@ export const surveyLogic = kea([
setFlagPropertyErrors: (errors: any) => ({ errors }),
}),
loaders(({ props, actions, values }) => ({
+ responseSummary: {
+ summarize: async ({ questionIndex }: { questionIndex?: number }) => {
+ return api.surveys.summarize_responses(props.id, questionIndex)
+ },
+ },
survey: {
loadSurvey: async () => {
if (props.id && props.id !== 'new') {
diff --git a/frontend/src/scenes/surveys/surveyViewViz.tsx b/frontend/src/scenes/surveys/surveyViewViz.tsx
index 324c53958dca3..a2ab8db7c32f8 100644
--- a/frontend/src/scenes/surveys/surveyViewViz.tsx
+++ b/frontend/src/scenes/surveys/surveyViewViz.tsx
@@ -1,10 +1,21 @@
-import { IconInfo } from '@posthog/icons'
-import { LemonTable } from '@posthog/lemon-ui'
+import {
+ IconInfo,
+ IconSparkles,
+ IconThumbsDown,
+ IconThumbsDownFilled,
+ IconThumbsUp,
+ IconThumbsUpFilled,
+} from '@posthog/icons'
+import { LemonButton, LemonTable, Spinner } from '@posthog/lemon-ui'
import { BindLogic, useActions, useValues } from 'kea'
+import { FlaggedFeature } from 'lib/components/FlaggedFeature'
+import { FEATURE_FLAGS } from 'lib/constants'
import { dayjs } from 'lib/dayjs'
import { LemonDivider } from 'lib/lemon-ui/LemonDivider'
+import { LemonMarkdown } from 'lib/lemon-ui/LemonMarkdown'
import { Tooltip } from 'lib/lemon-ui/Tooltip'
import { humanFriendlyNumber } from 'lib/utils'
+import posthog from 'posthog-js'
import { useEffect, useState } from 'react'
import { insightLogic } from 'scenes/insights/insightLogic'
import { LineGraph } from 'scenes/insights/views/LineGraph/LineGraph'
@@ -577,15 +588,19 @@ export function OpenTextViz({
<>>
) : (
<>
-
-
-
Open text
-
-
random selection
-
-
-
+
+
+
+
Open text
+
+
random selection
+
+
+
+
+
{question.question}
+
{surveyOpenTextResults[questionIndex].events.map((event, i) => {
const personProp = {
@@ -617,3 +632,87 @@ export function OpenTextViz({
)
}
+
+function ResponseSummariesButton({ questionIndex }: { questionIndex: number | undefined }): JSX.Element {
+ const { summarize } = useActions(surveyLogic)
+ const { responseSummary, responseSummaryLoading } = useValues(surveyLogic)
+
+ return (
+
+ summarize({ questionIndex })}
+ disabledReason={
+ responseSummaryLoading ? 'Let me think...' : responseSummary ? 'already summarized' : undefined
+ }
+ icon={ }
+ >
+ {responseSummaryLoading ? (
+ <>
+ Let me think...
+
+ >
+ ) : (
+ <>Summarize responses>
+ )}
+
+
+ )
+}
+
+function ResponseSummariesDisplay(): JSX.Element {
+ const { survey, responseSummary } = useValues(surveyLogic)
+
+ return (
+
+ {responseSummary ? (
+ <>
+ Responses summary
+ {responseSummary.content}
+
+
+ >
+ ) : null}
+
+ )
+}
+
+function ResponseSummaryFeedback({ surveyId }: { surveyId: string }): JSX.Element {
+ const [rating, setRating] = useState<'good' | 'bad' | null>(null)
+
+ function submitRating(newRating: 'good' | 'bad'): void {
+ if (rating) {
+ return // Already rated
+ }
+ setRating(newRating)
+ posthog.capture('chat rating', {
+ survey_id: surveyId,
+ answer_rating: rating,
+ })
+ }
+
+ return (
+
+ {rating === null ? <>Summaries are generated by AI. What did you think?> : null}
+ {rating !== 'bad' && (
+ : }
+ type="tertiary"
+ size="small"
+ tooltip="Good summary"
+ onClick={() => submitRating('good')}
+ />
+ )}
+ {rating !== 'good' && (
+ : }
+ type="tertiary"
+ size="small"
+ tooltip="Bad summary"
+ onClick={() => submitRating('bad')}
+ />
+ )}
+
+ )
+}
diff --git a/frontend/src/toolbar/Toolbar.stories.tsx b/frontend/src/toolbar/Toolbar.stories.tsx
index 593681ba4f32c..fc140a3727f7e 100644
--- a/frontend/src/toolbar/Toolbar.stories.tsx
+++ b/frontend/src/toolbar/Toolbar.stories.tsx
@@ -55,7 +55,6 @@ const BasicTemplate: StoryFn = (props) => {
userIntent: undefined,
dataAttributes: ['data-attr'],
apiURL: '/',
- jsURL: 'http://localhost:8234/',
userEmail: 'foobar@posthog.com',
}
useToolbarStyles()
@@ -68,7 +67,6 @@ const BasicTemplate: StoryFn = (props) => {
},
toolbarParams: {
toolbarVersion: 'toolbar',
- jsURL: 'http://localhost:8234/',
},
isAuthenticated: props.unauthenticated ?? true,
supportedCompression: ['gzip', 'gzip-js', 'lz64'],
diff --git a/frontend/src/toolbar/ToolbarApp.tsx b/frontend/src/toolbar/ToolbarApp.tsx
index 39d2d15afe8b7..f623736bd9c2b 100644
--- a/frontend/src/toolbar/ToolbarApp.tsx
+++ b/frontend/src/toolbar/ToolbarApp.tsx
@@ -13,7 +13,7 @@ import { TOOLBAR_ID } from './utils'
type HTMLElementWithShadowRoot = HTMLElement & { shadowRoot: ShadowRoot }
export function ToolbarApp(props: ToolbarProps = {}): JSX.Element {
- const { jsURL } = useValues(toolbarConfigLogic(props))
+ const { apiURL } = useValues(toolbarConfigLogic(props))
const shadowRef = useRef(null)
const [didLoadStyles, setDidLoadStyles] = useState(false)
@@ -32,7 +32,7 @@ export function ToolbarApp(props: ToolbarProps = {}): JSX.Element {
// this ensures that we bust the cache periodically
const timestampToNearestFiveMinutes =
Math.floor(Date.now() / fiveMinutesInMillis) * fiveMinutesInMillis
- styleLink.href = `${jsURL}/static/toolbar.css?t=${timestampToNearestFiveMinutes}`
+ styleLink.href = `${apiURL}/static/toolbar.css?t=${timestampToNearestFiveMinutes}`
styleLink.onload = () => setDidLoadStyles(true)
const shadowRoot =
shadowRef.current?.shadowRoot || window.document.getElementById(TOOLBAR_ID)?.shadowRoot
diff --git a/frontend/src/toolbar/index.tsx b/frontend/src/toolbar/index.tsx
index e5ae6fa344cc8..7df891f45aa98 100644
--- a/frontend/src/toolbar/index.tsx
+++ b/frontend/src/toolbar/index.tsx
@@ -25,7 +25,6 @@ import { ToolbarParams } from '~/types'
{...toolbarParams}
actionId={parseInt(String(toolbarParams.actionId))}
experimentId={parseInt(String(toolbarParams.experimentId))}
- jsURL={toolbarParams.jsURL || toolbarParams.apiURL}
posthog={posthog}
/>
)
diff --git a/frontend/src/toolbar/stats/currentPageLogic.test.ts b/frontend/src/toolbar/stats/currentPageLogic.test.ts
index c943d482ebba1..1aad6eef2e674 100644
--- a/frontend/src/toolbar/stats/currentPageLogic.test.ts
+++ b/frontend/src/toolbar/stats/currentPageLogic.test.ts
@@ -1,7 +1,7 @@
import { withoutPostHogInit } from '~/toolbar/stats/currentPageLogic'
const posthogInitHashParam =
- '__posthog={%22action%22:%20%22ph_authorize%22,%20%22token%22:%20%the-ph-token%22,%20%22temporaryToken%22:%20%the-posthog-token%22,%20%22actionId%22:%20null,%20%22userIntent%22:%20%22heatmaps%22,%20%22toolbarVersion%22:%20%22toolbar%22,%20%22apiURL%22:%20%22https://eu.posthog.com%22,%20%22dataAttributes%22:%20[%22data-attr%22],%20%22jsURL%22:%20%22https://app-static.eu.posthog.com%22,%20%22instrument%22:%20true,%20%22userEmail%22:%20%user-email@gmail.com%22,%20%22distinctId%22:%20%the-distinct-id%22}'
+ '__posthog={%22action%22:%20%22ph_authorize%22,%20%22token%22:%20%the-ph-token%22,%20%22temporaryToken%22:%20%the-posthog-token%22,%20%22actionId%22:%20null,%20%22userIntent%22:%20%22heatmaps%22,%20%22toolbarVersion%22:%20%22toolbar%22,%20%22apiURL%22:%20%22https://eu.posthog.com%22,%20%22dataAttributes%22:%20[%22data-attr%22],%20%22instrument%22:%20true,%20%22userEmail%22:%20%user-email@gmail.com%22,%20%22distinctId%22:%20%the-distinct-id%22}'
describe('current page logic', () => {
describe('cleaning href', () => {
diff --git a/frontend/src/toolbar/toolbarConfigLogic.ts b/frontend/src/toolbar/toolbarConfigLogic.ts
index e31442b5b2743..3ab336677f682 100644
--- a/frontend/src/toolbar/toolbarConfigLogic.ts
+++ b/frontend/src/toolbar/toolbarConfigLogic.ts
@@ -41,11 +41,6 @@ export const toolbarConfigLogic = kea([
(s) => [s.props],
(props: ToolbarProps) => `${props.apiURL?.endsWith('/') ? props.apiURL.replace(/\/+$/, '') : props.apiURL}`,
],
- jsURL: [
- (s) => [s.props, s.apiURL],
- (props: ToolbarProps, apiUrl) =>
- `${props.jsURL ? (props.jsURL.endsWith('/') ? props.jsURL.replace(/\/+$/, '') : props.jsURL) : apiUrl}`,
- ],
dataAttributes: [(s) => [s.props], (props): string[] => props.dataAttributes ?? []],
isAuthenticated: [(s) => [s.temporaryToken], (temporaryToken) => !!temporaryToken],
}),
diff --git a/frontend/src/types.ts b/frontend/src/types.ts
index 7a481e31fa24d..6b6c2b161c442 100644
--- a/frontend/src/types.ts
+++ b/frontend/src/types.ts
@@ -626,7 +626,6 @@ export type ExperimentIdType = number | 'new' | 'web'
/* sync with posthog-js */
export interface ToolbarParams {
apiURL?: string
- jsURL?: string
token?: string /** public posthog-js token */
temporaryToken?: string /** private temporary user token */
actionId?: number
@@ -2193,7 +2192,8 @@ export enum RetentionPeriod {
Month = 'Month',
}
-export type BreakdownKeyType = string | number | (string | number)[] | null
+// eslint-disable-next-line @typescript-eslint/no-duplicate-type-constituents
+export type BreakdownKeyType = integer | string | number | (integer | string | number)[] | null
/**
* Legacy breakdown.
@@ -4413,7 +4413,6 @@ export enum SidePanelTab {
Discussion = 'discussion',
Status = 'status',
Exports = 'exports',
- ExperimentFeatureFlag = 'experiment-feature-flag',
}
export interface SourceFieldOauthConfig {
diff --git a/mypy-baseline.txt b/mypy-baseline.txt
index 93573c831c0fe..a2ab36ff3afea 100644
--- a/mypy-baseline.txt
+++ b/mypy-baseline.txt
@@ -617,13 +617,12 @@ posthog/warehouse/api/external_data_schema.py:0: note: def [_T] get(self, Type,
posthog/warehouse/api/table.py:0: error: Unused "type: ignore" comment [unused-ignore]
posthog/warehouse/api/table.py:0: error: Unused "type: ignore" comment [unused-ignore]
posthog/warehouse/api/table.py:0: error: Unused "type: ignore" comment [unused-ignore]
-posthog/temporal/data_imports/workflow_activities/sync_new_schemas.py:0: error: Argument 1 has incompatible type "str"; expected "Type" [arg-type]
posthog/temporal/data_imports/workflow_activities/sync_new_schemas.py:0: error: No overload variant of "get" of "dict" matches argument types "str", "tuple[()]" [call-overload]
posthog/temporal/data_imports/workflow_activities/sync_new_schemas.py:0: note: Possible overload variants:
posthog/temporal/data_imports/workflow_activities/sync_new_schemas.py:0: note: def get(self, Type, /) -> Sequence[str] | None
posthog/temporal/data_imports/workflow_activities/sync_new_schemas.py:0: note: def get(self, Type, Sequence[str], /) -> Sequence[str]
posthog/temporal/data_imports/workflow_activities/sync_new_schemas.py:0: note: def [_T] get(self, Type, _T, /) -> Sequence[str] | _T
-posthog/temporal/data_imports/workflow_activities/sync_new_schemas.py:0: error: Argument "source_id" has incompatible type "str"; expected "UUID" [arg-type]
+posthog/temporal/data_imports/workflow_activities/sync_new_schemas.py:0: error: Argument "source_id" to "sync_old_schemas_with_new_schemas" has incompatible type "str"; expected "UUID" [arg-type]
posthog/tasks/exports/test/test_csv_exporter.py:0: error: Function is missing a return type annotation [no-untyped-def]
posthog/tasks/exports/test/test_csv_exporter.py:0: error: Function is missing a type annotation [no-untyped-def]
posthog/tasks/exports/test/test_csv_exporter.py:0: error: Function is missing a type annotation for one or more arguments [no-untyped-def]
@@ -796,6 +795,11 @@ posthog/temporal/tests/batch_exports/test_batch_exports.py:0: error: TypedDict k
posthog/temporal/data_modeling/run_workflow.py:0: error: Dict entry 20 has incompatible type "str": "Literal['complex']"; expected "str": "Literal['text', 'double', 'bool', 'timestamp', 'bigint', 'binary', 'json', 'decimal', 'wei', 'date', 'time']" [dict-item]
posthog/temporal/data_modeling/run_workflow.py:0: error: Dict entry 21 has incompatible type "str": "Literal['complex']"; expected "str": "Literal['text', 'double', 'bool', 'timestamp', 'bigint', 'binary', 'json', 'decimal', 'wei', 'date', 'time']" [dict-item]
posthog/temporal/data_modeling/run_workflow.py:0: error: Dict entry 22 has incompatible type "str": "Literal['complex']"; expected "str": "Literal['text', 'double', 'bool', 'timestamp', 'bigint', 'binary', 'json', 'decimal', 'wei', 'date', 'time']" [dict-item]
+posthog/temporal/data_imports/pipelines/pipeline_sync.py:0: error: "FilesystemDestinationClientConfiguration" has no attribute "delta_jobs_per_write" [attr-defined]
+posthog/temporal/data_imports/pipelines/pipeline_sync.py:0: error: "type[FilesystemDestinationClientConfiguration]" has no attribute "delta_jobs_per_write" [attr-defined]
+posthog/temporal/data_imports/pipelines/pipeline_sync.py:0: error: Incompatible types in assignment (expression has type "object", variable has type "DataWarehouseCredential | Combinable | None") [assignment]
+posthog/temporal/data_imports/pipelines/pipeline_sync.py:0: error: Incompatible types in assignment (expression has type "object", variable has type "str | int | Combinable") [assignment]
+posthog/temporal/data_imports/pipelines/pipeline_sync.py:0: error: Incompatible types in assignment (expression has type "dict[str, dict[str, str | bool]] | dict[str, str]", variable has type "dict[str, dict[str, str]]") [assignment]
posthog/session_recordings/session_recording_api.py:0: error: Argument "team_id" to "get_realtime_snapshots" has incompatible type "int"; expected "str" [arg-type]
posthog/session_recordings/session_recording_api.py:0: error: Value of type variable "SupportsRichComparisonT" of "sorted" cannot be "str | None" [type-var]
posthog/session_recordings/session_recording_api.py:0: error: Argument 1 to "get" of "dict" has incompatible type "str | None"; expected "str" [arg-type]
@@ -826,12 +830,6 @@ posthog/temporal/tests/batch_exports/test_snowflake_batch_export_workflow.py:0:
posthog/temporal/tests/batch_exports/test_snowflake_batch_export_workflow.py:0: error: List item 0 has incompatible type "tuple[str, str, int, int, int, int, str, int]"; expected "tuple[str, str, int, int, str, str, str, str]" [list-item]
posthog/temporal/tests/batch_exports/test_s3_batch_export_workflow.py:0: error: "tuple[Any, ...]" has no attribute "last_uploaded_part_timestamp" [attr-defined]
posthog/temporal/tests/batch_exports/test_s3_batch_export_workflow.py:0: error: "tuple[Any, ...]" has no attribute "upload_state" [attr-defined]
-posthog/temporal/data_imports/workflow_activities/import_data.py:0: error: Argument "job_type" to "PipelineInputs" has incompatible type "str"; expected "Type" [arg-type]
-posthog/temporal/data_imports/pipelines/pipeline_sync.py:0: error: "FilesystemDestinationClientConfiguration" has no attribute "delta_jobs_per_write" [attr-defined]
-posthog/temporal/data_imports/pipelines/pipeline_sync.py:0: error: "type[FilesystemDestinationClientConfiguration]" has no attribute "delta_jobs_per_write" [attr-defined]
-posthog/temporal/data_imports/pipelines/pipeline_sync.py:0: error: Incompatible types in assignment (expression has type "object", variable has type "DataWarehouseCredential | Combinable | None") [assignment]
-posthog/temporal/data_imports/pipelines/pipeline_sync.py:0: error: Incompatible types in assignment (expression has type "object", variable has type "str | int | Combinable") [assignment]
-posthog/temporal/data_imports/pipelines/pipeline_sync.py:0: error: Incompatible types in assignment (expression has type "dict[str, dict[str, str | bool]] | dict[str, str]", variable has type "dict[str, dict[str, str]]") [assignment]
posthog/migrations/0237_remove_timezone_from_teams.py:0: error: Argument 2 to "RunPython" has incompatible type "Callable[[Migration, Any], None]"; expected "_CodeCallable | None" [arg-type]
posthog/migrations/0228_fix_tile_layouts.py:0: error: Argument 2 to "RunPython" has incompatible type "Callable[[Migration, Any], None]"; expected "_CodeCallable | None" [arg-type]
posthog/api/plugin_log_entry.py:0: error: Name "timezone.datetime" is not defined [name-defined]
@@ -839,6 +837,7 @@ posthog/api/plugin_log_entry.py:0: error: Module "django.utils.timezone" does no
posthog/api/plugin_log_entry.py:0: error: Name "timezone.datetime" is not defined [name-defined]
posthog/api/plugin_log_entry.py:0: error: Module "django.utils.timezone" does not explicitly export attribute "datetime" [attr-defined]
posthog/temporal/tests/batch_exports/test_redshift_batch_export_workflow.py:0: error: Incompatible types in assignment (expression has type "str | int", variable has type "int") [assignment]
+posthog/temporal/data_imports/external_data_job.py:0: error: Argument "status" to "update_external_job_status" has incompatible type "str"; expected "Status" [arg-type]
posthog/api/sharing.py:0: error: Item "None" of "list[Any] | None" has no attribute "__iter__" (not iterable) [union-attr]
posthog/api/test/batch_exports/conftest.py:0: error: Signature of "run" incompatible with supertype "Worker" [override]
posthog/api/test/batch_exports/conftest.py:0: note: Superclass:
@@ -850,10 +849,10 @@ posthog/temporal/tests/external_data/test_external_data_job.py:0: error: Invalid
posthog/temporal/tests/external_data/test_external_data_job.py:0: error: Invalid index type "str" for "dict[Type, Sequence[str]]"; expected type "Type" [index]
posthog/temporal/tests/external_data/test_external_data_job.py:0: error: Invalid index type "str" for "dict[Type, Sequence[str]]"; expected type "Type" [index]
posthog/temporal/tests/external_data/test_external_data_job.py:0: error: Invalid index type "str" for "dict[Type, Sequence[str]]"; expected type "Type" [index]
+posthog/temporal/tests/data_imports/test_end_to_end.py:0: error: Unused "type: ignore" comment [unused-ignore]
posthog/api/test/test_team.py:0: error: "HttpResponse" has no attribute "json" [attr-defined]
posthog/api/test/test_team.py:0: error: "HttpResponse" has no attribute "json" [attr-defined]
posthog/test/test_middleware.py:0: error: Incompatible types in assignment (expression has type "_MonkeyPatchedWSGIResponse", variable has type "_MonkeyPatchedResponse") [assignment]
-posthog/temporal/tests/data_imports/test_end_to_end.py:0: error: Unused "type: ignore" comment [unused-ignore]
posthog/management/commands/test/test_create_batch_export_from_app.py:0: error: Incompatible return value type (got "dict[str, Collection[str]]", expected "dict[str, str]") [return-value]
posthog/management/commands/test/test_create_batch_export_from_app.py:0: error: Incompatible types in assignment (expression has type "dict[str, Collection[str]]", variable has type "dict[str, str]") [assignment]
posthog/management/commands/test/test_create_batch_export_from_app.py:0: error: Unpacked dict entry 1 has incompatible type "str"; expected "SupportsKeysAndGetItem[str, str]" [dict-item]
diff --git a/posthog/api/person.py b/posthog/api/person.py
index b410de3d7da68..862a62b5cea91 100644
--- a/posthog/api/person.py
+++ b/posthog/api/person.py
@@ -420,12 +420,12 @@ def bulk_delete(self, request: request.Request, pk=None, **kwargs):
This endpoint allows you to bulk delete persons, either by the PostHog person IDs or by distinct IDs. You can pass in a maximum of 100 IDs per call.
"""
if distinct_ids := request.data.get("distinct_ids"):
- if len(distinct_ids) > 100:
- raise ValidationError("You can only pass 100 distinct_ids in one call")
+ if len(distinct_ids) > 1000:
+ raise ValidationError("You can only pass 1000 distinct_ids in one call")
persons = self.get_queryset().filter(persondistinctid__distinct_id__in=distinct_ids)
elif ids := request.data.get("ids"):
- if len(ids) > 100:
- raise ValidationError("You can only pass 100 ids in one call")
+ if len(ids) > 1000:
+ raise ValidationError("You can only pass 1000 ids in one call")
persons = self.get_queryset().filter(uuid__in=ids)
else:
raise ValidationError("You need to specify either distinct_ids or ids")
@@ -438,7 +438,7 @@ def bulk_delete(self, request: request.Request, pk=None, **kwargs):
team_id=self.team_id,
user=cast(User, request.user),
was_impersonated=is_impersonated_session(request),
- item_id=person.id,
+ item_id=person.pk,
scope="Person",
activity="deleted",
detail=Detail(name=str(person.uuid)),
diff --git a/posthog/api/survey.py b/posthog/api/survey.py
index 4864612c2b438..bfd2dd7d9f87b 100644
--- a/posthog/api/survey.py
+++ b/posthog/api/survey.py
@@ -1,18 +1,24 @@
+import os
from contextlib import contextmanager
+from datetime import datetime, timedelta
from typing import Any, cast
from urllib.parse import urlparse
import nh3
+import posthoganalytics
+from django.conf import settings
+from django.core.cache import cache
from django.db.models import Min
from django.http import HttpResponse, JsonResponse
from django.utils.text import slugify
from django.views.decorators.csrf import csrf_exempt
from loginas.utils import is_impersonated_session
from nanoid import generate
-from rest_framework import request, serializers, status, viewsets
+from rest_framework import request, serializers, status, viewsets, exceptions
from rest_framework.request import Request
from rest_framework.response import Response
+from ee.surveys.summaries.summarize_surveys import summarize_survey_responses
from posthog.api.action import ActionSerializer
from posthog.api.feature_flag import (
BEHAVIOURAL_COHORT_FOUND_ERROR_CODE,
@@ -23,6 +29,7 @@
from posthog.api.shared import UserBasicSerializer
from posthog.api.utils import action, get_token
from posthog.client import sync_execute
+from posthog.cloud_utils import is_cloud
from posthog.constants import AvailableFeature
from posthog.event_usage import report_user_action
from posthog.exceptions import generate_exception_response
@@ -646,6 +653,67 @@ def activity(self, request: request.Request, **kwargs):
)
return activity_page_response(activity_page, limit, page, request)
+ @action(methods=["POST"], detail=True, required_scopes=["survey:read"])
+ def summarize_responses(self, request: request.Request, **kwargs):
+ if not request.user.is_authenticated:
+ raise exceptions.NotAuthenticated()
+
+ user = cast(User, request.user)
+
+ survey_id = kwargs["pk"]
+
+ if not Survey.objects.filter(id=survey_id, team_id=self.team_id).exists():
+ return Response(status=status.HTTP_404_NOT_FOUND)
+
+ survey = self.get_object()
+
+ cache_key = f'summarize_survey_responses_{self.team.pk}_{self.kwargs["pk"]}'
+ # Check if the response is cached
+ cached_response = cache.get(cache_key)
+ if cached_response is not None:
+ return Response(cached_response)
+
+ environment_is_allowed = settings.DEBUG or is_cloud()
+ has_openai_api_key = bool(os.environ.get("OPENAI_API_KEY"))
+ if not environment_is_allowed or not has_openai_api_key:
+ raise exceptions.ValidationError("session summary is only supported in PostHog Cloud")
+
+ if not posthoganalytics.feature_enabled("ai-survey-response-summary", str(user.distinct_id)):
+ raise exceptions.ValidationError("survey response summary is not enabled for this user")
+
+ end_date: datetime = (survey.end_date or datetime.now()).replace(
+ hour=0, minute=0, second=0, microsecond=0
+ ) + timedelta(days=1)
+
+ try:
+ question_index_param = request.query_params.get("question_index", None)
+ question_index = int(question_index_param) if question_index_param else None
+ except (ValueError, TypeError):
+ question_index = None
+
+ summary = summarize_survey_responses(
+ survey_id=survey_id,
+ question_index=question_index,
+ survey_start=(survey.start_date or survey.created_at).replace(hour=0, minute=0, second=0, microsecond=0),
+ survey_end=end_date,
+ team=self.team,
+ user=user,
+ )
+ timings = summary.pop("timings", None)
+ cache.set(cache_key, summary, timeout=30)
+
+ posthoganalytics.capture(
+ event="survey response summarized", distinct_id=str(user.distinct_id), properties=summary
+ )
+
+ # let the browser cache for half the time we cache on the server
+ r = Response(summary, headers={"Cache-Control": "max-age=15"})
+ if timings:
+ r.headers["Server-Timing"] = ", ".join(
+ f"{key};dur={round(duration, ndigits=2)}" for key, duration in timings.items()
+ )
+ return r
+
class SurveyConfigSerializer(serializers.ModelSerializer):
class Meta:
diff --git a/posthog/api/team.py b/posthog/api/team.py
index 257a10f459d38..70e0bd4199380 100644
--- a/posthog/api/team.py
+++ b/posthog/api/team.py
@@ -64,13 +64,31 @@ def has_permission(self, request: request.Request, view) -> bool:
return False
if not request.data.get("is_demo"):
- # if we're not requesting to make a demo project
- # and if the org already has more than 1 non-demo project (need to be able to make the initial project)
- # and the org isn't allowed to make multiple projects
- if organization.teams.exclude(is_demo=True).count() >= 1 and not organization.is_feature_available(
+ has_organization_projects_feature = organization.is_feature_available(
AvailableFeature.ORGANIZATIONS_PROJECTS
- ):
- return False
+ )
+ current_non_demo_project_count = organization.teams.exclude(is_demo=True).count()
+
+ allowed_project_count = next(
+ (
+ feature.get("limit")
+ for feature in organization.available_product_features or []
+ if feature.get("key") == AvailableFeature.ORGANIZATIONS_PROJECTS
+ ),
+ None,
+ )
+
+ if has_organization_projects_feature:
+ # If allowed_project_count is None then the user is allowed unlimited projects
+ if allowed_project_count is None:
+ return True
+ # Check current limit against allowed limit
+ if current_non_demo_project_count >= allowed_project_count:
+ return False
+ else:
+ # If the org doesn't have the feature, they can only have one non-demo project
+ if current_non_demo_project_count >= 1:
+ return False
else:
# if we ARE requesting to make a demo project
# but the org already has a demo project
diff --git a/posthog/api/test/test_team.py b/posthog/api/test/test_team.py
index 6992c1822ef97..0040ddd257e2d 100644
--- a/posthog/api/test/test_team.py
+++ b/posthog/api/test/test_team.py
@@ -1284,3 +1284,63 @@ def test_teams_outside_personal_api_key_scoped_organizations_not_listed(self):
{team_in_other_org.id},
"Only the team belonging to the scoped organization should be listed, the other one should be excluded",
)
+
+ def test_can_create_team_with_valid_project_limit(self):
+ self.organization_membership.level = OrganizationMembership.Level.ADMIN
+ self.organization_membership.save()
+ self.organization.available_product_features = [
+ {
+ "key": AvailableFeature.ORGANIZATIONS_PROJECTS,
+ "name": "Organizations Projects",
+ "limit": 5,
+ }
+ ]
+ self.organization.save()
+ self.assertEqual(Team.objects.count(), 1)
+
+ response = self.client.post("/api/projects/@current/environments/", {"name": "New Project"})
+ self.assertEqual(response.status_code, 201)
+ self.assertEqual(Team.objects.count(), 2)
+
+ def test_cant_create_team_when_at_project_limit(self):
+ self.organization_membership.level = OrganizationMembership.Level.ADMIN
+ self.organization_membership.save()
+ self.organization.available_product_features = [
+ {
+ "key": AvailableFeature.ORGANIZATIONS_PROJECTS,
+ "name": "Organizations Projects",
+ "limit": 1,
+ }
+ ]
+ self.organization.save()
+ self.assertEqual(Team.objects.count(), 1)
+
+ response = self.client.post("/api/projects/@current/environments/", {"name": "New Project"})
+ self.assertEqual(response.status_code, 403)
+ response_data = response.json()
+ self.assertDictContainsSubset(
+ {
+ "type": "authentication_error",
+ "code": "permission_denied",
+ "detail": "You must upgrade your PostHog plan to be able to create and manage multiple projects or environments.",
+ },
+ response_data,
+ )
+ self.assertEqual(Team.objects.count(), 1)
+
+ def test_can_create_team_with_unlimited_projects_feature(self):
+ self.organization_membership.level = OrganizationMembership.Level.ADMIN
+ self.organization_membership.save()
+ self.organization.available_product_features = [
+ {"key": AvailableFeature.ORGANIZATIONS_PROJECTS, "name": "Organizations Projects", "limit": None}
+ ]
+ self.organization.save()
+ self.assertEqual(Team.objects.count(), 1)
+
+ response = self.client.post("/api/projects/@current/environments/", {"name": "New Project"})
+ self.assertEqual(response.status_code, 201)
+ self.assertEqual(Team.objects.count(), 2)
+
+ response = self.client.post("/api/projects/@current/environments/", {"name": "New Project 2"})
+ self.assertEqual(response.status_code, 201)
+ self.assertEqual(Team.objects.count(), 3)
diff --git a/posthog/api/test/test_user.py b/posthog/api/test/test_user.py
index 4ee05190e8158..1c9d3b96aec04 100644
--- a/posthog/api/test/test_user.py
+++ b/posthog/api/test/test_user.py
@@ -864,11 +864,11 @@ def test_redirect_user_to_site_with_toolbar(self, patched_token):
)
self.assertEqual(response.status_code, status.HTTP_302_FOUND)
locationHeader = response.headers.get("location", "not found")
- self.assertIn("%22jsURL%22%3A%20%22http%3A%2F%2Flocalhost%3A8234%22", locationHeader)
+ self.assertIn("22apiURL%22%3A%20%22http%3A%2F%2Ftestserver%22", locationHeader)
self.maxDiff = None
self.assertEqual(
unquote(locationHeader),
- 'http://127.0.0.1:8000#__posthog={"action": "ph_authorize", "token": "token123", "temporaryToken": "tokenvalue", "actionId": null, "experimentId": null, "userIntent": "add-action", "toolbarVersion": "toolbar", "apiURL": "http://testserver", "dataAttributes": ["data-attr"], "jsURL": "http://localhost:8234"}',
+ 'http://127.0.0.1:8000#__posthog={"action": "ph_authorize", "token": "token123", "temporaryToken": "tokenvalue", "actionId": null, "experimentId": null, "userIntent": "add-action", "toolbarVersion": "toolbar", "apiURL": "http://testserver", "dataAttributes": ["data-attr"]}',
)
@patch("posthog.api.user.secrets.token_urlsafe")
@@ -883,11 +883,11 @@ def test_redirect_user_to_site_with_experiments_toolbar(self, patched_token):
)
self.assertEqual(response.status_code, status.HTTP_302_FOUND)
locationHeader = response.headers.get("location", "not found")
- self.assertIn("%22jsURL%22%3A%20%22http%3A%2F%2Flocalhost%3A8234%22", locationHeader)
+ self.assertIn("22apiURL%22%3A%20%22http%3A%2F%2Ftestserver%22", locationHeader)
self.maxDiff = None
self.assertEqual(
unquote(locationHeader),
- 'http://127.0.0.1:8000#__posthog={"action": "ph_authorize", "token": "token123", "temporaryToken": "tokenvalue", "actionId": null, "experimentId": "12", "userIntent": "edit-experiment", "toolbarVersion": "toolbar", "apiURL": "http://testserver", "dataAttributes": ["data-attr"], "jsURL": "http://localhost:8234"}',
+ 'http://127.0.0.1:8000#__posthog={"action": "ph_authorize", "token": "token123", "temporaryToken": "tokenvalue", "actionId": null, "experimentId": "12", "userIntent": "edit-experiment", "toolbarVersion": "toolbar", "apiURL": "http://testserver", "dataAttributes": ["data-attr"]}',
)
@patch("posthog.api.user.secrets.token_urlsafe")
diff --git a/posthog/api/user.py b/posthog/api/user.py
index 92cfa08e01895..451c0b3470e01 100644
--- a/posthog/api/user.py
+++ b/posthog/api/user.py
@@ -63,7 +63,6 @@
from posthog.tasks import user_identify
from posthog.tasks.email import send_email_change_emails
from posthog.user_permissions import UserPermissions
-from posthog.utils import get_js_url
REDIRECT_TO_SITE_COUNTER = Counter("posthog_redirect_to_site", "Redirect to site")
REDIRECT_TO_SITE_FAILED_COUNTER = Counter("posthog_redirect_to_site_failed", "Redirect to site failed")
@@ -518,9 +517,6 @@ def redirect_to_site(request):
"dataAttributes": team.data_attributes,
}
- if get_js_url(request):
- params["jsURL"] = get_js_url(request)
-
if not settings.TEST and not os.environ.get("OPT_OUT_CAPTURE"):
params["instrument"] = True
params["userEmail"] = request.user.email
diff --git a/posthog/cdp/templates/google_ads/template_google_ads.py b/posthog/cdp/templates/google_ads/template_google_ads.py
index ff577988ce025..3743ca93db541 100644
--- a/posthog/cdp/templates/google_ads/template_google_ads.py
+++ b/posthog/cdp/templates/google_ads/template_google_ads.py
@@ -1,5 +1,7 @@
from posthog.cdp.templates.hog_function_template import HogFunctionTemplate
+# Based on https://developers.google.com/google-ads/api/reference/rpc/v17/ClickConversion
+
template: HogFunctionTemplate = HogFunctionTemplate(
status="alpha",
type="destination",
@@ -14,6 +16,25 @@
return
}
+let body := {
+ 'conversions': [
+ {
+ 'gclid': inputs.gclid,
+ 'conversion_action': f'customers/{replaceAll(inputs.customerId, '-', '')}/conversionActions/{replaceAll(inputs.conversionActionId, 'AW-', '')}',
+ 'conversion_date_time': inputs.conversionDateTime
+ }
+ ],
+ 'partialFailure': true,
+ 'validateOnly': true
+}
+
+if (not empty(inputs.conversionValue)) {
+ body.conversions[1].conversion_value := inputs.conversionValue
+}
+if (not empty(inputs.currencyCode)) {
+ body.conversions[1].currency_code := inputs.currencyCode
+}
+
let res := fetch(f'https://googleads.googleapis.com/v17/customers/{replaceAll(inputs.customerId, '-', '')}:uploadClickConversions', {
'method': 'POST',
'headers': {
@@ -21,23 +42,12 @@
'Content-Type': 'application/json',
'developer-token': inputs.developerToken
},
- 'body': {
- 'conversions': [
- {
- 'gclid': inputs.gclid,
- 'conversionAction': f'customers/{replaceAll(inputs.customerId, '-', '')}/conversionActions/{replaceAll(inputs.conversionActionId, 'AW-', '')}',
- 'conversionDateTime': inputs.conversionDateTime
- }
- ],
- 'partialFailure': true,
- 'validateOnly': true
- }
+ 'body': body
})
if (res.status >= 400) {
throw Error(f'Error from googleads.googleapis.com (status {res.status}): {res.body}')
}
-
""".strip(),
inputs_schema=[
{
@@ -90,6 +100,24 @@
"secret": False,
"required": True,
},
+ {
+ "key": "conversionValue",
+ "type": "string",
+ "label": "Conversion value",
+ "description": "The value of the conversion for the advertiser.",
+ "default": "",
+ "secret": False,
+ "required": False,
+ },
+ {
+ "key": "currencyCode",
+ "type": "string",
+ "label": "Currency code",
+ "description": "Currency associated with the conversion value. This is the ISO 4217 3-character currency code. For example: USD, EUR.",
+ "default": "",
+ "secret": False,
+ "required": False,
+ },
],
filters={
"events": [],
diff --git a/posthog/cdp/templates/google_ads/test_template_google_ads.py b/posthog/cdp/templates/google_ads/test_template_google_ads.py
index 7e40cb4fb9f20..0c5ef98abb143 100644
--- a/posthog/cdp/templates/google_ads/test_template_google_ads.py
+++ b/posthog/cdp/templates/google_ads/test_template_google_ads.py
@@ -18,6 +18,7 @@ def _inputs(self, **kwargs):
"conversionActionId": "AW-123456789",
"gclid": "89y4thuergnjkd34oihroh3uhg39uwhgt9",
"conversionDateTime": "2024-10-10 16:32:45+02:00",
+ "currencyCode": "USD",
}
inputs.update(kwargs)
return inputs
@@ -29,23 +30,24 @@ def test_function_works(self):
(
"https://googleads.googleapis.com/v17/customers/1231231234:uploadClickConversions",
{
+ "method": "POST",
+ "headers": {
+ "Authorization": "Bearer oauth-1234",
+ "Content-Type": "application/json",
+ "developer-token": "developer-token1234",
+ },
"body": {
"conversions": [
{
"gclid": "89y4thuergnjkd34oihroh3uhg39uwhgt9",
- "conversionAction": f"customers/1231231234/conversionActions/123456789",
- "conversionDateTime": "2024-10-10 16:32:45+02:00",
+ "conversion_action": f"customers/1231231234/conversionActions/123456789",
+ "conversion_date_time": "2024-10-10 16:32:45+02:00",
+ "currency_code": "USD",
}
],
"partialFailure": True,
"validateOnly": True,
},
- "method": "POST",
- "headers": {
- "Authorization": "Bearer oauth-1234",
- "Content-Type": "application/json",
- "developer-token": "developer-token1234",
- },
},
)
)
diff --git a/posthog/cdp/templates/hubspot/template_hubspot.py b/posthog/cdp/templates/hubspot/template_hubspot.py
index cf70dcd8cf3d6..19cb2bff1e37f 100644
--- a/posthog/cdp/templates/hubspot/template_hubspot.py
+++ b/posthog/cdp/templates/hubspot/template_hubspot.py
@@ -110,8 +110,8 @@
return
}
-if (not match(event.event, '^([a-z])([a-z0-9_-])+$')) {
- throw Error(f'Event name must start with a letter and can only contain lowercase letters, numbers, underscores, and hyphens. Not sending event: {event.event}')
+if (not match(inputs.eventName, '^([a-z])([a-z0-9_-])+$')) {
+ throw Error(f'Event name must start with a letter and can only contain lowercase letters, numbers, underscores, and hyphens. Not sending event: {inputs.eventName}')
return
}
@@ -139,7 +139,7 @@
}
}
-let eventSchema := fetch(f'https://api.hubapi.com/events/v3/event-definitions/{event.event}/?includeProperties=true', {
+let eventSchema := fetch(f'https://api.hubapi.com/events/v3/event-definitions/{inputs.eventName}/?includeProperties=true', {
'method': 'GET',
'headers': {
'Authorization': f'Bearer {inputs.oauth.access_token}',
@@ -213,9 +213,9 @@
if (eventSchema.status >= 400) {
let body := {
- 'label': event.event,
- 'name': event.event,
- 'description': f'{event.event} - (created by PostHog)',
+ 'label': inputs.eventName,
+ 'name': inputs.eventName,
+ 'description': f'{inputs.eventName} - (created by PostHog)',
'primaryObject': 'CONTACT',
'propertyDefinitions': []
}
@@ -252,7 +252,7 @@
if (not empty(missingProperties)) {
for (let i, obj in missingProperties) {
- let res := fetch(f'https://api.hubapi.com/events/v3/event-definitions/{event.event}/property', {
+ let res := fetch(f'https://api.hubapi.com/events/v3/event-definitions/{inputs.eventName}/property', {
'method': 'POST',
'headers': {
'Authorization': f'Bearer {inputs.oauth.access_token}',
@@ -299,6 +299,15 @@
"secret": False,
"required": True,
},
+ {
+ "key": "eventName",
+ "type": "string",
+ "label": "Event Name",
+ "description": "Hubspot only allows events that start with a letter and can only contain lowercase letters, numbers, underscores, and hyphens.",
+ "default": "{replaceAll(replaceAll(trim(lower(event.event)), '$', ''), ' ', '_')}",
+ "secret": False,
+ "required": True,
+ },
{
"key": "email",
"type": "string",
diff --git a/posthog/cdp/templates/hubspot/test_template_hubspot.py b/posthog/cdp/templates/hubspot/test_template_hubspot.py
index c1192a89813fb..d92005f0ce399 100644
--- a/posthog/cdp/templates/hubspot/test_template_hubspot.py
+++ b/posthog/cdp/templates/hubspot/test_template_hubspot.py
@@ -91,6 +91,7 @@ class TestTemplateHubspotEvent(BaseHogFunctionTemplateTest):
def _inputs(self, **kwargs):
inputs = {
"oauth": {"access_token": "TOKEN"},
+ "eventName": "purchase",
"email": "example@posthog.com",
"include_all_properties": False,
"properties": {
@@ -126,9 +127,9 @@ def test_body_includes_all_properties_if_set(self):
self.mock_fetch_response = lambda *args: EVENT_DEFINITION_RESPONSE # type: ignore
self.run_function(
- inputs=self._inputs(include_all_properties=False),
+ inputs=self._inputs(include_all_properties=False, event="purchase"),
globals={
- "event": {"event": "purchase", "properties": {"product": "CDP"}},
+ "event": {"properties": {"product": "CDP"}},
},
)
@@ -158,10 +159,9 @@ def test_new_event_creation(self):
}
self.run_function(
- inputs=self._inputs(include_all_properties=True),
+ inputs=self._inputs(include_all_properties=True, eventName="sign_up"),
globals={
"event": {
- "event": "sign_up",
"properties": {"price": 50, "currency": "USD", "expressDelivery": True},
},
},
@@ -246,10 +246,9 @@ def test_new_property_creation(self):
}
self.run_function(
- inputs=self._inputs(include_all_properties=True),
+ inputs=self._inputs(include_all_properties=True, event="purchase"),
globals={
"event": {
- "event": "purchase",
"properties": {"price": 50, "currency": "USD", "expressDelivery": True, "location": "Planet Earth"},
},
},
@@ -333,10 +332,9 @@ def test_requires_correct_property_types(self):
}
with pytest.raises(UncaughtHogVMException) as e:
self.run_function(
- inputs=self._inputs(include_all_properties=True),
+ inputs=self._inputs(include_all_properties=True, event="purchase"),
globals={
"event": {
- "event": "purchase",
"properties": {"price": "50 coins"},
},
},
@@ -361,10 +359,9 @@ def test_allowed_event_names(self):
]:
if allowed:
self.run_function(
- inputs=self._inputs(),
+ inputs=self._inputs(eventName=event_name),
globals={
"event": {
- "event": event_name,
"properties": {"url": "https://example.com", "$browser": "Chrome"},
},
},
@@ -376,7 +373,7 @@ def test_allowed_event_names(self):
else:
with pytest.raises(UncaughtHogVMException) as e:
self.run_function(
- inputs=self._inputs(),
+ inputs=self._inputs(eventName=event_name),
globals={
"event": {
"event": event_name,
diff --git a/posthog/hogql/bytecode.py b/posthog/hogql/bytecode.py
index 27bdd54c6c295..70360b069d4d4 100644
--- a/posthog/hogql/bytecode.py
+++ b/posthog/hogql/bytecode.py
@@ -1,5 +1,6 @@
import dataclasses
from datetime import timedelta
+from enum import StrEnum
from typing import Any, Optional, cast, TYPE_CHECKING
from collections.abc import Callable
@@ -827,6 +828,45 @@ def visit_tuple(self, node: ast.Tuple):
response.append(len(node.exprs))
return response
+ def visit_hogqlx_tag(self, node: ast.HogQLXTag):
+ response = []
+ response.extend(self._visit_hogqlx_value("__hx_tag"))
+ response.extend(self._visit_hogqlx_value(node.kind))
+ for attribute in node.attributes:
+ response.extend(self._visit_hogqlx_value(attribute.name))
+ response.extend(self._visit_hogqlx_value(attribute.value))
+ response.append(Operation.DICT)
+ response.append(len(node.attributes) + 1)
+ return response
+
+ def _visit_hogqlx_value(self, value: Any) -> list[Any]:
+ if isinstance(value, AST):
+ return self.visit(value)
+ if isinstance(value, list):
+ elems = []
+ for v in value:
+ elems.extend(self._visit_hogqlx_value(v))
+ return [*elems, Operation.ARRAY, len(value)]
+ if isinstance(value, dict):
+ elems = []
+ for k, v in value.items():
+ elems.extend(self._visit_hogqlx_value(k))
+ elems.extend(self._visit_hogqlx_value(v))
+ return [*elems, Operation.DICT, len(value.items())]
+ if isinstance(value, StrEnum):
+ return [Operation.STRING, value.value]
+ if isinstance(value, int):
+ return [Operation.INTEGER, value]
+ if isinstance(value, float):
+ return [Operation.FLOAT, value]
+ if isinstance(value, str):
+ return [Operation.STRING, value]
+ if value is True:
+ return [Operation.TRUE]
+ if value is False:
+ return [Operation.FALSE]
+ return [Operation.NULL]
+
def execute_hog(
source_code: str,
diff --git a/posthog/hogql/test/test_bytecode.py b/posthog/hogql/test/test_bytecode.py
index eb41205140366..860acb7cdec1f 100644
--- a/posthog/hogql/test/test_bytecode.py
+++ b/posthog/hogql/test/test_bytecode.py
@@ -263,3 +263,9 @@ def test_bytecode_in_repl(self):
create_bytecode(parse_program("let a:=1"), in_repl=True).bytecode,
[_H, HOGQL_BYTECODE_VERSION, op.INTEGER, 1],
)
+
+ def test_bytecode_hogqlx(self):
+ self.assertEqual(
+ execute_hog(" ", team=self.team).result,
+ {"__hx_tag": "Sparkline", "data": [1, 2, 3]},
+ )
diff --git a/posthog/hogql_queries/insights/funnels/base.py b/posthog/hogql_queries/insights/funnels/base.py
index 87d7c2a4b0597..bc70b527e84f7 100644
--- a/posthog/hogql_queries/insights/funnels/base.py
+++ b/posthog/hogql_queries/insights/funnels/base.py
@@ -634,6 +634,7 @@ def _breakdown_other_subquery(self) -> ast.SelectQuery:
],
select_from=ast.JoinExpr(table=select_query),
group_by=[ast.Field(chain=["final_prop"])],
+ limit=ast.Constant(value=self.get_breakdown_limit() + 1),
)
def _get_steps_conditions(self, length: int) -> ast.Expr:
diff --git a/posthog/hogql_queries/insights/funnels/funnel_udf.py b/posthog/hogql_queries/insights/funnels/funnel_udf.py
index 3d55d89aa05ff..ac4fda03069d3 100644
--- a/posthog/hogql_queries/insights/funnels/funnel_udf.py
+++ b/posthog/hogql_queries/insights/funnels/funnel_udf.py
@@ -1,6 +1,7 @@
from typing import cast, Optional
from posthog.hogql import ast
+from posthog.hogql.constants import DEFAULT_RETURNED_ROWS
from posthog.hogql.parser import parse_select, parse_expr
from posthog.hogql_queries.insights.funnels.base import FunnelBase
from posthog.schema import BreakdownType, BreakdownAttributionType
@@ -144,7 +145,7 @@ def get_query(self) -> ast.SelectQuery:
SELECT
{step_results},
{conversion_time_arrays},
- rowNumberInBlock() as row_number,
+ rowNumberInAllBlocks() as row_number,
{final_prop} as final_prop
FROM
{{inner_select}}
@@ -179,6 +180,7 @@ def get_query(self) -> ast.SelectQuery:
FROM
{{s}}
GROUP BY final_prop
+ LIMIT {self.get_breakdown_limit() + 1 if use_breakdown_limit else DEFAULT_RETURNED_ROWS}
""",
{"s": s},
)
@@ -211,8 +213,8 @@ def _get_funnel_person_step_condition(self) -> ast.Expr:
raise ValueError("Missing both funnelStep and funnelCustomSteps")
if funnelStepBreakdown is not None:
- if isinstance(funnelStepBreakdown, int) and breakdownType != "cohort":
- funnelStepBreakdown = str(funnelStepBreakdown)
+ if isinstance(funnelStepBreakdown, int | float) and breakdownType != "cohort":
+ funnelStepBreakdown = str(int(funnelStepBreakdown))
conditions.append(
parse_expr(
diff --git a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel.ambr b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel.ambr
index c16a172389dee..2315f2b51ebf6 100644
--- a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel.ambr
+++ b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel.ambr
@@ -1085,14 +1085,14 @@
and isNull(max(max_steps))))
GROUP BY prop)
GROUP BY final_prop
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1,
- format_csv_allow_double_quotes=0,
- max_ast_elements=4000000,
- max_expanded_ast_elements=4000000,
- max_bytes_before_external_group_by=23622320128,
- allow_experimental_analyzer=1
+ LIMIT 26 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1,
+ format_csv_allow_double_quotes=0,
+ max_ast_elements=4000000,
+ max_expanded_ast_elements=4000000,
+ max_bytes_before_external_group_by=23622320128,
+ allow_experimental_analyzer=1
'''
# ---
# name: TestFunnelBreakdown.test_funnel_breakdown_correct_breakdown_props_are_chosen_for_step
@@ -1189,14 +1189,14 @@
and isNull(max(max_steps))))
GROUP BY prop)
GROUP BY final_prop
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1,
- format_csv_allow_double_quotes=0,
- max_ast_elements=4000000,
- max_expanded_ast_elements=4000000,
- max_bytes_before_external_group_by=23622320128,
- allow_experimental_analyzer=1
+ LIMIT 26 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1,
+ format_csv_allow_double_quotes=0,
+ max_ast_elements=4000000,
+ max_expanded_ast_elements=4000000,
+ max_bytes_before_external_group_by=23622320128,
+ allow_experimental_analyzer=1
'''
# ---
# name: TestFunnelBreakdown.test_funnel_step_multiple_breakdown_snapshot
@@ -1286,14 +1286,14 @@
and isNull(max(max_steps))))
GROUP BY prop)
GROUP BY final_prop
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1,
- format_csv_allow_double_quotes=0,
- max_ast_elements=4000000,
- max_expanded_ast_elements=4000000,
- max_bytes_before_external_group_by=23622320128,
- allow_experimental_analyzer=1
+ LIMIT 26 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1,
+ format_csv_allow_double_quotes=0,
+ max_ast_elements=4000000,
+ max_expanded_ast_elements=4000000,
+ max_bytes_before_external_group_by=23622320128,
+ allow_experimental_analyzer=1
'''
# ---
# name: TestFunnelGroupBreakdown.test_funnel_aggregate_by_groups_breakdown_group_person_on_events
@@ -1424,14 +1424,14 @@
and isNull(max(max_steps))))
GROUP BY prop)
GROUP BY final_prop
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1,
- format_csv_allow_double_quotes=0,
- max_ast_elements=4000000,
- max_expanded_ast_elements=4000000,
- max_bytes_before_external_group_by=23622320128,
- allow_experimental_analyzer=1
+ LIMIT 26 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1,
+ format_csv_allow_double_quotes=0,
+ max_ast_elements=4000000,
+ max_expanded_ast_elements=4000000,
+ max_bytes_before_external_group_by=23622320128,
+ allow_experimental_analyzer=1
'''
# ---
# name: TestFunnelGroupBreakdown.test_funnel_aggregate_by_groups_breakdown_group_person_on_events_poe_v2
@@ -1562,14 +1562,14 @@
and isNull(max(max_steps))))
GROUP BY prop)
GROUP BY final_prop
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1,
- format_csv_allow_double_quotes=0,
- max_ast_elements=4000000,
- max_expanded_ast_elements=4000000,
- max_bytes_before_external_group_by=23622320128,
- allow_experimental_analyzer=1
+ LIMIT 26 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1,
+ format_csv_allow_double_quotes=0,
+ max_ast_elements=4000000,
+ max_expanded_ast_elements=4000000,
+ max_bytes_before_external_group_by=23622320128,
+ allow_experimental_analyzer=1
'''
# ---
# name: TestFunnelGroupBreakdown.test_funnel_breakdown_group
@@ -1707,585 +1707,577 @@
and isNull(max(max_steps))))
GROUP BY prop)
GROUP BY final_prop
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1,
- format_csv_allow_double_quotes=0,
- max_ast_elements=4000000,
- max_expanded_ast_elements=4000000,
- max_bytes_before_external_group_by=23622320128,
- allow_experimental_analyzer=1
+ LIMIT 26 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1,
+ format_csv_allow_double_quotes=0,
+ max_ast_elements=4000000,
+ max_expanded_ast_elements=4000000,
+ max_bytes_before_external_group_by=23622320128,
+ allow_experimental_analyzer=1
'''
# ---
# name: TestFunnelGroupBreakdown.test_funnel_breakdown_group.1
'''
-
- SELECT replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS value,
- count(*) as count
- FROM events e
- LEFT JOIN
- (SELECT group_key,
- argMax(group_properties, _timestamp) AS group_properties_0
- FROM groups
- WHERE team_id = 99999
- AND group_type_index = 0
- GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key
- WHERE team_id = 99999
- AND event IN ['buy', 'play movie', 'sign up']
- AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC')
- AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC')
- GROUP BY value
- ORDER BY count DESC, value DESC
- LIMIT 26
- OFFSET 0
- '''
-# ---
-# name: TestFunnelGroupBreakdown.test_funnel_breakdown_group.2
- '''
-
- SELECT aggregation_target AS actor_id
+ SELECT persons.id,
+ persons.id AS id
FROM
- (SELECT aggregation_target,
- steps,
- avg(step_1_conversion_time) step_1_average_conversion_time_inner,
- avg(step_2_conversion_time) step_2_average_conversion_time_inner,
- median(step_1_conversion_time) step_1_median_conversion_time_inner,
- median(step_2_conversion_time) step_2_median_conversion_time_inner ,
- prop
+ (SELECT aggregation_target AS actor_id
FROM
- (SELECT aggregation_target,
- steps,
- max(steps) over (PARTITION BY aggregation_target,
- prop) as max_steps,
- step_1_conversion_time,
- step_2_conversion_time ,
- prop
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ prop AS prop,
+ prop AS prop,
+ min(step_1_conversion_time) AS step_1_conversion_time,
+ min(step_2_conversion_time) AS step_2_conversion_time
FROM
- (SELECT *,
- if(latest_0 <= latest_1
- AND latest_1 <= latest_0 + INTERVAL 7 DAY
- AND latest_1 <= latest_2
- AND latest_2 <= latest_0 + INTERVAL 7 DAY, 3, if(latest_0 <= latest_1
- AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps ,
- if(isNotNull(latest_1)
- AND latest_1 <= latest_0 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time,
- if(isNotNull(latest_2)
- AND latest_2 <= latest_1 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_1), toDateTime(latest_2)), NULL) step_2_conversion_time,
- prop
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ prop AS prop,
+ max(steps) OVER (PARTITION BY aggregation_target,
+ prop) AS max_steps,
+ step_1_conversion_time AS step_1_conversion_time,
+ step_2_conversion_time AS step_2_conversion_time,
+ prop AS prop
FROM
- (SELECT aggregation_target, timestamp, step_0,
- latest_0,
- step_1,
- latest_1,
- step_2,
- min(latest_2) over (PARTITION by aggregation_target,
- prop
- ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 ,
- prop
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ step_2 AS step_2,
+ latest_2 AS latest_2,
+ prop AS prop,
+ if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0), ifNull(lessOrEquals(latest_1, latest_2), 0), ifNull(lessOrEquals(latest_2, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 3, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 2, 1)) AS steps,
+ if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time,
+ if(and(isNotNull(latest_2), ifNull(lessOrEquals(latest_2, plus(toTimeZone(latest_1, 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', latest_1, latest_2), NULL) AS step_2_conversion_time,
+ prop AS prop
FROM
- (SELECT aggregation_target, timestamp, step_0,
- latest_0,
- step_1,
- latest_1,
- step_2,
- if(latest_2 < latest_1, NULL, latest_2) as latest_2 ,
- prop
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ step_2 AS step_2,
+ min(latest_2) OVER (PARTITION BY aggregation_target,
+ prop
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2,
+ prop AS prop
FROM
- (SELECT aggregation_target, timestamp, step_0,
- latest_0,
- step_1,
- min(latest_1) over (PARTITION by aggregation_target,
- prop
- ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1,
- step_2,
- min(latest_2) over (PARTITION by aggregation_target,
- prop
- ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 ,
- if(has(['technology', 'finance'], prop), prop, 'Other') as prop
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ step_2 AS step_2,
+ if(ifNull(less(latest_2, latest_1), 0), NULL, latest_2) AS latest_2,
+ prop AS prop
FROM
- (SELECT *,
- prop_vals as prop
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ min(latest_1) OVER (PARTITION BY aggregation_target,
+ prop
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1,
+ step_2 AS step_2,
+ min(latest_2) OVER (PARTITION BY aggregation_target,
+ prop
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2,
+ prop AS prop
FROM
- (SELECT e.timestamp as timestamp,
- if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as aggregation_target,
- if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id,
- if(event = 'sign up', 1, 0) as step_0,
- if(step_0 = 1, timestamp, null) as latest_0,
- if(event = 'play movie', 1, 0) as step_1,
- if(step_1 = 1, timestamp, null) as latest_1,
- if(event = 'buy', 1, 0) as step_2,
- if(step_2 = 1, timestamp, null) as latest_2,
- replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS prop_basic,
- prop_basic as prop,
- argMinIf(prop, timestamp, isNotNull(prop)) over (PARTITION by aggregation_target) as prop_vals
- FROM events e
- LEFT OUTER JOIN
- (SELECT distinct_id,
- argMax(person_id, version) as person_id
- FROM person_distinct_id2
- WHERE team_id = 99999
- AND distinct_id IN
- (SELECT distinct_id
- FROM events
- WHERE team_id = 99999
- AND event IN ['buy', 'play movie', 'sign up']
- AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC')
- AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') )
- GROUP BY distinct_id
- HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id
- LEFT JOIN
- (SELECT group_key,
- argMax(group_properties, _timestamp) AS group_properties_0
- FROM groups
- WHERE team_id = 99999
- AND group_type_index = 0
- GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key
- WHERE team_id = 99999
- AND event IN ['buy', 'play movie', 'sign up']
- AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC')
- AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC')
- AND (step_0 = 1
- OR step_1 = 1
- OR step_2 = 1) )))))
- WHERE step_0 = 1 ))
- GROUP BY aggregation_target,
- steps,
- prop
- HAVING steps = max(max_steps))
- WHERE steps IN [1, 2, 3]
- AND arrayFlatten(array(prop)) = arrayFlatten(array('finance'))
- ORDER BY aggregation_target
- LIMIT 100
- OFFSET 0 SETTINGS max_ast_elements=1000000,
- max_expanded_ast_elements=1000000
- '''
-# ---
-# name: TestFunnelGroupBreakdown.test_funnel_breakdown_group.3
- '''
-
- SELECT replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS value,
- count(*) as count
- FROM events e
- LEFT JOIN
- (SELECT group_key,
- argMax(group_properties, _timestamp) AS group_properties_0
- FROM groups
- WHERE team_id = 99999
- AND group_type_index = 0
- GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key
- WHERE team_id = 99999
- AND event IN ['buy', 'play movie', 'sign up']
- AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC')
- AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC')
- GROUP BY value
- ORDER BY count DESC, value DESC
- LIMIT 26
- OFFSET 0
+ (SELECT timestamp AS timestamp,
+ aggregation_target AS aggregation_target,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ step_2 AS step_2,
+ latest_2 AS latest_2,
+ prop_basic AS prop_basic,
+ prop,
+ prop_vals AS prop_vals,
+ prop_vals AS prop
+ FROM
+ (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
+ if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id) AS aggregation_target,
+ if(equals(e.event, 'sign up'), 1, 0) AS step_0,
+ if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
+ if(equals(e.event, 'play movie'), 1, 0) AS step_1,
+ if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1,
+ if(equals(e.event, 'buy'), 1, 0) AS step_2,
+ if(ifNull(equals(step_2, 1), 0), timestamp, NULL) AS latest_2,
+ ifNull(toString(e__group_0.properties___industry), '') AS prop_basic,
+ prop_basic AS prop,
+ argMinIf(prop, timestamp, isNotNull(prop)) OVER (PARTITION BY aggregation_target) AS prop_vals
+ FROM events AS e
+ LEFT OUTER JOIN
+ (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id,
+ person_distinct_id_overrides.distinct_id AS distinct_id
+ FROM person_distinct_id_overrides
+ WHERE equals(person_distinct_id_overrides.team_id, 99999)
+ GROUP BY person_distinct_id_overrides.distinct_id
+ HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__override ON equals(e.distinct_id, e__override.distinct_id)
+ LEFT JOIN
+ (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), toTimeZone(groups._timestamp, 'UTC')) AS properties___industry,
+ groups.group_type_index AS index,
+ groups.group_key AS key
+ FROM groups
+ WHERE and(equals(groups.team_id, 99999), equals(index, 0))
+ GROUP BY groups.group_type_index,
+ groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key)
+ WHERE and(equals(e.team_id, 99999), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'play movie', 'sign up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0), ifNull(equals(step_2, 1), 0))))))))
+ WHERE ifNull(equals(step_0, 1), 0)))
+ GROUP BY aggregation_target,
+ steps,
+ prop
+ HAVING ifNull(equals(steps, max(max_steps)), isNull(steps)
+ and isNull(max(max_steps))))
+ WHERE and(ifNull(in(steps, [1, 2, 3]), 0), ifNull(equals(arrayFlatten(array(prop)), arrayFlatten(array('finance'))), isNull(arrayFlatten(array(prop)))
+ and isNull(arrayFlatten(array('finance')))))
+ ORDER BY aggregation_target ASC) AS source
+ INNER JOIN
+ (SELECT person.id AS id
+ FROM person
+ WHERE equals(person.team_id, 99999)
+ GROUP BY person.id
+ HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id)
+ ORDER BY persons.id ASC
+ LIMIT 101
+ OFFSET 0 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1,
+ format_csv_allow_double_quotes=0,
+ max_ast_elements=4000000,
+ max_expanded_ast_elements=4000000,
+ max_bytes_before_external_group_by=0,
+ allow_experimental_analyzer=1
'''
# ---
-# name: TestFunnelGroupBreakdown.test_funnel_breakdown_group.4
+# name: TestFunnelGroupBreakdown.test_funnel_breakdown_group.2
'''
-
- SELECT aggregation_target AS actor_id
+ SELECT persons.id,
+ persons.id AS id
FROM
- (SELECT aggregation_target,
- steps,
- avg(step_1_conversion_time) step_1_average_conversion_time_inner,
- avg(step_2_conversion_time) step_2_average_conversion_time_inner,
- median(step_1_conversion_time) step_1_median_conversion_time_inner,
- median(step_2_conversion_time) step_2_median_conversion_time_inner ,
- prop
+ (SELECT aggregation_target AS actor_id
FROM
- (SELECT aggregation_target,
- steps,
- max(steps) over (PARTITION BY aggregation_target,
- prop) as max_steps,
- step_1_conversion_time,
- step_2_conversion_time ,
- prop
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ prop AS prop,
+ prop AS prop,
+ min(step_1_conversion_time) AS step_1_conversion_time,
+ min(step_2_conversion_time) AS step_2_conversion_time
FROM
- (SELECT *,
- if(latest_0 <= latest_1
- AND latest_1 <= latest_0 + INTERVAL 7 DAY
- AND latest_1 <= latest_2
- AND latest_2 <= latest_0 + INTERVAL 7 DAY, 3, if(latest_0 <= latest_1
- AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps ,
- if(isNotNull(latest_1)
- AND latest_1 <= latest_0 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time,
- if(isNotNull(latest_2)
- AND latest_2 <= latest_1 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_1), toDateTime(latest_2)), NULL) step_2_conversion_time,
- prop
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ prop AS prop,
+ max(steps) OVER (PARTITION BY aggregation_target,
+ prop) AS max_steps,
+ step_1_conversion_time AS step_1_conversion_time,
+ step_2_conversion_time AS step_2_conversion_time,
+ prop AS prop
FROM
- (SELECT aggregation_target, timestamp, step_0,
- latest_0,
- step_1,
- latest_1,
- step_2,
- min(latest_2) over (PARTITION by aggregation_target,
- prop
- ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 ,
- prop
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ step_2 AS step_2,
+ latest_2 AS latest_2,
+ prop AS prop,
+ if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0), ifNull(lessOrEquals(latest_1, latest_2), 0), ifNull(lessOrEquals(latest_2, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 3, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 2, 1)) AS steps,
+ if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time,
+ if(and(isNotNull(latest_2), ifNull(lessOrEquals(latest_2, plus(toTimeZone(latest_1, 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', latest_1, latest_2), NULL) AS step_2_conversion_time,
+ prop AS prop
FROM
- (SELECT aggregation_target, timestamp, step_0,
- latest_0,
- step_1,
- latest_1,
- step_2,
- if(latest_2 < latest_1, NULL, latest_2) as latest_2 ,
- prop
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ step_2 AS step_2,
+ min(latest_2) OVER (PARTITION BY aggregation_target,
+ prop
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2,
+ prop AS prop
FROM
- (SELECT aggregation_target, timestamp, step_0,
- latest_0,
- step_1,
- min(latest_1) over (PARTITION by aggregation_target,
- prop
- ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1,
- step_2,
- min(latest_2) over (PARTITION by aggregation_target,
- prop
- ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 ,
- if(has(['technology', 'finance'], prop), prop, 'Other') as prop
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ step_2 AS step_2,
+ if(ifNull(less(latest_2, latest_1), 0), NULL, latest_2) AS latest_2,
+ prop AS prop
FROM
- (SELECT *,
- prop_vals as prop
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ min(latest_1) OVER (PARTITION BY aggregation_target,
+ prop
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1,
+ step_2 AS step_2,
+ min(latest_2) OVER (PARTITION BY aggregation_target,
+ prop
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2,
+ prop AS prop
FROM
- (SELECT e.timestamp as timestamp,
- if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as aggregation_target,
- if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id,
- if(event = 'sign up', 1, 0) as step_0,
- if(step_0 = 1, timestamp, null) as latest_0,
- if(event = 'play movie', 1, 0) as step_1,
- if(step_1 = 1, timestamp, null) as latest_1,
- if(event = 'buy', 1, 0) as step_2,
- if(step_2 = 1, timestamp, null) as latest_2,
- replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS prop_basic,
- prop_basic as prop,
- argMinIf(prop, timestamp, isNotNull(prop)) over (PARTITION by aggregation_target) as prop_vals
- FROM events e
- LEFT OUTER JOIN
- (SELECT distinct_id,
- argMax(person_id, version) as person_id
- FROM person_distinct_id2
- WHERE team_id = 99999
- AND distinct_id IN
- (SELECT distinct_id
- FROM events
- WHERE team_id = 99999
- AND event IN ['buy', 'play movie', 'sign up']
- AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC')
- AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') )
- GROUP BY distinct_id
- HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id
- LEFT JOIN
- (SELECT group_key,
- argMax(group_properties, _timestamp) AS group_properties_0
- FROM groups
- WHERE team_id = 99999
- AND group_type_index = 0
- GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key
- WHERE team_id = 99999
- AND event IN ['buy', 'play movie', 'sign up']
- AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC')
- AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC')
- AND (step_0 = 1
- OR step_1 = 1
- OR step_2 = 1) )))))
- WHERE step_0 = 1 ))
- GROUP BY aggregation_target,
- steps,
- prop
- HAVING steps = max(max_steps))
- WHERE steps IN [2, 3]
- AND arrayFlatten(array(prop)) = arrayFlatten(array('finance'))
- ORDER BY aggregation_target
- LIMIT 100
- OFFSET 0 SETTINGS max_ast_elements=1000000,
- max_expanded_ast_elements=1000000
- '''
-# ---
-# name: TestFunnelGroupBreakdown.test_funnel_breakdown_group.5
- '''
-
- SELECT replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS value,
- count(*) as count
- FROM events e
- LEFT JOIN
- (SELECT group_key,
- argMax(group_properties, _timestamp) AS group_properties_0
- FROM groups
- WHERE team_id = 99999
- AND group_type_index = 0
- GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key
- WHERE team_id = 99999
- AND event IN ['buy', 'play movie', 'sign up']
- AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC')
- AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC')
- GROUP BY value
- ORDER BY count DESC, value DESC
- LIMIT 26
- OFFSET 0
+ (SELECT timestamp AS timestamp,
+ aggregation_target AS aggregation_target,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ step_2 AS step_2,
+ latest_2 AS latest_2,
+ prop_basic AS prop_basic,
+ prop,
+ prop_vals AS prop_vals,
+ prop_vals AS prop
+ FROM
+ (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
+ if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id) AS aggregation_target,
+ if(equals(e.event, 'sign up'), 1, 0) AS step_0,
+ if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
+ if(equals(e.event, 'play movie'), 1, 0) AS step_1,
+ if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1,
+ if(equals(e.event, 'buy'), 1, 0) AS step_2,
+ if(ifNull(equals(step_2, 1), 0), timestamp, NULL) AS latest_2,
+ ifNull(toString(e__group_0.properties___industry), '') AS prop_basic,
+ prop_basic AS prop,
+ argMinIf(prop, timestamp, isNotNull(prop)) OVER (PARTITION BY aggregation_target) AS prop_vals
+ FROM events AS e
+ LEFT OUTER JOIN
+ (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id,
+ person_distinct_id_overrides.distinct_id AS distinct_id
+ FROM person_distinct_id_overrides
+ WHERE equals(person_distinct_id_overrides.team_id, 99999)
+ GROUP BY person_distinct_id_overrides.distinct_id
+ HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__override ON equals(e.distinct_id, e__override.distinct_id)
+ LEFT JOIN
+ (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), toTimeZone(groups._timestamp, 'UTC')) AS properties___industry,
+ groups.group_type_index AS index,
+ groups.group_key AS key
+ FROM groups
+ WHERE and(equals(groups.team_id, 99999), equals(index, 0))
+ GROUP BY groups.group_type_index,
+ groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key)
+ WHERE and(equals(e.team_id, 99999), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'play movie', 'sign up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0), ifNull(equals(step_2, 1), 0))))))))
+ WHERE ifNull(equals(step_0, 1), 0)))
+ GROUP BY aggregation_target,
+ steps,
+ prop
+ HAVING ifNull(equals(steps, max(max_steps)), isNull(steps)
+ and isNull(max(max_steps))))
+ WHERE and(ifNull(in(steps, [2, 3]), 0), ifNull(equals(arrayFlatten(array(prop)), arrayFlatten(array('finance'))), isNull(arrayFlatten(array(prop)))
+ and isNull(arrayFlatten(array('finance')))))
+ ORDER BY aggregation_target ASC) AS source
+ INNER JOIN
+ (SELECT person.id AS id
+ FROM person
+ WHERE equals(person.team_id, 99999)
+ GROUP BY person.id
+ HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id)
+ ORDER BY persons.id ASC
+ LIMIT 101
+ OFFSET 0 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1,
+ format_csv_allow_double_quotes=0,
+ max_ast_elements=4000000,
+ max_expanded_ast_elements=4000000,
+ max_bytes_before_external_group_by=0,
+ allow_experimental_analyzer=1
'''
# ---
-# name: TestFunnelGroupBreakdown.test_funnel_breakdown_group.6
+# name: TestFunnelGroupBreakdown.test_funnel_breakdown_group.3
'''
-
- SELECT aggregation_target AS actor_id
+ SELECT persons.id,
+ persons.id AS id
FROM
- (SELECT aggregation_target,
- steps,
- avg(step_1_conversion_time) step_1_average_conversion_time_inner,
- avg(step_2_conversion_time) step_2_average_conversion_time_inner,
- median(step_1_conversion_time) step_1_median_conversion_time_inner,
- median(step_2_conversion_time) step_2_median_conversion_time_inner ,
- prop
+ (SELECT aggregation_target AS actor_id
FROM
- (SELECT aggregation_target,
- steps,
- max(steps) over (PARTITION BY aggregation_target,
- prop) as max_steps,
- step_1_conversion_time,
- step_2_conversion_time ,
- prop
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ prop AS prop,
+ prop AS prop,
+ min(step_1_conversion_time) AS step_1_conversion_time,
+ min(step_2_conversion_time) AS step_2_conversion_time
FROM
- (SELECT *,
- if(latest_0 <= latest_1
- AND latest_1 <= latest_0 + INTERVAL 7 DAY
- AND latest_1 <= latest_2
- AND latest_2 <= latest_0 + INTERVAL 7 DAY, 3, if(latest_0 <= latest_1
- AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps ,
- if(isNotNull(latest_1)
- AND latest_1 <= latest_0 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time,
- if(isNotNull(latest_2)
- AND latest_2 <= latest_1 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_1), toDateTime(latest_2)), NULL) step_2_conversion_time,
- prop
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ prop AS prop,
+ max(steps) OVER (PARTITION BY aggregation_target,
+ prop) AS max_steps,
+ step_1_conversion_time AS step_1_conversion_time,
+ step_2_conversion_time AS step_2_conversion_time,
+ prop AS prop
FROM
- (SELECT aggregation_target, timestamp, step_0,
- latest_0,
- step_1,
- latest_1,
- step_2,
- min(latest_2) over (PARTITION by aggregation_target,
- prop
- ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 ,
- prop
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ step_2 AS step_2,
+ latest_2 AS latest_2,
+ prop AS prop,
+ if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0), ifNull(lessOrEquals(latest_1, latest_2), 0), ifNull(lessOrEquals(latest_2, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 3, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 2, 1)) AS steps,
+ if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time,
+ if(and(isNotNull(latest_2), ifNull(lessOrEquals(latest_2, plus(toTimeZone(latest_1, 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', latest_1, latest_2), NULL) AS step_2_conversion_time,
+ prop AS prop
FROM
- (SELECT aggregation_target, timestamp, step_0,
- latest_0,
- step_1,
- latest_1,
- step_2,
- if(latest_2 < latest_1, NULL, latest_2) as latest_2 ,
- prop
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ step_2 AS step_2,
+ min(latest_2) OVER (PARTITION BY aggregation_target,
+ prop
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2,
+ prop AS prop
FROM
- (SELECT aggregation_target, timestamp, step_0,
- latest_0,
- step_1,
- min(latest_1) over (PARTITION by aggregation_target,
- prop
- ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1,
- step_2,
- min(latest_2) over (PARTITION by aggregation_target,
- prop
- ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 ,
- if(has(['technology', 'finance'], prop), prop, 'Other') as prop
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ step_2 AS step_2,
+ if(ifNull(less(latest_2, latest_1), 0), NULL, latest_2) AS latest_2,
+ prop AS prop
FROM
- (SELECT *,
- prop_vals as prop
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ min(latest_1) OVER (PARTITION BY aggregation_target,
+ prop
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1,
+ step_2 AS step_2,
+ min(latest_2) OVER (PARTITION BY aggregation_target,
+ prop
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2,
+ prop AS prop
FROM
- (SELECT e.timestamp as timestamp,
- if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as aggregation_target,
- if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id,
- if(event = 'sign up', 1, 0) as step_0,
- if(step_0 = 1, timestamp, null) as latest_0,
- if(event = 'play movie', 1, 0) as step_1,
- if(step_1 = 1, timestamp, null) as latest_1,
- if(event = 'buy', 1, 0) as step_2,
- if(step_2 = 1, timestamp, null) as latest_2,
- replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS prop_basic,
- prop_basic as prop,
- argMinIf(prop, timestamp, isNotNull(prop)) over (PARTITION by aggregation_target) as prop_vals
- FROM events e
- LEFT OUTER JOIN
- (SELECT distinct_id,
- argMax(person_id, version) as person_id
- FROM person_distinct_id2
- WHERE team_id = 99999
- AND distinct_id IN
- (SELECT distinct_id
- FROM events
- WHERE team_id = 99999
- AND event IN ['buy', 'play movie', 'sign up']
- AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC')
- AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') )
- GROUP BY distinct_id
- HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id
- LEFT JOIN
- (SELECT group_key,
- argMax(group_properties, _timestamp) AS group_properties_0
- FROM groups
- WHERE team_id = 99999
- AND group_type_index = 0
- GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key
- WHERE team_id = 99999
- AND event IN ['buy', 'play movie', 'sign up']
- AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC')
- AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC')
- AND (step_0 = 1
- OR step_1 = 1
- OR step_2 = 1) )))))
- WHERE step_0 = 1 ))
- GROUP BY aggregation_target,
- steps,
- prop
- HAVING steps = max(max_steps))
- WHERE steps IN [1, 2, 3]
- AND arrayFlatten(array(prop)) = arrayFlatten(array('technology'))
- ORDER BY aggregation_target
- LIMIT 100
- OFFSET 0 SETTINGS max_ast_elements=1000000,
- max_expanded_ast_elements=1000000
- '''
-# ---
-# name: TestFunnelGroupBreakdown.test_funnel_breakdown_group.7
- '''
-
- SELECT replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS value,
- count(*) as count
- FROM events e
- LEFT JOIN
- (SELECT group_key,
- argMax(group_properties, _timestamp) AS group_properties_0
- FROM groups
- WHERE team_id = 99999
- AND group_type_index = 0
- GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key
- WHERE team_id = 99999
- AND event IN ['buy', 'play movie', 'sign up']
- AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC')
- AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC')
- GROUP BY value
- ORDER BY count DESC, value DESC
- LIMIT 26
- OFFSET 0
+ (SELECT timestamp AS timestamp,
+ aggregation_target AS aggregation_target,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ step_2 AS step_2,
+ latest_2 AS latest_2,
+ prop_basic AS prop_basic,
+ prop,
+ prop_vals AS prop_vals,
+ prop_vals AS prop
+ FROM
+ (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
+ if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id) AS aggregation_target,
+ if(equals(e.event, 'sign up'), 1, 0) AS step_0,
+ if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
+ if(equals(e.event, 'play movie'), 1, 0) AS step_1,
+ if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1,
+ if(equals(e.event, 'buy'), 1, 0) AS step_2,
+ if(ifNull(equals(step_2, 1), 0), timestamp, NULL) AS latest_2,
+ ifNull(toString(e__group_0.properties___industry), '') AS prop_basic,
+ prop_basic AS prop,
+ argMinIf(prop, timestamp, isNotNull(prop)) OVER (PARTITION BY aggregation_target) AS prop_vals
+ FROM events AS e
+ LEFT OUTER JOIN
+ (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id,
+ person_distinct_id_overrides.distinct_id AS distinct_id
+ FROM person_distinct_id_overrides
+ WHERE equals(person_distinct_id_overrides.team_id, 99999)
+ GROUP BY person_distinct_id_overrides.distinct_id
+ HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__override ON equals(e.distinct_id, e__override.distinct_id)
+ LEFT JOIN
+ (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), toTimeZone(groups._timestamp, 'UTC')) AS properties___industry,
+ groups.group_type_index AS index,
+ groups.group_key AS key
+ FROM groups
+ WHERE and(equals(groups.team_id, 99999), equals(index, 0))
+ GROUP BY groups.group_type_index,
+ groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key)
+ WHERE and(equals(e.team_id, 99999), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'play movie', 'sign up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0), ifNull(equals(step_2, 1), 0))))))))
+ WHERE ifNull(equals(step_0, 1), 0)))
+ GROUP BY aggregation_target,
+ steps,
+ prop
+ HAVING ifNull(equals(steps, max(max_steps)), isNull(steps)
+ and isNull(max(max_steps))))
+ WHERE and(ifNull(in(steps, [1, 2, 3]), 0), ifNull(equals(arrayFlatten(array(prop)), arrayFlatten(array('technology'))), isNull(arrayFlatten(array(prop)))
+ and isNull(arrayFlatten(array('technology')))))
+ ORDER BY aggregation_target ASC) AS source
+ INNER JOIN
+ (SELECT person.id AS id
+ FROM person
+ WHERE equals(person.team_id, 99999)
+ GROUP BY person.id
+ HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id)
+ ORDER BY persons.id ASC
+ LIMIT 101
+ OFFSET 0 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1,
+ format_csv_allow_double_quotes=0,
+ max_ast_elements=4000000,
+ max_expanded_ast_elements=4000000,
+ max_bytes_before_external_group_by=0,
+ allow_experimental_analyzer=1
'''
# ---
-# name: TestFunnelGroupBreakdown.test_funnel_breakdown_group.8
+# name: TestFunnelGroupBreakdown.test_funnel_breakdown_group.4
'''
-
- SELECT aggregation_target AS actor_id
+ SELECT persons.id,
+ persons.id AS id
FROM
- (SELECT aggregation_target,
- steps,
- avg(step_1_conversion_time) step_1_average_conversion_time_inner,
- avg(step_2_conversion_time) step_2_average_conversion_time_inner,
- median(step_1_conversion_time) step_1_median_conversion_time_inner,
- median(step_2_conversion_time) step_2_median_conversion_time_inner ,
- prop
+ (SELECT aggregation_target AS actor_id
FROM
- (SELECT aggregation_target,
- steps,
- max(steps) over (PARTITION BY aggregation_target,
- prop) as max_steps,
- step_1_conversion_time,
- step_2_conversion_time ,
- prop
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ prop AS prop,
+ prop AS prop,
+ min(step_1_conversion_time) AS step_1_conversion_time,
+ min(step_2_conversion_time) AS step_2_conversion_time
FROM
- (SELECT *,
- if(latest_0 <= latest_1
- AND latest_1 <= latest_0 + INTERVAL 7 DAY
- AND latest_1 <= latest_2
- AND latest_2 <= latest_0 + INTERVAL 7 DAY, 3, if(latest_0 <= latest_1
- AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps ,
- if(isNotNull(latest_1)
- AND latest_1 <= latest_0 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time,
- if(isNotNull(latest_2)
- AND latest_2 <= latest_1 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_1), toDateTime(latest_2)), NULL) step_2_conversion_time,
- prop
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ prop AS prop,
+ max(steps) OVER (PARTITION BY aggregation_target,
+ prop) AS max_steps,
+ step_1_conversion_time AS step_1_conversion_time,
+ step_2_conversion_time AS step_2_conversion_time,
+ prop AS prop
FROM
- (SELECT aggregation_target, timestamp, step_0,
- latest_0,
- step_1,
- latest_1,
- step_2,
- min(latest_2) over (PARTITION by aggregation_target,
- prop
- ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 ,
- prop
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ step_2 AS step_2,
+ latest_2 AS latest_2,
+ prop AS prop,
+ if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0), ifNull(lessOrEquals(latest_1, latest_2), 0), ifNull(lessOrEquals(latest_2, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 3, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 2, 1)) AS steps,
+ if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time,
+ if(and(isNotNull(latest_2), ifNull(lessOrEquals(latest_2, plus(toTimeZone(latest_1, 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', latest_1, latest_2), NULL) AS step_2_conversion_time,
+ prop AS prop
FROM
- (SELECT aggregation_target, timestamp, step_0,
- latest_0,
- step_1,
- latest_1,
- step_2,
- if(latest_2 < latest_1, NULL, latest_2) as latest_2 ,
- prop
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ step_2 AS step_2,
+ min(latest_2) OVER (PARTITION BY aggregation_target,
+ prop
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2,
+ prop AS prop
FROM
- (SELECT aggregation_target, timestamp, step_0,
- latest_0,
- step_1,
- min(latest_1) over (PARTITION by aggregation_target,
- prop
- ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1,
- step_2,
- min(latest_2) over (PARTITION by aggregation_target,
- prop
- ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 ,
- if(has(['technology', 'finance'], prop), prop, 'Other') as prop
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ step_2 AS step_2,
+ if(ifNull(less(latest_2, latest_1), 0), NULL, latest_2) AS latest_2,
+ prop AS prop
FROM
- (SELECT *,
- prop_vals as prop
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ min(latest_1) OVER (PARTITION BY aggregation_target,
+ prop
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1,
+ step_2 AS step_2,
+ min(latest_2) OVER (PARTITION BY aggregation_target,
+ prop
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2,
+ prop AS prop
FROM
- (SELECT e.timestamp as timestamp,
- if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as aggregation_target,
- if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id,
- if(event = 'sign up', 1, 0) as step_0,
- if(step_0 = 1, timestamp, null) as latest_0,
- if(event = 'play movie', 1, 0) as step_1,
- if(step_1 = 1, timestamp, null) as latest_1,
- if(event = 'buy', 1, 0) as step_2,
- if(step_2 = 1, timestamp, null) as latest_2,
- replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS prop_basic,
- prop_basic as prop,
- argMinIf(prop, timestamp, isNotNull(prop)) over (PARTITION by aggregation_target) as prop_vals
- FROM events e
- LEFT OUTER JOIN
- (SELECT distinct_id,
- argMax(person_id, version) as person_id
- FROM person_distinct_id2
- WHERE team_id = 99999
- AND distinct_id IN
- (SELECT distinct_id
- FROM events
- WHERE team_id = 99999
- AND event IN ['buy', 'play movie', 'sign up']
- AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC')
- AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') )
- GROUP BY distinct_id
- HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id
- LEFT JOIN
- (SELECT group_key,
- argMax(group_properties, _timestamp) AS group_properties_0
- FROM groups
- WHERE team_id = 99999
- AND group_type_index = 0
- GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key
- WHERE team_id = 99999
- AND event IN ['buy', 'play movie', 'sign up']
- AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC')
- AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC')
- AND (step_0 = 1
- OR step_1 = 1
- OR step_2 = 1) )))))
- WHERE step_0 = 1 ))
- GROUP BY aggregation_target,
- steps,
- prop
- HAVING steps = max(max_steps))
- WHERE steps IN [2, 3]
- AND arrayFlatten(array(prop)) = arrayFlatten(array('technology'))
- ORDER BY aggregation_target
- LIMIT 100
- OFFSET 0 SETTINGS max_ast_elements=1000000,
- max_expanded_ast_elements=1000000
+ (SELECT timestamp AS timestamp,
+ aggregation_target AS aggregation_target,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ step_2 AS step_2,
+ latest_2 AS latest_2,
+ prop_basic AS prop_basic,
+ prop,
+ prop_vals AS prop_vals,
+ prop_vals AS prop
+ FROM
+ (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
+ if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id) AS aggregation_target,
+ if(equals(e.event, 'sign up'), 1, 0) AS step_0,
+ if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
+ if(equals(e.event, 'play movie'), 1, 0) AS step_1,
+ if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1,
+ if(equals(e.event, 'buy'), 1, 0) AS step_2,
+ if(ifNull(equals(step_2, 1), 0), timestamp, NULL) AS latest_2,
+ ifNull(toString(e__group_0.properties___industry), '') AS prop_basic,
+ prop_basic AS prop,
+ argMinIf(prop, timestamp, isNotNull(prop)) OVER (PARTITION BY aggregation_target) AS prop_vals
+ FROM events AS e
+ LEFT OUTER JOIN
+ (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id,
+ person_distinct_id_overrides.distinct_id AS distinct_id
+ FROM person_distinct_id_overrides
+ WHERE equals(person_distinct_id_overrides.team_id, 99999)
+ GROUP BY person_distinct_id_overrides.distinct_id
+ HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__override ON equals(e.distinct_id, e__override.distinct_id)
+ LEFT JOIN
+ (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), toTimeZone(groups._timestamp, 'UTC')) AS properties___industry,
+ groups.group_type_index AS index,
+ groups.group_key AS key
+ FROM groups
+ WHERE and(equals(groups.team_id, 99999), equals(index, 0))
+ GROUP BY groups.group_type_index,
+ groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key)
+ WHERE and(equals(e.team_id, 99999), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'play movie', 'sign up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0), ifNull(equals(step_2, 1), 0))))))))
+ WHERE ifNull(equals(step_0, 1), 0)))
+ GROUP BY aggregation_target,
+ steps,
+ prop
+ HAVING ifNull(equals(steps, max(max_steps)), isNull(steps)
+ and isNull(max(max_steps))))
+ WHERE and(ifNull(in(steps, [2, 3]), 0), ifNull(equals(arrayFlatten(array(prop)), arrayFlatten(array('technology'))), isNull(arrayFlatten(array(prop)))
+ and isNull(arrayFlatten(array('technology')))))
+ ORDER BY aggregation_target ASC) AS source
+ INNER JOIN
+ (SELECT person.id AS id
+ FROM person
+ WHERE equals(person.team_id, 99999)
+ GROUP BY person.id
+ HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id)
+ ORDER BY persons.id ASC
+ LIMIT 101
+ OFFSET 0 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1,
+ format_csv_allow_double_quotes=0,
+ max_ast_elements=4000000,
+ max_expanded_ast_elements=4000000,
+ max_bytes_before_external_group_by=0,
+ allow_experimental_analyzer=1
'''
# ---
diff --git a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_breakdowns_by_current_url.ambr b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_breakdowns_by_current_url.ambr
index 5bb342e37abed..e01e48e4c1f7f 100644
--- a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_breakdowns_by_current_url.ambr
+++ b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_breakdowns_by_current_url.ambr
@@ -86,7 +86,7 @@
and isNull(max(max_steps))))
GROUP BY prop)
GROUP BY final_prop
- LIMIT 100 SETTINGS readonly=2,
+ LIMIT 101 SETTINGS readonly=2,
max_execution_time=60,
allow_experimental_object_type=1,
format_csv_allow_double_quotes=0,
@@ -183,7 +183,7 @@
and isNull(max(max_steps))))
GROUP BY prop)
GROUP BY final_prop
- LIMIT 100 SETTINGS readonly=2,
+ LIMIT 101 SETTINGS readonly=2,
max_execution_time=60,
allow_experimental_object_type=1,
format_csv_allow_double_quotes=0,
diff --git a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_correlation_actors.ambr b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_correlation_actors.ambr
index 044feee2b0a90..ca6d26d135828 100644
--- a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_correlation_actors.ambr
+++ b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_correlation_actors.ambr
@@ -642,385 +642,3 @@
max_bytes_before_external_group_by=0
'''
# ---
-# name: TestFunnelCorrelationActors.test_strict_funnel_correlation_with_recordings
- '''
- SELECT persons.id,
- persons.id AS id,
- source.matching_events AS matching_events
- FROM
- (SELECT funnel_actors.actor_id AS actor_id,
- any(funnel_actors.matching_events) AS matching_events
- FROM
- (SELECT aggregation_target AS actor_id,
- final_matching_events AS matching_events,
- timestamp AS timestamp,
- steps AS steps,
- final_timestamp AS final_timestamp,
- first_timestamp AS first_timestamp
- FROM
- (SELECT groupArray(10)(step_0_matching_event) AS step_0_matching_events,
- groupArray(10)(step_1_matching_event) AS step_1_matching_events,
- groupArray(10)(final_matching_event) AS final_matching_events,
- aggregation_target AS aggregation_target,
- steps AS steps,
- argMax(latest_0, steps) AS timestamp,
- argMax(latest_1, steps) AS final_timestamp,
- argMax(latest_0, steps) AS first_timestamp,
- avg(step_1_conversion_time) AS step_1_average_conversion_time_inner,
- median(step_1_conversion_time) AS step_1_median_conversion_time_inner
- FROM
- (SELECT tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event,
- tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event,
- if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event,
- aggregation_target AS aggregation_target,
- steps AS steps,
- max(steps) OVER (PARTITION BY aggregation_target) AS max_steps,
- step_1_conversion_time AS step_1_conversion_time,
- latest_0 AS latest_0,
- latest_1 AS latest_1,
- latest_0 AS latest_0
- FROM
- (SELECT aggregation_target AS aggregation_target,
- timestamp AS timestamp,
- step_0 AS step_0,
- latest_0 AS latest_0,
- uuid_0 AS uuid_0,
- `$session_id_0` AS `$session_id_0`,
- `$window_id_0` AS `$window_id_0`,
- step_1 AS step_1,
- latest_1 AS latest_1,
- uuid_1 AS uuid_1,
- `$session_id_1` AS `$session_id_1`,
- `$window_id_1` AS `$window_id_1`,
- if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 2, 1) AS steps,
- if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time,
- tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event,
- tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event,
- if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event
- FROM
- (SELECT aggregation_target AS aggregation_target,
- timestamp AS timestamp,
- step_0 AS step_0,
- latest_0 AS latest_0,
- uuid_0 AS uuid_0,
- `$session_id_0` AS `$session_id_0`,
- `$window_id_0` AS `$window_id_0`,
- step_1 AS step_1,
- min(latest_1) OVER (PARTITION BY aggregation_target
- ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS latest_1,
- min(uuid_1) OVER (PARTITION BY aggregation_target
- ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS uuid_1,
- min(`$session_id_1`) OVER (PARTITION BY aggregation_target
- ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS `$session_id_1`,
- min(`$window_id_1`) OVER (PARTITION BY aggregation_target
- ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS `$window_id_1`
- FROM
- (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
- if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id) AS aggregation_target,
- e.uuid AS uuid,
- if(equals(e.event, '$pageview'), 1, 0) AS step_0,
- if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
- if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0,
- if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`,
- if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`,
- if(equals(e.event, 'insight analyzed'), 1, 0) AS step_1,
- if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1,
- if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1,
- if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`,
- if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1`
- FROM events AS e
- LEFT OUTER JOIN
- (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id,
- person_distinct_id_overrides.distinct_id AS distinct_id
- FROM person_distinct_id_overrides
- WHERE equals(person_distinct_id_overrides.team_id, 2)
- GROUP BY person_distinct_id_overrides.distinct_id
- HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__override ON equals(e.distinct_id, e__override.distinct_id)
- LEFT JOIN
- (SELECT person.id AS id,
- replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'foo'), ''), 'null'), '^"|"$', '') AS properties___foo
- FROM person
- WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version),
- (SELECT person.id AS id, max(person.version) AS version
- FROM person
- WHERE equals(person.team_id, 2)
- GROUP BY person.id
- HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__person ON equals(if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id), e__person.id)
- WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-01-08 23:59:59.999999', 6, 'UTC'))), ifNull(equals(e__person.properties___foo, 'bar'), 0))))
- WHERE ifNull(equals(step_0, 1), 0)))
- GROUP BY aggregation_target,
- steps
- HAVING ifNull(equals(steps, max(max_steps)), isNull(steps)
- and isNull(max(max_steps))))
- WHERE ifNull(in(steps, [1, 2]), 0)
- ORDER BY aggregation_target ASC) AS funnel_actors
- WHERE ifNull(equals(funnel_actors.steps, 2), 0)
- GROUP BY funnel_actors.actor_id
- ORDER BY funnel_actors.actor_id ASC) AS source
- INNER JOIN
- (SELECT person.id AS id
- FROM person
- WHERE and(equals(person.team_id, 2), in(id,
- (SELECT source.actor_id AS actor_id
- FROM
- (SELECT funnel_actors.actor_id AS actor_id, any(funnel_actors.matching_events) AS matching_events
- FROM
- (SELECT aggregation_target AS actor_id, final_matching_events AS matching_events, timestamp AS timestamp, steps AS steps, final_timestamp AS final_timestamp, first_timestamp AS first_timestamp
- FROM
- (SELECT groupArray(10)(step_0_matching_event) AS step_0_matching_events, groupArray(10)(step_1_matching_event) AS step_1_matching_events, groupArray(10)(final_matching_event) AS final_matching_events, aggregation_target AS aggregation_target, steps AS steps, argMax(latest_0, steps) AS timestamp, argMax(latest_1, steps) AS final_timestamp, argMax(latest_0, steps) AS first_timestamp, avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, median(step_1_conversion_time) AS step_1_median_conversion_time_inner
- FROM
- (SELECT tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event, aggregation_target AS aggregation_target, steps AS steps, max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, step_1_conversion_time AS step_1_conversion_time, latest_0 AS latest_0, latest_1 AS latest_1, latest_0 AS latest_0
- FROM
- (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, uuid_0 AS uuid_0, `$session_id_0` AS `$session_id_0`, `$window_id_0` AS `$window_id_0`, step_1 AS step_1, latest_1 AS latest_1, uuid_1 AS uuid_1, `$session_id_1` AS `$session_id_1`, `$window_id_1` AS `$window_id_1`, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 2, 1) AS steps, if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event
- FROM
- (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, uuid_0 AS uuid_0, `$session_id_0` AS `$session_id_0`, `$window_id_0` AS `$window_id_0`, step_1 AS step_1, min(latest_1) OVER (PARTITION BY aggregation_target
- ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS latest_1, min(uuid_1) OVER (PARTITION BY aggregation_target
- ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS uuid_1, min(`$session_id_1`) OVER (PARTITION BY aggregation_target
- ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS `$session_id_1`, min(`$window_id_1`) OVER (PARTITION BY aggregation_target
- ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS `$window_id_1`
- FROM
- (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id) AS aggregation_target, e.uuid AS uuid, if(equals(e.event, '$pageview'), 1, 0) AS step_0, if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0, if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`, if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`, if(equals(e.event, 'insight analyzed'), 1, 0) AS step_1, if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1, if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`, if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1`
- FROM events AS e
- LEFT OUTER JOIN
- (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, person_distinct_id_overrides.distinct_id AS distinct_id
- FROM person_distinct_id_overrides
- WHERE equals(person_distinct_id_overrides.team_id, 2)
- GROUP BY person_distinct_id_overrides.distinct_id
- HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__override ON equals(e.distinct_id, e__override.distinct_id)
- LEFT JOIN
- (SELECT person.id AS id, replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'foo'), ''), 'null'), '^"|"$', '') AS properties___foo
- FROM person
- WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version),
- (SELECT person.id AS id, max(person.version) AS version
- FROM person
- WHERE equals(person.team_id, 2)
- GROUP BY person.id
- HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__person ON equals(if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id), e__person.id)
- WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-01-08 23:59:59.999999', 6, 'UTC'))), ifNull(equals(e__person.properties___foo, 'bar'), 0))))
- WHERE ifNull(equals(step_0, 1), 0)))
- GROUP BY aggregation_target, steps
- HAVING ifNull(equals(steps, max(max_steps)), isNull(steps)
- and isNull(max(max_steps))))
- WHERE ifNull(in(steps, [1, 2]), 0)
- ORDER BY aggregation_target ASC) AS funnel_actors
- WHERE ifNull(equals(funnel_actors.steps, 2), 0)
- GROUP BY funnel_actors.actor_id
- ORDER BY funnel_actors.actor_id ASC) AS source)))
- GROUP BY person.id
- HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id)
- ORDER BY persons.id ASC
- LIMIT 101
- OFFSET 0 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1,
- format_csv_allow_double_quotes=0,
- max_ast_elements=4000000,
- max_expanded_ast_elements=4000000,
- max_bytes_before_external_group_by=0
- '''
-# ---
-# name: TestFunnelCorrelationActors.test_strict_funnel_correlation_with_recordings.1
- '''
- SELECT DISTINCT session_replay_events.session_id AS session_id
- FROM session_replay_events
- WHERE and(equals(session_replay_events.team_id, 2), ifNull(greaterOrEquals(toTimeZone(session_replay_events.min_first_timestamp, 'UTC'), minus(toDateTime64('2021-01-02 00:00:00.000000', 6, 'UTC'), toIntervalDay(21))), 0), in(session_replay_events.session_id, ['s2']))
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1,
- format_csv_allow_double_quotes=0,
- max_ast_elements=4000000,
- max_expanded_ast_elements=4000000,
- max_bytes_before_external_group_by=0
- '''
-# ---
-# name: TestFunnelCorrelationActors.test_strict_funnel_correlation_with_recordings.2
- '''
- SELECT persons.id,
- persons.id AS id,
- source.matching_events AS matching_events
- FROM
- (SELECT funnel_actors.actor_id AS actor_id,
- any(funnel_actors.matching_events) AS matching_events
- FROM
- (SELECT aggregation_target AS actor_id,
- final_matching_events AS matching_events,
- timestamp AS timestamp,
- steps AS steps,
- final_timestamp AS final_timestamp,
- first_timestamp AS first_timestamp
- FROM
- (SELECT groupArray(10)(step_0_matching_event) AS step_0_matching_events,
- groupArray(10)(step_1_matching_event) AS step_1_matching_events,
- groupArray(10)(final_matching_event) AS final_matching_events,
- aggregation_target AS aggregation_target,
- steps AS steps,
- argMax(latest_0, steps) AS timestamp,
- argMax(latest_1, steps) AS final_timestamp,
- argMax(latest_0, steps) AS first_timestamp,
- avg(step_1_conversion_time) AS step_1_average_conversion_time_inner,
- median(step_1_conversion_time) AS step_1_median_conversion_time_inner
- FROM
- (SELECT tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event,
- tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event,
- if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event,
- aggregation_target AS aggregation_target,
- steps AS steps,
- max(steps) OVER (PARTITION BY aggregation_target) AS max_steps,
- step_1_conversion_time AS step_1_conversion_time,
- latest_0 AS latest_0,
- latest_1 AS latest_1,
- latest_0 AS latest_0
- FROM
- (SELECT aggregation_target AS aggregation_target,
- timestamp AS timestamp,
- step_0 AS step_0,
- latest_0 AS latest_0,
- uuid_0 AS uuid_0,
- `$session_id_0` AS `$session_id_0`,
- `$window_id_0` AS `$window_id_0`,
- step_1 AS step_1,
- latest_1 AS latest_1,
- uuid_1 AS uuid_1,
- `$session_id_1` AS `$session_id_1`,
- `$window_id_1` AS `$window_id_1`,
- if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 2, 1) AS steps,
- if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time,
- tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event,
- tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event,
- if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event
- FROM
- (SELECT aggregation_target AS aggregation_target,
- timestamp AS timestamp,
- step_0 AS step_0,
- latest_0 AS latest_0,
- uuid_0 AS uuid_0,
- `$session_id_0` AS `$session_id_0`,
- `$window_id_0` AS `$window_id_0`,
- step_1 AS step_1,
- min(latest_1) OVER (PARTITION BY aggregation_target
- ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS latest_1,
- min(uuid_1) OVER (PARTITION BY aggregation_target
- ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS uuid_1,
- min(`$session_id_1`) OVER (PARTITION BY aggregation_target
- ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS `$session_id_1`,
- min(`$window_id_1`) OVER (PARTITION BY aggregation_target
- ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS `$window_id_1`
- FROM
- (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
- if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id) AS aggregation_target,
- e.uuid AS uuid,
- if(equals(e.event, '$pageview'), 1, 0) AS step_0,
- if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
- if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0,
- if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`,
- if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`,
- if(equals(e.event, 'insight analyzed'), 1, 0) AS step_1,
- if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1,
- if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1,
- if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`,
- if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1`
- FROM events AS e
- LEFT OUTER JOIN
- (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id,
- person_distinct_id_overrides.distinct_id AS distinct_id
- FROM person_distinct_id_overrides
- WHERE equals(person_distinct_id_overrides.team_id, 2)
- GROUP BY person_distinct_id_overrides.distinct_id
- HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__override ON equals(e.distinct_id, e__override.distinct_id)
- LEFT JOIN
- (SELECT person.id AS id,
- replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'foo'), ''), 'null'), '^"|"$', '') AS properties___foo
- FROM person
- WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version),
- (SELECT person.id AS id, max(person.version) AS version
- FROM person
- WHERE equals(person.team_id, 2)
- GROUP BY person.id
- HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__person ON equals(if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id), e__person.id)
- WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-01-08 23:59:59.999999', 6, 'UTC'))), ifNull(equals(e__person.properties___foo, 'bar'), 0))))
- WHERE ifNull(equals(step_0, 1), 0)))
- GROUP BY aggregation_target,
- steps
- HAVING ifNull(equals(steps, max(max_steps)), isNull(steps)
- and isNull(max(max_steps))))
- WHERE ifNull(in(steps, [1, 2]), 0)
- ORDER BY aggregation_target ASC) AS funnel_actors
- WHERE ifNull(notEquals(funnel_actors.steps, 2), 1)
- GROUP BY funnel_actors.actor_id
- ORDER BY funnel_actors.actor_id ASC) AS source
- INNER JOIN
- (SELECT person.id AS id
- FROM person
- WHERE and(equals(person.team_id, 2), in(id,
- (SELECT source.actor_id AS actor_id
- FROM
- (SELECT funnel_actors.actor_id AS actor_id, any(funnel_actors.matching_events) AS matching_events
- FROM
- (SELECT aggregation_target AS actor_id, final_matching_events AS matching_events, timestamp AS timestamp, steps AS steps, final_timestamp AS final_timestamp, first_timestamp AS first_timestamp
- FROM
- (SELECT groupArray(10)(step_0_matching_event) AS step_0_matching_events, groupArray(10)(step_1_matching_event) AS step_1_matching_events, groupArray(10)(final_matching_event) AS final_matching_events, aggregation_target AS aggregation_target, steps AS steps, argMax(latest_0, steps) AS timestamp, argMax(latest_1, steps) AS final_timestamp, argMax(latest_0, steps) AS first_timestamp, avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, median(step_1_conversion_time) AS step_1_median_conversion_time_inner
- FROM
- (SELECT tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event, aggregation_target AS aggregation_target, steps AS steps, max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, step_1_conversion_time AS step_1_conversion_time, latest_0 AS latest_0, latest_1 AS latest_1, latest_0 AS latest_0
- FROM
- (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, uuid_0 AS uuid_0, `$session_id_0` AS `$session_id_0`, `$window_id_0` AS `$window_id_0`, step_1 AS step_1, latest_1 AS latest_1, uuid_1 AS uuid_1, `$session_id_1` AS `$session_id_1`, `$window_id_1` AS `$window_id_1`, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 2, 1) AS steps, if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time, tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event, tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event, if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event
- FROM
- (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, uuid_0 AS uuid_0, `$session_id_0` AS `$session_id_0`, `$window_id_0` AS `$window_id_0`, step_1 AS step_1, min(latest_1) OVER (PARTITION BY aggregation_target
- ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS latest_1, min(uuid_1) OVER (PARTITION BY aggregation_target
- ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS uuid_1, min(`$session_id_1`) OVER (PARTITION BY aggregation_target
- ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS `$session_id_1`, min(`$window_id_1`) OVER (PARTITION BY aggregation_target
- ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS `$window_id_1`
- FROM
- (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id) AS aggregation_target, e.uuid AS uuid, if(equals(e.event, '$pageview'), 1, 0) AS step_0, if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0, if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`, if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`, if(equals(e.event, 'insight analyzed'), 1, 0) AS step_1, if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1, if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`, if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1`
- FROM events AS e
- LEFT OUTER JOIN
- (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, person_distinct_id_overrides.distinct_id AS distinct_id
- FROM person_distinct_id_overrides
- WHERE equals(person_distinct_id_overrides.team_id, 2)
- GROUP BY person_distinct_id_overrides.distinct_id
- HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__override ON equals(e.distinct_id, e__override.distinct_id)
- LEFT JOIN
- (SELECT person.id AS id, replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'foo'), ''), 'null'), '^"|"$', '') AS properties___foo
- FROM person
- WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version),
- (SELECT person.id AS id, max(person.version) AS version
- FROM person
- WHERE equals(person.team_id, 2)
- GROUP BY person.id
- HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__person ON equals(if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id), e__person.id)
- WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-01-08 23:59:59.999999', 6, 'UTC'))), ifNull(equals(e__person.properties___foo, 'bar'), 0))))
- WHERE ifNull(equals(step_0, 1), 0)))
- GROUP BY aggregation_target, steps
- HAVING ifNull(equals(steps, max(max_steps)), isNull(steps)
- and isNull(max(max_steps))))
- WHERE ifNull(in(steps, [1, 2]), 0)
- ORDER BY aggregation_target ASC) AS funnel_actors
- WHERE ifNull(notEquals(funnel_actors.steps, 2), 1)
- GROUP BY funnel_actors.actor_id
- ORDER BY funnel_actors.actor_id ASC) AS source)))
- GROUP BY person.id
- HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id)
- ORDER BY persons.id ASC
- LIMIT 101
- OFFSET 0 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1,
- format_csv_allow_double_quotes=0,
- max_ast_elements=4000000,
- max_expanded_ast_elements=4000000,
- max_bytes_before_external_group_by=0
- '''
-# ---
-# name: TestFunnelCorrelationActors.test_strict_funnel_correlation_with_recordings.3
- '''
- SELECT DISTINCT session_replay_events.session_id AS session_id
- FROM session_replay_events
- WHERE and(equals(session_replay_events.team_id, 2), ifNull(greaterOrEquals(toTimeZone(session_replay_events.min_first_timestamp, 'UTC'), minus(toDateTime64('2021-01-02 00:00:00.000000', 6, 'UTC'), toIntervalDay(21))), 0), in(session_replay_events.session_id, ['s3']))
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1,
- format_csv_allow_double_quotes=0,
- max_ast_elements=4000000,
- max_expanded_ast_elements=4000000,
- max_bytes_before_external_group_by=0
- '''
-# ---
diff --git a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_correlation_actors_udf.ambr b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_correlation_actors_udf.ambr
index 8c6788fe66107..3f3fd82910546 100644
--- a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_correlation_actors_udf.ambr
+++ b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_correlation_actors_udf.ambr
@@ -397,205 +397,3 @@
max_bytes_before_external_group_by=0
'''
# ---
-# name: TestFunnelCorrelationsActorsUDF.test_strict_funnel_correlation_with_recordings
- '''
- SELECT sum(step_1) AS step_1,
- sum(step_2) AS step_2,
- arrayMap(x -> if(isNaN(x), NULL, x), [avgArrayOrNull(step_1_conversion_times)])[1] AS step_1_average_conversion_time,
- arrayMap(x -> if(isNaN(x), NULL, x), [medianArrayOrNull(step_1_conversion_times)])[1] AS step_1_median_conversion_time,
- groupArray(row_number) AS row_number,
- final_prop AS final_prop
- FROM
- (SELECT countIf(ifNull(ifNull(equals(step_reached, 0), 0), 0)) AS step_1,
- countIf(ifNull(ifNull(equals(step_reached, 1), 0), 0)) AS step_2,
- groupArrayIf(timings[1], ifNull(greater(timings[1], 0), 0)) AS step_1_conversion_times,
- rowNumberInBlock() AS row_number,
- breakdown AS final_prop
- FROM
- (SELECT arraySort(t -> t.1, groupArray(tuple(accurateCastOrNull(timestamp, 'Float64'), uuid, [], arrayFilter(x -> ifNull(notEquals(x, 0), 1), [multiply(1, step_0), multiply(2, step_1)])))) AS events_array,
- arrayJoin(aggregate_funnel_array(2, 1209600, 'first_touch', 'strict', [[]], arrayFilter((x, x2) -> not(and(empty(x.4), empty(x2.4), ifNull(equals(x.3, x2.3), isNull(x.3)
- and isNull(x2.3)), ifNull(greater(x.1, x2.1), 0))), events_array, arrayRotateRight(events_array, 1)))) AS af_tuple,
- af_tuple.1 AS step_reached,
- plus(af_tuple.1, 1) AS steps,
- af_tuple.2 AS breakdown,
- af_tuple.3 AS timings,
- aggregation_target AS aggregation_target
- FROM
- (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
- if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id) AS aggregation_target,
- e.uuid AS uuid,
- e.`$session_id` AS `$session_id`,
- e.`$window_id` AS `$window_id`,
- if(equals(e.event, '$pageview'), 1, 0) AS step_0,
- if(equals(e.event, 'insight analyzed'), 1, 0) AS step_1
- FROM events AS e
- LEFT OUTER JOIN
- (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id,
- person_distinct_id_overrides.distinct_id AS distinct_id
- FROM person_distinct_id_overrides
- WHERE equals(person_distinct_id_overrides.team_id, 2)
- GROUP BY person_distinct_id_overrides.distinct_id
- HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__override ON equals(e.distinct_id, e__override.distinct_id)
- WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-01-08 23:59:59.999999', 6, 'UTC')))))
- GROUP BY aggregation_target
- HAVING ifNull(greaterOrEquals(step_reached, 0), 0))
- GROUP BY breakdown
- ORDER BY step_2 DESC, step_1 DESC)
- GROUP BY final_prop
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1,
- format_csv_allow_double_quotes=0,
- max_ast_elements=4000000,
- max_expanded_ast_elements=4000000,
- max_bytes_before_external_group_by=23622320128,
- allow_experimental_analyzer=1
- '''
-# ---
-# name: TestFunnelCorrelationsActorsUDF.test_strict_funnel_correlation_with_recordings.1
- '''
- SELECT DISTINCT session_replay_events.session_id AS session_id
- FROM session_replay_events
- WHERE and(equals(session_replay_events.team_id, 2), ifNull(greaterOrEquals(toTimeZone(session_replay_events.min_first_timestamp, 'UTC'), minus(toDateTime64('2021-01-02 00:00:00.000000', 6, 'UTC'), toIntervalDay(21))), 0), in(session_replay_events.session_id, ['s2']))
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1,
- format_csv_allow_double_quotes=0,
- max_ast_elements=4000000,
- max_expanded_ast_elements=4000000,
- max_bytes_before_external_group_by=0
- '''
-# ---
-# name: TestFunnelCorrelationsActorsUDF.test_strict_funnel_correlation_with_recordings.2
- '''
- SELECT persons.id,
- persons.id AS id,
- source.matching_events AS matching_events
- FROM
- (SELECT funnel_actors.actor_id AS actor_id,
- any(funnel_actors.matching_events) AS matching_events
- FROM
- (SELECT aggregation_target AS actor_id,
- matched_events_array[plus(step_reached, 1)] AS matching_events,
- (matched_events_array[1][1]).1 AS timestamp,
- nullIf((matched_events_array[2][1]).1, 0) AS final_timestamp,
- (matched_events_array[1][1]).1 AS first_timestamp,
- steps AS steps,
- final_timestamp,
- first_timestamp
- FROM
- (SELECT arraySort(t -> t.1, groupArray(tuple(accurateCastOrNull(timestamp, 'Float64'), uuid, [], arrayFilter(x -> ifNull(notEquals(x, 0), 1), [multiply(1, step_0), multiply(2, step_1)])))) AS events_array,
- arrayJoin(aggregate_funnel_array_v1(2, 1209600, 'first_touch', 'strict', [[]], arrayFilter((x, x2) -> not(and(empty(x.4), empty(x2.4), ifNull(equals(x.3, x2.3), isNull(x.3)
- and isNull(x2.3)), ifNull(greater(x.1, x2.1), 0))), events_array, arrayRotateRight(events_array, 1)))) AS af_tuple,
- arrayJoin(aggregate_funnel_array(2, 1209600, 'first_touch', 'strict', [[]], arrayFilter((x, x2) -> not(and(empty(x.4), empty(x2.4), ifNull(equals(x.3, x2.3), isNull(x.3)
- and isNull(x2.3)), ifNull(greater(x.1, x2.1), 0))), events_array, arrayRotateRight(events_array, 1)))) AS af_tuple,
- af_tuple.1 AS step_reached,
- plus(af_tuple.1, 1) AS steps,
- af_tuple.2 AS breakdown,
- af_tuple.3 AS timings,
- af_tuple.4 AS matched_event_uuids_array_array,
- groupArray(tuple(timestamp, uuid, `$session_id`, `$window_id`)) AS user_events,
- mapFromArrays(arrayMap(x -> x.2, user_events), user_events) AS user_events_map,
- arrayMap(matched_event_uuids_array -> arrayMap(event_uuid -> user_events_map[event_uuid], arrayDistinct(matched_event_uuids_array)), matched_event_uuids_array_array) AS matched_events_array,
- aggregation_target AS aggregation_target
- FROM
- (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
- if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id) AS aggregation_target,
- e.uuid AS uuid,
- e.`$session_id` AS `$session_id`,
- e.`$window_id` AS `$window_id`,
- if(equals(e.event, '$pageview'), 1, 0) AS step_0,
- if(equals(e.event, 'insight analyzed'), 1, 0) AS step_1
- FROM events AS e
- LEFT OUTER JOIN
- (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id,
- person_distinct_id_overrides.distinct_id AS distinct_id
- FROM person_distinct_id_overrides
- WHERE equals(person_distinct_id_overrides.team_id, 2)
- GROUP BY person_distinct_id_overrides.distinct_id
- HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__override ON equals(e.distinct_id, e__override.distinct_id)
- LEFT JOIN
- (SELECT person.id AS id,
- replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'foo'), ''), 'null'), '^"|"$', '') AS properties___foo
- FROM person
- WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version),
- (SELECT person.id AS id, max(person.version) AS version
- FROM person
- WHERE equals(person.team_id, 2)
- GROUP BY person.id
- HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__person ON equals(if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id), e__person.id)
- WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-01-08 23:59:59.999999', 6, 'UTC'))), ifNull(equals(e__person.properties___foo, 'bar'), 0)))
- GROUP BY aggregation_target
- HAVING ifNull(greaterOrEquals(step_reached, 0), 0))
- WHERE ifNull(greaterOrEquals(step_reached, 0), 0)
- ORDER BY aggregation_target ASC) AS funnel_actors
- WHERE ifNull(notEquals(funnel_actors.steps, 2), 1)
- GROUP BY funnel_actors.actor_id
- ORDER BY funnel_actors.actor_id ASC) AS source
- INNER JOIN
- (SELECT person.id AS id
- FROM person
- WHERE and(equals(person.team_id, 2), in(id,
- (SELECT source.actor_id AS actor_id
- FROM
- (SELECT funnel_actors.actor_id AS actor_id, any(funnel_actors.matching_events) AS matching_events
- FROM
- (SELECT aggregation_target AS actor_id, matched_events_array[plus(step_reached, 1)] AS matching_events, (matched_events_array[1][1]).1 AS timestamp, nullIf((matched_events_array[2][1]).1, 0) AS final_timestamp, (matched_events_array[1][1]).1 AS first_timestamp, steps AS steps, final_timestamp, first_timestamp
- FROM
- (SELECT arraySort(t -> t.1, groupArray(tuple(accurateCastOrNull(timestamp, 'Float64'), uuid, [], arrayFilter(x -> ifNull(notEquals(x, 0), 1), [multiply(1, step_0), multiply(2, step_1)])))) AS events_array, arrayJoin(aggregate_funnel_array_v1(2, 1209600, 'first_touch', 'strict', [[]], arrayFilter((x, x2) -> not(and(empty(x.4), empty(x2.4), ifNull(equals(x.3, x2.3), isNull(x.3)
- and isNull(x2.3)), ifNull(greater(x.1, x2.1), 0))), events_array, arrayRotateRight(events_array, 1)))) AS af_tuple, af_tuple.1 AS step_reached, plus(af_tuple.1, 1) AS steps, af_tuple.2 AS breakdown, af_tuple.3 AS timings, af_tuple.4 AS matched_event_uuids_array_array, groupArray(tuple(timestamp, uuid, `$session_id`, `$window_id`)) AS user_events, mapFromArrays(arrayMap(x -> x.2, user_events), user_events) AS user_events_map, arrayMap(matched_event_uuids_array -> arrayMap(event_uuid -> user_events_map[event_uuid], arrayDistinct(matched_event_uuids_array)), matched_event_uuids_array_array) AS matched_events_array, aggregation_target AS aggregation_target
- (SELECT arraySort(t -> t.1, groupArray(tuple(accurateCastOrNull(timestamp, 'Float64'), uuid, [], arrayFilter(x -> ifNull(notEquals(x, 0), 1), [multiply(1, step_0), multiply(2, step_1)])))) AS events_array, arrayJoin(aggregate_funnel_array(2, 1209600, 'first_touch', 'strict', [[]], arrayFilter((x, x2) -> not(and(empty(x.4), empty(x2.4), ifNull(equals(x.3, x2.3), isNull(x.3)
- and isNull(x2.3)), ifNull(greater(x.1, x2.1), 0))), events_array, arrayRotateRight(events_array, 1)))) AS af_tuple, af_tuple.1 AS step_reached, plus(af_tuple.1, 1) AS steps, af_tuple.2 AS breakdown, af_tuple.3 AS timings, af_tuple.4 AS matched_event_uuids_array_array, groupArray(tuple(timestamp, uuid, `$session_id`, `$window_id`)) AS user_events, mapFromArrays(arrayMap(x -> x.2, user_events), user_events) AS user_events_map, arrayMap(matched_event_uuids_array -> arrayMap(event_uuid -> user_events_map[event_uuid], arrayDistinct(matched_event_uuids_array)), matched_event_uuids_array_array) AS matched_events_array, aggregation_target AS aggregation_target
- FROM
- (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp, if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id) AS aggregation_target, e.uuid AS uuid, e.`$session_id` AS `$session_id`, e.`$window_id` AS `$window_id`, if(equals(e.event, '$pageview'), 1, 0) AS step_0, if(equals(e.event, 'insight analyzed'), 1, 0) AS step_1
- FROM events AS e
- LEFT OUTER JOIN
- (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id, person_distinct_id_overrides.distinct_id AS distinct_id
- FROM person_distinct_id_overrides
- WHERE equals(person_distinct_id_overrides.team_id, 2)
- GROUP BY person_distinct_id_overrides.distinct_id
- HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__override ON equals(e.distinct_id, e__override.distinct_id)
- LEFT JOIN
- (SELECT person.id AS id, replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'foo'), ''), 'null'), '^"|"$', '') AS properties___foo
- FROM person
- WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version),
- (SELECT person.id AS id, max(person.version) AS version
- FROM person
- WHERE equals(person.team_id, 2)
- GROUP BY person.id
- HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__person ON equals(if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id), e__person.id)
- WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-01-08 23:59:59.999999', 6, 'UTC'))), ifNull(equals(e__person.properties___foo, 'bar'), 0)))
- GROUP BY aggregation_target
- HAVING ifNull(greaterOrEquals(step_reached, 0), 0))
- WHERE ifNull(greaterOrEquals(step_reached, 0), 0)
- ORDER BY aggregation_target ASC) AS funnel_actors
- WHERE ifNull(notEquals(funnel_actors.steps, 2), 1)
- GROUP BY funnel_actors.actor_id
- ORDER BY funnel_actors.actor_id ASC) AS source)))
- GROUP BY person.id
- HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id)
- ORDER BY persons.id ASC
- LIMIT 101
- OFFSET 0 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1,
- format_csv_allow_double_quotes=0,
- max_ast_elements=4000000,
- max_expanded_ast_elements=4000000,
- max_bytes_before_external_group_by=0
- '''
-# ---
-# name: TestFunnelCorrelationsActorsUDF.test_strict_funnel_correlation_with_recordings.3
- '''
- SELECT DISTINCT session_replay_events.session_id AS session_id
- FROM session_replay_events
- WHERE and(equals(session_replay_events.team_id, 2), ifNull(greaterOrEquals(toTimeZone(session_replay_events.min_first_timestamp, 'UTC'), minus(toDateTime64('2021-01-02 00:00:00.000000', 6, 'UTC'), toIntervalDay(21))), 0), in(session_replay_events.session_id, ['s3']))
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1,
- format_csv_allow_double_quotes=0,
- max_ast_elements=4000000,
- max_expanded_ast_elements=4000000,
- max_bytes_before_external_group_by=0
- '''
-# ---
diff --git a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_strict.ambr b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_strict.ambr
index 6cd3cbbd8132e..ff107d7eeb376 100644
--- a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_strict.ambr
+++ b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_strict.ambr
@@ -85,14 +85,14 @@
and isNull(max(max_steps))))
GROUP BY prop)
GROUP BY final_prop
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1,
- format_csv_allow_double_quotes=0,
- max_ast_elements=4000000,
- max_expanded_ast_elements=4000000,
- max_bytes_before_external_group_by=23622320128,
- allow_experimental_analyzer=1
+ LIMIT 26 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1,
+ format_csv_allow_double_quotes=0,
+ max_ast_elements=4000000,
+ max_expanded_ast_elements=4000000,
+ max_bytes_before_external_group_by=23622320128,
+ allow_experimental_analyzer=1
'''
# ---
# name: TestFunnelStrictStepsBreakdown.test_funnel_breakdown_correct_breakdown_props_are_chosen_for_step
@@ -188,14 +188,14 @@
and isNull(max(max_steps))))
GROUP BY prop)
GROUP BY final_prop
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1,
- format_csv_allow_double_quotes=0,
- max_ast_elements=4000000,
- max_expanded_ast_elements=4000000,
- max_bytes_before_external_group_by=23622320128,
- allow_experimental_analyzer=1
+ LIMIT 26 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1,
+ format_csv_allow_double_quotes=0,
+ max_ast_elements=4000000,
+ max_expanded_ast_elements=4000000,
+ max_bytes_before_external_group_by=23622320128,
+ allow_experimental_analyzer=1
'''
# ---
# name: TestFunnelStrictStepsBreakdown.test_funnel_step_multiple_breakdown_snapshot
@@ -284,14 +284,14 @@
and isNull(max(max_steps))))
GROUP BY prop)
GROUP BY final_prop
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1,
- format_csv_allow_double_quotes=0,
- max_ast_elements=4000000,
- max_expanded_ast_elements=4000000,
- max_bytes_before_external_group_by=23622320128,
- allow_experimental_analyzer=1
+ LIMIT 26 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1,
+ format_csv_allow_double_quotes=0,
+ max_ast_elements=4000000,
+ max_expanded_ast_elements=4000000,
+ max_bytes_before_external_group_by=23622320128,
+ allow_experimental_analyzer=1
'''
# ---
# name: TestStrictFunnelGroupBreakdown.test_funnel_aggregate_by_groups_breakdown_group_person_on_events
@@ -399,14 +399,14 @@
and isNull(max(max_steps))))
GROUP BY prop)
GROUP BY final_prop
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1,
- format_csv_allow_double_quotes=0,
- max_ast_elements=4000000,
- max_expanded_ast_elements=4000000,
- max_bytes_before_external_group_by=23622320128,
- allow_experimental_analyzer=1
+ LIMIT 26 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1,
+ format_csv_allow_double_quotes=0,
+ max_ast_elements=4000000,
+ max_expanded_ast_elements=4000000,
+ max_bytes_before_external_group_by=23622320128,
+ allow_experimental_analyzer=1
'''
# ---
# name: TestStrictFunnelGroupBreakdown.test_funnel_aggregate_by_groups_breakdown_group_person_on_events_poe_v2
@@ -514,14 +514,14 @@
and isNull(max(max_steps))))
GROUP BY prop)
GROUP BY final_prop
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1,
- format_csv_allow_double_quotes=0,
- max_ast_elements=4000000,
- max_expanded_ast_elements=4000000,
- max_bytes_before_external_group_by=23622320128,
- allow_experimental_analyzer=1
+ LIMIT 26 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1,
+ format_csv_allow_double_quotes=0,
+ max_ast_elements=4000000,
+ max_expanded_ast_elements=4000000,
+ max_bytes_before_external_group_by=23622320128,
+ allow_experimental_analyzer=1
'''
# ---
# name: TestStrictFunnelGroupBreakdown.test_funnel_breakdown_group
@@ -636,14 +636,14 @@
and isNull(max(max_steps))))
GROUP BY prop)
GROUP BY final_prop
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1,
- format_csv_allow_double_quotes=0,
- max_ast_elements=4000000,
- max_expanded_ast_elements=4000000,
- max_bytes_before_external_group_by=23622320128,
- allow_experimental_analyzer=1
+ LIMIT 26 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1,
+ format_csv_allow_double_quotes=0,
+ max_ast_elements=4000000,
+ max_expanded_ast_elements=4000000,
+ max_bytes_before_external_group_by=23622320128,
+ allow_experimental_analyzer=1
'''
# ---
# name: TestStrictFunnelGroupBreakdown.test_funnel_breakdown_group.1
diff --git a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_strict_udf.ambr b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_strict_udf.ambr
index a45664788b8e3..8aeadef465f63 100644
--- a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_strict_udf.ambr
+++ b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_strict_udf.ambr
@@ -11,7 +11,7 @@
(SELECT countIf(ifNull(ifNull(equals(step_reached, 0), 0), 0)) AS step_1,
countIf(ifNull(ifNull(equals(step_reached, 1), 0), 0)) AS step_2,
groupArrayIf(timings[1], ifNull(greater(timings[1], 0), 0)) AS step_1_conversion_times,
- rowNumberInBlock() AS row_number,
+ rowNumberInAllBlocks() AS row_number,
if(ifNull(less(row_number, 25), 0), breakdown, ['Other']) AS final_prop
FROM
(SELECT arraySort(t -> t.1, groupArray(tuple(accurateCastOrNull(timestamp, 'Float64'), uuid, prop, arrayFilter(x -> ifNull(notEquals(x, 0), 1), [multiply(1, step_0), multiply(2, step_1)])))) AS events_array,
@@ -62,14 +62,14 @@
GROUP BY breakdown
ORDER BY step_2 DESC, step_1 DESC)
GROUP BY final_prop
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1,
- format_csv_allow_double_quotes=0,
- max_ast_elements=4000000,
- max_expanded_ast_elements=4000000,
- max_bytes_before_external_group_by=23622320128,
- allow_experimental_analyzer=1
+ LIMIT 26 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1,
+ format_csv_allow_double_quotes=0,
+ max_ast_elements=4000000,
+ max_expanded_ast_elements=4000000,
+ max_bytes_before_external_group_by=23622320128,
+ allow_experimental_analyzer=1
'''
# ---
# name: TestFunnelStrictStepsBreakdownUDF.test_funnel_breakdown_correct_breakdown_props_are_chosen_for_step
@@ -84,7 +84,7 @@
(SELECT countIf(ifNull(ifNull(equals(step_reached, 0), 0), 0)) AS step_1,
countIf(ifNull(ifNull(equals(step_reached, 1), 0), 0)) AS step_2,
groupArrayIf(timings[1], ifNull(greater(timings[1], 0), 0)) AS step_1_conversion_times,
- rowNumberInBlock() AS row_number,
+ rowNumberInAllBlocks() AS row_number,
if(ifNull(less(row_number, 25), 0), breakdown, ['Other']) AS final_prop
FROM
(SELECT arraySort(t -> t.1, groupArray(tuple(accurateCastOrNull(timestamp, 'Float64'), uuid, prop, arrayFilter(x -> ifNull(notEquals(x, 0), 1), [multiply(1, step_0), multiply(2, step_1)])))) AS events_array,
@@ -142,14 +142,14 @@
GROUP BY breakdown
ORDER BY step_2 DESC, step_1 DESC)
GROUP BY final_prop
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1,
- format_csv_allow_double_quotes=0,
- max_ast_elements=4000000,
- max_expanded_ast_elements=4000000,
- max_bytes_before_external_group_by=23622320128,
- allow_experimental_analyzer=1
+ LIMIT 26 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1,
+ format_csv_allow_double_quotes=0,
+ max_ast_elements=4000000,
+ max_expanded_ast_elements=4000000,
+ max_bytes_before_external_group_by=23622320128,
+ allow_experimental_analyzer=1
'''
# ---
# name: TestFunnelStrictStepsBreakdownUDF.test_funnel_step_multiple_breakdown_snapshot
@@ -164,7 +164,7 @@
(SELECT countIf(ifNull(ifNull(equals(step_reached, 0), 0), 0)) AS step_1,
countIf(ifNull(ifNull(equals(step_reached, 1), 0), 0)) AS step_2,
groupArrayIf(timings[1], ifNull(greater(timings[1], 0), 0)) AS step_1_conversion_times,
- rowNumberInBlock() AS row_number,
+ rowNumberInAllBlocks() AS row_number,
if(ifNull(less(row_number, 25), 0), breakdown, ['Other']) AS final_prop
FROM
(SELECT arraySort(t -> t.1, groupArray(tuple(accurateCastOrNull(timestamp, 'Float64'), uuid, prop, arrayFilter(x -> ifNull(notEquals(x, 0), 1), [multiply(1, step_0), multiply(2, step_1)])))) AS events_array,
@@ -215,14 +215,14 @@
GROUP BY breakdown
ORDER BY step_2 DESC, step_1 DESC)
GROUP BY final_prop
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1,
- format_csv_allow_double_quotes=0,
- max_ast_elements=4000000,
- max_expanded_ast_elements=4000000,
- max_bytes_before_external_group_by=23622320128,
- allow_experimental_analyzer=1
+ LIMIT 26 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1,
+ format_csv_allow_double_quotes=0,
+ max_ast_elements=4000000,
+ max_expanded_ast_elements=4000000,
+ max_bytes_before_external_group_by=23622320128,
+ allow_experimental_analyzer=1
'''
# ---
# name: TestStrictFunnelGroupBreakdownUDF.test_funnel_aggregate_by_groups_breakdown_group_person_on_events
@@ -242,7 +242,7 @@
countIf(ifNull(ifNull(equals(step_reached, 2), 0), 0)) AS step_3,
groupArrayIf(timings[1], ifNull(greater(timings[1], 0), 0)) AS step_1_conversion_times,
groupArrayIf(timings[2], ifNull(greater(timings[2], 0), 0)) AS step_2_conversion_times,
- rowNumberInBlock() AS row_number,
+ rowNumberInAllBlocks() AS row_number,
if(ifNull(less(row_number, 25), 0), breakdown, 'Other') AS final_prop
FROM
(SELECT arraySort(t -> t.1, groupArray(tuple(accurateCastOrNull(timestamp, 'Float64'), uuid, prop, arrayFilter(x -> ifNull(notEquals(x, 0), 1), [multiply(1, step_0), multiply(2, step_1), multiply(3, step_2)])))) AS events_array,
@@ -296,14 +296,14 @@
GROUP BY breakdown
ORDER BY step_3 DESC, step_2 DESC, step_1 DESC)
GROUP BY final_prop
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1,
- format_csv_allow_double_quotes=0,
- max_ast_elements=4000000,
- max_expanded_ast_elements=4000000,
- max_bytes_before_external_group_by=23622320128,
- allow_experimental_analyzer=1
+ LIMIT 26 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1,
+ format_csv_allow_double_quotes=0,
+ max_ast_elements=4000000,
+ max_expanded_ast_elements=4000000,
+ max_bytes_before_external_group_by=23622320128,
+ allow_experimental_analyzer=1
'''
# ---
# name: TestStrictFunnelGroupBreakdownUDF.test_funnel_aggregate_by_groups_breakdown_group_person_on_events_poe_v2
@@ -323,7 +323,7 @@
countIf(ifNull(ifNull(equals(step_reached, 2), 0), 0)) AS step_3,
groupArrayIf(timings[1], ifNull(greater(timings[1], 0), 0)) AS step_1_conversion_times,
groupArrayIf(timings[2], ifNull(greater(timings[2], 0), 0)) AS step_2_conversion_times,
- rowNumberInBlock() AS row_number,
+ rowNumberInAllBlocks() AS row_number,
if(ifNull(less(row_number, 25), 0), breakdown, 'Other') AS final_prop
FROM
(SELECT arraySort(t -> t.1, groupArray(tuple(accurateCastOrNull(timestamp, 'Float64'), uuid, prop, arrayFilter(x -> ifNull(notEquals(x, 0), 1), [multiply(1, step_0), multiply(2, step_1), multiply(3, step_2)])))) AS events_array,
@@ -377,14 +377,14 @@
GROUP BY breakdown
ORDER BY step_3 DESC, step_2 DESC, step_1 DESC)
GROUP BY final_prop
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1,
- format_csv_allow_double_quotes=0,
- max_ast_elements=4000000,
- max_expanded_ast_elements=4000000,
- max_bytes_before_external_group_by=23622320128,
- allow_experimental_analyzer=1
+ LIMIT 26 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1,
+ format_csv_allow_double_quotes=0,
+ max_ast_elements=4000000,
+ max_expanded_ast_elements=4000000,
+ max_bytes_before_external_group_by=23622320128,
+ allow_experimental_analyzer=1
'''
# ---
# name: TestStrictFunnelGroupBreakdownUDF.test_funnel_breakdown_group
@@ -404,7 +404,7 @@
countIf(ifNull(ifNull(equals(step_reached, 2), 0), 0)) AS step_3,
groupArrayIf(timings[1], ifNull(greater(timings[1], 0), 0)) AS step_1_conversion_times,
groupArrayIf(timings[2], ifNull(greater(timings[2], 0), 0)) AS step_2_conversion_times,
- rowNumberInBlock() AS row_number,
+ rowNumberInAllBlocks() AS row_number,
if(ifNull(less(row_number, 25), 0), breakdown, 'Other') AS final_prop
FROM
(SELECT arraySort(t -> t.1, groupArray(tuple(accurateCastOrNull(timestamp, 'Float64'), uuid, prop, arrayFilter(x -> ifNull(notEquals(x, 0), 1), [multiply(1, step_0), multiply(2, step_1), multiply(3, step_2)])))) AS events_array,
@@ -465,14 +465,14 @@
GROUP BY breakdown
ORDER BY step_3 DESC, step_2 DESC, step_1 DESC)
GROUP BY final_prop
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1,
- format_csv_allow_double_quotes=0,
- max_ast_elements=4000000,
- max_expanded_ast_elements=4000000,
- max_bytes_before_external_group_by=23622320128,
- allow_experimental_analyzer=1
+ LIMIT 26 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1,
+ format_csv_allow_double_quotes=0,
+ max_ast_elements=4000000,
+ max_expanded_ast_elements=4000000,
+ max_bytes_before_external_group_by=23622320128,
+ allow_experimental_analyzer=1
'''
# ---
# name: TestStrictFunnelGroupBreakdownUDF.test_funnel_breakdown_group.1
diff --git a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_trends_persons.ambr b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_trends_persons.ambr
deleted file mode 100644
index cdfb24412bf92..0000000000000
--- a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_trends_persons.ambr
+++ /dev/null
@@ -1,520 +0,0 @@
-# serializer version: 1
-# name: TestFunnelTrendsPersons.test_funnel_trend_persons_returns_recordings
- '''
- SELECT persons.id,
- persons.id AS id,
- source.matching_events AS matching_events
- FROM
- (SELECT aggregation_target AS actor_id,
- step_1_matching_events AS matching_events
- FROM
- (SELECT aggregation_target AS aggregation_target,
- toStartOfDay(timestamp) AS entrance_period_start,
- max(steps) AS steps_completed,
- groupArray(10)(step_0_matching_event) AS step_0_matching_events,
- groupArray(10)(step_1_matching_event) AS step_1_matching_events,
- groupArray(10)(step_2_matching_event) AS step_2_matching_events,
- groupArray(10)(final_matching_event) AS final_matching_events
- FROM
- (SELECT aggregation_target AS aggregation_target,
- timestamp AS timestamp,
- step_0 AS step_0,
- latest_0 AS latest_0,
- uuid_0 AS uuid_0,
- `$session_id_0` AS `$session_id_0`,
- `$window_id_0` AS `$window_id_0`,
- step_1 AS step_1,
- latest_1 AS latest_1,
- uuid_1 AS uuid_1,
- `$session_id_1` AS `$session_id_1`,
- `$window_id_1` AS `$window_id_1`,
- step_2 AS step_2,
- latest_2 AS latest_2,
- uuid_2 AS uuid_2,
- `$session_id_2` AS `$session_id_2`,
- `$window_id_2` AS `$window_id_2`,
- if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0), ifNull(lessOrEquals(latest_1, latest_2), 0), ifNull(lessOrEquals(latest_2, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 3, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 2, 1)) AS steps,
- if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time,
- if(and(isNotNull(latest_2), ifNull(lessOrEquals(latest_2, plus(toTimeZone(latest_1, 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', latest_1, latest_2), NULL) AS step_2_conversion_time,
- tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event,
- tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event,
- tuple(latest_2, uuid_2, `$session_id_2`, `$window_id_2`) AS step_2_matching_event,
- if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, if(isNull(latest_2), step_1_matching_event, step_2_matching_event))) AS final_matching_event
- FROM
- (SELECT aggregation_target AS aggregation_target,
- timestamp AS timestamp,
- step_0 AS step_0,
- latest_0 AS latest_0,
- uuid_0 AS uuid_0,
- `$session_id_0` AS `$session_id_0`,
- `$window_id_0` AS `$window_id_0`,
- step_1 AS step_1,
- latest_1 AS latest_1,
- uuid_1 AS uuid_1,
- `$session_id_1` AS `$session_id_1`,
- `$window_id_1` AS `$window_id_1`,
- step_2 AS step_2,
- min(latest_2) OVER (PARTITION BY aggregation_target
- ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2,
- last_value(uuid_2) OVER (PARTITION BY aggregation_target
- ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_2,
- last_value(`$session_id_2`) OVER (PARTITION BY aggregation_target
- ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_2`,
- last_value(`$window_id_2`) OVER (PARTITION BY aggregation_target
- ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_2`
- FROM
- (SELECT aggregation_target AS aggregation_target,
- timestamp AS timestamp,
- step_0 AS step_0,
- latest_0 AS latest_0,
- uuid_0 AS uuid_0,
- `$session_id_0` AS `$session_id_0`,
- `$window_id_0` AS `$window_id_0`,
- step_1 AS step_1,
- latest_1 AS latest_1,
- uuid_1 AS uuid_1,
- `$session_id_1` AS `$session_id_1`,
- `$window_id_1` AS `$window_id_1`,
- step_2 AS step_2,
- if(ifNull(less(latest_2, latest_1), 0), NULL, latest_2) AS latest_2,
- if(ifNull(less(latest_2, latest_1), 0), NULL, uuid_2) AS uuid_2,
- if(ifNull(less(latest_2, latest_1), 0), NULL, `$session_id_2`) AS `$session_id_2`,
- if(ifNull(less(latest_2, latest_1), 0), NULL, `$window_id_2`) AS `$window_id_2`
- FROM
- (SELECT aggregation_target AS aggregation_target,
- timestamp AS timestamp,
- step_0 AS step_0,
- latest_0 AS latest_0,
- uuid_0 AS uuid_0,
- `$session_id_0` AS `$session_id_0`,
- `$window_id_0` AS `$window_id_0`,
- step_1 AS step_1,
- min(latest_1) OVER (PARTITION BY aggregation_target
- ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1,
- last_value(uuid_1) OVER (PARTITION BY aggregation_target
- ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1,
- last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target
- ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`,
- last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target
- ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1`,
- step_2 AS step_2,
- min(latest_2) OVER (PARTITION BY aggregation_target
- ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2,
- last_value(uuid_2) OVER (PARTITION BY aggregation_target
- ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_2,
- last_value(`$session_id_2`) OVER (PARTITION BY aggregation_target
- ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_2`,
- last_value(`$window_id_2`) OVER (PARTITION BY aggregation_target
- ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_2`
- FROM
- (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
- if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id) AS aggregation_target,
- e.uuid AS uuid,
- if(equals(e.event, 'step one'), 1, 0) AS step_0,
- if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
- if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0,
- if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`,
- if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`,
- if(equals(e.event, 'step two'), 1, 0) AS step_1,
- if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1,
- if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1,
- if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`,
- if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1`,
- if(equals(e.event, 'step three'), 1, 0) AS step_2,
- if(ifNull(equals(step_2, 1), 0), timestamp, NULL) AS latest_2,
- if(ifNull(equals(step_2, 1), 0), uuid, NULL) AS uuid_2,
- if(ifNull(equals(step_2, 1), 0), e.`$session_id`, NULL) AS `$session_id_2`,
- if(ifNull(equals(step_2, 1), 0), e.`$window_id`, NULL) AS `$window_id_2`
- FROM events AS e
- LEFT OUTER JOIN
- (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id,
- person_distinct_id_overrides.distinct_id AS distinct_id
- FROM person_distinct_id_overrides
- WHERE equals(person_distinct_id_overrides.team_id, 2)
- GROUP BY person_distinct_id_overrides.distinct_id
- HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__override ON equals(e.distinct_id, e__override.distinct_id)
- WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-05-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-05-07 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('step one', 'step three', 'step two'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0), ifNull(equals(step_2, 1), 0)))))))
- WHERE ifNull(equals(step_0, 1), 0))
- WHERE ifNull(equals(entrance_period_start, toDateTime64('2021-05-01 00:00:00.000000', 6, 'UTC')), 0)
- GROUP BY aggregation_target,
- entrance_period_start)
- WHERE ifNull(greaterOrEquals(steps_completed, 2), 0)
- ORDER BY aggregation_target ASC) AS source
- INNER JOIN
- (SELECT person.id AS id
- FROM person
- WHERE equals(person.team_id, 2)
- GROUP BY person.id
- HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id)
- ORDER BY persons.id ASC
- LIMIT 101
- OFFSET 0 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1,
- format_csv_allow_double_quotes=0,
- max_ast_elements=4000000,
- max_expanded_ast_elements=4000000,
- max_bytes_before_external_group_by=0,
- allow_experimental_analyzer=1
- '''
-# ---
-# name: TestFunnelTrendsPersons.test_funnel_trend_persons_returns_recordings.1
- '''
- SELECT DISTINCT session_replay_events.session_id AS session_id
- FROM session_replay_events
- WHERE and(equals(session_replay_events.team_id, 2), ifNull(greaterOrEquals(toTimeZone(session_replay_events.min_first_timestamp, 'UTC'), minus(toDateTime64('2021-05-01 00:00:00.000000', 6, 'UTC'), toIntervalDay(21))), 0), in(session_replay_events.session_id, ['s1b']))
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1,
- format_csv_allow_double_quotes=0,
- max_ast_elements=4000000,
- max_expanded_ast_elements=4000000,
- max_bytes_before_external_group_by=0
- '''
-# ---
-# name: TestFunnelTrendsPersons.test_funnel_trend_persons_with_drop_off
- '''
- SELECT persons.id,
- persons.id AS id,
- source.matching_events AS matching_events
- FROM
- (SELECT aggregation_target AS actor_id,
- final_matching_events AS matching_events
- FROM
- (SELECT aggregation_target AS aggregation_target,
- toStartOfDay(timestamp) AS entrance_period_start,
- max(steps) AS steps_completed,
- groupArray(10)(step_0_matching_event) AS step_0_matching_events,
- groupArray(10)(step_1_matching_event) AS step_1_matching_events,
- groupArray(10)(step_2_matching_event) AS step_2_matching_events,
- groupArray(10)(final_matching_event) AS final_matching_events
- FROM
- (SELECT aggregation_target AS aggregation_target,
- timestamp AS timestamp,
- step_0 AS step_0,
- latest_0 AS latest_0,
- uuid_0 AS uuid_0,
- `$session_id_0` AS `$session_id_0`,
- `$window_id_0` AS `$window_id_0`,
- step_1 AS step_1,
- latest_1 AS latest_1,
- uuid_1 AS uuid_1,
- `$session_id_1` AS `$session_id_1`,
- `$window_id_1` AS `$window_id_1`,
- step_2 AS step_2,
- latest_2 AS latest_2,
- uuid_2 AS uuid_2,
- `$session_id_2` AS `$session_id_2`,
- `$window_id_2` AS `$window_id_2`,
- if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0), ifNull(lessOrEquals(latest_1, latest_2), 0), ifNull(lessOrEquals(latest_2, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 3, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 2, 1)) AS steps,
- if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time,
- if(and(isNotNull(latest_2), ifNull(lessOrEquals(latest_2, plus(toTimeZone(latest_1, 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', latest_1, latest_2), NULL) AS step_2_conversion_time,
- tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event,
- tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event,
- tuple(latest_2, uuid_2, `$session_id_2`, `$window_id_2`) AS step_2_matching_event,
- if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, if(isNull(latest_2), step_1_matching_event, step_2_matching_event))) AS final_matching_event
- FROM
- (SELECT aggregation_target AS aggregation_target,
- timestamp AS timestamp,
- step_0 AS step_0,
- latest_0 AS latest_0,
- uuid_0 AS uuid_0,
- `$session_id_0` AS `$session_id_0`,
- `$window_id_0` AS `$window_id_0`,
- step_1 AS step_1,
- latest_1 AS latest_1,
- uuid_1 AS uuid_1,
- `$session_id_1` AS `$session_id_1`,
- `$window_id_1` AS `$window_id_1`,
- step_2 AS step_2,
- min(latest_2) OVER (PARTITION BY aggregation_target
- ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2,
- last_value(uuid_2) OVER (PARTITION BY aggregation_target
- ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_2,
- last_value(`$session_id_2`) OVER (PARTITION BY aggregation_target
- ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_2`,
- last_value(`$window_id_2`) OVER (PARTITION BY aggregation_target
- ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_2`
- FROM
- (SELECT aggregation_target AS aggregation_target,
- timestamp AS timestamp,
- step_0 AS step_0,
- latest_0 AS latest_0,
- uuid_0 AS uuid_0,
- `$session_id_0` AS `$session_id_0`,
- `$window_id_0` AS `$window_id_0`,
- step_1 AS step_1,
- latest_1 AS latest_1,
- uuid_1 AS uuid_1,
- `$session_id_1` AS `$session_id_1`,
- `$window_id_1` AS `$window_id_1`,
- step_2 AS step_2,
- if(ifNull(less(latest_2, latest_1), 0), NULL, latest_2) AS latest_2,
- if(ifNull(less(latest_2, latest_1), 0), NULL, uuid_2) AS uuid_2,
- if(ifNull(less(latest_2, latest_1), 0), NULL, `$session_id_2`) AS `$session_id_2`,
- if(ifNull(less(latest_2, latest_1), 0), NULL, `$window_id_2`) AS `$window_id_2`
- FROM
- (SELECT aggregation_target AS aggregation_target,
- timestamp AS timestamp,
- step_0 AS step_0,
- latest_0 AS latest_0,
- uuid_0 AS uuid_0,
- `$session_id_0` AS `$session_id_0`,
- `$window_id_0` AS `$window_id_0`,
- step_1 AS step_1,
- min(latest_1) OVER (PARTITION BY aggregation_target
- ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1,
- last_value(uuid_1) OVER (PARTITION BY aggregation_target
- ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1,
- last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target
- ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`,
- last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target
- ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1`,
- step_2 AS step_2,
- min(latest_2) OVER (PARTITION BY aggregation_target
- ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2,
- last_value(uuid_2) OVER (PARTITION BY aggregation_target
- ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_2,
- last_value(`$session_id_2`) OVER (PARTITION BY aggregation_target
- ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_2`,
- last_value(`$window_id_2`) OVER (PARTITION BY aggregation_target
- ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_2`
- FROM
- (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
- if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id) AS aggregation_target,
- e.uuid AS uuid,
- if(equals(e.event, 'step one'), 1, 0) AS step_0,
- if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
- if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0,
- if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`,
- if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`,
- if(equals(e.event, 'step two'), 1, 0) AS step_1,
- if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1,
- if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1,
- if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`,
- if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1`,
- if(equals(e.event, 'step three'), 1, 0) AS step_2,
- if(ifNull(equals(step_2, 1), 0), timestamp, NULL) AS latest_2,
- if(ifNull(equals(step_2, 1), 0), uuid, NULL) AS uuid_2,
- if(ifNull(equals(step_2, 1), 0), e.`$session_id`, NULL) AS `$session_id_2`,
- if(ifNull(equals(step_2, 1), 0), e.`$window_id`, NULL) AS `$window_id_2`
- FROM events AS e
- LEFT OUTER JOIN
- (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id,
- person_distinct_id_overrides.distinct_id AS distinct_id
- FROM person_distinct_id_overrides
- WHERE equals(person_distinct_id_overrides.team_id, 2)
- GROUP BY person_distinct_id_overrides.distinct_id
- HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__override ON equals(e.distinct_id, e__override.distinct_id)
- WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-05-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-05-07 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('step one', 'step three', 'step two'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0), ifNull(equals(step_2, 1), 0)))))))
- WHERE ifNull(equals(step_0, 1), 0))
- WHERE ifNull(equals(entrance_period_start, toDateTime64('2021-05-01 00:00:00.000000', 6, 'UTC')), 0)
- GROUP BY aggregation_target,
- entrance_period_start)
- WHERE and(ifNull(greaterOrEquals(steps_completed, 1), 0), ifNull(less(steps_completed, 3), 0))
- ORDER BY aggregation_target ASC) AS source
- INNER JOIN
- (SELECT person.id AS id
- FROM person
- WHERE equals(person.team_id, 2)
- GROUP BY person.id
- HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id)
- ORDER BY persons.id ASC
- LIMIT 101
- OFFSET 0 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1,
- format_csv_allow_double_quotes=0,
- max_ast_elements=4000000,
- max_expanded_ast_elements=4000000,
- max_bytes_before_external_group_by=0,
- allow_experimental_analyzer=1
- '''
-# ---
-# name: TestFunnelTrendsPersons.test_funnel_trend_persons_with_drop_off.1
- '''
- SELECT DISTINCT session_replay_events.session_id AS session_id
- FROM session_replay_events
- WHERE and(equals(session_replay_events.team_id, 2), ifNull(greaterOrEquals(toTimeZone(session_replay_events.min_first_timestamp, 'UTC'), minus(toDateTime64('2021-05-01 00:00:00.000000', 6, 'UTC'), toIntervalDay(21))), 0), in(session_replay_events.session_id, ['s1a']))
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1,
- format_csv_allow_double_quotes=0,
- max_ast_elements=4000000,
- max_expanded_ast_elements=4000000,
- max_bytes_before_external_group_by=0
- '''
-# ---
-# name: TestFunnelTrendsPersons.test_funnel_trend_persons_with_no_to_step
- '''
- SELECT persons.id,
- persons.id AS id,
- source.matching_events AS matching_events
- FROM
- (SELECT aggregation_target AS actor_id,
- final_matching_events AS matching_events
- FROM
- (SELECT aggregation_target AS aggregation_target,
- toStartOfDay(timestamp) AS entrance_period_start,
- max(steps) AS steps_completed,
- groupArray(10)(step_0_matching_event) AS step_0_matching_events,
- groupArray(10)(step_1_matching_event) AS step_1_matching_events,
- groupArray(10)(step_2_matching_event) AS step_2_matching_events,
- groupArray(10)(final_matching_event) AS final_matching_events
- FROM
- (SELECT aggregation_target AS aggregation_target,
- timestamp AS timestamp,
- step_0 AS step_0,
- latest_0 AS latest_0,
- uuid_0 AS uuid_0,
- `$session_id_0` AS `$session_id_0`,
- `$window_id_0` AS `$window_id_0`,
- step_1 AS step_1,
- latest_1 AS latest_1,
- uuid_1 AS uuid_1,
- `$session_id_1` AS `$session_id_1`,
- `$window_id_1` AS `$window_id_1`,
- step_2 AS step_2,
- latest_2 AS latest_2,
- uuid_2 AS uuid_2,
- `$session_id_2` AS `$session_id_2`,
- `$window_id_2` AS `$window_id_2`,
- if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0), ifNull(lessOrEquals(latest_1, latest_2), 0), ifNull(lessOrEquals(latest_2, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 3, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 2, 1)) AS steps,
- if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time,
- if(and(isNotNull(latest_2), ifNull(lessOrEquals(latest_2, plus(toTimeZone(latest_1, 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', latest_1, latest_2), NULL) AS step_2_conversion_time,
- tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event,
- tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event,
- tuple(latest_2, uuid_2, `$session_id_2`, `$window_id_2`) AS step_2_matching_event,
- if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, if(isNull(latest_2), step_1_matching_event, step_2_matching_event))) AS final_matching_event
- FROM
- (SELECT aggregation_target AS aggregation_target,
- timestamp AS timestamp,
- step_0 AS step_0,
- latest_0 AS latest_0,
- uuid_0 AS uuid_0,
- `$session_id_0` AS `$session_id_0`,
- `$window_id_0` AS `$window_id_0`,
- step_1 AS step_1,
- latest_1 AS latest_1,
- uuid_1 AS uuid_1,
- `$session_id_1` AS `$session_id_1`,
- `$window_id_1` AS `$window_id_1`,
- step_2 AS step_2,
- min(latest_2) OVER (PARTITION BY aggregation_target
- ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2,
- last_value(uuid_2) OVER (PARTITION BY aggregation_target
- ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_2,
- last_value(`$session_id_2`) OVER (PARTITION BY aggregation_target
- ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_2`,
- last_value(`$window_id_2`) OVER (PARTITION BY aggregation_target
- ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_2`
- FROM
- (SELECT aggregation_target AS aggregation_target,
- timestamp AS timestamp,
- step_0 AS step_0,
- latest_0 AS latest_0,
- uuid_0 AS uuid_0,
- `$session_id_0` AS `$session_id_0`,
- `$window_id_0` AS `$window_id_0`,
- step_1 AS step_1,
- latest_1 AS latest_1,
- uuid_1 AS uuid_1,
- `$session_id_1` AS `$session_id_1`,
- `$window_id_1` AS `$window_id_1`,
- step_2 AS step_2,
- if(ifNull(less(latest_2, latest_1), 0), NULL, latest_2) AS latest_2,
- if(ifNull(less(latest_2, latest_1), 0), NULL, uuid_2) AS uuid_2,
- if(ifNull(less(latest_2, latest_1), 0), NULL, `$session_id_2`) AS `$session_id_2`,
- if(ifNull(less(latest_2, latest_1), 0), NULL, `$window_id_2`) AS `$window_id_2`
- FROM
- (SELECT aggregation_target AS aggregation_target,
- timestamp AS timestamp,
- step_0 AS step_0,
- latest_0 AS latest_0,
- uuid_0 AS uuid_0,
- `$session_id_0` AS `$session_id_0`,
- `$window_id_0` AS `$window_id_0`,
- step_1 AS step_1,
- min(latest_1) OVER (PARTITION BY aggregation_target
- ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1,
- last_value(uuid_1) OVER (PARTITION BY aggregation_target
- ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1,
- last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target
- ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`,
- last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target
- ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1`,
- step_2 AS step_2,
- min(latest_2) OVER (PARTITION BY aggregation_target
- ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2,
- last_value(uuid_2) OVER (PARTITION BY aggregation_target
- ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_2,
- last_value(`$session_id_2`) OVER (PARTITION BY aggregation_target
- ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_2`,
- last_value(`$window_id_2`) OVER (PARTITION BY aggregation_target
- ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_2`
- FROM
- (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
- if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id) AS aggregation_target,
- e.uuid AS uuid,
- if(equals(e.event, 'step one'), 1, 0) AS step_0,
- if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
- if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0,
- if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`,
- if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`,
- if(equals(e.event, 'step two'), 1, 0) AS step_1,
- if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1,
- if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1,
- if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`,
- if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1`,
- if(equals(e.event, 'step three'), 1, 0) AS step_2,
- if(ifNull(equals(step_2, 1), 0), timestamp, NULL) AS latest_2,
- if(ifNull(equals(step_2, 1), 0), uuid, NULL) AS uuid_2,
- if(ifNull(equals(step_2, 1), 0), e.`$session_id`, NULL) AS `$session_id_2`,
- if(ifNull(equals(step_2, 1), 0), e.`$window_id`, NULL) AS `$window_id_2`
- FROM events AS e
- LEFT OUTER JOIN
- (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id,
- person_distinct_id_overrides.distinct_id AS distinct_id
- FROM person_distinct_id_overrides
- WHERE equals(person_distinct_id_overrides.team_id, 2)
- GROUP BY person_distinct_id_overrides.distinct_id
- HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__override ON equals(e.distinct_id, e__override.distinct_id)
- WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-05-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-05-07 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('step one', 'step three', 'step two'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0), ifNull(equals(step_2, 1), 0)))))))
- WHERE ifNull(equals(step_0, 1), 0))
- WHERE ifNull(equals(entrance_period_start, toDateTime64('2021-05-01 00:00:00.000000', 6, 'UTC')), 0)
- GROUP BY aggregation_target,
- entrance_period_start)
- WHERE ifNull(greaterOrEquals(steps_completed, 3), 0)
- ORDER BY aggregation_target ASC) AS source
- INNER JOIN
- (SELECT person.id AS id
- FROM person
- WHERE equals(person.team_id, 2)
- GROUP BY person.id
- HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id)
- ORDER BY persons.id ASC
- LIMIT 101
- OFFSET 0 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1,
- format_csv_allow_double_quotes=0,
- max_ast_elements=4000000,
- max_expanded_ast_elements=4000000,
- max_bytes_before_external_group_by=0,
- allow_experimental_analyzer=1
- '''
-# ---
-# name: TestFunnelTrendsPersons.test_funnel_trend_persons_with_no_to_step.1
- '''
- SELECT DISTINCT session_replay_events.session_id AS session_id
- FROM session_replay_events
- WHERE and(equals(session_replay_events.team_id, 2), ifNull(greaterOrEquals(toTimeZone(session_replay_events.min_first_timestamp, 'UTC'), minus(toDateTime64('2021-05-01 00:00:00.000000', 6, 'UTC'), toIntervalDay(21))), 0), in(session_replay_events.session_id, ['s1c']))
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1,
- format_csv_allow_double_quotes=0,
- max_ast_elements=4000000,
- max_expanded_ast_elements=4000000,
- max_bytes_before_external_group_by=0
- '''
-# ---
diff --git a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_udf.ambr b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_udf.ambr
index db1d9211dad93..76914332b11c4 100644
--- a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_udf.ambr
+++ b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_udf.ambr
@@ -16,7 +16,7 @@
countIf(ifNull(ifNull(equals(step_reached, 2), 0), 0)) AS step_3,
groupArrayIf(timings[1], ifNull(greater(timings[1], 0), 0)) AS step_1_conversion_times,
groupArrayIf(timings[2], ifNull(greater(timings[2], 0), 0)) AS step_2_conversion_times,
- rowNumberInBlock() AS row_number,
+ rowNumberInAllBlocks() AS row_number,
breakdown AS final_prop
FROM
(SELECT arraySort(t -> t.1, groupArray(tuple(accurateCastOrNull(timestamp, 'Float64'), uuid, [], arrayFilter(x -> ifNull(notEquals(x, 0), 1), [multiply(1, step_0), multiply(2, step_1), multiply(3, step_2)])))) AS events_array,
@@ -150,7 +150,7 @@
countIf(ifNull(ifNull(equals(step_reached, 2), 0), 0)) AS step_3,
groupArrayIf(timings[1], ifNull(greater(timings[1], 0), 0)) AS step_1_conversion_times,
groupArrayIf(timings[2], ifNull(greater(timings[2], 0), 0)) AS step_2_conversion_times,
- rowNumberInBlock() AS row_number,
+ rowNumberInAllBlocks() AS row_number,
breakdown AS final_prop
FROM
(SELECT arraySort(t -> t.1, groupArray(tuple(accurateCastOrNull(timestamp, 'Float64'), uuid, [], arrayFilter(x -> ifNull(notEquals(x, 0), 1), [multiply(1, step_0), multiply(2, step_1), multiply(3, step_2)])))) AS events_array,
@@ -229,7 +229,7 @@
(SELECT countIf(ifNull(ifNull(equals(step_reached, 0), 0), 0)) AS step_1,
countIf(ifNull(ifNull(equals(step_reached, 1), 0), 0)) AS step_2,
groupArrayIf(timings[1], ifNull(greater(timings[1], 0), 0)) AS step_1_conversion_times,
- rowNumberInBlock() AS row_number,
+ rowNumberInAllBlocks() AS row_number,
breakdown AS final_prop
FROM
(SELECT arraySort(t -> t.1, groupArray(tuple(accurateCastOrNull(timestamp, 'Float64'), uuid, [], arrayFilter(x -> ifNull(notEquals(x, 0), 1), [multiply(1, step_0), multiply(2, step_1)])))) AS events_array,
@@ -295,7 +295,7 @@
countIf(ifNull(ifNull(equals(step_reached, 2), 0), 0)) AS step_3,
groupArrayIf(timings[1], ifNull(greater(timings[1], 0), 0)) AS step_1_conversion_times,
groupArrayIf(timings[2], ifNull(greater(timings[2], 0), 0)) AS step_2_conversion_times,
- rowNumberInBlock() AS row_number,
+ rowNumberInAllBlocks() AS row_number,
breakdown AS final_prop
FROM
(SELECT arraySort(t -> t.1, groupArray(tuple(accurateCastOrNull(timestamp, 'Float64'), uuid, [], arrayFilter(x -> ifNull(notEquals(x, 0), 1), [multiply(1, step_0), multiply(2, step_1), multiply(3, step_2)])))) AS events_array,
@@ -590,7 +590,7 @@
(SELECT countIf(ifNull(ifNull(equals(step_reached, 0), 0), 0)) AS step_1,
countIf(ifNull(ifNull(equals(step_reached, 1), 0), 0)) AS step_2,
groupArrayIf(timings[1], ifNull(greater(timings[1], 0), 0)) AS step_1_conversion_times,
- rowNumberInBlock() AS row_number,
+ rowNumberInAllBlocks() AS row_number,
breakdown AS final_prop
FROM
(SELECT arraySort(t -> t.1, groupArray(tuple(accurateCastOrNull(timestamp, 'Float64'), uuid, [], arrayFilter(x -> ifNull(notEquals(x, 0), 1), [multiply(1, step_0), multiply(2, step_1)])))) AS events_array,
@@ -651,7 +651,7 @@
(SELECT countIf(ifNull(ifNull(equals(step_reached, 0), 0), 0)) AS step_1,
countIf(ifNull(ifNull(equals(step_reached, 1), 0), 0)) AS step_2,
groupArrayIf(timings[1], ifNull(greater(timings[1], 0), 0)) AS step_1_conversion_times,
- rowNumberInBlock() AS row_number,
+ rowNumberInAllBlocks() AS row_number,
breakdown AS final_prop
FROM
(SELECT arraySort(t -> t.1, groupArray(tuple(accurateCastOrNull(timestamp, 'Float64'), uuid, [], arrayFilter(x -> ifNull(notEquals(x, 0), 1), [multiply(1, step_0), multiply(2, step_1)])))) AS events_array,
@@ -709,7 +709,7 @@
(SELECT countIf(ifNull(ifNull(equals(step_reached, 0), 0), 0)) AS step_1,
countIf(ifNull(ifNull(equals(step_reached, 1), 0), 0)) AS step_2,
groupArrayIf(timings[1], ifNull(greater(timings[1], 0), 0)) AS step_1_conversion_times,
- rowNumberInBlock() AS row_number,
+ rowNumberInAllBlocks() AS row_number,
if(ifNull(less(row_number, 25), 0), breakdown, ['Other']) AS final_prop
FROM
(SELECT arraySort(t -> t.1, groupArray(tuple(accurateCastOrNull(timestamp, 'Float64'), uuid, prop, arrayFilter(x -> ifNull(notEquals(x, 0), 1), [multiply(1, step_0), multiply(2, step_1)])))) AS events_array,
@@ -760,14 +760,14 @@
GROUP BY breakdown
ORDER BY step_2 DESC, step_1 DESC)
GROUP BY final_prop
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1,
- format_csv_allow_double_quotes=0,
- max_ast_elements=4000000,
- max_expanded_ast_elements=4000000,
- max_bytes_before_external_group_by=23622320128,
- allow_experimental_analyzer=1
+ LIMIT 26 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1,
+ format_csv_allow_double_quotes=0,
+ max_ast_elements=4000000,
+ max_expanded_ast_elements=4000000,
+ max_bytes_before_external_group_by=23622320128,
+ allow_experimental_analyzer=1
'''
# ---
# name: TestFunnelBreakdownUDF.test_funnel_breakdown_correct_breakdown_props_are_chosen_for_step
@@ -782,7 +782,7 @@
(SELECT countIf(ifNull(ifNull(equals(step_reached, 0), 0), 0)) AS step_1,
countIf(ifNull(ifNull(equals(step_reached, 1), 0), 0)) AS step_2,
groupArrayIf(timings[1], ifNull(greater(timings[1], 0), 0)) AS step_1_conversion_times,
- rowNumberInBlock() AS row_number,
+ rowNumberInAllBlocks() AS row_number,
if(ifNull(less(row_number, 25), 0), breakdown, ['Other']) AS final_prop
FROM
(SELECT arraySort(t -> t.1, groupArray(tuple(accurateCastOrNull(timestamp, 'Float64'), uuid, prop, arrayFilter(x -> ifNull(notEquals(x, 0), 1), [multiply(1, step_0), multiply(2, step_1)])))) AS events_array,
@@ -840,14 +840,14 @@
GROUP BY breakdown
ORDER BY step_2 DESC, step_1 DESC)
GROUP BY final_prop
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1,
- format_csv_allow_double_quotes=0,
- max_ast_elements=4000000,
- max_expanded_ast_elements=4000000,
- max_bytes_before_external_group_by=23622320128,
- allow_experimental_analyzer=1
+ LIMIT 26 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1,
+ format_csv_allow_double_quotes=0,
+ max_ast_elements=4000000,
+ max_expanded_ast_elements=4000000,
+ max_bytes_before_external_group_by=23622320128,
+ allow_experimental_analyzer=1
'''
# ---
# name: TestFunnelBreakdownUDF.test_funnel_step_multiple_breakdown_snapshot
@@ -862,7 +862,7 @@
(SELECT countIf(ifNull(ifNull(equals(step_reached, 0), 0), 0)) AS step_1,
countIf(ifNull(ifNull(equals(step_reached, 1), 0), 0)) AS step_2,
groupArrayIf(timings[1], ifNull(greater(timings[1], 0), 0)) AS step_1_conversion_times,
- rowNumberInBlock() AS row_number,
+ rowNumberInAllBlocks() AS row_number,
if(ifNull(less(row_number, 25), 0), breakdown, ['Other']) AS final_prop
FROM
(SELECT arraySort(t -> t.1, groupArray(tuple(accurateCastOrNull(timestamp, 'Float64'), uuid, prop, arrayFilter(x -> ifNull(notEquals(x, 0), 1), [multiply(1, step_0), multiply(2, step_1)])))) AS events_array,
@@ -913,14 +913,14 @@
GROUP BY breakdown
ORDER BY step_2 DESC, step_1 DESC)
GROUP BY final_prop
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1,
- format_csv_allow_double_quotes=0,
- max_ast_elements=4000000,
- max_expanded_ast_elements=4000000,
- max_bytes_before_external_group_by=23622320128,
- allow_experimental_analyzer=1
+ LIMIT 26 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1,
+ format_csv_allow_double_quotes=0,
+ max_ast_elements=4000000,
+ max_expanded_ast_elements=4000000,
+ max_bytes_before_external_group_by=23622320128,
+ allow_experimental_analyzer=1
'''
# ---
# name: TestFunnelGroupBreakdownUDF.test_funnel_aggregate_by_groups_breakdown_group_person_on_events
@@ -940,7 +940,7 @@
countIf(ifNull(ifNull(equals(step_reached, 2), 0), 0)) AS step_3,
groupArrayIf(timings[1], ifNull(greater(timings[1], 0), 0)) AS step_1_conversion_times,
groupArrayIf(timings[2], ifNull(greater(timings[2], 0), 0)) AS step_2_conversion_times,
- rowNumberInBlock() AS row_number,
+ rowNumberInAllBlocks() AS row_number,
if(ifNull(less(row_number, 25), 0), breakdown, 'Other') AS final_prop
FROM
(SELECT arraySort(t -> t.1, groupArray(tuple(accurateCastOrNull(timestamp, 'Float64'), uuid, prop, arrayFilter(x -> ifNull(notEquals(x, 0), 1), [multiply(1, step_0), multiply(2, step_1), multiply(3, step_2)])))) AS events_array,
@@ -994,14 +994,14 @@
GROUP BY breakdown
ORDER BY step_3 DESC, step_2 DESC, step_1 DESC)
GROUP BY final_prop
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1,
- format_csv_allow_double_quotes=0,
- max_ast_elements=4000000,
- max_expanded_ast_elements=4000000,
- max_bytes_before_external_group_by=23622320128,
- allow_experimental_analyzer=1
+ LIMIT 26 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1,
+ format_csv_allow_double_quotes=0,
+ max_ast_elements=4000000,
+ max_expanded_ast_elements=4000000,
+ max_bytes_before_external_group_by=23622320128,
+ allow_experimental_analyzer=1
'''
# ---
# name: TestFunnelGroupBreakdownUDF.test_funnel_aggregate_by_groups_breakdown_group_person_on_events_poe_v2
@@ -1021,7 +1021,7 @@
countIf(ifNull(ifNull(equals(step_reached, 2), 0), 0)) AS step_3,
groupArrayIf(timings[1], ifNull(greater(timings[1], 0), 0)) AS step_1_conversion_times,
groupArrayIf(timings[2], ifNull(greater(timings[2], 0), 0)) AS step_2_conversion_times,
- rowNumberInBlock() AS row_number,
+ rowNumberInAllBlocks() AS row_number,
if(ifNull(less(row_number, 25), 0), breakdown, 'Other') AS final_prop
FROM
(SELECT arraySort(t -> t.1, groupArray(tuple(accurateCastOrNull(timestamp, 'Float64'), uuid, prop, arrayFilter(x -> ifNull(notEquals(x, 0), 1), [multiply(1, step_0), multiply(2, step_1), multiply(3, step_2)])))) AS events_array,
@@ -1075,14 +1075,14 @@
GROUP BY breakdown
ORDER BY step_3 DESC, step_2 DESC, step_1 DESC)
GROUP BY final_prop
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1,
- format_csv_allow_double_quotes=0,
- max_ast_elements=4000000,
- max_expanded_ast_elements=4000000,
- max_bytes_before_external_group_by=23622320128,
- allow_experimental_analyzer=1
+ LIMIT 26 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1,
+ format_csv_allow_double_quotes=0,
+ max_ast_elements=4000000,
+ max_expanded_ast_elements=4000000,
+ max_bytes_before_external_group_by=23622320128,
+ allow_experimental_analyzer=1
'''
# ---
# name: TestFunnelGroupBreakdownUDF.test_funnel_breakdown_group
@@ -1102,7 +1102,7 @@
countIf(ifNull(ifNull(equals(step_reached, 2), 0), 0)) AS step_3,
groupArrayIf(timings[1], ifNull(greater(timings[1], 0), 0)) AS step_1_conversion_times,
groupArrayIf(timings[2], ifNull(greater(timings[2], 0), 0)) AS step_2_conversion_times,
- rowNumberInBlock() AS row_number,
+ rowNumberInAllBlocks() AS row_number,
if(ifNull(less(row_number, 25), 0), breakdown, 'Other') AS final_prop
FROM
(SELECT arraySort(t -> t.1, groupArray(tuple(accurateCastOrNull(timestamp, 'Float64'), uuid, prop, arrayFilter(x -> ifNull(notEquals(x, 0), 1), [multiply(1, step_0), multiply(2, step_1), multiply(3, step_2)])))) AS events_array,
@@ -1163,585 +1163,345 @@
GROUP BY breakdown
ORDER BY step_3 DESC, step_2 DESC, step_1 DESC)
GROUP BY final_prop
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1,
- format_csv_allow_double_quotes=0,
- max_ast_elements=4000000,
- max_expanded_ast_elements=4000000,
- max_bytes_before_external_group_by=23622320128,
- allow_experimental_analyzer=1
+ LIMIT 26 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1,
+ format_csv_allow_double_quotes=0,
+ max_ast_elements=4000000,
+ max_expanded_ast_elements=4000000,
+ max_bytes_before_external_group_by=23622320128,
+ allow_experimental_analyzer=1
'''
# ---
# name: TestFunnelGroupBreakdownUDF.test_funnel_breakdown_group.1
'''
-
- SELECT replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS value,
- count(*) as count
- FROM events e
- LEFT JOIN
- (SELECT group_key,
- argMax(group_properties, _timestamp) AS group_properties_0
- FROM groups
- WHERE team_id = 99999
- AND group_type_index = 0
- GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key
- WHERE team_id = 99999
- AND event IN ['buy', 'play movie', 'sign up']
- AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC')
- AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC')
- GROUP BY value
- ORDER BY count DESC, value DESC
- LIMIT 26
- OFFSET 0
- '''
-# ---
-# name: TestFunnelGroupBreakdownUDF.test_funnel_breakdown_group.2
- '''
-
- SELECT aggregation_target AS actor_id
+ SELECT persons.id,
+ persons.id AS id
FROM
- (SELECT aggregation_target,
- steps,
- avg(step_1_conversion_time) step_1_average_conversion_time_inner,
- avg(step_2_conversion_time) step_2_average_conversion_time_inner,
- median(step_1_conversion_time) step_1_median_conversion_time_inner,
- median(step_2_conversion_time) step_2_median_conversion_time_inner ,
- prop
+ (SELECT aggregation_target AS actor_id
FROM
- (SELECT aggregation_target,
- steps,
- max(steps) over (PARTITION BY aggregation_target,
- prop) as max_steps,
- step_1_conversion_time,
- step_2_conversion_time ,
- prop
+ (SELECT arraySort(t -> t.1, groupArray(tuple(accurateCastOrNull(timestamp, 'Float64'), uuid, prop, arrayFilter(x -> ifNull(notEquals(x, 0), 1), [multiply(1, step_0), multiply(2, step_1), multiply(3, step_2)])))) AS events_array,
+ arrayJoin(aggregate_funnel_v3(3, 1209600, 'first_touch', 'ordered', groupUniqArray(prop), arrayFilter((x, x_before, x_after) -> not(and(ifNull(lessOrEquals(length(x.4), 1), 0), ifNull(equals(x.4, x_before.4), isNull(x.4)
+ and isNull(x_before.4)), ifNull(equals(x.4, x_after.4), isNull(x.4)
+ and isNull(x_after.4)), ifNull(equals(x.3, x_before.3), isNull(x.3)
+ and isNull(x_before.3)), ifNull(equals(x.3, x_after.3), isNull(x.3)
+ and isNull(x_after.3)), ifNull(greater(x.1, x_before.1), 0), ifNull(less(x.1, x_after.1), 0))), events_array, arrayRotateRight(events_array, 1), arrayRotateLeft(events_array, 1)))) AS af_tuple,
+ af_tuple.1 AS step_reached,
+ plus(af_tuple.1, 1) AS steps,
+ af_tuple.2 AS breakdown,
+ af_tuple.3 AS timings,
+ aggregation_target AS aggregation_target
FROM
- (SELECT *,
- if(latest_0 <= latest_1
- AND latest_1 <= latest_0 + INTERVAL 7 DAY
- AND latest_1 <= latest_2
- AND latest_2 <= latest_0 + INTERVAL 7 DAY, 3, if(latest_0 <= latest_1
- AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps ,
- if(isNotNull(latest_1)
- AND latest_1 <= latest_0 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time,
- if(isNotNull(latest_2)
- AND latest_2 <= latest_1 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_1), toDateTime(latest_2)), NULL) step_2_conversion_time,
- prop
+ (SELECT timestamp AS timestamp,
+ aggregation_target AS aggregation_target,
+ uuid AS uuid,
+ `$session_id` AS `$session_id`,
+ `$window_id` AS `$window_id`,
+ step_0 AS step_0,
+ step_1 AS step_1,
+ step_2 AS step_2,
+ prop_basic AS prop_basic,
+ prop,
+ prop_vals AS prop_vals,
+ prop_vals AS prop
FROM
- (SELECT aggregation_target, timestamp, step_0,
- latest_0,
- step_1,
- latest_1,
- step_2,
- min(latest_2) over (PARTITION by aggregation_target,
- prop
- ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 ,
- prop
- FROM
- (SELECT aggregation_target, timestamp, step_0,
- latest_0,
- step_1,
- latest_1,
- step_2,
- if(latest_2 < latest_1, NULL, latest_2) as latest_2 ,
- prop
- FROM
- (SELECT aggregation_target, timestamp, step_0,
- latest_0,
- step_1,
- min(latest_1) over (PARTITION by aggregation_target,
- prop
- ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1,
- step_2,
- min(latest_2) over (PARTITION by aggregation_target,
- prop
- ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 ,
- if(has(['technology', 'finance'], prop), prop, 'Other') as prop
- FROM
- (SELECT *,
- prop_vals as prop
- FROM
- (SELECT e.timestamp as timestamp,
- if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as aggregation_target,
- if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id,
- if(event = 'sign up', 1, 0) as step_0,
- if(step_0 = 1, timestamp, null) as latest_0,
- if(event = 'play movie', 1, 0) as step_1,
- if(step_1 = 1, timestamp, null) as latest_1,
- if(event = 'buy', 1, 0) as step_2,
- if(step_2 = 1, timestamp, null) as latest_2,
- replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS prop_basic,
- prop_basic as prop,
- argMinIf(prop, timestamp, isNotNull(prop)) over (PARTITION by aggregation_target) as prop_vals
- FROM events e
- LEFT OUTER JOIN
- (SELECT distinct_id,
- argMax(person_id, version) as person_id
- FROM person_distinct_id2
- WHERE team_id = 99999
- AND distinct_id IN
- (SELECT distinct_id
- FROM events
- WHERE team_id = 99999
- AND event IN ['buy', 'play movie', 'sign up']
- AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC')
- AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') )
- GROUP BY distinct_id
- HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id
- LEFT JOIN
- (SELECT group_key,
- argMax(group_properties, _timestamp) AS group_properties_0
- FROM groups
- WHERE team_id = 99999
- AND group_type_index = 0
- GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key
- WHERE team_id = 99999
- AND event IN ['buy', 'play movie', 'sign up']
- AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC')
- AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC')
- AND (step_0 = 1
- OR step_1 = 1
- OR step_2 = 1) )))))
- WHERE step_0 = 1 ))
- GROUP BY aggregation_target,
- steps,
- prop
- HAVING steps = max(max_steps))
- WHERE steps IN [1, 2, 3]
- AND arrayFlatten(array(prop)) = arrayFlatten(array('finance'))
- ORDER BY aggregation_target
- LIMIT 100
- OFFSET 0 SETTINGS max_ast_elements=1000000,
- max_expanded_ast_elements=1000000
- '''
-# ---
-# name: TestFunnelGroupBreakdownUDF.test_funnel_breakdown_group.3
- '''
-
- SELECT replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS value,
- count(*) as count
- FROM events e
- LEFT JOIN
- (SELECT group_key,
- argMax(group_properties, _timestamp) AS group_properties_0
- FROM groups
- WHERE team_id = 99999
- AND group_type_index = 0
- GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key
- WHERE team_id = 99999
- AND event IN ['buy', 'play movie', 'sign up']
- AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC')
- AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC')
- GROUP BY value
- ORDER BY count DESC, value DESC
- LIMIT 26
- OFFSET 0
+ (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
+ if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id) AS aggregation_target,
+ e.uuid AS uuid,
+ e.`$session_id` AS `$session_id`,
+ e.`$window_id` AS `$window_id`,
+ if(equals(e.event, 'sign up'), 1, 0) AS step_0,
+ if(equals(e.event, 'play movie'), 1, 0) AS step_1,
+ if(equals(e.event, 'buy'), 1, 0) AS step_2,
+ ifNull(toString(e__group_0.properties___industry), '') AS prop_basic,
+ prop_basic AS prop,
+ argMinIf(prop, timestamp, isNotNull(prop)) OVER (PARTITION BY aggregation_target) AS prop_vals
+ FROM events AS e
+ LEFT OUTER JOIN
+ (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id,
+ person_distinct_id_overrides.distinct_id AS distinct_id
+ FROM person_distinct_id_overrides
+ WHERE equals(person_distinct_id_overrides.team_id, 99999)
+ GROUP BY person_distinct_id_overrides.distinct_id
+ HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__override ON equals(e.distinct_id, e__override.distinct_id)
+ LEFT JOIN
+ (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), toTimeZone(groups._timestamp, 'UTC')) AS properties___industry,
+ groups.group_type_index AS index,
+ groups.group_key AS key
+ FROM groups
+ WHERE and(equals(groups.team_id, 99999), equals(index, 0))
+ GROUP BY groups.group_type_index,
+ groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key)
+ WHERE and(equals(e.team_id, 99999), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'play movie', 'sign up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0), ifNull(equals(step_2, 1), 0)))))
+ GROUP BY aggregation_target
+ HAVING ifNull(greaterOrEquals(step_reached, 0), 0))
+ WHERE and(ifNull(greaterOrEquals(step_reached, 0), 0), ifNull(equals(arrayFlatten(array(breakdown)), arrayFlatten(array('finance'))), isNull(arrayFlatten(array(breakdown)))
+ and isNull(arrayFlatten(array('finance')))))
+ ORDER BY aggregation_target ASC) AS source
+ INNER JOIN
+ (SELECT person.id AS id
+ FROM person
+ WHERE equals(person.team_id, 99999)
+ GROUP BY person.id
+ HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id)
+ ORDER BY persons.id ASC
+ LIMIT 101
+ OFFSET 0 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1,
+ format_csv_allow_double_quotes=0,
+ max_ast_elements=4000000,
+ max_expanded_ast_elements=4000000,
+ max_bytes_before_external_group_by=0,
+ allow_experimental_analyzer=1
'''
# ---
-# name: TestFunnelGroupBreakdownUDF.test_funnel_breakdown_group.4
+# name: TestFunnelGroupBreakdownUDF.test_funnel_breakdown_group.2
'''
-
- SELECT aggregation_target AS actor_id
+ SELECT persons.id,
+ persons.id AS id
FROM
- (SELECT aggregation_target,
- steps,
- avg(step_1_conversion_time) step_1_average_conversion_time_inner,
- avg(step_2_conversion_time) step_2_average_conversion_time_inner,
- median(step_1_conversion_time) step_1_median_conversion_time_inner,
- median(step_2_conversion_time) step_2_median_conversion_time_inner ,
- prop
+ (SELECT aggregation_target AS actor_id
FROM
- (SELECT aggregation_target,
- steps,
- max(steps) over (PARTITION BY aggregation_target,
- prop) as max_steps,
- step_1_conversion_time,
- step_2_conversion_time ,
- prop
+ (SELECT arraySort(t -> t.1, groupArray(tuple(accurateCastOrNull(timestamp, 'Float64'), uuid, prop, arrayFilter(x -> ifNull(notEquals(x, 0), 1), [multiply(1, step_0), multiply(2, step_1), multiply(3, step_2)])))) AS events_array,
+ arrayJoin(aggregate_funnel_v3(3, 1209600, 'first_touch', 'ordered', groupUniqArray(prop), arrayFilter((x, x_before, x_after) -> not(and(ifNull(lessOrEquals(length(x.4), 1), 0), ifNull(equals(x.4, x_before.4), isNull(x.4)
+ and isNull(x_before.4)), ifNull(equals(x.4, x_after.4), isNull(x.4)
+ and isNull(x_after.4)), ifNull(equals(x.3, x_before.3), isNull(x.3)
+ and isNull(x_before.3)), ifNull(equals(x.3, x_after.3), isNull(x.3)
+ and isNull(x_after.3)), ifNull(greater(x.1, x_before.1), 0), ifNull(less(x.1, x_after.1), 0))), events_array, arrayRotateRight(events_array, 1), arrayRotateLeft(events_array, 1)))) AS af_tuple,
+ af_tuple.1 AS step_reached,
+ plus(af_tuple.1, 1) AS steps,
+ af_tuple.2 AS breakdown,
+ af_tuple.3 AS timings,
+ aggregation_target AS aggregation_target
FROM
- (SELECT *,
- if(latest_0 <= latest_1
- AND latest_1 <= latest_0 + INTERVAL 7 DAY
- AND latest_1 <= latest_2
- AND latest_2 <= latest_0 + INTERVAL 7 DAY, 3, if(latest_0 <= latest_1
- AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps ,
- if(isNotNull(latest_1)
- AND latest_1 <= latest_0 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time,
- if(isNotNull(latest_2)
- AND latest_2 <= latest_1 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_1), toDateTime(latest_2)), NULL) step_2_conversion_time,
- prop
+ (SELECT timestamp AS timestamp,
+ aggregation_target AS aggregation_target,
+ uuid AS uuid,
+ `$session_id` AS `$session_id`,
+ `$window_id` AS `$window_id`,
+ step_0 AS step_0,
+ step_1 AS step_1,
+ step_2 AS step_2,
+ prop_basic AS prop_basic,
+ prop,
+ prop_vals AS prop_vals,
+ prop_vals AS prop
FROM
- (SELECT aggregation_target, timestamp, step_0,
- latest_0,
- step_1,
- latest_1,
- step_2,
- min(latest_2) over (PARTITION by aggregation_target,
- prop
- ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 ,
- prop
- FROM
- (SELECT aggregation_target, timestamp, step_0,
- latest_0,
- step_1,
- latest_1,
- step_2,
- if(latest_2 < latest_1, NULL, latest_2) as latest_2 ,
- prop
- FROM
- (SELECT aggregation_target, timestamp, step_0,
- latest_0,
- step_1,
- min(latest_1) over (PARTITION by aggregation_target,
- prop
- ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1,
- step_2,
- min(latest_2) over (PARTITION by aggregation_target,
- prop
- ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 ,
- if(has(['technology', 'finance'], prop), prop, 'Other') as prop
- FROM
- (SELECT *,
- prop_vals as prop
- FROM
- (SELECT e.timestamp as timestamp,
- if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as aggregation_target,
- if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id,
- if(event = 'sign up', 1, 0) as step_0,
- if(step_0 = 1, timestamp, null) as latest_0,
- if(event = 'play movie', 1, 0) as step_1,
- if(step_1 = 1, timestamp, null) as latest_1,
- if(event = 'buy', 1, 0) as step_2,
- if(step_2 = 1, timestamp, null) as latest_2,
- replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS prop_basic,
- prop_basic as prop,
- argMinIf(prop, timestamp, isNotNull(prop)) over (PARTITION by aggregation_target) as prop_vals
- FROM events e
- LEFT OUTER JOIN
- (SELECT distinct_id,
- argMax(person_id, version) as person_id
- FROM person_distinct_id2
- WHERE team_id = 99999
- AND distinct_id IN
- (SELECT distinct_id
- FROM events
- WHERE team_id = 99999
- AND event IN ['buy', 'play movie', 'sign up']
- AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC')
- AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') )
- GROUP BY distinct_id
- HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id
- LEFT JOIN
- (SELECT group_key,
- argMax(group_properties, _timestamp) AS group_properties_0
- FROM groups
- WHERE team_id = 99999
- AND group_type_index = 0
- GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key
- WHERE team_id = 99999
- AND event IN ['buy', 'play movie', 'sign up']
- AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC')
- AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC')
- AND (step_0 = 1
- OR step_1 = 1
- OR step_2 = 1) )))))
- WHERE step_0 = 1 ))
- GROUP BY aggregation_target,
- steps,
- prop
- HAVING steps = max(max_steps))
- WHERE steps IN [2, 3]
- AND arrayFlatten(array(prop)) = arrayFlatten(array('finance'))
- ORDER BY aggregation_target
- LIMIT 100
- OFFSET 0 SETTINGS max_ast_elements=1000000,
- max_expanded_ast_elements=1000000
- '''
-# ---
-# name: TestFunnelGroupBreakdownUDF.test_funnel_breakdown_group.5
- '''
-
- SELECT replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS value,
- count(*) as count
- FROM events e
- LEFT JOIN
- (SELECT group_key,
- argMax(group_properties, _timestamp) AS group_properties_0
- FROM groups
- WHERE team_id = 99999
- AND group_type_index = 0
- GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key
- WHERE team_id = 99999
- AND event IN ['buy', 'play movie', 'sign up']
- AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC')
- AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC')
- GROUP BY value
- ORDER BY count DESC, value DESC
- LIMIT 26
- OFFSET 0
+ (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
+ if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id) AS aggregation_target,
+ e.uuid AS uuid,
+ e.`$session_id` AS `$session_id`,
+ e.`$window_id` AS `$window_id`,
+ if(equals(e.event, 'sign up'), 1, 0) AS step_0,
+ if(equals(e.event, 'play movie'), 1, 0) AS step_1,
+ if(equals(e.event, 'buy'), 1, 0) AS step_2,
+ ifNull(toString(e__group_0.properties___industry), '') AS prop_basic,
+ prop_basic AS prop,
+ argMinIf(prop, timestamp, isNotNull(prop)) OVER (PARTITION BY aggregation_target) AS prop_vals
+ FROM events AS e
+ LEFT OUTER JOIN
+ (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id,
+ person_distinct_id_overrides.distinct_id AS distinct_id
+ FROM person_distinct_id_overrides
+ WHERE equals(person_distinct_id_overrides.team_id, 99999)
+ GROUP BY person_distinct_id_overrides.distinct_id
+ HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__override ON equals(e.distinct_id, e__override.distinct_id)
+ LEFT JOIN
+ (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), toTimeZone(groups._timestamp, 'UTC')) AS properties___industry,
+ groups.group_type_index AS index,
+ groups.group_key AS key
+ FROM groups
+ WHERE and(equals(groups.team_id, 99999), equals(index, 0))
+ GROUP BY groups.group_type_index,
+ groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key)
+ WHERE and(equals(e.team_id, 99999), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'play movie', 'sign up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0), ifNull(equals(step_2, 1), 0)))))
+ GROUP BY aggregation_target
+ HAVING ifNull(greaterOrEquals(step_reached, 0), 0))
+ WHERE and(ifNull(greaterOrEquals(step_reached, 1), 0), ifNull(equals(arrayFlatten(array(breakdown)), arrayFlatten(array('finance'))), isNull(arrayFlatten(array(breakdown)))
+ and isNull(arrayFlatten(array('finance')))))
+ ORDER BY aggregation_target ASC) AS source
+ INNER JOIN
+ (SELECT person.id AS id
+ FROM person
+ WHERE equals(person.team_id, 99999)
+ GROUP BY person.id
+ HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id)
+ ORDER BY persons.id ASC
+ LIMIT 101
+ OFFSET 0 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1,
+ format_csv_allow_double_quotes=0,
+ max_ast_elements=4000000,
+ max_expanded_ast_elements=4000000,
+ max_bytes_before_external_group_by=0,
+ allow_experimental_analyzer=1
'''
# ---
-# name: TestFunnelGroupBreakdownUDF.test_funnel_breakdown_group.6
+# name: TestFunnelGroupBreakdownUDF.test_funnel_breakdown_group.3
'''
-
- SELECT aggregation_target AS actor_id
+ SELECT persons.id,
+ persons.id AS id
FROM
- (SELECT aggregation_target,
- steps,
- avg(step_1_conversion_time) step_1_average_conversion_time_inner,
- avg(step_2_conversion_time) step_2_average_conversion_time_inner,
- median(step_1_conversion_time) step_1_median_conversion_time_inner,
- median(step_2_conversion_time) step_2_median_conversion_time_inner ,
- prop
+ (SELECT aggregation_target AS actor_id
FROM
- (SELECT aggregation_target,
- steps,
- max(steps) over (PARTITION BY aggregation_target,
- prop) as max_steps,
- step_1_conversion_time,
- step_2_conversion_time ,
- prop
+ (SELECT arraySort(t -> t.1, groupArray(tuple(accurateCastOrNull(timestamp, 'Float64'), uuid, prop, arrayFilter(x -> ifNull(notEquals(x, 0), 1), [multiply(1, step_0), multiply(2, step_1), multiply(3, step_2)])))) AS events_array,
+ arrayJoin(aggregate_funnel_v3(3, 1209600, 'first_touch', 'ordered', groupUniqArray(prop), arrayFilter((x, x_before, x_after) -> not(and(ifNull(lessOrEquals(length(x.4), 1), 0), ifNull(equals(x.4, x_before.4), isNull(x.4)
+ and isNull(x_before.4)), ifNull(equals(x.4, x_after.4), isNull(x.4)
+ and isNull(x_after.4)), ifNull(equals(x.3, x_before.3), isNull(x.3)
+ and isNull(x_before.3)), ifNull(equals(x.3, x_after.3), isNull(x.3)
+ and isNull(x_after.3)), ifNull(greater(x.1, x_before.1), 0), ifNull(less(x.1, x_after.1), 0))), events_array, arrayRotateRight(events_array, 1), arrayRotateLeft(events_array, 1)))) AS af_tuple,
+ af_tuple.1 AS step_reached,
+ plus(af_tuple.1, 1) AS steps,
+ af_tuple.2 AS breakdown,
+ af_tuple.3 AS timings,
+ aggregation_target AS aggregation_target
FROM
- (SELECT *,
- if(latest_0 <= latest_1
- AND latest_1 <= latest_0 + INTERVAL 7 DAY
- AND latest_1 <= latest_2
- AND latest_2 <= latest_0 + INTERVAL 7 DAY, 3, if(latest_0 <= latest_1
- AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps ,
- if(isNotNull(latest_1)
- AND latest_1 <= latest_0 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time,
- if(isNotNull(latest_2)
- AND latest_2 <= latest_1 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_1), toDateTime(latest_2)), NULL) step_2_conversion_time,
- prop
+ (SELECT timestamp AS timestamp,
+ aggregation_target AS aggregation_target,
+ uuid AS uuid,
+ `$session_id` AS `$session_id`,
+ `$window_id` AS `$window_id`,
+ step_0 AS step_0,
+ step_1 AS step_1,
+ step_2 AS step_2,
+ prop_basic AS prop_basic,
+ prop,
+ prop_vals AS prop_vals,
+ prop_vals AS prop
FROM
- (SELECT aggregation_target, timestamp, step_0,
- latest_0,
- step_1,
- latest_1,
- step_2,
- min(latest_2) over (PARTITION by aggregation_target,
- prop
- ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 ,
- prop
- FROM
- (SELECT aggregation_target, timestamp, step_0,
- latest_0,
- step_1,
- latest_1,
- step_2,
- if(latest_2 < latest_1, NULL, latest_2) as latest_2 ,
- prop
- FROM
- (SELECT aggregation_target, timestamp, step_0,
- latest_0,
- step_1,
- min(latest_1) over (PARTITION by aggregation_target,
- prop
- ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1,
- step_2,
- min(latest_2) over (PARTITION by aggregation_target,
- prop
- ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 ,
- if(has(['technology', 'finance'], prop), prop, 'Other') as prop
- FROM
- (SELECT *,
- prop_vals as prop
- FROM
- (SELECT e.timestamp as timestamp,
- if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as aggregation_target,
- if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id,
- if(event = 'sign up', 1, 0) as step_0,
- if(step_0 = 1, timestamp, null) as latest_0,
- if(event = 'play movie', 1, 0) as step_1,
- if(step_1 = 1, timestamp, null) as latest_1,
- if(event = 'buy', 1, 0) as step_2,
- if(step_2 = 1, timestamp, null) as latest_2,
- replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS prop_basic,
- prop_basic as prop,
- argMinIf(prop, timestamp, isNotNull(prop)) over (PARTITION by aggregation_target) as prop_vals
- FROM events e
- LEFT OUTER JOIN
- (SELECT distinct_id,
- argMax(person_id, version) as person_id
- FROM person_distinct_id2
- WHERE team_id = 99999
- AND distinct_id IN
- (SELECT distinct_id
- FROM events
- WHERE team_id = 99999
- AND event IN ['buy', 'play movie', 'sign up']
- AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC')
- AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') )
- GROUP BY distinct_id
- HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id
- LEFT JOIN
- (SELECT group_key,
- argMax(group_properties, _timestamp) AS group_properties_0
- FROM groups
- WHERE team_id = 99999
- AND group_type_index = 0
- GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key
- WHERE team_id = 99999
- AND event IN ['buy', 'play movie', 'sign up']
- AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC')
- AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC')
- AND (step_0 = 1
- OR step_1 = 1
- OR step_2 = 1) )))))
- WHERE step_0 = 1 ))
- GROUP BY aggregation_target,
- steps,
- prop
- HAVING steps = max(max_steps))
- WHERE steps IN [1, 2, 3]
- AND arrayFlatten(array(prop)) = arrayFlatten(array('technology'))
- ORDER BY aggregation_target
- LIMIT 100
- OFFSET 0 SETTINGS max_ast_elements=1000000,
- max_expanded_ast_elements=1000000
- '''
-# ---
-# name: TestFunnelGroupBreakdownUDF.test_funnel_breakdown_group.7
- '''
-
- SELECT replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS value,
- count(*) as count
- FROM events e
- LEFT JOIN
- (SELECT group_key,
- argMax(group_properties, _timestamp) AS group_properties_0
- FROM groups
- WHERE team_id = 99999
- AND group_type_index = 0
- GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key
- WHERE team_id = 99999
- AND event IN ['buy', 'play movie', 'sign up']
- AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC')
- AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC')
- GROUP BY value
- ORDER BY count DESC, value DESC
- LIMIT 26
- OFFSET 0
+ (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
+ if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id) AS aggregation_target,
+ e.uuid AS uuid,
+ e.`$session_id` AS `$session_id`,
+ e.`$window_id` AS `$window_id`,
+ if(equals(e.event, 'sign up'), 1, 0) AS step_0,
+ if(equals(e.event, 'play movie'), 1, 0) AS step_1,
+ if(equals(e.event, 'buy'), 1, 0) AS step_2,
+ ifNull(toString(e__group_0.properties___industry), '') AS prop_basic,
+ prop_basic AS prop,
+ argMinIf(prop, timestamp, isNotNull(prop)) OVER (PARTITION BY aggregation_target) AS prop_vals
+ FROM events AS e
+ LEFT OUTER JOIN
+ (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id,
+ person_distinct_id_overrides.distinct_id AS distinct_id
+ FROM person_distinct_id_overrides
+ WHERE equals(person_distinct_id_overrides.team_id, 99999)
+ GROUP BY person_distinct_id_overrides.distinct_id
+ HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__override ON equals(e.distinct_id, e__override.distinct_id)
+ LEFT JOIN
+ (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), toTimeZone(groups._timestamp, 'UTC')) AS properties___industry,
+ groups.group_type_index AS index,
+ groups.group_key AS key
+ FROM groups
+ WHERE and(equals(groups.team_id, 99999), equals(index, 0))
+ GROUP BY groups.group_type_index,
+ groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key)
+ WHERE and(equals(e.team_id, 99999), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'play movie', 'sign up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0), ifNull(equals(step_2, 1), 0)))))
+ GROUP BY aggregation_target
+ HAVING ifNull(greaterOrEquals(step_reached, 0), 0))
+ WHERE and(ifNull(greaterOrEquals(step_reached, 0), 0), ifNull(equals(arrayFlatten(array(breakdown)), arrayFlatten(array('technology'))), isNull(arrayFlatten(array(breakdown)))
+ and isNull(arrayFlatten(array('technology')))))
+ ORDER BY aggregation_target ASC) AS source
+ INNER JOIN
+ (SELECT person.id AS id
+ FROM person
+ WHERE equals(person.team_id, 99999)
+ GROUP BY person.id
+ HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id)
+ ORDER BY persons.id ASC
+ LIMIT 101
+ OFFSET 0 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1,
+ format_csv_allow_double_quotes=0,
+ max_ast_elements=4000000,
+ max_expanded_ast_elements=4000000,
+ max_bytes_before_external_group_by=0,
+ allow_experimental_analyzer=1
'''
# ---
-# name: TestFunnelGroupBreakdownUDF.test_funnel_breakdown_group.8
+# name: TestFunnelGroupBreakdownUDF.test_funnel_breakdown_group.4
'''
-
- SELECT aggregation_target AS actor_id
+ SELECT persons.id,
+ persons.id AS id
FROM
- (SELECT aggregation_target,
- steps,
- avg(step_1_conversion_time) step_1_average_conversion_time_inner,
- avg(step_2_conversion_time) step_2_average_conversion_time_inner,
- median(step_1_conversion_time) step_1_median_conversion_time_inner,
- median(step_2_conversion_time) step_2_median_conversion_time_inner ,
- prop
+ (SELECT aggregation_target AS actor_id
FROM
- (SELECT aggregation_target,
- steps,
- max(steps) over (PARTITION BY aggregation_target,
- prop) as max_steps,
- step_1_conversion_time,
- step_2_conversion_time ,
- prop
+ (SELECT arraySort(t -> t.1, groupArray(tuple(accurateCastOrNull(timestamp, 'Float64'), uuid, prop, arrayFilter(x -> ifNull(notEquals(x, 0), 1), [multiply(1, step_0), multiply(2, step_1), multiply(3, step_2)])))) AS events_array,
+ arrayJoin(aggregate_funnel_v3(3, 1209600, 'first_touch', 'ordered', groupUniqArray(prop), arrayFilter((x, x_before, x_after) -> not(and(ifNull(lessOrEquals(length(x.4), 1), 0), ifNull(equals(x.4, x_before.4), isNull(x.4)
+ and isNull(x_before.4)), ifNull(equals(x.4, x_after.4), isNull(x.4)
+ and isNull(x_after.4)), ifNull(equals(x.3, x_before.3), isNull(x.3)
+ and isNull(x_before.3)), ifNull(equals(x.3, x_after.3), isNull(x.3)
+ and isNull(x_after.3)), ifNull(greater(x.1, x_before.1), 0), ifNull(less(x.1, x_after.1), 0))), events_array, arrayRotateRight(events_array, 1), arrayRotateLeft(events_array, 1)))) AS af_tuple,
+ af_tuple.1 AS step_reached,
+ plus(af_tuple.1, 1) AS steps,
+ af_tuple.2 AS breakdown,
+ af_tuple.3 AS timings,
+ aggregation_target AS aggregation_target
FROM
- (SELECT *,
- if(latest_0 <= latest_1
- AND latest_1 <= latest_0 + INTERVAL 7 DAY
- AND latest_1 <= latest_2
- AND latest_2 <= latest_0 + INTERVAL 7 DAY, 3, if(latest_0 <= latest_1
- AND latest_1 <= latest_0 + INTERVAL 7 DAY, 2, 1)) AS steps ,
- if(isNotNull(latest_1)
- AND latest_1 <= latest_0 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time,
- if(isNotNull(latest_2)
- AND latest_2 <= latest_1 + INTERVAL 7 DAY, dateDiff('second', toDateTime(latest_1), toDateTime(latest_2)), NULL) step_2_conversion_time,
- prop
+ (SELECT timestamp AS timestamp,
+ aggregation_target AS aggregation_target,
+ uuid AS uuid,
+ `$session_id` AS `$session_id`,
+ `$window_id` AS `$window_id`,
+ step_0 AS step_0,
+ step_1 AS step_1,
+ step_2 AS step_2,
+ prop_basic AS prop_basic,
+ prop,
+ prop_vals AS prop_vals,
+ prop_vals AS prop
FROM
- (SELECT aggregation_target, timestamp, step_0,
- latest_0,
- step_1,
- latest_1,
- step_2,
- min(latest_2) over (PARTITION by aggregation_target,
- prop
- ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 ,
- prop
- FROM
- (SELECT aggregation_target, timestamp, step_0,
- latest_0,
- step_1,
- latest_1,
- step_2,
- if(latest_2 < latest_1, NULL, latest_2) as latest_2 ,
- prop
- FROM
- (SELECT aggregation_target, timestamp, step_0,
- latest_0,
- step_1,
- min(latest_1) over (PARTITION by aggregation_target,
- prop
- ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1,
- step_2,
- min(latest_2) over (PARTITION by aggregation_target,
- prop
- ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_2 ,
- if(has(['technology', 'finance'], prop), prop, 'Other') as prop
- FROM
- (SELECT *,
- prop_vals as prop
- FROM
- (SELECT e.timestamp as timestamp,
- if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as aggregation_target,
- if(notEmpty(pdi.distinct_id), pdi.person_id, e.person_id) as person_id,
- if(event = 'sign up', 1, 0) as step_0,
- if(step_0 = 1, timestamp, null) as latest_0,
- if(event = 'play movie', 1, 0) as step_1,
- if(step_1 = 1, timestamp, null) as latest_1,
- if(event = 'buy', 1, 0) as step_2,
- if(step_2 = 1, timestamp, null) as latest_2,
- replaceRegexpAll(JSONExtractRaw(group_properties_0, 'industry'), '^"|"$', '') AS prop_basic,
- prop_basic as prop,
- argMinIf(prop, timestamp, isNotNull(prop)) over (PARTITION by aggregation_target) as prop_vals
- FROM events e
- LEFT OUTER JOIN
- (SELECT distinct_id,
- argMax(person_id, version) as person_id
- FROM person_distinct_id2
- WHERE team_id = 99999
- AND distinct_id IN
- (SELECT distinct_id
- FROM events
- WHERE team_id = 99999
- AND event IN ['buy', 'play movie', 'sign up']
- AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC')
- AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC') )
- GROUP BY distinct_id
- HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id
- LEFT JOIN
- (SELECT group_key,
- argMax(group_properties, _timestamp) AS group_properties_0
- FROM groups
- WHERE team_id = 99999
- AND group_type_index = 0
- GROUP BY group_key) groups_0 ON "$group_0" == groups_0.group_key
- WHERE team_id = 99999
- AND event IN ['buy', 'play movie', 'sign up']
- AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC')
- AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-08 23:59:59', 'UTC')
- AND (step_0 = 1
- OR step_1 = 1
- OR step_2 = 1) )))))
- WHERE step_0 = 1 ))
- GROUP BY aggregation_target,
- steps,
- prop
- HAVING steps = max(max_steps))
- WHERE steps IN [2, 3]
- AND arrayFlatten(array(prop)) = arrayFlatten(array('technology'))
- ORDER BY aggregation_target
- LIMIT 100
- OFFSET 0 SETTINGS max_ast_elements=1000000,
- max_expanded_ast_elements=1000000
+ (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
+ if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id) AS aggregation_target,
+ e.uuid AS uuid,
+ e.`$session_id` AS `$session_id`,
+ e.`$window_id` AS `$window_id`,
+ if(equals(e.event, 'sign up'), 1, 0) AS step_0,
+ if(equals(e.event, 'play movie'), 1, 0) AS step_1,
+ if(equals(e.event, 'buy'), 1, 0) AS step_2,
+ ifNull(toString(e__group_0.properties___industry), '') AS prop_basic,
+ prop_basic AS prop,
+ argMinIf(prop, timestamp, isNotNull(prop)) OVER (PARTITION BY aggregation_target) AS prop_vals
+ FROM events AS e
+ LEFT OUTER JOIN
+ (SELECT argMax(person_distinct_id_overrides.person_id, person_distinct_id_overrides.version) AS person_id,
+ person_distinct_id_overrides.distinct_id AS distinct_id
+ FROM person_distinct_id_overrides
+ WHERE equals(person_distinct_id_overrides.team_id, 99999)
+ GROUP BY person_distinct_id_overrides.distinct_id
+ HAVING ifNull(equals(argMax(person_distinct_id_overrides.is_deleted, person_distinct_id_overrides.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__override ON equals(e.distinct_id, e__override.distinct_id)
+ LEFT JOIN
+ (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), toTimeZone(groups._timestamp, 'UTC')) AS properties___industry,
+ groups.group_type_index AS index,
+ groups.group_key AS key
+ FROM groups
+ WHERE and(equals(groups.team_id, 99999), equals(index, 0))
+ GROUP BY groups.group_type_index,
+ groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key)
+ WHERE and(equals(e.team_id, 99999), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('buy', 'play movie', 'sign up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0), ifNull(equals(step_2, 1), 0)))))
+ GROUP BY aggregation_target
+ HAVING ifNull(greaterOrEquals(step_reached, 0), 0))
+ WHERE and(ifNull(greaterOrEquals(step_reached, 1), 0), ifNull(equals(arrayFlatten(array(breakdown)), arrayFlatten(array('technology'))), isNull(arrayFlatten(array(breakdown)))
+ and isNull(arrayFlatten(array('technology')))))
+ ORDER BY aggregation_target ASC) AS source
+ INNER JOIN
+ (SELECT person.id AS id
+ FROM person
+ WHERE equals(person.team_id, 99999)
+ GROUP BY person.id
+ HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons ON equals(persons.id, source.actor_id)
+ ORDER BY persons.id ASC
+ LIMIT 101
+ OFFSET 0 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1,
+ format_csv_allow_double_quotes=0,
+ max_ast_elements=4000000,
+ max_expanded_ast_elements=4000000,
+ max_bytes_before_external_group_by=0,
+ allow_experimental_analyzer=1
'''
# ---
diff --git a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_unordered.ambr b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_unordered.ambr
index 7982e95c56bed..2c4f5e6564765 100644
--- a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_unordered.ambr
+++ b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_unordered.ambr
@@ -139,14 +139,14 @@
and isNull(max(max_steps))))
GROUP BY prop)
GROUP BY final_prop
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1,
- format_csv_allow_double_quotes=0,
- max_ast_elements=4000000,
- max_expanded_ast_elements=4000000,
- max_bytes_before_external_group_by=23622320128,
- allow_experimental_analyzer=1
+ LIMIT 26 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1,
+ format_csv_allow_double_quotes=0,
+ max_ast_elements=4000000,
+ max_expanded_ast_elements=4000000,
+ max_bytes_before_external_group_by=23622320128,
+ allow_experimental_analyzer=1
'''
# ---
# name: TestFunnelUnorderedStepsBreakdown.test_funnel_breakdown_correct_breakdown_props_are_chosen_for_step
@@ -303,14 +303,14 @@
and isNull(max(max_steps))))
GROUP BY prop)
GROUP BY final_prop
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1,
- format_csv_allow_double_quotes=0,
- max_ast_elements=4000000,
- max_expanded_ast_elements=4000000,
- max_bytes_before_external_group_by=23622320128,
- allow_experimental_analyzer=1
+ LIMIT 26 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1,
+ format_csv_allow_double_quotes=0,
+ max_ast_elements=4000000,
+ max_expanded_ast_elements=4000000,
+ max_bytes_before_external_group_by=23622320128,
+ allow_experimental_analyzer=1
'''
# ---
# name: TestFunnelUnorderedStepsBreakdown.test_funnel_step_multiple_breakdown_snapshot
@@ -453,14 +453,14 @@
and isNull(max(max_steps))))
GROUP BY prop)
GROUP BY final_prop
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1,
- format_csv_allow_double_quotes=0,
- max_ast_elements=4000000,
- max_expanded_ast_elements=4000000,
- max_bytes_before_external_group_by=23622320128,
- allow_experimental_analyzer=1
+ LIMIT 26 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1,
+ format_csv_allow_double_quotes=0,
+ max_ast_elements=4000000,
+ max_expanded_ast_elements=4000000,
+ max_bytes_before_external_group_by=23622320128,
+ allow_experimental_analyzer=1
'''
# ---
# name: TestUnorderedFunnelGroupBreakdown.test_funnel_aggregate_by_groups_breakdown_group_person_on_events
@@ -698,14 +698,14 @@
and isNull(max(max_steps))))
GROUP BY prop)
GROUP BY final_prop
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1,
- format_csv_allow_double_quotes=0,
- max_ast_elements=4000000,
- max_expanded_ast_elements=4000000,
- max_bytes_before_external_group_by=23622320128,
- allow_experimental_analyzer=1
+ LIMIT 26 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1,
+ format_csv_allow_double_quotes=0,
+ max_ast_elements=4000000,
+ max_expanded_ast_elements=4000000,
+ max_bytes_before_external_group_by=23622320128,
+ allow_experimental_analyzer=1
'''
# ---
# name: TestUnorderedFunnelGroupBreakdown.test_funnel_aggregate_by_groups_breakdown_group_person_on_events_poe_v2
@@ -943,14 +943,14 @@
and isNull(max(max_steps))))
GROUP BY prop)
GROUP BY final_prop
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1,
- format_csv_allow_double_quotes=0,
- max_ast_elements=4000000,
- max_expanded_ast_elements=4000000,
- max_bytes_before_external_group_by=23622320128,
- allow_experimental_analyzer=1
+ LIMIT 26 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1,
+ format_csv_allow_double_quotes=0,
+ max_ast_elements=4000000,
+ max_expanded_ast_elements=4000000,
+ max_bytes_before_external_group_by=23622320128,
+ allow_experimental_analyzer=1
'''
# ---
# name: TestUnorderedFunnelGroupBreakdown.test_funnel_breakdown_group
@@ -1209,14 +1209,14 @@
and isNull(max(max_steps))))
GROUP BY prop)
GROUP BY final_prop
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1,
- format_csv_allow_double_quotes=0,
- max_ast_elements=4000000,
- max_expanded_ast_elements=4000000,
- max_bytes_before_external_group_by=23622320128,
- allow_experimental_analyzer=1
+ LIMIT 26 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1,
+ format_csv_allow_double_quotes=0,
+ max_ast_elements=4000000,
+ max_expanded_ast_elements=4000000,
+ max_bytes_before_external_group_by=23622320128,
+ allow_experimental_analyzer=1
'''
# ---
# name: TestUnorderedFunnelGroupBreakdown.test_funnel_breakdown_group.1
diff --git a/posthog/hogql_queries/insights/funnels/test/breakdown_cases.py b/posthog/hogql_queries/insights/funnels/test/breakdown_cases.py
index ddef2c9567d6b..ee1122013788b 100644
--- a/posthog/hogql_queries/insights/funnels/test/breakdown_cases.py
+++ b/posthog/hogql_queries/insights/funnels/test/breakdown_cases.py
@@ -1,8 +1,10 @@
+import ast
from collections.abc import Callable
from dataclasses import dataclass
from datetime import datetime
from string import ascii_lowercase
from typing import Any, Literal, Optional, Union, cast
+from unittest import skip
from posthog.constants import INSIGHT_FUNNELS, FunnelOrderType
from posthog.hogql_queries.insights.funnels.funnels_query_runner import FunnelsQueryRunner
@@ -460,6 +462,7 @@ def test_funnel_step_breakdown_event(self):
)
@also_test_with_materialized_columns(["$browser"])
+ @skip('Using "Other" as a breakdown is not yet implemented in HogQL Actors Queries')
def test_funnel_step_breakdown_event_with_other(self):
filters = {
"insight": INSIGHT_FUNNELS,
@@ -533,7 +536,8 @@ def test_funnel_step_breakdown_event_with_other(self):
people = journeys_for(events_by_person, self.team)
query = cast(FunnelsQuery, filter_to_query(filters))
- results = FunnelsQueryRunner(query=query, team=self.team).calculate().results
+ query_runner = FunnelsQueryRunner(query=query, team=self.team)
+ results = query_runner.calculate().results
results = sort_breakdown_funnel_results(results)
self._assert_funnel_breakdown_result_is_correct(
@@ -597,6 +601,7 @@ def test_funnel_step_breakdown_event_with_other(self):
self._get_actor_ids_at_step(filters, 2, "Other"),
[people["person1"].uuid],
)
+ self.assertEqual(2, cast(ast.Constant, query_runner.to_query().limit).value)
@also_test_with_materialized_columns(["$browser"])
def test_funnel_step_breakdown_event_no_type(self):
@@ -847,6 +852,7 @@ def test_funnel_step_breakdown_limit(self):
self.assertEqual([["5"], ["6"], ["7"], ["8"], ["9"], ["Other"]], breakdown_vals)
@also_test_with_materialized_columns(["some_breakdown_val"])
+ @skip('Using "Other" as a breakdown is not yet implemented in HogQL Actors Queries')
def test_funnel_step_custom_breakdown_limit_with_nulls(self):
filters = {
"insight": INSIGHT_FUNNELS,
diff --git a/posthog/hogql_queries/insights/funnels/test/test_funnel.py b/posthog/hogql_queries/insights/funnels/test/test_funnel.py
index ceaa344041d58..b92891822cb87 100644
--- a/posthog/hogql_queries/insights/funnels/test/test_funnel.py
+++ b/posthog/hogql_queries/insights/funnels/test/test_funnel.py
@@ -1,6 +1,6 @@
import uuid
from datetime import datetime
-from typing import cast
+from typing import cast, Any
from unittest.mock import Mock, patch
from django.test import override_settings
@@ -25,12 +25,11 @@
funnel_conversion_time_test_factory,
)
from posthog.hogql_queries.legacy_compatibility.filter_to_query import filter_to_query
-from posthog.models import Action, Element
+from posthog.models import Action, Element, Team
from posthog.models.cohort.cohort import Cohort
from posthog.models.group.util import create_group
from posthog.models.group_type_mapping import GroupTypeMapping
from posthog.models.property_definition import PropertyDefinition
-from posthog.queries.funnels import ClickhouseFunnelActors
from posthog.schema import (
ActionsNode,
ActorsQuery,
@@ -60,6 +59,27 @@
snapshot_clickhouse_queries,
)
from posthog.test.test_journeys import journeys_for
+from posthog.hogql_queries.insights.funnels.test.test_funnel_persons import get_actors
+
+
+class PseudoFunnelActors:
+ def __init__(self, person_filter: Any, team: Team):
+ self.filters = person_filter._data
+ self.team = team
+
+ def get_actors(self):
+ actors = get_actors(
+ self.filters,
+ self.team,
+ funnel_step=self.filters.get("funnel_step"),
+ funnel_step_breakdown=self.filters.get("funnel_step_breakdown"),
+ )
+
+ return (
+ None,
+ [{"id": x[0]} for x in actors],
+ None,
+ )
def _create_action(**kwargs):
@@ -75,7 +95,7 @@ class TestFunnelBreakdown(
ClickhouseTestMixin,
funnel_breakdown_test_factory( # type: ignore
FunnelOrderType.ORDERED,
- ClickhouseFunnelActors,
+ PseudoFunnelActors,
_create_action,
_create_person,
),
@@ -89,7 +109,7 @@ class TestFunnelGroupBreakdown(
ClickhouseTestMixin,
funnel_breakdown_group_test_factory( # type: ignore
FunnelOrderType.ORDERED,
- ClickhouseFunnelActors,
+ PseudoFunnelActors,
),
):
pass
@@ -98,7 +118,7 @@ class TestFunnelGroupBreakdown(
@patch("posthoganalytics.feature_enabled", new=Mock(return_value=False))
class TestFunnelConversionTime(
ClickhouseTestMixin,
- funnel_conversion_time_test_factory(FunnelOrderType.ORDERED, ClickhouseFunnelActors), # type: ignore
+ funnel_conversion_time_test_factory(FunnelOrderType.ORDERED, PseudoFunnelActors), # type: ignore
):
maxDiff = None
pass
diff --git a/posthog/hogql_queries/insights/funnels/test/test_funnel_udf.py b/posthog/hogql_queries/insights/funnels/test/test_funnel_udf.py
index 285dd9d127fac..48faf02ddc06f 100644
--- a/posthog/hogql_queries/insights/funnels/test/test_funnel_udf.py
+++ b/posthog/hogql_queries/insights/funnels/test/test_funnel_udf.py
@@ -13,7 +13,6 @@
)
from posthog.hogql_queries.legacy_compatibility.filter_to_query import filter_to_query
from posthog.models import Action
-from posthog.queries.funnels import ClickhouseFunnelActors
from posthog.schema import FunnelsQuery, FunnelsQueryResponse
from posthog.test.base import (
ClickhouseTestMixin,
@@ -21,7 +20,7 @@
_create_person,
)
from posthog.test.test_journeys import journeys_for
-from test_funnel import funnel_test_factory
+from test_funnel import funnel_test_factory, PseudoFunnelActors
from posthog.hogql_queries.insights.funnels.test.conversion_time_cases import (
funnel_conversion_time_test_factory,
)
@@ -43,7 +42,7 @@ class TestFunnelBreakdownUDF(
ClickhouseTestMixin,
funnel_breakdown_test_factory( # type: ignore
FunnelOrderType.ORDERED,
- ClickhouseFunnelActors,
+ PseudoFunnelActors,
_create_action,
_create_person,
),
@@ -57,7 +56,7 @@ class TestFunnelGroupBreakdownUDF(
ClickhouseTestMixin,
funnel_breakdown_group_test_factory( # type: ignore
FunnelOrderType.ORDERED,
- ClickhouseFunnelActors,
+ PseudoFunnelActors,
),
):
pass
@@ -199,7 +198,7 @@ def test_excluded_after_time_expires(self):
@patch("posthoganalytics.feature_enabled", new=Mock(side_effect=use_udf_funnel_flag_side_effect))
class TestFunnelConversionTimeUDF(
ClickhouseTestMixin,
- funnel_conversion_time_test_factory(FunnelOrderType.ORDERED, ClickhouseFunnelActors), # type: ignore
+ funnel_conversion_time_test_factory(FunnelOrderType.ORDERED, PseudoFunnelActors), # type: ignore
):
maxDiff = None
pass
diff --git a/posthog/schema.py b/posthog/schema.py
index 2c7b546c928a7..8f3a71cf4ea99 100644
--- a/posthog/schema.py
+++ b/posthog/schema.py
@@ -6058,7 +6058,7 @@ class FunnelsActorsQuery(BaseModel):
" negative for dropped of persons."
),
)
- funnelStepBreakdown: Optional[Union[str, float, list[Union[str, float]]]] = Field(
+ funnelStepBreakdown: Optional[Union[int, str, float, list[Union[int, str, float]]]] = Field(
default=None,
description=(
"The breakdown value for which to get persons for. This is an array for person and event properties, a"
diff --git a/posthog/temporal/common/heartbeat_sync.py b/posthog/temporal/common/heartbeat_sync.py
index 35ac79515b9f4..cf775c3bf5cb0 100644
--- a/posthog/temporal/common/heartbeat_sync.py
+++ b/posthog/temporal/common/heartbeat_sync.py
@@ -11,6 +11,8 @@ def __init__(self, details: tuple[Any, ...] = (), factor: int = 12, logger: Opti
self.details: tuple[Any, ...] = details
self.factor = factor
self.logger = logger
+ self.stop_event: Optional[threading.Event] = None
+ self.heartbeat_thread: Optional[threading.Thread] = None
def log_debug(self, message: str, exc_info: Optional[Any] = None) -> None:
if self.logger:
diff --git a/posthog/temporal/data_imports/__init__.py b/posthog/temporal/data_imports/__init__.py
index cabeaf433d4e1..c59f20b05d8cf 100644
--- a/posthog/temporal/data_imports/__init__.py
+++ b/posthog/temporal/data_imports/__init__.py
@@ -2,10 +2,8 @@
ExternalDataJobWorkflow,
create_external_data_job_model_activity,
create_source_templates,
- import_data_activity,
import_data_activity_sync,
update_external_data_job_model,
- check_schedule_activity,
check_billing_limits_activity,
sync_new_schemas_activity,
)
@@ -15,10 +13,8 @@
ACTIVITIES = [
create_external_data_job_model_activity,
update_external_data_job_model,
- import_data_activity,
import_data_activity_sync,
create_source_templates,
- check_schedule_activity,
check_billing_limits_activity,
sync_new_schemas_activity,
]
diff --git a/posthog/temporal/data_imports/external_data_job.py b/posthog/temporal/data_imports/external_data_job.py
index 1820f462093ca..0bccbf9b95fa9 100644
--- a/posthog/temporal/data_imports/external_data_job.py
+++ b/posthog/temporal/data_imports/external_data_job.py
@@ -8,7 +8,6 @@
# TODO: remove dependency
from posthog.temporal.batch_exports.base import PostHogWorkflow
-from posthog.temporal.data_imports.util import is_posthog_team
from posthog.temporal.data_imports.workflow_activities.check_billing_limits import (
CheckBillingLimitsActivityInputs,
check_billing_limits_activity,
@@ -23,28 +22,19 @@
CreateExternalDataJobModelActivityInputs,
create_external_data_job_model_activity,
)
-from posthog.temporal.data_imports.workflow_activities.import_data import ImportDataActivityInputs, import_data_activity
+from posthog.temporal.data_imports.workflow_activities.import_data_sync import ImportDataActivityInputs
from posthog.utils import get_machine_id
-from posthog.warehouse.data_load.service import (
- a_delete_external_data_schedule,
- a_external_data_workflow_exists,
- a_sync_external_data_job_workflow,
- a_trigger_external_data_workflow,
-)
from posthog.warehouse.data_load.source_templates import create_warehouse_templates_for_source
from posthog.warehouse.external_data_source.jobs import (
- aget_running_job_for_schema,
- aupdate_external_job_status,
+ update_external_job_status,
)
from posthog.warehouse.models import (
ExternalDataJob,
- get_active_schemas_for_source_id,
ExternalDataSource,
- get_external_data_source,
)
-from posthog.temporal.common.logger import bind_temporal_worker_logger
-from posthog.warehouse.models.external_data_schema import aupdate_should_sync
+from posthog.temporal.common.logger import bind_temporal_worker_logger_sync
+from posthog.warehouse.models.external_data_schema import update_should_sync
Non_Retryable_Schema_Errors: dict[ExternalDataSource.Type, list[str]] = {
@@ -76,11 +66,15 @@ class UpdateExternalDataJobStatusInputs:
@activity.defn
-async def update_external_data_job_model(inputs: UpdateExternalDataJobStatusInputs) -> None:
- logger = await bind_temporal_worker_logger(team_id=inputs.team_id)
+def update_external_data_job_model(inputs: UpdateExternalDataJobStatusInputs) -> None:
+ logger = bind_temporal_worker_logger_sync(team_id=inputs.team_id)
if inputs.job_id is None:
- job: ExternalDataJob | None = await aget_running_job_for_schema(inputs.schema_id)
+ job: ExternalDataJob | None = (
+ ExternalDataJob.objects.filter(schema_id=inputs.schema_id, status=ExternalDataJob.Status.RUNNING)
+ .order_by("-created_at")
+ .first()
+ )
if job is None:
logger.info("No job to update status on")
return
@@ -94,7 +88,7 @@ async def update_external_data_job_model(inputs: UpdateExternalDataJobStatusInpu
f"External data job failed for external data schema {inputs.schema_id} with error: {inputs.internal_error}"
)
- source: ExternalDataSource = await get_external_data_source(inputs.source_id)
+ source: ExternalDataSource = ExternalDataSource.objects.get(pk=inputs.source_id)
non_retryable_errors = Non_Retryable_Schema_Errors.get(ExternalDataSource.Type(source.source_type))
if non_retryable_errors is not None:
@@ -113,9 +107,9 @@ async def update_external_data_job_model(inputs: UpdateExternalDataJobStatusInpu
"error": inputs.internal_error,
},
)
- await aupdate_should_sync(schema_id=inputs.schema_id, team_id=inputs.team_id, should_sync=False)
+ update_should_sync(schema_id=inputs.schema_id, team_id=inputs.team_id, should_sync=False)
- await aupdate_external_job_status(
+ update_external_job_status(
job_id=job_id,
status=inputs.status,
latest_error=inputs.latest_error,
@@ -134,34 +128,8 @@ class CreateSourceTemplateInputs:
@activity.defn
-async def create_source_templates(inputs: CreateSourceTemplateInputs) -> None:
- await create_warehouse_templates_for_source(team_id=inputs.team_id, run_id=inputs.run_id)
-
-
-@activity.defn
-async def check_schedule_activity(inputs: ExternalDataWorkflowInputs) -> bool:
- logger = await bind_temporal_worker_logger(team_id=inputs.team_id)
-
- # Creates schedules for all schemas if they don't exist yet, and then remove itself as a source schedule
- if inputs.external_data_schema_id is None:
- logger.info("Schema ID is none, creating schedules for schemas...")
- schemas = await get_active_schemas_for_source_id(
- team_id=inputs.team_id, source_id=inputs.external_data_source_id
- )
- for schema in schemas:
- if await a_external_data_workflow_exists(schema.id):
- await a_trigger_external_data_workflow(schema)
- logger.info(f"Schedule exists for schema {schema.id}. Triggered schedule")
- else:
- await a_sync_external_data_job_workflow(schema, create=True)
- logger.info(f"Created schedule for schema {schema.id}")
- # Delete the source schedule in favour of the schema schedules
- await a_delete_external_data_schedule(ExternalDataSource(id=inputs.external_data_source_id))
- logger.info(f"Deleted schedule for source {inputs.external_data_source_id}")
- return True
-
- logger.info("Schema ID is set. Continuing...")
- return False
+def create_source_templates(inputs: CreateSourceTemplateInputs) -> None:
+ create_warehouse_templates_for_source(team_id=inputs.team_id, run_id=inputs.run_id)
# TODO: update retry policies
@@ -174,21 +142,6 @@ def parse_inputs(inputs: list[str]) -> ExternalDataWorkflowInputs:
@workflow.run
async def run(self, inputs: ExternalDataWorkflowInputs):
- should_exit = await workflow.execute_activity(
- check_schedule_activity,
- inputs,
- start_to_close_timeout=dt.timedelta(minutes=1),
- retry_policy=RetryPolicy(
- initial_interval=dt.timedelta(seconds=10),
- maximum_interval=dt.timedelta(seconds=60),
- maximum_attempts=0,
- non_retryable_error_types=["NotNullViolation", "IntegrityError"],
- ),
- )
-
- if should_exit:
- return
-
assert inputs.external_data_schema_id is not None
update_inputs = UpdateExternalDataJobStatusInputs(
@@ -262,24 +215,12 @@ async def run(self, inputs: ExternalDataWorkflowInputs):
else {"start_to_close_timeout": dt.timedelta(hours=12), "retry_policy": RetryPolicy(maximum_attempts=3)}
)
- if is_posthog_team(inputs.team_id) and (
- source_type == ExternalDataSource.Type.POSTGRES or source_type == ExternalDataSource.Type.BIGQUERY
- ):
- # Sync activity for testing
- await workflow.execute_activity(
- import_data_activity_sync,
- job_inputs,
- heartbeat_timeout=dt.timedelta(minutes=5),
- **timeout_params,
- ) # type: ignore
- else:
- # Async activity for everyone else
- await workflow.execute_activity(
- import_data_activity,
- job_inputs,
- heartbeat_timeout=dt.timedelta(minutes=5),
- **timeout_params,
- ) # type: ignore
+ await workflow.execute_activity(
+ import_data_activity_sync,
+ job_inputs,
+ heartbeat_timeout=dt.timedelta(minutes=5),
+ **timeout_params,
+ ) # type: ignore
# Create source templates
await workflow.execute_activity(
diff --git a/posthog/temporal/data_imports/pipelines/pipeline.py b/posthog/temporal/data_imports/pipelines/pipeline.py
deleted file mode 100644
index 24099e698fb7c..0000000000000
--- a/posthog/temporal/data_imports/pipelines/pipeline.py
+++ /dev/null
@@ -1,266 +0,0 @@
-from concurrent.futures import ThreadPoolExecutor
-from dataclasses import dataclass
-from typing import Literal
-from uuid import UUID
-
-import dlt
-from django.conf import settings
-from dlt.pipeline.exceptions import PipelineStepFailed
-
-from asgiref.sync import async_to_sync
-import asyncio
-from posthog.settings.base_variables import TEST
-from structlog.typing import FilteringBoundLogger
-from dlt.common.libs.deltalake import get_delta_tables
-from dlt.load.exceptions import LoadClientJobRetry
-from dlt.sources import DltSource
-from deltalake.exceptions import DeltaError
-from collections import Counter
-
-from posthog.warehouse.data_load.validate_schema import update_last_synced_at, validate_schema_and_update_table
-from posthog.warehouse.models.external_data_job import ExternalDataJob, get_external_data_job
-from posthog.warehouse.models.external_data_schema import ExternalDataSchema, aget_schema_by_id
-from posthog.warehouse.models.external_data_source import ExternalDataSource
-from posthog.warehouse.models.table import DataWarehouseTable
-from posthog.temporal.data_imports.util import prepare_s3_files_for_querying
-
-
-@dataclass
-class PipelineInputs:
- source_id: UUID
- run_id: str
- schema_id: UUID
- dataset_name: str
- job_type: ExternalDataSource.Type
- team_id: int
-
-
-class DataImportPipeline:
- loader_file_format: Literal["parquet"] = "parquet"
-
- def __init__(
- self,
- inputs: PipelineInputs,
- source: DltSource,
- logger: FilteringBoundLogger,
- reset_pipeline: bool,
- incremental: bool = False,
- ):
- self.inputs = inputs
- self.logger = logger
-
- self._incremental = incremental
- self.refresh_dlt = reset_pipeline
- self.should_chunk_pipeline = (
- incremental
- and inputs.job_type != ExternalDataSource.Type.POSTGRES
- and inputs.job_type != ExternalDataSource.Type.MYSQL
- and inputs.job_type != ExternalDataSource.Type.MSSQL
- and inputs.job_type != ExternalDataSource.Type.SNOWFLAKE
- and inputs.job_type != ExternalDataSource.Type.BIGQUERY
- )
-
- if self.should_chunk_pipeline:
- # Incremental syncs: Assuming each page is 100 items for now so bound each run at 50_000 items
- self.source = source.add_limit(500)
- else:
- self.source = source
-
- def _get_pipeline_name(self):
- return f"{self.inputs.job_type}_pipeline_{self.inputs.team_id}_run_{self.inputs.schema_id}"
-
- def _get_destination(self):
- if TEST:
- credentials = {
- "aws_access_key_id": settings.AIRBYTE_BUCKET_KEY,
- "aws_secret_access_key": settings.AIRBYTE_BUCKET_SECRET,
- "endpoint_url": settings.OBJECT_STORAGE_ENDPOINT,
- "region_name": settings.AIRBYTE_BUCKET_REGION,
- "AWS_ALLOW_HTTP": "true",
- "AWS_S3_ALLOW_UNSAFE_RENAME": "true",
- }
- else:
- credentials = {
- "aws_access_key_id": settings.AIRBYTE_BUCKET_KEY,
- "aws_secret_access_key": settings.AIRBYTE_BUCKET_SECRET,
- "region_name": settings.AIRBYTE_BUCKET_REGION,
- "AWS_S3_ALLOW_UNSAFE_RENAME": "true",
- }
-
- return dlt.destinations.filesystem(
- credentials=credentials,
- bucket_url=settings.BUCKET_URL, # type: ignore
- )
-
- def _create_pipeline(self):
- pipeline_name = self._get_pipeline_name()
- destination = self._get_destination()
-
- dlt.config["normalize.parquet_normalizer.add_dlt_load_id"] = True
- dlt.config["normalize.parquet_normalizer.add_dlt_id"] = True
-
- return dlt.pipeline(
- pipeline_name=pipeline_name, destination=destination, dataset_name=self.inputs.dataset_name, progress="log"
- )
-
- async def _prepare_s3_files_for_querying(self, file_uris: list[str]):
- job: ExternalDataJob = await get_external_data_job(job_id=self.inputs.run_id)
- schema: ExternalDataSchema = await aget_schema_by_id(self.inputs.schema_id, self.inputs.team_id)
-
- prepare_s3_files_for_querying(job.folder_path(), schema.name, file_uris)
-
- def _run(self) -> dict[str, int]:
- if self.refresh_dlt:
- self.logger.info("Pipeline getting a full refresh due to reset_pipeline being set")
-
- pipeline = self._create_pipeline()
-
- total_counts: Counter[str] = Counter({})
-
- # Do chunking for incremental syncing on API based endpoints (e.g. not sql databases)
- if self.should_chunk_pipeline:
- # will get overwritten
- counts: Counter[str] = Counter({"start": 1})
- pipeline_runs = 0
-
- while counts:
- self.logger.info(f"Running incremental (non-sql) pipeline, run ${pipeline_runs}")
-
- try:
- pipeline.run(
- self.source,
- loader_file_format=self.loader_file_format,
- refresh="drop_sources" if self.refresh_dlt and pipeline_runs == 0 else None,
- )
- except PipelineStepFailed as e:
- # Remove once DLT support writing empty Delta files
- if isinstance(e.exception, LoadClientJobRetry):
- if "Generic S3 error" not in e.exception.retry_message:
- raise
- elif isinstance(e.exception, DeltaError):
- if e.exception.args[0] != "Generic error: No data source supplied to write command.":
- raise
- else:
- raise
-
- if pipeline.last_trace.last_normalize_info is not None:
- row_counts = pipeline.last_trace.last_normalize_info.row_counts
- else:
- row_counts = {}
- # Remove any DLT tables from the counts
- filtered_rows = dict(filter(lambda pair: not pair[0].startswith("_dlt"), row_counts.items()))
- counts = Counter(filtered_rows)
- total_counts = counts + total_counts
-
- if total_counts.total() > 0:
- delta_tables = get_delta_tables(pipeline)
-
- table_format = DataWarehouseTable.TableFormat.DeltaS3Wrapper
-
- # Workaround while we fix msising table_format on DLT resource
- if len(delta_tables.values()) == 0:
- table_format = DataWarehouseTable.TableFormat.Delta
-
- # There should only ever be one table here
- for table in delta_tables.values():
- self.logger.info("Compacting delta table")
- table.optimize.compact()
- table.vacuum(retention_hours=24, enforce_retention_duration=False, dry_run=False)
-
- file_uris = table.file_uris()
- self.logger.info(f"Preparing S3 files - total parquet files: {len(file_uris)}")
- async_to_sync(self._prepare_s3_files_for_querying)(file_uris)
-
- self.logger.info(f"Table format: {table_format}")
-
- async_to_sync(validate_schema_and_update_table)(
- run_id=self.inputs.run_id,
- team_id=self.inputs.team_id,
- schema_id=self.inputs.schema_id,
- table_schema=self.source.schema.tables,
- row_count=total_counts.total(),
- table_format=table_format,
- )
- else:
- self.logger.info("No table_counts, skipping validate_schema_and_update_table")
-
- pipeline_runs = pipeline_runs + 1
- else:
- self.logger.info("Running standard pipeline")
- try:
- pipeline.run(
- self.source,
- loader_file_format=self.loader_file_format,
- refresh="drop_sources" if self.refresh_dlt else None,
- )
- except PipelineStepFailed as e:
- # Remove once DLT support writing empty Delta files
- if isinstance(e.exception, LoadClientJobRetry):
- if "Generic S3 error" not in e.exception.retry_message:
- raise
- elif isinstance(e.exception, DeltaError):
- if e.exception.args[0] != "Generic error: No data source supplied to write command.":
- raise
- else:
- raise
-
- if pipeline.last_trace.last_normalize_info is not None:
- row_counts = pipeline.last_trace.last_normalize_info.row_counts
- else:
- row_counts = {}
-
- filtered_rows = dict(filter(lambda pair: not pair[0].startswith("_dlt"), row_counts.items()))
- counts = Counter(filtered_rows)
- total_counts = total_counts + counts
-
- if total_counts.total() > 0:
- delta_tables = get_delta_tables(pipeline)
-
- table_format = DataWarehouseTable.TableFormat.DeltaS3Wrapper
-
- # Workaround while we fix msising table_format on DLT resource
- if len(delta_tables.values()) == 0:
- table_format = DataWarehouseTable.TableFormat.Delta
-
- # There should only ever be one table here
- for table in delta_tables.values():
- self.logger.info("Compacting delta table")
- table.optimize.compact()
- table.vacuum(retention_hours=24, enforce_retention_duration=False, dry_run=False)
-
- file_uris = table.file_uris()
- self.logger.info(f"Preparing S3 files - total parquet files: {len(file_uris)}")
- async_to_sync(self._prepare_s3_files_for_querying)(file_uris)
-
- self.logger.info(f"Table format: {table_format}")
-
- async_to_sync(validate_schema_and_update_table)(
- run_id=self.inputs.run_id,
- team_id=self.inputs.team_id,
- schema_id=self.inputs.schema_id,
- table_schema=self.source.schema.tables,
- row_count=total_counts.total(),
- table_format=table_format,
- )
- else:
- self.logger.info("No table_counts, skipping validate_schema_and_update_table")
-
- # Update last_synced_at on schema
- async_to_sync(update_last_synced_at)(
- job_id=self.inputs.run_id, schema_id=str(self.inputs.schema_id), team_id=self.inputs.team_id
- )
-
- # Cleanup: delete local state from the file system
- pipeline.drop()
-
- return dict(total_counts)
-
- async def run(self) -> dict[str, int]:
- try:
- # Use a dedicated thread pool to not interfere with the heartbeater thread
- with ThreadPoolExecutor(max_workers=5) as pipeline_executor:
- loop = asyncio.get_event_loop()
- return await loop.run_in_executor(pipeline_executor, self._run)
- except PipelineStepFailed as e:
- self.logger.exception(f"Data import failed for endpoint with exception {e}", exc_info=e)
- raise
diff --git a/posthog/temporal/data_imports/pipelines/pipeline_sync.py b/posthog/temporal/data_imports/pipelines/pipeline_sync.py
index ac6d31433a808..e3ca8a4ecbdaa 100644
--- a/posthog/temporal/data_imports/pipelines/pipeline_sync.py
+++ b/posthog/temporal/data_imports/pipelines/pipeline_sync.py
@@ -1,3 +1,4 @@
+from dataclasses import dataclass
from typing import Any, Literal, Optional
from collections.abc import Iterator, Sequence
import uuid
@@ -34,7 +35,6 @@
from clickhouse_driver.errors import ServerException
from posthog.temporal.common.logger import bind_temporal_worker_logger_sync
-from posthog.temporal.data_imports.pipelines.pipeline import PipelineInputs
from posthog.warehouse.data_load.validate_schema import dlt_to_hogql_type
from posthog.warehouse.models.credential import get_or_create_datawarehouse_credential
from posthog.warehouse.models.external_data_job import ExternalDataJob
@@ -44,6 +44,16 @@
from posthog.temporal.data_imports.util import prepare_s3_files_for_querying
+@dataclass
+class PipelineInputs:
+ source_id: uuid.UUID
+ run_id: str
+ schema_id: uuid.UUID
+ dataset_name: str
+ job_type: ExternalDataSource.Type
+ team_id: int
+
+
class DataImportPipelineSync:
loader_file_format: Literal["parquet"] = "parquet"
@@ -141,16 +151,19 @@ def _iter_chunks(self, lst: list[Any], n: int) -> Iterator[list[Any]]:
yield lst[i : i + n]
# Monkey patch to fix large memory consumption until https://github.com/dlt-hub/dlt/pull/2031 gets merged in
- FilesystemDestinationClientConfiguration.delta_jobs_per_write = 1
- FilesystemClient.create_table_chain_completed_followup_jobs = create_table_chain_completed_followup_jobs # type: ignore
- FilesystemClient._iter_chunks = _iter_chunks # type: ignore
+ # This only works on incremental syncs right now though
+ if self._incremental:
+ FilesystemDestinationClientConfiguration.delta_jobs_per_write = 1
+ FilesystemClient.create_table_chain_completed_followup_jobs = create_table_chain_completed_followup_jobs # type: ignore
+ FilesystemClient._iter_chunks = _iter_chunks # type: ignore
+
+ dlt.config["data_writer.file_max_items"] = 500_000
+ dlt.config["data_writer.file_max_bytes"] = 500_000_000 # 500 MB
+ dlt.config["loader_parallelism_strategy"] = "table-sequential"
+ dlt.config["delta_jobs_per_write"] = 1
dlt.config["normalize.parquet_normalizer.add_dlt_load_id"] = True
dlt.config["normalize.parquet_normalizer.add_dlt_id"] = True
- dlt.config["data_writer.file_max_items"] = 500_000
- dlt.config["data_writer.file_max_bytes"] = 500_000_000 # 500 MB
- dlt.config["loader_parallelism_strategy"] = "table-sequential"
- dlt.config["delta_jobs_per_write"] = 1
return dlt.pipeline(
pipeline_name=pipeline_name, destination=destination, dataset_name=self.inputs.dataset_name, progress="log"
diff --git a/posthog/temporal/data_imports/pipelines/test/test_pipeline.py b/posthog/temporal/data_imports/pipelines/test/test_pipeline_sync.py
similarity index 73%
rename from posthog/temporal/data_imports/pipelines/test/test_pipeline.py
rename to posthog/temporal/data_imports/pipelines/test/test_pipeline_sync.py
index 965b77ca5f9ae..3b265f54a352a 100644
--- a/posthog/temporal/data_imports/pipelines/test/test_pipeline.py
+++ b/posthog/temporal/data_imports/pipelines/test/test_pipeline_sync.py
@@ -4,8 +4,7 @@
import pytest
import structlog
-from asgiref.sync import sync_to_async
-from posthog.temporal.data_imports.pipelines.pipeline import DataImportPipeline, PipelineInputs
+from posthog.temporal.data_imports.pipelines.pipeline_sync import DataImportPipelineSync, PipelineInputs
from posthog.temporal.data_imports.pipelines.stripe import stripe_source
from posthog.test.base import APIBaseTest
from posthog.warehouse.models.external_data_job import ExternalDataJob
@@ -14,8 +13,8 @@
class TestDataImportPipeline(APIBaseTest):
- async def _create_pipeline(self, schema_name: str, incremental: bool):
- source = await sync_to_async(ExternalDataSource.objects.create)(
+ def _create_pipeline(self, schema_name: str, incremental: bool):
+ source = ExternalDataSource.objects.create(
source_id=str(uuid.uuid4()),
connection_id=str(uuid.uuid4()),
destination_id=str(uuid.uuid4()),
@@ -23,13 +22,13 @@ async def _create_pipeline(self, schema_name: str, incremental: bool):
status="running",
source_type="Stripe",
)
- schema = await sync_to_async(ExternalDataSchema.objects.create)(
+ schema = ExternalDataSchema.objects.create(
name=schema_name,
team_id=self.team.pk,
source_id=source.pk,
source=source,
)
- job = await sync_to_async(ExternalDataJob.objects.create)(
+ job = ExternalDataJob.objects.create(
team_id=self.team.pk,
pipeline_id=source.pk,
pipeline=source,
@@ -40,7 +39,7 @@ async def _create_pipeline(self, schema_name: str, incremental: bool):
workflow_id=str(uuid.uuid4()),
)
- pipeline = DataImportPipeline(
+ pipeline = DataImportPipelineSync(
inputs=PipelineInputs(
source_id=source.pk,
run_id=str(job.pk),
@@ -65,45 +64,43 @@ async def _create_pipeline(self, schema_name: str, incremental: bool):
return pipeline
@pytest.mark.django_db(transaction=True)
- @pytest.mark.asyncio
- async def test_pipeline_non_incremental(self):
+ def test_pipeline_non_incremental(self):
def mock_create_pipeline(local_self: Any):
mock = MagicMock()
mock.last_trace.last_normalize_info.row_counts = {"customer": 1}
return mock
with (
- patch.object(DataImportPipeline, "_create_pipeline", mock_create_pipeline),
+ patch.object(DataImportPipelineSync, "_create_pipeline", mock_create_pipeline),
patch(
- "posthog.temporal.data_imports.pipelines.pipeline.validate_schema_and_update_table"
+ "posthog.temporal.data_imports.pipelines.pipeline_sync.validate_schema_and_update_table_sync"
) as mock_validate_schema_and_update_table,
- patch("posthog.temporal.data_imports.pipelines.pipeline.get_delta_tables"),
- patch("posthog.temporal.data_imports.pipelines.pipeline.update_last_synced_at"),
+ patch("posthog.temporal.data_imports.pipelines.pipeline_sync.get_delta_tables"),
+ patch("posthog.temporal.data_imports.pipelines.pipeline_sync.update_last_synced_at_sync"),
):
- pipeline = await self._create_pipeline("Customer", False)
- res = await pipeline.run()
+ pipeline = self._create_pipeline("Customer", False)
+ res = pipeline.run()
assert res.get("customer") == 1
assert mock_validate_schema_and_update_table.call_count == 1
@pytest.mark.django_db(transaction=True)
- @pytest.mark.asyncio
- async def test_pipeline_incremental(self):
+ def test_pipeline_incremental(self):
def mock_create_pipeline(local_self: Any):
mock = MagicMock()
type(mock.last_trace.last_normalize_info).row_counts = PropertyMock(side_effect=[{"customer": 1}, {}])
return mock
with (
- patch.object(DataImportPipeline, "_create_pipeline", mock_create_pipeline),
+ patch.object(DataImportPipelineSync, "_create_pipeline", mock_create_pipeline),
patch(
- "posthog.temporal.data_imports.pipelines.pipeline.validate_schema_and_update_table"
+ "posthog.temporal.data_imports.pipelines.pipeline_sync.validate_schema_and_update_table_sync"
) as mock_validate_schema_and_update_table,
- patch("posthog.temporal.data_imports.pipelines.pipeline.get_delta_tables"),
- patch("posthog.temporal.data_imports.pipelines.pipeline.update_last_synced_at"),
+ patch("posthog.temporal.data_imports.pipelines.pipeline_sync.get_delta_tables"),
+ patch("posthog.temporal.data_imports.pipelines.pipeline_sync.update_last_synced_at_sync"),
):
- pipeline = await self._create_pipeline("Customer", True)
- res = await pipeline.run()
+ pipeline = self._create_pipeline("Customer", True)
+ res = pipeline.run()
assert res.get("customer") == 1
assert mock_validate_schema_and_update_table.call_count == 2
diff --git a/posthog/temporal/data_imports/workflow_activities/create_job_model.py b/posthog/temporal/data_imports/workflow_activities/create_job_model.py
index 8d3577cf1ff23..02eb6aee7d52a 100644
--- a/posthog/temporal/data_imports/workflow_activities/create_job_model.py
+++ b/posthog/temporal/data_imports/workflow_activities/create_job_model.py
@@ -1,19 +1,15 @@
import dataclasses
import uuid
-from asgiref.sync import sync_to_async
from temporalio import activity
# TODO: remove dependency
-from posthog.warehouse.external_data_source.jobs import (
- acreate_external_data_job,
-)
-from posthog.warehouse.models import ExternalDataSource
+from posthog.warehouse.models import ExternalDataJob, ExternalDataSource
from posthog.warehouse.models.external_data_schema import (
ExternalDataSchema,
)
-from posthog.temporal.common.logger import bind_temporal_worker_logger
+from posthog.temporal.common.logger import bind_temporal_worker_logger_sync
@dataclasses.dataclass
@@ -24,25 +20,27 @@ class CreateExternalDataJobModelActivityInputs:
@activity.defn
-async def create_external_data_job_model_activity(
+def create_external_data_job_model_activity(
inputs: CreateExternalDataJobModelActivityInputs,
) -> tuple[str, bool, str]:
- logger = await bind_temporal_worker_logger(team_id=inputs.team_id)
+ logger = bind_temporal_worker_logger_sync(team_id=inputs.team_id)
try:
- job = await acreate_external_data_job(
+ job = ExternalDataJob.objects.create(
team_id=inputs.team_id,
- external_data_source_id=inputs.source_id,
- external_data_schema_id=inputs.schema_id,
+ pipeline_id=inputs.source_id,
+ schema_id=inputs.schema_id,
+ status=ExternalDataJob.Status.RUNNING,
+ rows_synced=0,
workflow_id=activity.info().workflow_id,
workflow_run_id=activity.info().workflow_run_id,
)
- schema = await sync_to_async(ExternalDataSchema.objects.get)(team_id=inputs.team_id, id=inputs.schema_id)
+ schema = ExternalDataSchema.objects.get(team_id=inputs.team_id, id=inputs.schema_id)
schema.status = ExternalDataSchema.Status.RUNNING
- await sync_to_async(schema.save)()
+ schema.save()
- source = await sync_to_async(ExternalDataSource.objects.get)(team_id=inputs.team_id, id=schema.source_id)
+ source: ExternalDataSource = schema.source
logger.info(
f"Created external data job for external data source {inputs.source_id}",
diff --git a/posthog/temporal/data_imports/workflow_activities/import_data.py b/posthog/temporal/data_imports/workflow_activities/import_data.py
deleted file mode 100644
index 26ce621f99a3d..0000000000000
--- a/posthog/temporal/data_imports/workflow_activities/import_data.py
+++ /dev/null
@@ -1,434 +0,0 @@
-import dataclasses
-import uuid
-from datetime import datetime
-from typing import Any
-
-from structlog.typing import FilteringBoundLogger
-from temporalio import activity
-
-from posthog.temporal.common.heartbeat import Heartbeater
-from posthog.temporal.common.logger import bind_temporal_worker_logger
-from posthog.temporal.data_imports.pipelines.bigquery import delete_table
-from posthog.temporal.data_imports.pipelines.helpers import aremove_reset_pipeline, aupdate_job_count
-
-from posthog.temporal.data_imports.pipelines.pipeline import DataImportPipeline, PipelineInputs
-from posthog.temporal.data_imports.util import is_posthog_team
-from posthog.warehouse.models import (
- ExternalDataJob,
- ExternalDataSource,
- get_external_data_job,
-)
-from posthog.warehouse.models.external_data_schema import (
- ExternalDataSchema,
- aget_schema_by_id,
-)
-from posthog.warehouse.models.ssh_tunnel import SSHTunnel
-
-
-@dataclasses.dataclass
-class ImportDataActivityInputs:
- team_id: int
- schema_id: uuid.UUID
- source_id: uuid.UUID
- run_id: str
-
-
-@activity.defn
-async def import_data_activity(inputs: ImportDataActivityInputs):
- async with Heartbeater(factor=30): # Every 10 secs
- model: ExternalDataJob = await get_external_data_job(
- job_id=inputs.run_id,
- )
-
- logger = await bind_temporal_worker_logger(team_id=inputs.team_id)
-
- logger.debug("Running *ASYNC* import_data")
-
- job_inputs = PipelineInputs(
- source_id=inputs.source_id,
- schema_id=inputs.schema_id,
- run_id=inputs.run_id,
- team_id=inputs.team_id,
- job_type=model.pipeline.source_type,
- dataset_name=model.folder_path(),
- )
-
- reset_pipeline = model.pipeline.job_inputs.get("reset_pipeline", "False") == "True"
-
- schema: ExternalDataSchema = await aget_schema_by_id(inputs.schema_id, inputs.team_id)
-
- endpoints = [schema.name]
-
- source = None
- if model.pipeline.source_type == ExternalDataSource.Type.STRIPE:
- from posthog.temporal.data_imports.pipelines.stripe import stripe_source
-
- stripe_secret_key = model.pipeline.job_inputs.get("stripe_secret_key", None)
- account_id = model.pipeline.job_inputs.get("stripe_account_id", None)
- if not stripe_secret_key:
- raise ValueError(f"Stripe secret key not found for job {model.id}")
-
- source = stripe_source(
- api_key=stripe_secret_key,
- account_id=account_id,
- endpoint=schema.name,
- team_id=inputs.team_id,
- job_id=inputs.run_id,
- is_incremental=schema.is_incremental,
- )
-
- return await _run(
- job_inputs=job_inputs,
- source=source,
- logger=logger,
- inputs=inputs,
- schema=schema,
- reset_pipeline=reset_pipeline,
- )
- elif model.pipeline.source_type == ExternalDataSource.Type.HUBSPOT:
- from posthog.temporal.data_imports.pipelines.hubspot import hubspot
- from posthog.temporal.data_imports.pipelines.hubspot.auth import (
- hubspot_refresh_access_token,
- )
-
- hubspot_access_code = model.pipeline.job_inputs.get("hubspot_secret_key", None)
- refresh_token = model.pipeline.job_inputs.get("hubspot_refresh_token", None)
- if not refresh_token:
- raise ValueError(f"Hubspot refresh token not found for job {model.id}")
-
- if not hubspot_access_code:
- hubspot_access_code = hubspot_refresh_access_token(refresh_token)
-
- source = hubspot(
- api_key=hubspot_access_code,
- refresh_token=refresh_token,
- endpoints=tuple(endpoints),
- )
-
- return await _run(
- job_inputs=job_inputs,
- source=source,
- logger=logger,
- inputs=inputs,
- schema=schema,
- reset_pipeline=reset_pipeline,
- )
- elif model.pipeline.source_type in [
- ExternalDataSource.Type.POSTGRES,
- ExternalDataSource.Type.MYSQL,
- ExternalDataSource.Type.MSSQL,
- ]:
- if is_posthog_team(inputs.team_id):
- from posthog.temporal.data_imports.pipelines.sql_database_v2 import (
- sql_source_for_type,
- )
- else:
- from posthog.temporal.data_imports.pipelines.sql_database import (
- sql_source_for_type,
- )
-
- host = model.pipeline.job_inputs.get("host")
- port = model.pipeline.job_inputs.get("port")
- user = model.pipeline.job_inputs.get("user")
- password = model.pipeline.job_inputs.get("password")
- database = model.pipeline.job_inputs.get("database")
- pg_schema = model.pipeline.job_inputs.get("schema")
-
- using_ssh_tunnel = str(model.pipeline.job_inputs.get("ssh_tunnel_enabled", False)) == "True"
- ssh_tunnel_host = model.pipeline.job_inputs.get("ssh_tunnel_host")
- ssh_tunnel_port = model.pipeline.job_inputs.get("ssh_tunnel_port")
- ssh_tunnel_auth_type = model.pipeline.job_inputs.get("ssh_tunnel_auth_type")
- ssh_tunnel_auth_type_username = model.pipeline.job_inputs.get("ssh_tunnel_auth_type_username")
- ssh_tunnel_auth_type_password = model.pipeline.job_inputs.get("ssh_tunnel_auth_type_password")
- ssh_tunnel_auth_type_passphrase = model.pipeline.job_inputs.get("ssh_tunnel_auth_type_passphrase")
- ssh_tunnel_auth_type_private_key = model.pipeline.job_inputs.get("ssh_tunnel_auth_type_private_key")
-
- ssh_tunnel = SSHTunnel(
- enabled=using_ssh_tunnel,
- host=ssh_tunnel_host,
- port=ssh_tunnel_port,
- auth_type=ssh_tunnel_auth_type,
- username=ssh_tunnel_auth_type_username,
- password=ssh_tunnel_auth_type_password,
- passphrase=ssh_tunnel_auth_type_passphrase,
- private_key=ssh_tunnel_auth_type_private_key,
- )
-
- if ssh_tunnel.enabled:
- with ssh_tunnel.get_tunnel(host, int(port)) as tunnel:
- if tunnel is None:
- raise Exception("Can't open tunnel to SSH server")
-
- source = sql_source_for_type(
- source_type=ExternalDataSource.Type(model.pipeline.source_type),
- host=tunnel.local_bind_host,
- port=tunnel.local_bind_port,
- user=user,
- password=password,
- database=database,
- sslmode="prefer",
- schema=pg_schema,
- table_names=endpoints,
- incremental_field=schema.sync_type_config.get("incremental_field")
- if schema.is_incremental
- else None,
- incremental_field_type=schema.sync_type_config.get("incremental_field_type")
- if schema.is_incremental
- else None,
- team_id=inputs.team_id,
- )
-
- return await _run(
- job_inputs=job_inputs,
- source=source,
- logger=logger,
- inputs=inputs,
- schema=schema,
- reset_pipeline=reset_pipeline,
- )
-
- source = sql_source_for_type(
- source_type=ExternalDataSource.Type(model.pipeline.source_type),
- host=host,
- port=port,
- user=user,
- password=password,
- database=database,
- sslmode="prefer",
- schema=pg_schema,
- table_names=endpoints,
- incremental_field=schema.sync_type_config.get("incremental_field") if schema.is_incremental else None,
- incremental_field_type=schema.sync_type_config.get("incremental_field_type")
- if schema.is_incremental
- else None,
- team_id=inputs.team_id,
- )
-
- return await _run(
- job_inputs=job_inputs,
- source=source,
- logger=logger,
- inputs=inputs,
- schema=schema,
- reset_pipeline=reset_pipeline,
- )
- elif model.pipeline.source_type == ExternalDataSource.Type.SNOWFLAKE:
- if is_posthog_team(inputs.team_id):
- from posthog.temporal.data_imports.pipelines.sql_database_v2 import (
- snowflake_source,
- )
- else:
- from posthog.temporal.data_imports.pipelines.sql_database import (
- snowflake_source,
- )
-
- account_id = model.pipeline.job_inputs.get("account_id")
- user = model.pipeline.job_inputs.get("user")
- password = model.pipeline.job_inputs.get("password")
- database = model.pipeline.job_inputs.get("database")
- warehouse = model.pipeline.job_inputs.get("warehouse")
- sf_schema = model.pipeline.job_inputs.get("schema")
- role = model.pipeline.job_inputs.get("role")
-
- source = snowflake_source(
- account_id=account_id,
- user=user,
- password=password,
- database=database,
- schema=sf_schema,
- warehouse=warehouse,
- role=role,
- table_names=endpoints,
- incremental_field=schema.sync_type_config.get("incremental_field") if schema.is_incremental else None,
- incremental_field_type=schema.sync_type_config.get("incremental_field_type")
- if schema.is_incremental
- else None,
- )
-
- return await _run(
- job_inputs=job_inputs,
- source=source,
- logger=logger,
- inputs=inputs,
- schema=schema,
- reset_pipeline=reset_pipeline,
- )
- elif model.pipeline.source_type == ExternalDataSource.Type.SALESFORCE:
- from posthog.models.integration import aget_integration_by_id
- from posthog.temporal.data_imports.pipelines.salesforce import (
- salesforce_source,
- )
- from posthog.temporal.data_imports.pipelines.salesforce.auth import (
- salesforce_refresh_access_token,
- )
-
- salesforce_integration_id = model.pipeline.job_inputs.get("salesforce_integration_id", None)
-
- if not salesforce_integration_id:
- raise ValueError(f"Salesforce integration not found for job {model.id}")
-
- integration = await aget_integration_by_id(integration_id=salesforce_integration_id, team_id=inputs.team_id)
- salesforce_refresh_token = integration.refresh_token
-
- if not salesforce_refresh_token:
- raise ValueError(f"Salesforce refresh token not found for job {model.id}")
-
- salesforce_access_token = integration.access_token
-
- if not salesforce_access_token:
- salesforce_access_token = salesforce_refresh_access_token(salesforce_refresh_token)
-
- salesforce_instance_url = integration.config.get("instance_url")
-
- source = salesforce_source(
- instance_url=salesforce_instance_url,
- access_token=salesforce_access_token,
- refresh_token=salesforce_refresh_token,
- endpoint=schema.name,
- team_id=inputs.team_id,
- job_id=inputs.run_id,
- is_incremental=schema.is_incremental,
- )
-
- return await _run(
- job_inputs=job_inputs,
- source=source,
- logger=logger,
- inputs=inputs,
- schema=schema,
- reset_pipeline=reset_pipeline,
- )
-
- elif model.pipeline.source_type == ExternalDataSource.Type.ZENDESK:
- from posthog.temporal.data_imports.pipelines.zendesk import zendesk_source
-
- source = zendesk_source(
- subdomain=model.pipeline.job_inputs.get("zendesk_subdomain"),
- api_key=model.pipeline.job_inputs.get("zendesk_api_key"),
- email_address=model.pipeline.job_inputs.get("zendesk_email_address"),
- endpoint=schema.name,
- team_id=inputs.team_id,
- job_id=inputs.run_id,
- is_incremental=schema.is_incremental,
- )
-
- return await _run(
- job_inputs=job_inputs,
- source=source,
- logger=logger,
- inputs=inputs,
- schema=schema,
- reset_pipeline=reset_pipeline,
- )
- elif model.pipeline.source_type == ExternalDataSource.Type.VITALLY:
- from posthog.temporal.data_imports.pipelines.vitally import vitally_source
-
- source = vitally_source(
- secret_token=model.pipeline.job_inputs.get("secret_token"),
- region=model.pipeline.job_inputs.get("region"),
- subdomain=model.pipeline.job_inputs.get("subdomain"),
- endpoint=schema.name,
- team_id=inputs.team_id,
- job_id=inputs.run_id,
- is_incremental=schema.is_incremental,
- )
-
- return await _run(
- job_inputs=job_inputs,
- source=source,
- logger=logger,
- inputs=inputs,
- schema=schema,
- reset_pipeline=reset_pipeline,
- )
- elif model.pipeline.source_type == ExternalDataSource.Type.BIGQUERY:
- from posthog.temporal.data_imports.pipelines.sql_database_v2 import (
- bigquery_source,
- )
-
- dataset_id = model.pipeline.job_inputs.get("dataset_id")
- project_id = model.pipeline.job_inputs.get("project_id")
- private_key = model.pipeline.job_inputs.get("private_key")
- private_key_id = model.pipeline.job_inputs.get("private_key_id")
- client_email = model.pipeline.job_inputs.get("client_email")
- token_uri = model.pipeline.job_inputs.get("token_uri")
-
- destination_table = f"{project_id}.{dataset_id}.__posthog_import_{inputs.run_id}_{str(datetime.now().timestamp()).replace('.', '')}"
- try:
- source = bigquery_source(
- dataset_id=dataset_id,
- project_id=project_id,
- private_key=private_key,
- private_key_id=private_key_id,
- client_email=client_email,
- token_uri=token_uri,
- table_name=schema.name,
- bq_destination_table_id=destination_table,
- incremental_field=schema.sync_type_config.get("incremental_field")
- if schema.is_incremental
- else None,
- incremental_field_type=schema.sync_type_config.get("incremental_field_type")
- if schema.is_incremental
- else None,
- )
-
- await _run(
- job_inputs=job_inputs,
- source=source,
- logger=logger,
- inputs=inputs,
- schema=schema,
- reset_pipeline=reset_pipeline,
- )
- except:
- raise
- finally:
- # Delete the destination table (if it exists) after we're done with it
- delete_table(
- table_id=destination_table,
- project_id=project_id,
- private_key=private_key,
- private_key_id=private_key_id,
- client_email=client_email,
- token_uri=token_uri,
- )
- logger.info(f"Deleting bigquery temp destination table: {destination_table}")
- elif model.pipeline.source_type == ExternalDataSource.Type.CHARGEBEE:
- from posthog.temporal.data_imports.pipelines.chargebee import (
- chargebee_source,
- )
-
- source = chargebee_source(
- api_key=model.pipeline.job_inputs.get("api_key"),
- site_name=model.pipeline.job_inputs.get("site_name"),
- endpoint=schema.name,
- team_id=inputs.team_id,
- job_id=inputs.run_id,
- is_incremental=schema.is_incremental,
- )
-
- return await _run(
- job_inputs=job_inputs,
- source=source,
- logger=logger,
- inputs=inputs,
- schema=schema,
- reset_pipeline=reset_pipeline,
- )
- else:
- raise ValueError(f"Source type {model.pipeline.source_type} not supported")
-
-
-async def _run(
- job_inputs: PipelineInputs,
- source: Any,
- logger: FilteringBoundLogger,
- inputs: ImportDataActivityInputs,
- schema: ExternalDataSchema,
- reset_pipeline: bool,
-):
- table_row_counts = await DataImportPipeline(job_inputs, source, logger, reset_pipeline, schema.is_incremental).run()
- total_rows_synced = sum(table_row_counts.values())
-
- await aupdate_job_count(inputs.run_id, inputs.team_id, total_rows_synced)
- await aremove_reset_pipeline(inputs.source_id)
diff --git a/posthog/temporal/data_imports/workflow_activities/import_data_sync.py b/posthog/temporal/data_imports/workflow_activities/import_data_sync.py
index 9fc9489fabc94..ddb242483ab31 100644
--- a/posthog/temporal/data_imports/workflow_activities/import_data_sync.py
+++ b/posthog/temporal/data_imports/workflow_activities/import_data_sync.py
@@ -1,3 +1,5 @@
+import dataclasses
+import uuid
from datetime import datetime
from typing import Any
@@ -5,13 +7,12 @@
from temporalio import activity
+from posthog.models.integration import Integration
from posthog.temporal.common.heartbeat_sync import HeartbeaterSync
from posthog.temporal.data_imports.pipelines.bigquery import delete_table
-from posthog.temporal.data_imports.pipelines.pipeline import PipelineInputs
-from posthog.temporal.data_imports.pipelines.pipeline_sync import DataImportPipelineSync
+from posthog.temporal.data_imports.pipelines.pipeline_sync import DataImportPipelineSync, PipelineInputs
from posthog.temporal.data_imports.util import is_posthog_team
-from posthog.temporal.data_imports.workflow_activities.import_data import ImportDataActivityInputs
from posthog.warehouse.models import (
ExternalDataJob,
ExternalDataSource,
@@ -22,6 +23,14 @@
from posthog.warehouse.models.ssh_tunnel import SSHTunnel
+@dataclasses.dataclass
+class ImportDataActivityInputs:
+ team_id: int
+ schema_id: uuid.UUID
+ source_id: uuid.UUID
+ run_id: str
+
+
@activity.defn
def import_data_activity_sync(inputs: ImportDataActivityInputs):
logger = bind_temporal_worker_logger_sync(team_id=inputs.team_id)
@@ -53,7 +62,60 @@ def import_data_activity_sync(inputs: ImportDataActivityInputs):
endpoints = [schema.name]
source = None
- if model.pipeline.source_type in [
+ if model.pipeline.source_type == ExternalDataSource.Type.STRIPE:
+ from posthog.temporal.data_imports.pipelines.stripe import stripe_source
+
+ stripe_secret_key = model.pipeline.job_inputs.get("stripe_secret_key", None)
+ account_id = model.pipeline.job_inputs.get("stripe_account_id", None)
+ if not stripe_secret_key:
+ raise ValueError(f"Stripe secret key not found for job {model.id}")
+
+ source = stripe_source(
+ api_key=stripe_secret_key,
+ account_id=account_id,
+ endpoint=schema.name,
+ team_id=inputs.team_id,
+ job_id=inputs.run_id,
+ is_incremental=schema.is_incremental,
+ )
+
+ return _run(
+ job_inputs=job_inputs,
+ source=source,
+ logger=logger,
+ inputs=inputs,
+ schema=schema,
+ reset_pipeline=reset_pipeline,
+ )
+ elif model.pipeline.source_type == ExternalDataSource.Type.HUBSPOT:
+ from posthog.temporal.data_imports.pipelines.hubspot import hubspot
+ from posthog.temporal.data_imports.pipelines.hubspot.auth import (
+ hubspot_refresh_access_token,
+ )
+
+ hubspot_access_code = model.pipeline.job_inputs.get("hubspot_secret_key", None)
+ refresh_token = model.pipeline.job_inputs.get("hubspot_refresh_token", None)
+ if not refresh_token:
+ raise ValueError(f"Hubspot refresh token not found for job {model.id}")
+
+ if not hubspot_access_code:
+ hubspot_access_code = hubspot_refresh_access_token(refresh_token)
+
+ source = hubspot(
+ api_key=hubspot_access_code,
+ refresh_token=refresh_token,
+ endpoints=tuple(endpoints),
+ )
+
+ return _run(
+ job_inputs=job_inputs,
+ source=source,
+ logger=logger,
+ inputs=inputs,
+ schema=schema,
+ reset_pipeline=reset_pipeline,
+ )
+ elif model.pipeline.source_type in [
ExternalDataSource.Type.POSTGRES,
ExternalDataSource.Type.MYSQL,
ExternalDataSource.Type.MSSQL,
@@ -140,6 +202,134 @@ def import_data_activity_sync(inputs: ImportDataActivityInputs):
team_id=inputs.team_id,
)
+ return _run(
+ job_inputs=job_inputs,
+ source=source,
+ logger=logger,
+ inputs=inputs,
+ schema=schema,
+ reset_pipeline=reset_pipeline,
+ )
+ elif model.pipeline.source_type == ExternalDataSource.Type.SNOWFLAKE:
+ if is_posthog_team(inputs.team_id):
+ from posthog.temporal.data_imports.pipelines.sql_database_v2 import (
+ snowflake_source,
+ )
+ else:
+ from posthog.temporal.data_imports.pipelines.sql_database import (
+ snowflake_source,
+ )
+
+ account_id = model.pipeline.job_inputs.get("account_id")
+ user = model.pipeline.job_inputs.get("user")
+ password = model.pipeline.job_inputs.get("password")
+ database = model.pipeline.job_inputs.get("database")
+ warehouse = model.pipeline.job_inputs.get("warehouse")
+ sf_schema = model.pipeline.job_inputs.get("schema")
+ role = model.pipeline.job_inputs.get("role")
+
+ source = snowflake_source(
+ account_id=account_id,
+ user=user,
+ password=password,
+ database=database,
+ schema=sf_schema,
+ warehouse=warehouse,
+ role=role,
+ table_names=endpoints,
+ incremental_field=schema.sync_type_config.get("incremental_field") if schema.is_incremental else None,
+ incremental_field_type=schema.sync_type_config.get("incremental_field_type")
+ if schema.is_incremental
+ else None,
+ )
+
+ return _run(
+ job_inputs=job_inputs,
+ source=source,
+ logger=logger,
+ inputs=inputs,
+ schema=schema,
+ reset_pipeline=reset_pipeline,
+ )
+ elif model.pipeline.source_type == ExternalDataSource.Type.SALESFORCE:
+ from posthog.temporal.data_imports.pipelines.salesforce import (
+ salesforce_source,
+ )
+ from posthog.temporal.data_imports.pipelines.salesforce.auth import (
+ salesforce_refresh_access_token,
+ )
+
+ salesforce_integration_id = model.pipeline.job_inputs.get("salesforce_integration_id", None)
+
+ if not salesforce_integration_id:
+ raise ValueError(f"Salesforce integration not found for job {model.id}")
+
+ integration = Integration.objects.get(id=salesforce_integration_id, team_id=inputs.team_id)
+ salesforce_refresh_token = integration.refresh_token
+
+ if not salesforce_refresh_token:
+ raise ValueError(f"Salesforce refresh token not found for job {model.id}")
+
+ salesforce_access_token = integration.access_token
+
+ if not salesforce_access_token:
+ salesforce_access_token = salesforce_refresh_access_token(salesforce_refresh_token)
+
+ salesforce_instance_url = integration.config.get("instance_url")
+
+ source = salesforce_source(
+ instance_url=salesforce_instance_url,
+ access_token=salesforce_access_token,
+ refresh_token=salesforce_refresh_token,
+ endpoint=schema.name,
+ team_id=inputs.team_id,
+ job_id=inputs.run_id,
+ is_incremental=schema.is_incremental,
+ )
+
+ return _run(
+ job_inputs=job_inputs,
+ source=source,
+ logger=logger,
+ inputs=inputs,
+ schema=schema,
+ reset_pipeline=reset_pipeline,
+ )
+
+ elif model.pipeline.source_type == ExternalDataSource.Type.ZENDESK:
+ from posthog.temporal.data_imports.pipelines.zendesk import zendesk_source
+
+ source = zendesk_source(
+ subdomain=model.pipeline.job_inputs.get("zendesk_subdomain"),
+ api_key=model.pipeline.job_inputs.get("zendesk_api_key"),
+ email_address=model.pipeline.job_inputs.get("zendesk_email_address"),
+ endpoint=schema.name,
+ team_id=inputs.team_id,
+ job_id=inputs.run_id,
+ is_incremental=schema.is_incremental,
+ )
+
+ return _run(
+ job_inputs=job_inputs,
+ source=source,
+ logger=logger,
+ inputs=inputs,
+ schema=schema,
+ reset_pipeline=reset_pipeline,
+ )
+ elif model.pipeline.source_type == ExternalDataSource.Type.VITALLY:
+ from posthog.temporal.data_imports.pipelines.vitally import vitally_source
+
+ source = vitally_source(
+ secret_token=model.pipeline.job_inputs.get("secret_token"),
+ region=model.pipeline.job_inputs.get("region"),
+ subdomain=model.pipeline.job_inputs.get("subdomain"),
+ endpoint=schema.name,
+ team_id=inputs.team_id,
+ job_id=inputs.run_id,
+ is_incremental=schema.is_incremental,
+ )
+
return _run(
job_inputs=job_inputs,
source=source,
@@ -198,6 +388,28 @@ def import_data_activity_sync(inputs: ImportDataActivityInputs):
token_uri=token_uri,
)
logger.info(f"Deleting bigquery temp destination table: {destination_table}")
+ elif model.pipeline.source_type == ExternalDataSource.Type.CHARGEBEE:
+ from posthog.temporal.data_imports.pipelines.chargebee import (
+ chargebee_source,
+ )
+
+ source = chargebee_source(
+ api_key=model.pipeline.job_inputs.get("api_key"),
+ site_name=model.pipeline.job_inputs.get("site_name"),
+ endpoint=schema.name,
+ team_id=inputs.team_id,
+ job_id=inputs.run_id,
+ is_incremental=schema.is_incremental,
+ )
+
+ return _run(
+ job_inputs=job_inputs,
+ source=source,
+ logger=logger,
+ inputs=inputs,
+ schema=schema,
+ reset_pipeline=reset_pipeline,
+ )
else:
raise ValueError(f"Source type {model.pipeline.source_type} not supported")
diff --git a/posthog/temporal/data_imports/workflow_activities/sync_new_schemas.py b/posthog/temporal/data_imports/workflow_activities/sync_new_schemas.py
index 34e27b0cd49ff..2bc916d3ec9d4 100644
--- a/posthog/temporal/data_imports/workflow_activities/sync_new_schemas.py
+++ b/posthog/temporal/data_imports/workflow_activities/sync_new_schemas.py
@@ -1,9 +1,8 @@
import dataclasses
-from asgiref.sync import sync_to_async
from temporalio import activity
-from posthog.temporal.common.logger import bind_temporal_worker_logger
+from posthog.temporal.common.logger import bind_temporal_worker_logger_sync
from posthog.temporal.data_imports.pipelines.schemas import PIPELINE_TYPE_SCHEMA_DEFAULT_MAPPING
from posthog.warehouse.models import sync_old_schemas_with_new_schemas, ExternalDataSource
@@ -21,12 +20,12 @@ class SyncNewSchemasActivityInputs:
@activity.defn
-async def sync_new_schemas_activity(inputs: SyncNewSchemasActivityInputs) -> None:
- logger = await bind_temporal_worker_logger(team_id=inputs.team_id)
+def sync_new_schemas_activity(inputs: SyncNewSchemasActivityInputs) -> None:
+ logger = bind_temporal_worker_logger_sync(team_id=inputs.team_id)
logger.info("Syncing new -> old schemas")
- source = await sync_to_async(ExternalDataSource.objects.get)(team_id=inputs.team_id, id=inputs.source_id)
+ source = ExternalDataSource.objects.get(team_id=inputs.team_id, id=inputs.source_id)
schemas_to_sync: list[str] = []
@@ -65,8 +64,8 @@ async def sync_new_schemas_activity(inputs: SyncNewSchemasActivityInputs) -> Non
private_key=ssh_tunnel_auth_type_private_key,
)
- sql_schemas = await sync_to_async(get_sql_schemas_for_source_type)(
- source.source_type, host, port, database, user, password, db_schema, ssh_tunnel
+ sql_schemas = get_sql_schemas_for_source_type(
+ ExternalDataSource.Type(source.source_type), host, port, database, user, password, db_schema, ssh_tunnel
)
schemas_to_sync = list(sql_schemas.keys())
@@ -82,9 +81,7 @@ async def sync_new_schemas_activity(inputs: SyncNewSchemasActivityInputs) -> Non
sf_schema = source.job_inputs.get("schema")
role = source.job_inputs.get("role")
- sql_schemas = await sync_to_async(get_snowflake_schemas)(
- account_id, database, warehouse, user, password, sf_schema, role
- )
+ sql_schemas = get_snowflake_schemas(account_id, database, warehouse, user, password, sf_schema, role)
schemas_to_sync = list(sql_schemas.keys())
else:
@@ -92,7 +89,7 @@ async def sync_new_schemas_activity(inputs: SyncNewSchemasActivityInputs) -> Non
# TODO: this could cause a race condition where each schema worker creates the missing schema
- schemas_created = await sync_to_async(sync_old_schemas_with_new_schemas)(
+ schemas_created = sync_old_schemas_with_new_schemas(
schemas_to_sync,
source_id=inputs.source_id,
team_id=inputs.team_id,
diff --git a/posthog/temporal/tests/batch_exports/test_import_data.py b/posthog/temporal/tests/batch_exports/test_import_data.py
index 229f063cc9b43..93d20fbd44b23 100644
--- a/posthog/temporal/tests/batch_exports/test_import_data.py
+++ b/posthog/temporal/tests/batch_exports/test_import_data.py
@@ -1,9 +1,9 @@
from typing import Any
from unittest import mock
import pytest
-from asgiref.sync import sync_to_async
from posthog.models.team.team import Team
-from posthog.temporal.data_imports.workflow_activities.import_data import ImportDataActivityInputs, import_data_activity
+from posthog.temporal.data_imports import import_data_activity_sync
+from posthog.temporal.data_imports.workflow_activities.import_data_sync import ImportDataActivityInputs
from posthog.warehouse.models.credential import DataWarehouseCredential
from posthog.warehouse.models.external_data_job import ExternalDataJob
from posthog.warehouse.models.external_data_schema import ExternalDataSchema
@@ -12,8 +12,8 @@
from posthog.warehouse.models.table import DataWarehouseTable
-async def _setup(team: Team, job_inputs: dict[Any, Any]) -> ImportDataActivityInputs:
- source = await sync_to_async(ExternalDataSource.objects.create)(
+def _setup(team: Team, job_inputs: dict[Any, Any]) -> ImportDataActivityInputs:
+ source = ExternalDataSource.objects.create(
team=team,
source_id="source_id",
connection_id="connection_id",
@@ -21,10 +21,8 @@ async def _setup(team: Team, job_inputs: dict[Any, Any]) -> ImportDataActivityIn
source_type=ExternalDataSource.Type.POSTGRES,
job_inputs=job_inputs,
)
- credentials = await sync_to_async(DataWarehouseCredential.objects.create)(
- access_key="blah", access_secret="blah", team=team
- )
- warehouse_table = await sync_to_async(DataWarehouseTable.objects.create)(
+ credentials = DataWarehouseCredential.objects.create(access_key="blah", access_secret="blah", team=team)
+ warehouse_table = DataWarehouseTable.objects.create(
name="table_1",
format="Parquet",
team=team,
@@ -34,7 +32,7 @@ async def _setup(team: Team, job_inputs: dict[Any, Any]) -> ImportDataActivityIn
url_pattern="https://bucket.s3/data/*",
columns={"id": {"hogql": "StringDatabaseField", "clickhouse": "Nullable(String)", "schema_valid": True}},
)
- schema = await sync_to_async(ExternalDataSchema.objects.create)(
+ schema = ExternalDataSchema.objects.create(
team=team,
name="table_1",
source=source,
@@ -43,7 +41,7 @@ async def _setup(team: Team, job_inputs: dict[Any, Any]) -> ImportDataActivityIn
status=ExternalDataSchema.Status.COMPLETED,
last_synced_at="2024-01-01",
)
- job = await sync_to_async(ExternalDataJob.objects.create)(
+ job = ExternalDataJob.objects.create(
team=team,
pipeline=source,
schema=schema,
@@ -56,8 +54,7 @@ async def _setup(team: Team, job_inputs: dict[Any, Any]) -> ImportDataActivityIn
@pytest.mark.django_db(transaction=True)
-@pytest.mark.asyncio
-async def test_postgres_source_without_ssh_tunnel(activity_environment, team, **kwargs):
+def test_postgres_source_without_ssh_tunnel(activity_environment, team, **kwargs):
job_inputs = {
"host": "host.com",
"port": 5432,
@@ -67,15 +64,15 @@ async def test_postgres_source_without_ssh_tunnel(activity_environment, team, **
"schema": "schema",
}
- activity_inputs = await _setup(team, job_inputs)
+ activity_inputs = _setup(team, job_inputs)
with (
mock.patch(
"posthog.temporal.data_imports.pipelines.sql_database_v2.sql_source_for_type"
) as sql_source_for_type,
- mock.patch("posthog.temporal.data_imports.workflow_activities.import_data._run"),
+ mock.patch("posthog.temporal.data_imports.workflow_activities.import_data_sync._run"),
):
- await activity_environment.run(import_data_activity, activity_inputs)
+ activity_environment.run(import_data_activity_sync, activity_inputs)
sql_source_for_type.assert_called_once_with(
source_type=ExternalDataSource.Type.POSTGRES,
@@ -94,8 +91,7 @@ async def test_postgres_source_without_ssh_tunnel(activity_environment, team, **
@pytest.mark.django_db(transaction=True)
-@pytest.mark.asyncio
-async def test_postgres_source_with_ssh_tunnel_disabled(activity_environment, team, **kwargs):
+def test_postgres_source_with_ssh_tunnel_disabled(activity_environment, team, **kwargs):
job_inputs = {
"host": "host.com",
"port": "5432",
@@ -108,15 +104,15 @@ async def test_postgres_source_with_ssh_tunnel_disabled(activity_environment, te
"ssh_tunnel_port": "",
}
- activity_inputs = await _setup(team, job_inputs)
+ activity_inputs = _setup(team, job_inputs)
with (
mock.patch(
"posthog.temporal.data_imports.pipelines.sql_database_v2.sql_source_for_type"
) as sql_source_for_type,
- mock.patch("posthog.temporal.data_imports.workflow_activities.import_data._run"),
+ mock.patch("posthog.temporal.data_imports.workflow_activities.import_data_sync._run"),
):
- await activity_environment.run(import_data_activity, activity_inputs)
+ activity_environment.run(import_data_activity_sync, activity_inputs)
sql_source_for_type.assert_called_once_with(
source_type=ExternalDataSource.Type.POSTGRES,
@@ -136,7 +132,7 @@ async def test_postgres_source_with_ssh_tunnel_disabled(activity_environment, te
@pytest.mark.django_db(transaction=True)
@pytest.mark.asyncio
-async def test_postgres_source_with_ssh_tunnel_enabled(activity_environment, team, **kwargs):
+def test_postgres_source_with_ssh_tunnel_enabled(activity_environment, team, **kwargs):
job_inputs = {
"host": "host.com",
"port": "5432",
@@ -152,7 +148,7 @@ async def test_postgres_source_with_ssh_tunnel_enabled(activity_environment, tea
"ssh_tunnel_auth_type_password": "password",
}
- activity_inputs = await _setup(team, job_inputs)
+ activity_inputs = _setup(team, job_inputs)
def mock_get_tunnel(self_class, host, port):
class MockedTunnel:
@@ -171,10 +167,10 @@ def __exit__(self, exc_type, exc_value, exc_traceback):
mock.patch(
"posthog.temporal.data_imports.pipelines.sql_database_v2.sql_source_for_type"
) as sql_source_for_type_v2,
- mock.patch("posthog.temporal.data_imports.workflow_activities.import_data._run"),
+ mock.patch("posthog.temporal.data_imports.workflow_activities.import_data_sync._run"),
mock.patch.object(SSHTunnel, "get_tunnel", mock_get_tunnel),
):
- await activity_environment.run(import_data_activity, activity_inputs)
+ activity_environment.run(import_data_activity_sync, activity_inputs)
sql_source_for_type_v2.assert_called_once_with(
source_type=ExternalDataSource.Type.POSTGRES,
diff --git a/posthog/temporal/tests/data_imports/test_end_to_end.py b/posthog/temporal/tests/data_imports/test_end_to_end.py
index 786d6fdd56596..cb29cbafa5d78 100644
--- a/posthog/temporal/tests/data_imports/test_end_to_end.py
+++ b/posthog/temporal/tests/data_imports/test_end_to_end.py
@@ -870,10 +870,11 @@ def get_jobs():
return list(jobs)
- with mock.patch(
- "posthog.temporal.data_imports.workflow_activities.create_job_model.acreate_external_data_job",
- ) as acreate_external_data_job:
- acreate_external_data_job.side_effect = Exception("Ruhoh!")
+ with mock.patch.object(
+ ExternalDataJob.objects,
+ "create",
+ ) as create_external_data_job:
+ create_external_data_job.side_effect = Exception("Ruhoh!")
with pytest.raises(Exception):
await _execute_run(workflow_id, inputs, stripe_customer["data"])
diff --git a/posthog/temporal/tests/external_data/test_external_data_job.py b/posthog/temporal/tests/external_data/test_external_data_job.py
index d554fe81fc5e1..f931c97f93943 100644
--- a/posthog/temporal/tests/external_data/test_external_data_job.py
+++ b/posthog/temporal/tests/external_data/test_external_data_job.py
@@ -6,9 +6,9 @@
from asgiref.sync import sync_to_async
from django.test import override_settings
+from posthog.temporal.data_imports import import_data_activity_sync
from posthog.temporal.data_imports.external_data_job import (
UpdateExternalDataJobStatusInputs,
- check_schedule_activity,
create_source_templates,
update_external_data_job_model,
)
@@ -16,58 +16,55 @@
ExternalDataJobWorkflow,
ExternalDataWorkflowInputs,
)
+from posthog.temporal.data_imports.pipelines.pipeline_sync import DataImportPipelineSync
from posthog.temporal.data_imports.workflow_activities.check_billing_limits import check_billing_limits_activity
from posthog.temporal.data_imports.workflow_activities.create_job_model import (
CreateExternalDataJobModelActivityInputs,
create_external_data_job_model_activity,
)
-from posthog.temporal.data_imports.workflow_activities.import_data import ImportDataActivityInputs, import_data_activity
+from posthog.temporal.data_imports.workflow_activities.import_data_sync import ImportDataActivityInputs
from posthog.temporal.data_imports.workflow_activities.sync_new_schemas import (
SyncNewSchemasActivityInputs,
sync_new_schemas_activity,
)
-from posthog.warehouse.external_data_source.jobs import acreate_external_data_job
from posthog.warehouse.models import (
get_latest_run_if_exists,
ExternalDataJob,
ExternalDataSource,
ExternalDataSchema,
- get_external_data_job,
)
from posthog.temporal.data_imports.pipelines.schemas import (
PIPELINE_TYPE_SCHEMA_DEFAULT_MAPPING,
)
from posthog.models import Team
-from posthog.temporal.data_imports.pipelines.pipeline import DataImportPipeline
from temporalio.testing import WorkflowEnvironment
from temporalio.common import RetryPolicy
from temporalio.worker import UnsandboxedWorkflowRunner, Worker
from posthog.constants import DATA_WAREHOUSE_TASK_QUEUE
import pytest_asyncio
-import aioboto3
+import boto3
import functools
from django.conf import settings
from dlt.sources.helpers.rest_client.client import RESTClient
from dlt.common.configuration.specs.aws_credentials import AwsCredentials
-import asyncio
import psycopg
from posthog.warehouse.models.external_data_schema import get_all_schemas_for_source_id
BUCKET_NAME = "test-pipeline"
-SESSION = aioboto3.Session()
+SESSION = boto3.Session()
create_test_client = functools.partial(SESSION.client, endpoint_url=settings.OBJECT_STORAGE_ENDPOINT)
-async def delete_all_from_s3(minio_client, bucket_name: str, key_prefix: str):
+def delete_all_from_s3(minio_client, bucket_name: str, key_prefix: str):
"""Delete all objects in bucket_name under key_prefix."""
- response = await minio_client.list_objects_v2(Bucket=bucket_name, Prefix=key_prefix)
+ response = minio_client.list_objects_v2(Bucket=bucket_name, Prefix=key_prefix)
if "Contents" in response:
for obj in response["Contents"]:
if "Key" in obj:
- await minio_client.delete_object(Bucket=bucket_name, Key=obj["Key"])
+ minio_client.delete_object(Bucket=bucket_name, Key=obj["Key"])
@pytest.fixture
@@ -76,28 +73,29 @@ def bucket_name(request) -> str:
return BUCKET_NAME
-@pytest_asyncio.fixture
-async def minio_client(bucket_name):
+@pytest.fixture
+def minio_client(bucket_name):
"""Manage an S3 client to interact with a MinIO bucket.
Yields the client after creating a bucket. Upon resuming, we delete
the contents and the bucket itself.
"""
- async with create_test_client(
+ minio_client = create_test_client(
"s3",
aws_access_key_id=settings.OBJECT_STORAGE_ACCESS_KEY_ID,
aws_secret_access_key=settings.OBJECT_STORAGE_SECRET_ACCESS_KEY,
- ) as minio_client:
- try:
- await minio_client.head_bucket(Bucket=bucket_name)
- except:
- await minio_client.create_bucket(Bucket=bucket_name)
+ )
- yield minio_client
+ try:
+ minio_client.head_bucket(Bucket=bucket_name)
+ except:
+ minio_client.create_bucket(Bucket=bucket_name)
- await delete_all_from_s3(minio_client, bucket_name, key_prefix="/")
+ yield minio_client
- await minio_client.delete_bucket(Bucket=bucket_name)
+ delete_all_from_s3(minio_client, bucket_name, key_prefix="/")
+
+ minio_client.delete_bucket(Bucket=bucket_name)
@pytest.fixture
@@ -127,8 +125,8 @@ async def postgres_connection(postgres_config, setup_postgres_test_db):
await connection.close()
-async def _create_schema(schema_name: str, source: ExternalDataSource, team: Team, table_id: Optional[str] = None):
- return await sync_to_async(ExternalDataSchema.objects.create)(
+def _create_schema(schema_name: str, source: ExternalDataSource, team: Team, table_id: Optional[str] = None):
+ return ExternalDataSchema.objects.create(
name=schema_name,
team_id=team.pk,
source_id=source.pk,
@@ -136,46 +134,64 @@ async def _create_schema(schema_name: str, source: ExternalDataSource, team: Tea
)
+def _create_external_data_job(
+ external_data_source_id: uuid.UUID,
+ external_data_schema_id: uuid.UUID,
+ workflow_id: str,
+ workflow_run_id: str,
+ team_id: int,
+) -> ExternalDataJob:
+ job = ExternalDataJob.objects.create(
+ team_id=team_id,
+ pipeline_id=external_data_source_id,
+ schema_id=external_data_schema_id,
+ status=ExternalDataJob.Status.RUNNING,
+ rows_synced=0,
+ workflow_id=workflow_id,
+ workflow_run_id=workflow_run_id,
+ )
+
+ return job
+
+
@pytest.mark.django_db(transaction=True)
-@pytest.mark.asyncio
-async def test_create_external_job_activity(activity_environment, team, **kwargs):
+def test_create_external_job_activity(activity_environment, team, **kwargs):
"""
Test that the create external job activity creates a new job
"""
- new_source = await sync_to_async(ExternalDataSource.objects.create)(
- source_id=uuid.uuid4(),
- connection_id=uuid.uuid4(),
- destination_id=uuid.uuid4(),
+ new_source = ExternalDataSource.objects.create(
+ source_id=str(uuid.uuid4()),
+ connection_id=str(uuid.uuid4()),
+ destination_id=str(uuid.uuid4()),
team=team,
status="running",
source_type="Stripe",
)
- test_1_schema = await _create_schema("test-1", new_source, team)
+ test_1_schema = _create_schema("test-1", new_source, team)
inputs = CreateExternalDataJobModelActivityInputs(
team_id=team.id, source_id=new_source.pk, schema_id=test_1_schema.id
)
- run_id, _, __ = await activity_environment.run(create_external_data_job_model_activity, inputs)
+ run_id, _, __ = activity_environment.run(create_external_data_job_model_activity, inputs)
runs = ExternalDataJob.objects.filter(id=run_id)
- assert await sync_to_async(runs.exists)()
+ assert runs.exists()
@pytest.mark.django_db(transaction=True)
-@pytest.mark.asyncio
-async def test_create_external_job_activity_schemas_exist(activity_environment, team, **kwargs):
- new_source = await sync_to_async(ExternalDataSource.objects.create)(
- source_id=uuid.uuid4(),
- connection_id=uuid.uuid4(),
- destination_id=uuid.uuid4(),
+def test_create_external_job_activity_schemas_exist(activity_environment, team, **kwargs):
+ new_source = ExternalDataSource.objects.create(
+ source_id=str(uuid.uuid4()),
+ connection_id=str(uuid.uuid4()),
+ destination_id=str(uuid.uuid4()),
team=team,
status="running",
source_type="Stripe",
)
- schema = await sync_to_async(ExternalDataSchema.objects.create)(
+ schema = ExternalDataSchema.objects.create(
name=PIPELINE_TYPE_SCHEMA_DEFAULT_MAPPING[new_source.source_type][0],
team_id=team.id,
source_id=new_source.pk,
@@ -183,25 +199,24 @@ async def test_create_external_job_activity_schemas_exist(activity_environment,
inputs = CreateExternalDataJobModelActivityInputs(team_id=team.id, source_id=new_source.pk, schema_id=schema.id)
- run_id, _, __ = await activity_environment.run(create_external_data_job_model_activity, inputs)
+ run_id, _, __ = activity_environment.run(create_external_data_job_model_activity, inputs)
runs = ExternalDataJob.objects.filter(id=run_id)
- assert await sync_to_async(runs.exists)()
+ assert runs.exists()
@pytest.mark.django_db(transaction=True)
-@pytest.mark.asyncio
-async def test_create_external_job_activity_update_schemas(activity_environment, team, **kwargs):
- new_source = await sync_to_async(ExternalDataSource.objects.create)(
- source_id=uuid.uuid4(),
- connection_id=uuid.uuid4(),
- destination_id=uuid.uuid4(),
+def test_create_external_job_activity_update_schemas(activity_environment, team, **kwargs):
+ new_source = ExternalDataSource.objects.create(
+ source_id=str(uuid.uuid4()),
+ connection_id=str(uuid.uuid4()),
+ destination_id=str(uuid.uuid4()),
team=team,
status="running",
source_type="Stripe",
)
- await sync_to_async(ExternalDataSchema.objects.create)(
+ ExternalDataSchema.objects.create(
name=PIPELINE_TYPE_SCHEMA_DEFAULT_MAPPING[new_source.source_type][0],
team_id=team.id,
source_id=new_source.pk,
@@ -210,36 +225,35 @@ async def test_create_external_job_activity_update_schemas(activity_environment,
inputs = SyncNewSchemasActivityInputs(source_id=str(new_source.pk), team_id=team.id)
- await activity_environment.run(sync_new_schemas_activity, inputs)
+ activity_environment.run(sync_new_schemas_activity, inputs)
- all_schemas = await sync_to_async(get_all_schemas_for_source_id)(new_source.pk, team.id)
+ all_schemas = get_all_schemas_for_source_id(new_source.pk, team.id)
assert len(all_schemas) == len(PIPELINE_TYPE_SCHEMA_DEFAULT_MAPPING[ExternalDataSource.Type.STRIPE])
@pytest.mark.django_db(transaction=True)
-@pytest.mark.asyncio
-async def test_update_external_job_activity(activity_environment, team, **kwargs):
+def test_update_external_job_activity(activity_environment, team, **kwargs):
"""
Test that the update external job activity updates the job status
"""
- new_source = await sync_to_async(ExternalDataSource.objects.create)(
- source_id=uuid.uuid4(),
- connection_id=uuid.uuid4(),
- destination_id=uuid.uuid4(),
+ new_source = ExternalDataSource.objects.create(
+ source_id=str(uuid.uuid4()),
+ connection_id=str(uuid.uuid4()),
+ destination_id=str(uuid.uuid4()),
team=team,
status="running",
source_type="Stripe",
)
- schema = await sync_to_async(ExternalDataSchema.objects.create)(
+ schema = ExternalDataSchema.objects.create(
name=PIPELINE_TYPE_SCHEMA_DEFAULT_MAPPING[new_source.source_type][0],
team_id=team.id,
source_id=new_source.pk,
should_sync=True,
)
- new_job = await acreate_external_data_job(
+ new_job = _create_external_data_job(
team_id=team.id,
external_data_source_id=new_source.pk,
workflow_id=activity_environment.info.workflow_id,
@@ -257,34 +271,33 @@ async def test_update_external_job_activity(activity_environment, team, **kwargs
team_id=team.id,
)
- await activity_environment.run(update_external_data_job_model, inputs)
- await sync_to_async(new_job.refresh_from_db)()
- await sync_to_async(schema.refresh_from_db)()
+ activity_environment.run(update_external_data_job_model, inputs)
+ new_job.refresh_from_db()
+ schema.refresh_from_db()
assert new_job.status == ExternalDataJob.Status.COMPLETED
assert schema.status == ExternalDataJob.Status.COMPLETED
@pytest.mark.django_db(transaction=True)
-@pytest.mark.asyncio
-async def test_update_external_job_activity_with_retryable_error(activity_environment, team, **kwargs):
- new_source = await sync_to_async(ExternalDataSource.objects.create)(
- source_id=uuid.uuid4(),
- connection_id=uuid.uuid4(),
- destination_id=uuid.uuid4(),
+def test_update_external_job_activity_with_retryable_error(activity_environment, team, **kwargs):
+ new_source = ExternalDataSource.objects.create(
+ source_id=str(uuid.uuid4()),
+ connection_id=str(uuid.uuid4()),
+ destination_id=str(uuid.uuid4()),
team=team,
status="running",
source_type="Stripe",
)
- schema = await sync_to_async(ExternalDataSchema.objects.create)(
+ schema = ExternalDataSchema.objects.create(
name=PIPELINE_TYPE_SCHEMA_DEFAULT_MAPPING[new_source.source_type][0],
team_id=team.id,
source_id=new_source.pk,
should_sync=True,
)
- new_job = await acreate_external_data_job(
+ new_job = _create_external_data_job(
team_id=team.id,
external_data_source_id=new_source.pk,
workflow_id=activity_environment.info.workflow_id,
@@ -302,9 +315,9 @@ async def test_update_external_job_activity_with_retryable_error(activity_enviro
team_id=team.id,
)
- await activity_environment.run(update_external_data_job_model, inputs)
- await sync_to_async(new_job.refresh_from_db)()
- await sync_to_async(schema.refresh_from_db)()
+ activity_environment.run(update_external_data_job_model, inputs)
+ new_job.refresh_from_db()
+ schema.refresh_from_db()
assert new_job.status == ExternalDataJob.Status.COMPLETED
assert schema.status == ExternalDataJob.Status.COMPLETED
@@ -312,25 +325,24 @@ async def test_update_external_job_activity_with_retryable_error(activity_enviro
@pytest.mark.django_db(transaction=True)
-@pytest.mark.asyncio
-async def test_update_external_job_activity_with_non_retryable_error(activity_environment, team, **kwargs):
- new_source = await sync_to_async(ExternalDataSource.objects.create)(
- source_id=uuid.uuid4(),
- connection_id=uuid.uuid4(),
- destination_id=uuid.uuid4(),
+def test_update_external_job_activity_with_non_retryable_error(activity_environment, team, **kwargs):
+ new_source = ExternalDataSource.objects.create(
+ source_id=str(uuid.uuid4()),
+ connection_id=str(uuid.uuid4()),
+ destination_id=str(uuid.uuid4()),
team=team,
status="running",
source_type="Postgres",
)
- schema = await sync_to_async(ExternalDataSchema.objects.create)(
+ schema = ExternalDataSchema.objects.create(
name="test_123",
team_id=team.id,
source_id=new_source.pk,
should_sync=True,
)
- new_job = await acreate_external_data_job(
+ new_job = _create_external_data_job(
team_id=team.id,
external_data_source_id=new_source.pk,
workflow_id=activity_environment.info.workflow_id,
@@ -348,10 +360,10 @@ async def test_update_external_job_activity_with_non_retryable_error(activity_en
team_id=team.id,
)
with mock.patch("posthog.warehouse.models.external_data_schema.external_data_workflow_exists", return_value=False):
- await activity_environment.run(update_external_data_job_model, inputs)
+ activity_environment.run(update_external_data_job_model, inputs)
- await sync_to_async(new_job.refresh_from_db)()
- await sync_to_async(schema.refresh_from_db)()
+ new_job.refresh_from_db()
+ schema.refresh_from_db()
assert new_job.status == ExternalDataJob.Status.COMPLETED
assert schema.status == ExternalDataJob.Status.COMPLETED
@@ -359,22 +371,21 @@ async def test_update_external_job_activity_with_non_retryable_error(activity_en
@pytest.mark.django_db(transaction=True)
-@pytest.mark.asyncio
-async def test_run_stripe_job(activity_environment, team, minio_client, **kwargs):
- async def setup_job_1():
- new_source = await sync_to_async(ExternalDataSource.objects.create)(
- source_id=uuid.uuid4(),
- connection_id=uuid.uuid4(),
- destination_id=uuid.uuid4(),
+def test_run_stripe_job(activity_environment, team, minio_client, **kwargs):
+ def setup_job_1():
+ new_source = ExternalDataSource.objects.create(
+ source_id=str(uuid.uuid4()),
+ connection_id=str(uuid.uuid4()),
+ destination_id=str(uuid.uuid4()),
team=team,
status="running",
source_type="Stripe",
job_inputs={"stripe_secret_key": "test-key", "stripe_account_id": "acct_id"},
)
- customer_schema = await _create_schema("Customer", new_source, team)
+ customer_schema = _create_schema("Customer", new_source, team)
- new_job: ExternalDataJob = await sync_to_async(ExternalDataJob.objects.create)(
+ new_job: ExternalDataJob = ExternalDataJob.objects.create(
team_id=team.id,
pipeline_id=new_source.pk,
status=ExternalDataJob.Status.RUNNING,
@@ -382,7 +393,7 @@ async def setup_job_1():
schema=customer_schema,
)
- new_job = await get_external_data_job(new_job.id)
+ new_job = ExternalDataJob.objects.get(id=new_job.id)
inputs = ImportDataActivityInputs(
team_id=team.id,
@@ -393,20 +404,20 @@ async def setup_job_1():
return new_job, inputs
- async def setup_job_2():
- new_source = await sync_to_async(ExternalDataSource.objects.create)(
- source_id=uuid.uuid4(),
- connection_id=uuid.uuid4(),
- destination_id=uuid.uuid4(),
+ def setup_job_2():
+ new_source = ExternalDataSource.objects.create(
+ source_id=str(uuid.uuid4()),
+ connection_id=str(uuid.uuid4()),
+ destination_id=str(uuid.uuid4()),
team=team,
status="running",
source_type="Stripe",
job_inputs={"stripe_secret_key": "test-key", "stripe_account_id": "acct_id"},
)
- charge_schema = await _create_schema("Charge", new_source, team)
+ charge_schema = _create_schema("Charge", new_source, team)
- new_job: ExternalDataJob = await sync_to_async(ExternalDataJob.objects.create)(
+ new_job: ExternalDataJob = ExternalDataJob.objects.create(
team_id=team.id,
pipeline_id=new_source.pk,
status=ExternalDataJob.Status.RUNNING,
@@ -414,7 +425,7 @@ async def setup_job_2():
schema=charge_schema,
)
- new_job = await get_external_data_job(new_job.id)
+ new_job = ExternalDataJob.objects.get(id=new_job.id)
inputs = ImportDataActivityInputs(
team_id=team.id,
@@ -425,8 +436,8 @@ async def setup_job_2():
return new_job, inputs
- job_1, job_1_inputs = await setup_job_1()
- job_2, job_2_inputs = await setup_job_2()
+ job_1, job_1_inputs = setup_job_1()
+ job_2, job_2_inputs = setup_job_2()
def mock_customers_paginate(
class_self,
@@ -504,14 +515,10 @@ def mock_to_object_store_rs_credentials(class_self):
mock.patch.object(AwsCredentials, "to_session_credentials", mock_to_session_credentials),
mock.patch.object(AwsCredentials, "to_object_store_rs_credentials", mock_to_object_store_rs_credentials),
):
- await asyncio.gather(
- activity_environment.run(import_data_activity, job_1_inputs),
- )
+ activity_environment.run(import_data_activity_sync, job_1_inputs)
- folder_path = await sync_to_async(job_1.folder_path)()
- job_1_customer_objects = await minio_client.list_objects_v2(
- Bucket=BUCKET_NAME, Prefix=f"{folder_path}/customer/"
- )
+ folder_path = job_1.folder_path()
+ job_1_customer_objects = minio_client.list_objects_v2(Bucket=BUCKET_NAME, Prefix=f"{folder_path}/customer/")
assert len(job_1_customer_objects["Contents"]) == 2
@@ -531,33 +538,28 @@ def mock_to_object_store_rs_credentials(class_self):
mock.patch.object(AwsCredentials, "to_session_credentials", mock_to_session_credentials),
mock.patch.object(AwsCredentials, "to_object_store_rs_credentials", mock_to_object_store_rs_credentials),
):
- await asyncio.gather(
- activity_environment.run(import_data_activity, job_2_inputs),
- )
+ activity_environment.run(import_data_activity_sync, job_2_inputs)
- job_2_charge_objects = await minio_client.list_objects_v2(
- Bucket=BUCKET_NAME, Prefix=f"{job_2.folder_path()}/charge/"
- )
+ job_2_charge_objects = minio_client.list_objects_v2(Bucket=BUCKET_NAME, Prefix=f"{job_2.folder_path()}/charge/")
assert len(job_2_charge_objects["Contents"]) == 2
@pytest.mark.django_db(transaction=True)
-@pytest.mark.asyncio
-async def test_run_stripe_job_row_count_update(activity_environment, team, minio_client, **kwargs):
- async def setup_job_1():
- new_source = await sync_to_async(ExternalDataSource.objects.create)(
- source_id=uuid.uuid4(),
- connection_id=uuid.uuid4(),
- destination_id=uuid.uuid4(),
+def test_run_stripe_job_row_count_update(activity_environment, team, minio_client, **kwargs):
+ def setup_job_1():
+ new_source = ExternalDataSource.objects.create(
+ source_id=str(uuid.uuid4()),
+ connection_id=str(uuid.uuid4()),
+ destination_id=str(uuid.uuid4()),
team=team,
status="running",
source_type="Stripe",
job_inputs={"stripe_secret_key": "test-key", "stripe_account_id": "acct_id"},
)
- customer_schema = await _create_schema("Customer", new_source, team)
+ customer_schema = _create_schema("Customer", new_source, team)
- new_job: ExternalDataJob = await sync_to_async(ExternalDataJob.objects.create)(
+ new_job: ExternalDataJob = ExternalDataJob.objects.create(
team_id=team.id,
pipeline_id=new_source.pk,
status=ExternalDataJob.Status.RUNNING,
@@ -565,9 +567,9 @@ async def setup_job_1():
schema=customer_schema,
)
- new_job = await sync_to_async(
- ExternalDataJob.objects.filter(id=new_job.id).prefetch_related("pipeline").prefetch_related("schema").get
- )()
+ new_job = (
+ ExternalDataJob.objects.filter(id=new_job.id).prefetch_related("pipeline").prefetch_related("schema").get()
+ )
inputs = ImportDataActivityInputs(
team_id=team.id,
@@ -578,7 +580,7 @@ async def setup_job_1():
return new_job, inputs
- job_1, job_1_inputs = await setup_job_1()
+ job_1, job_1_inputs = setup_job_1()
def mock_customers_paginate(
class_self,
@@ -636,18 +638,14 @@ def mock_to_object_store_rs_credentials(class_self):
mock.patch.object(AwsCredentials, "to_session_credentials", mock_to_session_credentials),
mock.patch.object(AwsCredentials, "to_object_store_rs_credentials", mock_to_object_store_rs_credentials),
):
- await asyncio.gather(
- activity_environment.run(import_data_activity, job_1_inputs),
- )
+ activity_environment.run(import_data_activity_sync, job_1_inputs)
- folder_path = await sync_to_async(job_1.folder_path)()
- job_1_customer_objects = await minio_client.list_objects_v2(
- Bucket=BUCKET_NAME, Prefix=f"{folder_path}/customer/"
- )
+ folder_path = job_1.folder_path()
+ job_1_customer_objects = minio_client.list_objects_v2(Bucket=BUCKET_NAME, Prefix=f"{folder_path}/customer/")
assert len(job_1_customer_objects["Contents"]) == 2
- await sync_to_async(job_1.refresh_from_db)()
+ job_1.refresh_from_db()
assert job_1.rows_synced == 1
@@ -680,24 +678,30 @@ async def test_external_data_job_workflow_with_schema(team, **kwargs):
external_data_schema_id=schema.id,
)
- async def mock_async_func(inputs):
+ def mock_func(inputs):
return {}
with (
mock.patch("posthog.warehouse.models.table.DataWarehouseTable.get_columns", return_value={"id": "string"}),
- mock.patch.object(DataImportPipeline, "run", mock_async_func),
+ mock.patch.object(DataImportPipelineSync, "run", mock_func),
):
- with override_settings(AIRBYTE_BUCKET_KEY="test-key", AIRBYTE_BUCKET_SECRET="test-secret"):
+ with override_settings(
+ BUCKET_URL=f"s3://{BUCKET_NAME}",
+ AIRBYTE_BUCKET_KEY=settings.OBJECT_STORAGE_ACCESS_KEY_ID,
+ AIRBYTE_BUCKET_SECRET=settings.OBJECT_STORAGE_SECRET_ACCESS_KEY,
+ AIRBYTE_BUCKET_REGION="us-east-1",
+ AIRBYTE_BUCKET_DOMAIN="objectstorage:19000",
+ BUCKET_NAME=BUCKET_NAME,
+ ):
async with await WorkflowEnvironment.start_time_skipping() as activity_environment:
async with Worker(
activity_environment.client,
task_queue=DATA_WAREHOUSE_TASK_QUEUE,
workflows=[ExternalDataJobWorkflow],
activities=[
- check_schedule_activity,
create_external_data_job_model_activity,
update_external_data_job_model,
- import_data_activity,
+ import_data_activity_sync,
create_source_templates,
check_billing_limits_activity,
sync_new_schemas_activity,
@@ -752,7 +756,7 @@ async def setup_job_1():
},
)
- posthog_test_schema = await _create_schema("posthog_test", new_source, team)
+ posthog_test_schema = await sync_to_async(_create_schema)("posthog_test", new_source, team)
new_job: ExternalDataJob = await sync_to_async(ExternalDataJob.objects.create)(
team_id=team.id,
@@ -806,127 +810,8 @@ def mock_to_object_store_rs_credentials(class_self):
mock.patch.object(AwsCredentials, "to_session_credentials", mock_to_session_credentials),
mock.patch.object(AwsCredentials, "to_object_store_rs_credentials", mock_to_object_store_rs_credentials),
):
- await asyncio.gather(
- activity_environment.run(import_data_activity, job_1_inputs),
- )
+ await sync_to_async(activity_environment.run)(import_data_activity_sync, job_1_inputs)
folder_path = await sync_to_async(job_1.folder_path)()
- job_1_team_objects = await minio_client.list_objects_v2(
- Bucket=BUCKET_NAME, Prefix=f"{folder_path}/posthog_test/"
- )
+ job_1_team_objects = minio_client.list_objects_v2(Bucket=BUCKET_NAME, Prefix=f"{folder_path}/posthog_test/")
assert len(job_1_team_objects["Contents"]) == 2
-
-
-@pytest.mark.django_db(transaction=True)
-@pytest.mark.asyncio
-async def test_check_schedule_activity_with_schema_id(activity_environment, team, **kwargs):
- new_source = await sync_to_async(ExternalDataSource.objects.create)(
- source_id=uuid.uuid4(),
- connection_id=uuid.uuid4(),
- destination_id=uuid.uuid4(),
- team=team,
- status="running",
- source_type="Stripe",
- job_inputs={"stripe_secret_key": "test-key", "stripe_account_id": "acct_id"},
- )
-
- test_1_schema = await _create_schema("test-1", new_source, team)
-
- should_exit = await activity_environment.run(
- check_schedule_activity,
- ExternalDataWorkflowInputs(
- team_id=team.id,
- external_data_source_id=new_source.id,
- external_data_schema_id=test_1_schema.id,
- ),
- )
-
- assert should_exit is False
-
-
-@pytest.mark.django_db(transaction=True)
-@pytest.mark.asyncio
-async def test_check_schedule_activity_with_missing_schema_id_but_with_schedule(activity_environment, team, **kwargs):
- new_source = await sync_to_async(ExternalDataSource.objects.create)(
- source_id=uuid.uuid4(),
- connection_id=uuid.uuid4(),
- destination_id=uuid.uuid4(),
- team=team,
- status="running",
- source_type="Stripe",
- job_inputs={"stripe_secret_key": "test-key", "stripe_account_id": "acct_id"},
- )
-
- await sync_to_async(ExternalDataSchema.objects.create)(
- name="test-1",
- team_id=team.id,
- source_id=new_source.pk,
- should_sync=True,
- )
-
- with (
- mock.patch(
- "posthog.temporal.data_imports.external_data_job.a_external_data_workflow_exists", return_value=True
- ),
- mock.patch(
- "posthog.temporal.data_imports.external_data_job.a_delete_external_data_schedule", return_value=True
- ),
- mock.patch(
- "posthog.temporal.data_imports.external_data_job.a_trigger_external_data_workflow"
- ) as mock_a_trigger_external_data_workflow,
- ):
- should_exit = await activity_environment.run(
- check_schedule_activity,
- ExternalDataWorkflowInputs(
- team_id=team.id,
- external_data_source_id=new_source.id,
- external_data_schema_id=None,
- ),
- )
-
- assert should_exit is True
- assert mock_a_trigger_external_data_workflow.call_count == 1
-
-
-@pytest.mark.django_db(transaction=True)
-@pytest.mark.asyncio
-async def test_check_schedule_activity_with_missing_schema_id_and_no_schedule(activity_environment, team, **kwargs):
- new_source = await sync_to_async(ExternalDataSource.objects.create)(
- source_id=uuid.uuid4(),
- connection_id=uuid.uuid4(),
- destination_id=uuid.uuid4(),
- team=team,
- status="running",
- source_type="Stripe",
- job_inputs={"stripe_secret_key": "test-key", "stripe_account_id": "acct_id"},
- )
-
- await sync_to_async(ExternalDataSchema.objects.create)(
- name="test-1",
- team_id=team.id,
- source_id=new_source.pk,
- should_sync=True,
- )
-
- with (
- mock.patch(
- "posthog.temporal.data_imports.external_data_job.a_external_data_workflow_exists", return_value=False
- ),
- mock.patch(
- "posthog.temporal.data_imports.external_data_job.a_delete_external_data_schedule", return_value=True
- ),
- mock.patch(
- "posthog.temporal.data_imports.external_data_job.a_sync_external_data_job_workflow"
- ) as mock_a_sync_external_data_job_workflow,
- ):
- should_exit = await activity_environment.run(
- check_schedule_activity,
- ExternalDataWorkflowInputs(
- team_id=team.id,
- external_data_source_id=new_source.id,
- external_data_schema_id=None,
- ),
- )
-
- assert should_exit is True
- assert mock_a_sync_external_data_job_workflow.call_count == 1
diff --git a/posthog/warehouse/data_load/source_templates.py b/posthog/warehouse/data_load/source_templates.py
index 5a7d515bc8536..6b993e00d3d97 100644
--- a/posthog/warehouse/data_load/source_templates.py
+++ b/posthog/warehouse/data_load/source_templates.py
@@ -1,11 +1,9 @@
-from posthog.temporal.common.logger import bind_temporal_worker_logger
-from posthog.warehouse.models.external_data_job import ExternalDataJob, get_external_data_job, get_latest_run_if_exists
+from posthog.temporal.common.logger import bind_temporal_worker_logger_sync
+from posthog.warehouse.models.external_data_job import ExternalDataJob
from posthog.warehouse.models.external_data_source import ExternalDataSource
from posthog.warehouse.models.join import DataWarehouseJoin
-from posthog.warehouse.util import database_sync_to_async
-@database_sync_to_async
def database_operations(team_id: int, table_prefix: str) -> None:
customer_join_exists = (
DataWarehouseJoin.objects.filter(
@@ -54,11 +52,18 @@ def database_operations(team_id: int, table_prefix: str) -> None:
)
-async def create_warehouse_templates_for_source(team_id: int, run_id: str) -> None:
- logger = await bind_temporal_worker_logger(team_id=team_id)
+def create_warehouse_templates_for_source(team_id: int, run_id: str) -> None:
+ logger = bind_temporal_worker_logger_sync(team_id=team_id)
- job: ExternalDataJob = await get_external_data_job(job_id=run_id)
- last_successful_job: ExternalDataJob | None = await get_latest_run_if_exists(job.team_id, job.pipeline_id)
+ job: ExternalDataJob = ExternalDataJob.objects.get(pk=run_id)
+ last_successful_job: ExternalDataJob | None = (
+ ExternalDataJob.objects.filter(
+ team_id=job.team_id, pipeline_id=job.pipeline_id, status=ExternalDataJob.Status.COMPLETED
+ )
+ .prefetch_related("pipeline")
+ .order_by("-created_at")
+ .first()
+ )
source: ExternalDataSource.Type = job.pipeline.source_type
@@ -71,7 +76,7 @@ async def create_warehouse_templates_for_source(team_id: int, run_id: str) -> No
table_prefix = job.pipeline.prefix or ""
- await database_operations(team_id, table_prefix)
+ database_operations(team_id, table_prefix)
logger.info(
f"Created warehouse template for job {run_id}",
diff --git a/posthog/warehouse/external_data_source/jobs.py b/posthog/warehouse/external_data_source/jobs.py
index d21210f2ec097..b7d37eb746270 100644
--- a/posthog/warehouse/external_data_source/jobs.py
+++ b/posthog/warehouse/external_data_source/jobs.py
@@ -1,4 +1,3 @@
-from uuid import UUID
from posthog.warehouse.util import database_sync_to_async
from posthog.warehouse.models.external_data_job import ExternalDataJob
from posthog.warehouse.models.external_data_schema import ExternalDataSchema
@@ -9,27 +8,6 @@ def get_external_data_source(team_id: str, external_data_source_id: str) -> Exte
return ExternalDataSource.objects.get(team_id=team_id, id=external_data_source_id)
-@database_sync_to_async
-def acreate_external_data_job(
- external_data_source_id: UUID,
- external_data_schema_id: UUID,
- workflow_id: str,
- workflow_run_id: str,
- team_id: int,
-) -> ExternalDataJob:
- job = ExternalDataJob.objects.create(
- team_id=team_id,
- pipeline_id=external_data_source_id,
- schema_id=external_data_schema_id,
- status=ExternalDataJob.Status.RUNNING,
- rows_synced=0,
- workflow_id=workflow_id,
- workflow_run_id=workflow_run_id,
- )
-
- return job
-
-
@database_sync_to_async
def aget_running_job_for_schema(schema_id: str) -> ExternalDataJob | None:
return (
@@ -39,8 +17,7 @@ def aget_running_job_for_schema(schema_id: str) -> ExternalDataJob | None:
)
-@database_sync_to_async
-def aupdate_external_job_status(
+def update_external_job_status(
job_id: str, team_id: int, status: ExternalDataJob.Status, latest_error: str | None
) -> ExternalDataJob:
model = ExternalDataJob.objects.get(id=job_id, team_id=team_id)
diff --git a/posthog/warehouse/models/external_data_schema.py b/posthog/warehouse/models/external_data_schema.py
index c90a5c2e472bb..3bcbc6c658f7f 100644
--- a/posthog/warehouse/models/external_data_schema.py
+++ b/posthog/warehouse/models/external_data_schema.py
@@ -99,8 +99,7 @@ def aget_schema_by_id(schema_id: str, team_id: int) -> ExternalDataSchema | None
)
-@database_sync_to_async
-def aupdate_should_sync(schema_id: str, team_id: int, should_sync: bool) -> ExternalDataSchema | None:
+def update_should_sync(schema_id: str, team_id: int, should_sync: bool) -> ExternalDataSchema | None:
schema = ExternalDataSchema.objects.get(id=schema_id, team_id=team_id)
schema.should_sync = should_sync
schema.save()
@@ -119,15 +118,6 @@ def aupdate_should_sync(schema_id: str, team_id: int, should_sync: bool) -> Exte
return schema
-@database_sync_to_async
-def get_active_schemas_for_source_id(source_id: uuid.UUID, team_id: int):
- return list(
- ExternalDataSchema.objects.exclude(deleted=True)
- .filter(team_id=team_id, source_id=source_id, should_sync=True)
- .all()
- )
-
-
def get_all_schemas_for_source_id(source_id: uuid.UUID, team_id: int):
return list(ExternalDataSchema.objects.exclude(deleted=True).filter(team_id=team_id, source_id=source_id).all())
diff --git a/rust/cymbal/src/frames/mod.rs b/rust/cymbal/src/frames/mod.rs
index 09b12ff625b89..01ba7d13e4e97 100644
--- a/rust/cymbal/src/frames/mod.rs
+++ b/rust/cymbal/src/frames/mod.rs
@@ -32,14 +32,9 @@ impl RawFrame {
res
}
- pub fn needs_symbols(&self) -> bool {
- // For now, we only support JS, so this is always true
- true
- }
-
- pub fn symbol_set_ref(&self) -> String {
+ pub fn symbol_set_ref(&self) -> Option {
let RawFrame::JavaScript(raw) = self;
- raw.source_url().map(String::from).unwrap_or_default()
+ raw.source_url().map(String::from).ok()
}
pub fn frame_id(&self) -> String {
diff --git a/rust/cymbal/src/frames/resolver.rs b/rust/cymbal/src/frames/resolver.rs
index 6a10c68c67208..b93da78b6b8ed 100644
--- a/rust/cymbal/src/frames/resolver.rs
+++ b/rust/cymbal/src/frames/resolver.rs
@@ -35,10 +35,6 @@ impl Resolver {
return Ok(result.contents);
}
- if !frame.needs_symbols() {
- return frame.resolve(team_id, catalog).await;
- }
-
if let Some(result) =
ErrorTrackingStackFrame::load(pool, team_id, &frame.frame_id()).await?
{
@@ -48,7 +44,11 @@ impl Resolver {
let resolved = frame.resolve(team_id, catalog).await?;
- let set = SymbolSetRecord::load(pool, team_id, &frame.symbol_set_ref()).await?;
+ let set = if let Some(set_ref) = frame.symbol_set_ref() {
+ SymbolSetRecord::load(pool, team_id, &set_ref).await?
+ } else {
+ None
+ };
let record = ErrorTrackingStackFrame::new(
frame.frame_id(),
@@ -212,7 +212,7 @@ mod test {
// get the symbol set
let set_ref = frame.symbol_set_ref();
- let set = SymbolSetRecord::load(&pool, 0, &set_ref)
+ let set = SymbolSetRecord::load(&pool, 0, &set_ref.unwrap())
.await
.unwrap()
.unwrap();
diff --git a/rust/cymbal/src/main.rs b/rust/cymbal/src/main.rs
index 8fca47a17f34b..fc18cfbf946b2 100644
--- a/rust/cymbal/src/main.rs
+++ b/rust/cymbal/src/main.rs
@@ -14,7 +14,7 @@ use cymbal::{
};
use envconfig::Envconfig;
use tokio::task::JoinHandle;
-use tracing::{error, info};
+use tracing::{error, info, warn};
use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt, EnvFilter, Layer};
common_alloc::used!();
@@ -83,7 +83,7 @@ async fn main() -> Result<(), Error> {
offset.store().unwrap();
if event.event != "$exception" {
- error!("event of type {}", event.event);
+ warn!("event of type {}", event.event);
continue;
}
@@ -96,13 +96,17 @@ async fn main() -> Result<(), Error> {
Ok(r) => r,
Err(err) => {
metrics::counter!(ERRORS, "cause" => "invalid_exception_properties").increment(1);
- error!("Error parsing properties: {:?}", err);
+ error!(
+ "Error parsing properties: {:?} from properties {:?}",
+ err, properties
+ );
continue;
}
};
let Some(mut exception_list) = properties.exception_list else {
// Known issue that $exception_list didn't exist on old clients
+ metrics::counter!(ERRORS, "cause" => "no_exception_list").increment(1);
continue;
};
@@ -155,6 +159,6 @@ async fn main() -> Result<(), Error> {
let _fingerprint = fingerprinting::generate_fingerprint(&exception_list);
metrics::counter!(STACK_PROCESSED).increment(1);
- whole_loop.label("had_frame", "true").fin();
+ whole_loop.label("finished", "true").fin();
}
}
diff --git a/rust/cymbal/src/types/mod.rs b/rust/cymbal/src/types/mod.rs
index 6a329c75572d2..317262c52aff0 100644
--- a/rust/cymbal/src/types/mod.rs
+++ b/rust/cymbal/src/types/mod.rs
@@ -48,17 +48,6 @@ pub struct Exception {
pub struct ErrProps {
#[serde(rename = "$exception_list")]
pub exception_list: Option>, // Required from exception producers - we will not process events without this. Optional to support older clients, should eventually be removed
- #[serde(skip_serializing_if = "Option::is_none")]
- #[serde(rename = "$exception_type")]
- pub exception_type: Option, // legacy, overridden by exception_list
- #[serde(skip_serializing_if = "Option::is_none")]
- #[serde(rename = "$exception_message")]
- pub exception_message: Option, // legacy, overridden by exception_list
- #[serde(skip_serializing_if = "Option::is_none")]
- #[serde(rename = "$exception_stack_trace_raw")]
- pub exception_stack_trace_raw: Option, // Not all exceptions have a stack trace
- #[serde(rename = "$exception_level")]
- pub exception_level: Option, // We generally don't touch this, but we break it out explicitly for users. Not all exceptions have a level
#[serde(flatten)] // A catch-all for all the properties we don't "care" about
pub other: HashMap,
}
@@ -148,11 +137,6 @@ mod test {
assert!(frame.in_app);
assert_eq!(frame.line, 64);
assert_eq!(frame.column, 15003);
-
- assert_eq!(props.exception_type, None);
- assert_eq!(props.exception_message, None);
- assert_eq!(props.exception_stack_trace_raw, None);
- assert_eq!(props.exception_level, Some("error".to_string()));
}
#[test]