diff --git a/cypress/e2e/featureFlags.cy.ts b/cypress/e2e/featureFlags.cy.ts index 2d6b30aaabb04..becbae301d94f 100644 --- a/cypress/e2e/featureFlags.cy.ts +++ b/cypress/e2e/featureFlags.cy.ts @@ -44,6 +44,9 @@ describe('Feature Flags', () => { cy.get('[data-attr=prop-val]').click() cy.get('[data-attr=prop-val-0]').click({ force: true }) + // set rollout percentage + cy.get('[data-attr=rollout-percentage]').clear().type('0').should('have.value', '0') + // save the feature flag cy.get('[data-attr=save-feature-flag]').first().click() @@ -65,6 +68,7 @@ describe('Feature Flags', () => { .click() .type(`{moveToEnd}-updated`) .should('have.value', name + '-updated') + cy.get('[data-attr=rollout-percentage]').type('{selectall}50').should('have.value', '50') cy.get('[data-attr=save-feature-flag]').first().click() cy.wait(100) cy.clickNavMenu('featureflags') @@ -81,6 +85,7 @@ describe('Feature Flags', () => { cy.get('[data-attr=top-bar-name]').should('contain', 'Feature flags') cy.get('[data-attr=new-feature-flag]').click() cy.get('[data-attr=feature-flag-key]').focus().type(name).should('have.value', name) + cy.get('[data-attr=rollout-percentage]').type('{selectall}50').should('have.value', '50') cy.get('[data-attr=save-feature-flag]').first().click() // after save there should be a delete button diff --git a/ee/clickhouse/models/test/__snapshots__/test_property.ambr b/ee/clickhouse/models/test/__snapshots__/test_property.ambr index 084d684e685af..d9973ee67e29c 100644 --- a/ee/clickhouse/models/test/__snapshots__/test_property.ambr +++ b/ee/clickhouse/models/test/__snapshots__/test_property.ambr @@ -58,7 +58,7 @@ # --- # name: test_parse_groups_persons_edge_case_with_single_filter tuple( - 'AND ( has(%(vglobalperson_0)s, replaceRegexpAll(JSONExtractRaw(person_props, %(kglobalperson_0)s), \'^"|"$\', \'\')))', + 'AND ( has(%(vglobalperson_0)s, "pmat_email"))', dict({ 'kglobalperson_0': 'email', 'vglobalperson_0': list([ diff --git a/ee/clickhouse/queries/experiments/funnel_experiment_result.py b/ee/clickhouse/queries/experiments/funnel_experiment_result.py index 4044bcfd6ac7a..b0c8970d265bd 100644 --- a/ee/clickhouse/queries/experiments/funnel_experiment_result.py +++ b/ee/clickhouse/queries/experiments/funnel_experiment_result.py @@ -85,7 +85,11 @@ def __init__( def get_results(self): funnel_results = self.funnel.run() + + validate_event_variants(funnel_results, self.variants) + filtered_results = [result for result in funnel_results if result[0]["breakdown_value"][0] in self.variants] + control_variant, test_variants = self.get_variants(filtered_results) probabilities = self.calculate_results(control_variant, test_variants) @@ -292,3 +296,25 @@ def calculate_probability_of_winning_for_each(variants: List[Variant]) -> List[P total_test_probabilities = sum(probabilities[1:]) return [max(0, 1 - total_test_probabilities), *probabilities[1:]] + + +def validate_event_variants(funnel_results, variants): + if not funnel_results or not funnel_results[0]: + raise ValidationError("No experiment events have been ingested yet.", code="no-events") + + eventsWithOrderZero = [] + for eventArr in funnel_results: + for event in eventArr: + if event.get("order") == 0: + eventsWithOrderZero.append(event) + + missing_variants = set(variants) + for event in eventsWithOrderZero: + event_variant = event.get("breakdown_value")[0] + if event_variant in missing_variants: + missing_variants.discard(event_variant) + + if not len(missing_variants) == 0: + missing_variants_str = ", ".join(missing_variants) + message = f"No experiment events have been ingested yet for the following variants: {missing_variants_str}" + raise ValidationError(message, code=f"missing-flag-variants::{missing_variants_str}") diff --git a/ee/clickhouse/queries/test/test_experiments.py b/ee/clickhouse/queries/test/test_experiments.py new file mode 100644 index 0000000000000..de551b9a893f9 --- /dev/null +++ b/ee/clickhouse/queries/test/test_experiments.py @@ -0,0 +1,88 @@ +import unittest +from ee.clickhouse.queries.experiments.funnel_experiment_result import validate_event_variants +from rest_framework.exceptions import ValidationError + + +class TestExperiments(unittest.TestCase): + def test_validate_event_variants_no_events(self): + expected_code = "no-events" + with self.assertRaises(ValidationError) as context: + validate_event_variants([], ["test", "control"]) + + self.assertEqual(expected_code, context.exception.detail[0].code) + + def test_validate_event_variants_missing_variants(self): + funnel_results = [ + [ + { + "action_id": "step-a-1", + "name": "step-a-1", + "custom_name": None, + "order": 0, + "people": [], + "count": 1, + "type": "events", + "average_conversion_time": None, + "median_conversion_time": None, + "breakdown": ["test"], + "breakdown_value": ["test"], + }, + { + "action_id": "step-a-2", + "name": "step-a-2", + "custom_name": None, + "order": 1, + "people": [], + "count": 0, + "type": "events", + "average_conversion_time": None, + "median_conversion_time": None, + "breakdown": ["test"], + "breakdown_value": ["test"], + }, + ] + ] + + expected_code = "missing-flag-variants::control" + with self.assertRaises(ValidationError) as context: + validate_event_variants(funnel_results, ["test", "control"]) + + self.assertEqual(expected_code, context.exception.detail[0].code) + + def test_validate_event_variants_ignore_old_variant(self): + funnel_results = [ + [ + { + "action_id": "step-a-1", + "name": "step-a-1", + "custom_name": None, + "order": 0, + "people": [], + "count": 1, + "type": "events", + "average_conversion_time": None, + "median_conversion_time": None, + "breakdown": ["test"], + "breakdown_value": ["test"], + }, + { + "action_id": "step-a-2", + "name": "step-a-2", + "custom_name": None, + "order": 1, + "people": [], + "count": 0, + "type": "events", + "average_conversion_time": None, + "median_conversion_time": None, + "breakdown": ["old-variant"], + "breakdown_value": ["old-variant"], + }, + ] + ] + + expected_code = "missing-flag-variants::control" + with self.assertRaises(ValidationError) as context: + validate_event_variants(funnel_results, ["test", "control"]) + + self.assertEqual(expected_code, context.exception.detail[0].code) diff --git a/ee/clickhouse/views/test/__snapshots__/test_clickhouse_experiment_secondary_results.ambr b/ee/clickhouse/views/test/__snapshots__/test_clickhouse_experiment_secondary_results.ambr index 613bdc0de3a2a..b9255b1dadd48 100644 --- a/ee/clickhouse/views/test/__snapshots__/test_clickhouse_experiment_secondary_results.ambr +++ b/ee/clickhouse/views/test/__snapshots__/test_clickhouse_experiment_secondary_results.ambr @@ -1,7 +1,7 @@ # serializer version: 1 # name: ClickhouseTestExperimentSecondaryResults.test_basic_secondary_metric_results ''' - /* user_id:132 celery:posthog.tasks.tasks.sync_insight_caching_state */ + /* user_id:129 celery:posthog.tasks.tasks.sync_insight_caching_state */ SELECT team_id, date_diff('second', max(timestamp), now()) AS age FROM events diff --git a/ee/frontend/mobile-replay/__snapshots__/transform.test.ts.snap b/ee/frontend/mobile-replay/__snapshots__/transform.test.ts.snap index d8d54f7ee04c2..c7193c7c6dc3e 100644 --- a/ee/frontend/mobile-replay/__snapshots__/transform.test.ts.snap +++ b/ee/frontend/mobile-replay/__snapshots__/transform.test.ts.snap @@ -190,9 +190,49 @@ exports[`replay/transform transform can convert navigation bar 1`] = ` { "attributes": { "data-rrweb-id": 12345, - "style": "border-width: 4px;border-radius: 10px;border-color: #ee3ee4;border-style: solid;color: #ee3ee4;width: 100px;height: 30px;position: fixed;left: 11px;top: 12px;", + "style": "border-width: 4px;border-radius: 10px;border-color: #ee3ee4;border-style: solid;color: #ee3ee4;width: 100px;height: 30px;position: fixed;left: 11px;top: 12px;display:flex;flex-direction:row;align-items:center;justify-content:space-around;color:white;", }, - "childNodes": [], + "childNodes": [ + { + "attributes": {}, + "childNodes": [ + { + "id": 101, + "textContent": "◀", + "type": 3, + }, + ], + "id": 100, + "tagName": "div", + "type": 2, + }, + { + "attributes": {}, + "childNodes": [ + { + "id": 103, + "textContent": "⚪", + "type": 3, + }, + ], + "id": 102, + "tagName": "div", + "type": 2, + }, + { + "attributes": {}, + "childNodes": [ + { + "id": 105, + "textContent": "⬜️", + "type": 3, + }, + ], + "id": 104, + "tagName": "div", + "type": 2, + }, + ], "id": 12345, "tagName": "div", "type": 2, @@ -428,7 +468,7 @@ exports[`replay/transform transform can convert status bar 1`] = ` { "attributes": { "data-rrweb-id": 12, - "style": "width: 100px;height: 0px;position: fixed;left: 13px;top: 17px;display:flex;flex-direction:row;align-items:center;", + "style": "color: black;width: 100px;height: 0px;position: fixed;left: 13px;top: 17px;display:flex;flex-direction:row;align-items:center;", }, "childNodes": [ { diff --git a/ee/frontend/mobile-replay/transformer/colors.ts b/ee/frontend/mobile-replay/transformer/colors.ts new file mode 100644 index 0000000000000..56a54b23d723b --- /dev/null +++ b/ee/frontend/mobile-replay/transformer/colors.ts @@ -0,0 +1,51 @@ +// from https://gist.github.com/t1grok/a0f6d04db569890bcb57 + +interface rgb { + r: number + g: number + b: number +} +interface yuv { + y: number + u: number + v: number +} + +function hexToRgb(hexColor: string): rgb | null { + const shorthandRegex = /^#?([a-f\d])([a-f\d])([a-f\d])$/i + hexColor = hexColor.replace(shorthandRegex, function (_, r, g, b) { + return r + r + g + g + b + b + }) + + const result = /^#?([a-f\d]{2})([a-f\d]{2})([a-f\d]{2})$/i.exec(hexColor) + return result + ? { + r: parseInt(result[1], 16), + g: parseInt(result[2], 16), + b: parseInt(result[3], 16), + } + : null +} + +function rgbToYuv(rgbColor: rgb): yuv { + let y, u, v + + y = rgbColor.r * 0.299 + rgbColor.g * 0.587 + rgbColor.b * 0.114 + u = rgbColor.r * -0.168736 + rgbColor.g * -0.331264 + rgbColor.b * 0.5 + 128 + v = rgbColor.r * 0.5 + rgbColor.g * -0.418688 + rgbColor.b * -0.081312 + 128 + + y = Math.floor(y) + u = Math.floor(u) + v = Math.floor(v) + + return { y: y, u: u, v: v } +} + +export const isLight = (hexColor: string): boolean => { + const rgbColor = hexToRgb(hexColor) + if (!rgbColor) { + return false + } + const yuvColor = rgbToYuv(rgbColor) + return yuvColor.y > 128 +} diff --git a/ee/frontend/mobile-replay/transformer/screen-chrome.ts b/ee/frontend/mobile-replay/transformer/screen-chrome.ts index e553544386461..fd64712e9a224 100644 --- a/ee/frontend/mobile-replay/transformer/screen-chrome.ts +++ b/ee/frontend/mobile-replay/transformer/screen-chrome.ts @@ -1,4 +1,5 @@ import { NodeType, serializedNodeWithId, wireframeNavigationBar, wireframeStatusBar } from '../mobile.types' +import { isLight } from './colors' import { NAVIGATION_BAR_ID, STATUS_BAR_ID } from './transformers' import { ConversionContext, ConversionResult } from './types' import { asStyleString, makeStylesString } from './wireframeStyle' @@ -17,22 +18,50 @@ function spacerDiv(idSequence: Generator): serializedNodeWithId { } } +function makeFakeNavButton(icon: string, context: ConversionContext): serializedNodeWithId { + return { + type: NodeType.Element, + tagName: 'div', + attributes: {}, + id: context.idSequence.next().value, + childNodes: [ + { + type: NodeType.Text, + textContent: icon, + id: context.idSequence.next().value, + }, + ], + } +} + export function makeNavigationBar( wireframe: wireframeNavigationBar, _children: serializedNodeWithId[], context: ConversionContext ): ConversionResult | null { const _id = wireframe.id || NAVIGATION_BAR_ID + + const backArrowTriangle = makeFakeNavButton('◀', context) + const homeCircle = makeFakeNavButton('⚪', context) + const screenButton = makeFakeNavButton('⬜️', context) + return { result: { type: NodeType.Element, tagName: 'div', attributes: { - style: asStyleString([makeStylesString(wireframe)]), + style: asStyleString([ + makeStylesString(wireframe), + 'display:flex', + 'flex-direction:row', + 'align-items:center', + 'justify-content:space-around', + 'color:white', + ]), 'data-rrweb-id': _id, }, id: _id, - childNodes: [], + childNodes: [backArrowTriangle, homeCircle, screenButton], }, context, } @@ -51,6 +80,9 @@ export function makeStatusBar( const clockTime = context.timestamp ? new Date(context.timestamp).toLocaleTimeString([], { hour: '2-digit', minute: '2-digit' }) : '' + + const clockFontColor = isLight(wireframe.style?.backgroundColor || '#ffffff') ? 'black' : 'white' + const clock: serializedNodeWithId = { type: NodeType.Element, tagName: 'div', @@ -73,7 +105,7 @@ export function makeStatusBar( tagName: 'div', attributes: { style: asStyleString([ - makeStylesString(wireframe), + makeStylesString(wireframe, { color: clockFontColor }), 'display:flex', 'flex-direction:row', 'align-items:center', diff --git a/ee/tasks/subscriptions/__init__.py b/ee/tasks/subscriptions/__init__.py index f5bd3c043ff59..2f6e393ab3b2e 100644 --- a/ee/tasks/subscriptions/__init__.py +++ b/ee/tasks/subscriptions/__init__.py @@ -121,7 +121,7 @@ def _deliver_subscription_report( subscription.save() -@shared_task(queue=CeleryQueue.SUBSCRIPTION_DELIVERY) +@shared_task(queue=CeleryQueue.SUBSCRIPTION_DELIVERY.value) def schedule_all_subscriptions() -> None: """ Schedule all past notifications (with a buffer) to be delivered @@ -152,7 +152,7 @@ def schedule_all_subscriptions() -> None: @shared_task( soft_time_limit=report_timeout_seconds, time_limit=report_timeout_seconds + 10, - queue=CeleryQueue.SUBSCRIPTION_DELIVERY, + queue=CeleryQueue.SUBSCRIPTION_DELIVERY.value, ) def deliver_subscription_report(subscription_id: int) -> None: return _deliver_subscription_report(subscription_id) @@ -161,7 +161,7 @@ def deliver_subscription_report(subscription_id: int) -> None: @shared_task( soft_time_limit=report_timeout_seconds, time_limit=report_timeout_seconds + 10, - queue=CeleryQueue.SUBSCRIPTION_DELIVERY, + queue=CeleryQueue.SUBSCRIPTION_DELIVERY.value, ) def handle_subscription_value_change( subscription_id: int, previous_value: str, invite_message: Optional[str] = None diff --git a/frontend/src/lib/components/JSSnippet.tsx b/frontend/src/lib/components/JSSnippet.tsx index 8fb5f32277d31..119d84622b9af 100644 --- a/frontend/src/lib/components/JSSnippet.tsx +++ b/frontend/src/lib/components/JSSnippet.tsx @@ -9,7 +9,7 @@ export function JSSnippet(): JSX.Element { return ( {``} ) } diff --git a/frontend/src/queries/query.ts b/frontend/src/queries/query.ts index 11388a394bd21..d5e0d5d697392 100644 --- a/frontend/src/queries/query.ts +++ b/frontend/src/queries/query.ts @@ -254,6 +254,9 @@ export async function query( } else if (res2.length > 0 && res2[0].people) { res2 = res2[0]?.people.map((n: any) => n.id) res1 = res1.map((n: any) => n[0].id) + // Sort, since the order of the results is not guaranteed + res1.sort() + res2.sort() } const getTimingDiff = (): undefined | { diff: number; legacy: number; hogql: number } => { diff --git a/frontend/src/queries/schema.json b/frontend/src/queries/schema.json index e871939974992..d441257e746e2 100644 --- a/frontend/src/queries/schema.json +++ b/frontend/src/queries/schema.json @@ -2438,9 +2438,11 @@ "type": "integer" }, "pathDropoffKey": { + "description": "Relevant only within actors query", "type": "string" }, "pathEndKey": { + "description": "Relevant only within actors query", "type": "string" }, "pathGroupings": { @@ -2453,6 +2455,7 @@ "type": "boolean" }, "pathStartKey": { + "description": "Relevant only within actors query", "type": "string" }, "pathsHogQLExpression": { diff --git a/frontend/src/queries/schema.ts b/frontend/src/queries/schema.ts index bf369b8a791af..57d39b2201a64 100644 --- a/frontend/src/queries/schema.ts +++ b/frontend/src/queries/schema.ts @@ -663,9 +663,11 @@ export type PathsFilter = { funnelPaths?: PathsFilterLegacy['funnel_paths'] funnelFilter?: PathsFilterLegacy['funnel_filter'] - // persons only + /** Relevant only within actors query */ pathStartKey?: string + /** Relevant only within actors query */ pathEndKey?: string + /** Relevant only within actors query */ pathDropoffKey?: string } diff --git a/frontend/src/scenes/experiments/Experiment.stories.tsx b/frontend/src/scenes/experiments/Experiment.stories.tsx index 041dab5a4ad78..31177ce43ade6 100644 --- a/frontend/src/scenes/experiments/Experiment.stories.tsx +++ b/frontend/src/scenes/experiments/Experiment.stories.tsx @@ -52,7 +52,7 @@ const MOCK_FUNNEL_EXPERIMENT: Experiment = { }, ], variant: null, - rollout_percentage: null, + rollout_percentage: undefined, }, ], payloads: {}, @@ -156,7 +156,7 @@ const MOCK_TREND_EXPERIMENT: Experiment = { }, ], variant: null, - rollout_percentage: null, + rollout_percentage: undefined, }, ], payloads: {}, diff --git a/frontend/src/scenes/experiments/ExperimentResult.tsx b/frontend/src/scenes/experiments/ExperimentResult.tsx index df92c3c9335b7..407f7d1e5f61e 100644 --- a/frontend/src/scenes/experiments/ExperimentResult.tsx +++ b/frontend/src/scenes/experiments/ExperimentResult.tsx @@ -183,9 +183,13 @@ export function ExperimentResult(): JSX.Element {
{!experimentResultsLoading && (
- There are no results for this experiment yet. +
+ There are no results for this experiment yet. +
+ {!!experimentResultCalculationError && ( +
{experimentResultCalculationError}
+ )}
- {!!experimentResultCalculationError && `${experimentResultCalculationError}. `}{' '} Wait a bit longer for your users to be exposed to the experiment. Double check your feature flag implementation if you're still not seeing results.
diff --git a/frontend/src/scenes/feature-flags/FeatureFlagCodeInstructions.stories.tsx b/frontend/src/scenes/feature-flags/FeatureFlagCodeInstructions.stories.tsx index 064b3b7e41dd7..13dea08e0f9f3 100644 --- a/frontend/src/scenes/feature-flags/FeatureFlagCodeInstructions.stories.tsx +++ b/frontend/src/scenes/feature-flags/FeatureFlagCodeInstructions.stories.tsx @@ -14,7 +14,7 @@ const REGULAR_FEATURE_FLAG: FeatureFlagType = { key: 'test', rollout_percentage: 50, filters: { - groups: [{ properties: [], rollout_percentage: null, variant: null }], + groups: [{ properties: [], rollout_percentage: undefined, variant: null }], multivariate: null, payloads: { true: '' }, }, @@ -48,7 +48,7 @@ const MULTIVARIATE_FEATURE_FLAG: FeatureFlagType = { ...REGULAR_FEATURE_FLAG, key: 'multivariate-flag', filters: { - groups: [{ properties: [], rollout_percentage: null, variant: null }], + groups: [{ properties: [], rollout_percentage: undefined, variant: null }], payloads: {}, multivariate: { variants: [ @@ -64,7 +64,7 @@ const MULTIVARIATE_GROUP_WITH_PAYLOADS_FEATURE_FLAG: FeatureFlagType = { key: 'multivariate-group-flag', filters: { aggregation_group_type_index: 1, - groups: [{ properties: [], rollout_percentage: null, variant: null }], + groups: [{ properties: [], rollout_percentage: undefined, variant: null }], payloads: { alpha: 'abcd', beta: 'xyz' }, multivariate: { variants: [ diff --git a/frontend/src/scenes/feature-flags/FeatureFlagReleaseConditions.tsx b/frontend/src/scenes/feature-flags/FeatureFlagReleaseConditions.tsx index b7144c132a488..1b29756ae8af7 100644 --- a/frontend/src/scenes/feature-flags/FeatureFlagReleaseConditions.tsx +++ b/frontend/src/scenes/feature-flags/FeatureFlagReleaseConditions.tsx @@ -2,6 +2,7 @@ import './FeatureFlag.scss' import { LemonInput, LemonSelect, Link } from '@posthog/lemon-ui' import { useActions, useValues } from 'kea' +import { Field, Group } from 'kea-forms' import { router } from 'kea-router' import { allOperatorsToHumanName } from 'lib/components/DefinitionPopover/utils' import { PropertyFilters } from 'lib/components/PropertyFilters/PropertyFilters' @@ -267,7 +268,11 @@ export function FeatureFlagReleaseConditions({
Roll out to{' '} { updateConditionSet(index, value) }} @@ -276,19 +281,23 @@ export function FeatureFlagReleaseConditions({ step={1} className="ml-1.5 w-20" /> - { - updateConditionSet(index, value === undefined ? 0 : value) - }} - value={group.rollout_percentage ?? 100} - min={0} - max={100} - step="any" - suffix={%} - />{' '} + + + { + updateConditionSet(index, value === undefined ? 0 : value) + }} + value={group.rollout_percentage ?? undefined} + min={0} + max={100} + step="any" + suffix={%} + /> + + {' '} of {aggregationTargetName} in this set.{' '}
diff --git a/frontend/src/scenes/feature-flags/featureFlagLogic.test.ts b/frontend/src/scenes/feature-flags/featureFlagLogic.test.ts index e4d21cb4b7c64..f7ebbc84aab49 100644 --- a/frontend/src/scenes/feature-flags/featureFlagLogic.test.ts +++ b/frontend/src/scenes/feature-flags/featureFlagLogic.test.ts @@ -328,7 +328,7 @@ describe('the feature flag logic', () => { await expectLogic(logic, () => { logic.actions.setFeatureFlag( generateFeatureFlag([ - { properties: [], rollout_percentage: null, variant: null }, + { properties: [], rollout_percentage: undefined, variant: null }, { properties: [ { @@ -338,7 +338,7 @@ describe('the feature flag logic', () => { operator: PropertyOperator.Exact, }, ], - rollout_percentage: null, + rollout_percentage: undefined, variant: null, }, { @@ -350,7 +350,7 @@ describe('the feature flag logic', () => { operator: PropertyOperator.Exact, }, ], - rollout_percentage: null, + rollout_percentage: undefined, variant: null, }, ]) diff --git a/frontend/src/scenes/feature-flags/featureFlagLogic.ts b/frontend/src/scenes/feature-flags/featureFlagLogic.ts index 8d8de11f4a888..66b4c926e2833 100644 --- a/frontend/src/scenes/feature-flags/featureFlagLogic.ts +++ b/frontend/src/scenes/feature-flags/featureFlagLogic.ts @@ -1,5 +1,5 @@ import { actions, afterMount, connect, kea, key, listeners, path, props, reducers, selectors } from 'kea' -import { forms } from 'kea-forms' +import { DeepPartialMap, forms, ValidationErrorType } from 'kea-forms' import { loaders } from 'kea-loaders' import { router, urlToAction } from 'kea-router' import api from 'lib/api' @@ -75,7 +75,7 @@ const NEW_FLAG: FeatureFlagType = { key: '', name: '', filters: { - groups: [{ properties: [], rollout_percentage: 0, variant: null }], + groups: [{ properties: [], rollout_percentage: undefined, variant: null }], multivariate: null, payloads: {}, }, @@ -210,7 +210,7 @@ export const featureFlagLogic = kea([ duplicateConditionSet: (index: number) => ({ index }), updateConditionSet: ( index: number, - newRolloutPercentage?: number | null, + newRolloutPercentage?: number, newProperties?: AnyPropertyFilter[], newVariant?: string | null ) => ({ @@ -243,19 +243,24 @@ export const featureFlagLogic = kea([ forms(({ actions, values }) => ({ featureFlag: { defaults: { ...NEW_FLAG } as FeatureFlagType, - errors: ({ key, filters }) => ({ - key: validateFeatureFlagKey(key), - filters: { - multivariate: { - variants: filters?.multivariate?.variants?.map( - ({ key: variantKey }: MultivariateFlagVariant) => ({ - key: validateFeatureFlagKey(variantKey), - }) - ), + errors: ({ key, filters }) => { + return { + key: validateFeatureFlagKey(key), + filters: { + multivariate: { + variants: filters?.multivariate?.variants?.map( + ({ key: variantKey }: MultivariateFlagVariant) => ({ + key: validateFeatureFlagKey(variantKey), + }) + ), + }, + groups: values.propertySelectErrors as DeepPartialMap< + FeatureFlagGroupType, + ValidationErrorType + >[], }, - groups: values.propertySelectErrors, - }, - }), + } + }, submit: (featureFlag) => { actions.saveFeatureFlag(featureFlag) }, @@ -288,7 +293,7 @@ export const featureFlagLogic = kea([ } const groups = [ ...(state?.filters?.groups || []), - { properties: [], rollout_percentage: 0, variant: null }, + { properties: [], rollout_percentage: undefined, variant: null }, ] return { ...state, filters: { ...state.filters, groups } } }, @@ -989,16 +994,22 @@ export const featureFlagLogic = kea([ propertySelectErrors: [ (s) => [s.featureFlag], (featureFlag) => { - return featureFlag?.filters?.groups?.map(({ properties }: FeatureFlagGroupType) => ({ - properties: properties?.map((property: AnyPropertyFilter) => ({ - value: - property.value === null || - property.value === undefined || - (Array.isArray(property.value) && property.value.length === 0) - ? "Property filters can't be empty" + return featureFlag?.filters?.groups?.map( + ({ properties, rollout_percentage }: FeatureFlagGroupType) => ({ + properties: properties?.map((property: AnyPropertyFilter) => ({ + value: + property.value === null || + property.value === undefined || + (Array.isArray(property.value) && property.value.length === 0) + ? "Property filters can't be empty" + : undefined, + })), + rollout_percentage: + rollout_percentage === null || rollout_percentage === undefined + ? 'You need to set a rollout % value' : undefined, - })), - })) + }) + ) }, ], computeBlastRadiusPercentage: [ diff --git a/frontend/src/scenes/onboarding/onboardingLogic.tsx b/frontend/src/scenes/onboarding/onboardingLogic.tsx index 2186d6eff384a..f6d46eee16751 100644 --- a/frontend/src/scenes/onboarding/onboardingLogic.tsx +++ b/frontend/src/scenes/onboarding/onboardingLogic.tsx @@ -6,6 +6,7 @@ import { eventUsageLogic } from 'lib/utils/eventUsageLogic' import { billingLogic } from 'scenes/billing/billingLogic' import { teamLogic } from 'scenes/teamLogic' import { urls } from 'scenes/urls' +import { userLogic } from 'scenes/userLogic' import { BillingProductV2Type, ProductKey } from '~/types' @@ -50,8 +51,17 @@ export const onboardingLogic = kea([ props({} as OnboardingLogicProps), path(['scenes', 'onboarding', 'onboardingLogic']), connect({ - values: [billingLogic, ['billing'], teamLogic, ['currentTeam'], featureFlagLogic, ['featureFlags']], - actions: [billingLogic, ['loadBillingSuccess'], teamLogic, ['updateCurrentTeamSuccess']], + values: [ + billingLogic, + ['billing'], + teamLogic, + ['currentTeam'], + featureFlagLogic, + ['featureFlags'], + userLogic, + ['user'], + ], + actions: [billingLogic, ['loadBillingSuccess'], teamLogic, ['updateCurrentTeam', 'updateCurrentTeamSuccess']], }), actions({ setProduct: (product: BillingProductV2Type | null) => ({ product }), @@ -175,6 +185,28 @@ export const onboardingLogic = kea([ window.location.href = urls.default() } else { actions.resetStepKey() + const includeFirstOnboardingProductOnUserProperties = values.user?.date_joined + ? new Date(values.user?.date_joined) > new Date('2024-01-10T00:00:00Z') + : false + eventUsageLogic.actions.reportOnboardingProductSelected( + product.type, + includeFirstOnboardingProductOnUserProperties + ) + switch (product.type) { + case ProductKey.PRODUCT_ANALYTICS: + return + case ProductKey.SESSION_REPLAY: + actions.updateCurrentTeam({ + session_recording_opt_in: true, + capture_console_log_opt_in: true, + capture_performance_opt_in: true, + }) + return + case ProductKey.FEATURE_FLAGS: + return + default: + return + } } }, setProductKey: ({ productKey }) => { diff --git a/frontend/src/scenes/paths/PathNodeCardButton.tsx b/frontend/src/scenes/paths/PathNodeCardButton.tsx index 1dabbbdf8e453..9acfd9abd7ba2 100644 --- a/frontend/src/scenes/paths/PathNodeCardButton.tsx +++ b/frontend/src/scenes/paths/PathNodeCardButton.tsx @@ -53,10 +53,8 @@ export function PathNodeCardButton({ {pageUrl(node, true)}
- - - {count} - + + {count} ([ path_end_key, path_dropoff_key, } - const personsUrl = buildPeopleUrl({ - date_from: '', - filters, - response: values.insightData, - }) + const modalProps: OpenPersonsModalProps = { + url: buildPeopleUrl({ + date_from: '', + filters, + response: values.insightData, + }), + title: pathsTitle({ + label: path_dropoff_key || path_start_key || path_end_key || 'Pageview', + mode: path_dropoff_key ? 'dropOff' : path_start_key ? 'continue' : 'completion', + }), + orderBy: ['id'], + } if (values.hogQLInsightsPathsFlagEnabled && values.vizQuerySource?.kind === NodeKind.PathsQuery) { - const pathsActorsQuery: InsightActorsQuery = { + modalProps['query'] = { kind: NodeKind.InsightActorsQuery, source: { ...values.vizQuerySource, @@ -141,26 +148,12 @@ export const pathsDataLogic = kea([ }, }, } - openPersonsModal({ - url: personsUrl, - query: pathsActorsQuery, - title: pathsTitle({ - label: path_dropoff_key || path_start_key || path_end_key || 'Pageview', - isDropOff: Boolean(path_dropoff_key), - }), - additionalFields: { - value_at_data_point: 'event_count', - }, - }) - } else if (personsUrl) { - openPersonsModal({ - url: personsUrl, - title: pathsTitle({ - label: path_dropoff_key || path_start_key || path_end_key || 'Pageview', - isDropOff: Boolean(path_dropoff_key), - }), - }) + modalProps['additionalSelect'] = { + value_at_data_point: 'event_count', + matched_recordings: 'matched_recordings', + } } + openPersonsModal(modalProps) }, viewPathToFunnel: ({ pathItemCard }) => { const events: ActionFilter[] = [] diff --git a/frontend/src/scenes/products/Products.tsx b/frontend/src/scenes/products/Products.tsx index 32f3514bcd0f1..d28d43fc80eee 100644 --- a/frontend/src/scenes/products/Products.tsx +++ b/frontend/src/scenes/products/Products.tsx @@ -1,7 +1,7 @@ import * as Icons from '@posthog/icons' import { LemonButton } from '@posthog/lemon-ui' import clsx from 'clsx' -import { useActions, useValues } from 'kea' +import { useValues } from 'kea' import { router } from 'kea-router' import { LemonCard } from 'lib/lemon-ui/LemonCard/LemonCard' import { Spinner } from 'lib/lemon-ui/Spinner' @@ -14,24 +14,18 @@ import { urls } from 'scenes/urls' import { BillingProductV2Type, ProductKey } from '~/types' -import { productsLogic } from './productsLogic' - export const scene: SceneExport = { component: Products, - logic: productsLogic, } function OnboardingCompletedButton({ productUrl, onboardingUrl, - productKey, }: { productUrl: string onboardingUrl: string productKey: ProductKey }): JSX.Element { - const { onSelectProduct } = useActions(productsLogic) - return ( <> @@ -40,7 +34,6 @@ function OnboardingCompletedButton({ { - onSelectProduct(productKey) router.actions.push(onboardingUrl) }} > @@ -52,14 +45,12 @@ function OnboardingCompletedButton({ function OnboardingNotCompletedButton({ url, - productKey, getStartedActionOverride, }: { url: string productKey: ProductKey getStartedActionOverride?: () => void }): JSX.Element { - const { onSelectProduct } = useActions(productsLogic) return ( ([ - path(() => ['scenes', 'products', 'productsLogic']), - connect({ - actions: [teamLogic, ['updateCurrentTeam'], onboardingLogic, ['setProduct']], - values: [userLogic, ['user']], - }), - actions(() => ({ - onSelectProduct: (product: ProductKey) => ({ product }), - })), - listeners(({ actions, values }) => ({ - onSelectProduct: ({ product }) => { - const includeFirstOnboardingProductOnUserProperties = values.user?.date_joined - ? new Date(values.user?.date_joined) > new Date('2024-01-10T00:00:00Z') - : false - eventUsageLogic.actions.reportOnboardingProductSelected( - product, - includeFirstOnboardingProductOnUserProperties - ) - - switch (product) { - case ProductKey.PRODUCT_ANALYTICS: - return - case ProductKey.SESSION_REPLAY: - actions.updateCurrentTeam({ - session_recording_opt_in: true, - capture_console_log_opt_in: true, - capture_performance_opt_in: true, - }) - return - case ProductKey.FEATURE_FLAGS: - return - default: - return - } - }, - })), -]) diff --git a/frontend/src/scenes/sceneLogic.ts b/frontend/src/scenes/sceneLogic.ts index 3fdc78103e7d5..73375a8432cc5 100644 --- a/frontend/src/scenes/sceneLogic.ts +++ b/frontend/src/scenes/sceneLogic.ts @@ -268,17 +268,24 @@ export const sceneLogic = kea([ ) if ( - values.featureFlags[FEATURE_FLAGS.PRODUCT_INTRO_PAGES] === 'test' && productKeyFromUrl && teamLogic.values.currentTeam && !teamLogic.values.currentTeam?.has_completed_onboarding_for?.[productKeyFromUrl] - // TODO: should this only happen when in cloud mode? What is the experience for self-hosted? + // TODO: when removing ff PRODUCT_INTRO_PAGES - should this only happen when in + // cloud mode? What is the experience for self-hosted? ) { - console.warn( - `Onboarding not completed for ${productKeyFromUrl}, redirecting to onboarding intro` - ) - router.actions.replace(urls.onboardingProductIntroduction(productKeyFromUrl)) - return + // TODO: remove after PRODUCT_INTRO_PAGES experiment is complete + posthog.capture('should view onboarding product intro', { + did_view_intro: values.featureFlags[FEATURE_FLAGS.PRODUCT_INTRO_PAGES] === 'test', + product_key: productKeyFromUrl, + }) + if (values.featureFlags[FEATURE_FLAGS.PRODUCT_INTRO_PAGES] === 'test') { + console.warn( + `Onboarding not completed for ${productKeyFromUrl}, redirecting to onboarding intro` + ) + router.actions.replace(urls.onboardingProductIntroduction(productKeyFromUrl)) + return + } } } } diff --git a/frontend/src/scenes/surveys/Surveys.stories.tsx b/frontend/src/scenes/surveys/Surveys.stories.tsx index 560897857a549..c53b81338cd9a 100644 --- a/frontend/src/scenes/surveys/Surveys.stories.tsx +++ b/frontend/src/scenes/surveys/Surveys.stories.tsx @@ -68,7 +68,7 @@ const MOCK_SURVEY_WITH_RELEASE_CONS: Survey = { { variant: null, properties: [], - rollout_percentage: null, + rollout_percentage: undefined, }, ], payloads: {}, diff --git a/frontend/src/scenes/trends/persons-modal/PersonsModal.tsx b/frontend/src/scenes/trends/persons-modal/PersonsModal.tsx index ed7d4e95737b9..05233d66aad46 100644 --- a/frontend/src/scenes/trends/persons-modal/PersonsModal.tsx +++ b/frontend/src/scenes/trends/persons-modal/PersonsModal.tsx @@ -55,7 +55,8 @@ export function PersonsModal({ title, onAfterClose, inline, - additionalFields, + additionalSelect, + orderBy, }: PersonsModalProps): JSX.Element { const [selectedUrlIndex, setSelectedUrlIndex] = useState(urlsIndex || 0) const originalUrl = (urls || [])[selectedUrlIndex]?.value || _url || '' @@ -63,7 +64,8 @@ export function PersonsModal({ const logic = personsModalLogic({ url: originalUrl, query: _query, - additionalFields, + additionalSelect, + orderBy, }) const { diff --git a/frontend/src/scenes/trends/persons-modal/persons-modal-utils.tsx b/frontend/src/scenes/trends/persons-modal/persons-modal-utils.tsx index 3a82c70c9e6b6..b0457f6b19b69 100644 --- a/frontend/src/scenes/trends/persons-modal/persons-modal-utils.tsx +++ b/frontend/src/scenes/trends/persons-modal/persons-modal-utils.tsx @@ -52,10 +52,16 @@ export const funnelTitle = (props: { ) } -export const pathsTitle = (props: { isDropOff: boolean; label: string }): React.ReactNode => { +type pathModes = 'completion' | 'dropOff' | 'continue' +export const pathsTitle = (props: { mode: pathModes; label: string }): React.ReactNode => { + const modeMap: Record = { + completion: 'Completed', + dropOff: 'Dropped off after', + continue: 'Continued after', + } return ( <> - {props.isDropOff ? 'Dropped off after' : 'Completed'} step{' '} + {modeMap[props.mode]} step{' '} ) diff --git a/frontend/src/scenes/trends/persons-modal/personsModalLogic.ts b/frontend/src/scenes/trends/persons-modal/personsModalLogic.ts index e08a7c2163be5..7a51ca349d0ab 100644 --- a/frontend/src/scenes/trends/persons-modal/personsModalLogic.ts +++ b/frontend/src/scenes/trends/persons-modal/personsModalLogic.ts @@ -36,7 +36,8 @@ const RESULTS_PER_PAGE = 100 export interface PersonModalLogicProps { query?: InsightActorsQuery | null url?: string | null - additionalFields?: Partial> + additionalSelect?: Partial> + orderBy?: string[] } export interface ListActorsResponse { @@ -63,19 +64,19 @@ export const personsModalLogic = kea([ query, clear, offset, - additionalFields, + additionalSelect, }: { url?: string | null query?: InsightActorsQuery | null clear?: boolean offset?: number - additionalFields?: PersonModalLogicProps['additionalFields'] + additionalSelect?: PersonModalLogicProps['additionalSelect'] }) => ({ url, query, clear, offset, - additionalFields, + additionalSelect, }), loadNextActors: true, updateActorsQuery: (query: Partial) => ({ query }), @@ -90,7 +91,7 @@ export const personsModalLogic = kea([ actorsResponse: [ null as ListActorsResponse | null, { - loadActors: async ({ url, query, clear, offset, additionalFields }, breakpoint) => { + loadActors: async ({ url, query, clear, offset, additionalSelect }, breakpoint) => { if (url) { url += '&include_recordings=true' @@ -111,7 +112,7 @@ export const personsModalLogic = kea([ const response = await performQuery( { ...values.actorsQuery, - limit: RESULTS_PER_PAGE + 1, + limit: RESULTS_PER_PAGE, offset: offset || 0, } as ActorsQuery, undefined, @@ -122,7 +123,7 @@ export const personsModalLogic = kea([ breakpoint() const assembledSelectFields = values.selectFields - const additionalFieldIndices = Object.values(additionalFields || {}).map((field) => + const additionalFieldIndices = Object.values(additionalSelect || {}).map((field) => assembledSelectFields.indexOf(field) ) const newResponse: ListActorsResponse = { @@ -144,7 +145,7 @@ export const personsModalLogic = kea([ value_at_data_point: null, } - Object.keys(additionalFields || {}).forEach((field, index) => { + Object.keys(additionalSelect || {}).forEach((field, index) => { person[field] = result[additionalFieldIndices[index]] }) @@ -326,15 +327,15 @@ export const personsModalLogic = kea([ }, ], selectFields: [ - () => [(_, p) => p.additionalFields], - (additionalFields: PersonModalLogicProps['additionalFields']): string[] => { - const extra = Object.values(additionalFields || {}) + () => [(_, p) => p.additionalSelect], + (additionalSelect: PersonModalLogicProps['additionalSelect']): string[] => { + const extra = Object.values(additionalSelect || {}) return ['person', 'created_at', ...extra] }, ], actorsQuery: [ - (s) => [(_, p) => p.query, s.searchTerm, s.selectFields], - (query, searchTerm, selectFields): ActorsQuery | null => { + (s) => [(_, p) => p.query, (_, p) => p.orderBy, s.searchTerm, s.selectFields], + (query, orderBy, searchTerm, selectFields): ActorsQuery | null => { if (!query) { return null } @@ -342,7 +343,7 @@ export const personsModalLogic = kea([ kind: NodeKind.ActorsQuery, source: query, select: selectFields, - orderBy: ['created_at DESC'], + orderBy: orderBy || ['created_at DESC'], search: searchTerm, } }, @@ -365,7 +366,7 @@ export const personsModalLogic = kea([ }), afterMount(({ actions, props }) => { - actions.loadActors({ query: props.query, url: props.url, additionalFields: props.additionalFields }) + actions.loadActors({ query: props.query, url: props.url, additionalSelect: props.additionalSelect }) actions.reportPersonsModalViewed({ url: props.url, diff --git a/frontend/src/types.ts b/frontend/src/types.ts index 3770b462743a7..23c519ea139d5 100644 --- a/frontend/src/types.ts +++ b/frontend/src/types.ts @@ -2428,7 +2428,7 @@ export enum SurveyQuestionType { export interface FeatureFlagGroupType { properties?: AnyPropertyFilter[] - rollout_percentage: number | null + rollout_percentage?: number | null variant: string | null users_affected?: number } diff --git a/mypy-baseline.txt b/mypy-baseline.txt index 17fd5d040a1ed..4be818b4f9aa6 100644 --- a/mypy-baseline.txt +++ b/mypy-baseline.txt @@ -403,7 +403,6 @@ posthog/hogql_queries/hogql_query_runner.py:0: error: Argument "placeholders" to posthog/hogql_queries/hogql_query_runner.py:0: error: Incompatible types in assignment (expression has type "Expr", variable has type "SelectQuery | SelectUnionQuery") [assignment] posthog/hogql_queries/hogql_query_runner.py:0: error: Incompatible return value type (got "SelectQuery | SelectUnionQuery", expected "SelectQuery") [return-value] posthog/hogql_queries/events_query_runner.py:0: error: Statement is unreachable [unreachable] -posthog/hogql_queries/events_query_runner.py:0: error: Argument "order_by" to "SelectQuery" has incompatible type "list[Expr]"; expected "list[OrderExpr] | None" [arg-type] posthog/hogql/metadata.py:0: error: Argument "metadata_source" to "translate_hogql" has incompatible type "SelectQuery | SelectUnionQuery"; expected "SelectQuery | None" [arg-type] posthog/hogql/metadata.py:0: error: Incompatible types in assignment (expression has type "Expr", variable has type "SelectQuery | SelectUnionQuery") [assignment] posthog/queries/breakdown_props.py:0: error: Argument 1 to "translate_hogql" has incompatible type "str | int"; expected "str" [arg-type] @@ -463,14 +462,6 @@ posthog/hogql_queries/insights/lifecycle_query_runner.py:0: error: Item "None" o posthog/hogql_queries/legacy_compatibility/process_insight.py:0: error: Incompatible types in assignment (expression has type "PathFilter", variable has type "RetentionFilter") [assignment] posthog/hogql_queries/legacy_compatibility/process_insight.py:0: error: Incompatible types in assignment (expression has type "StickinessFilter", variable has type "RetentionFilter") [assignment] posthog/hogql_queries/legacy_compatibility/process_insight.py:0: error: Incompatible types in assignment (expression has type "Filter", variable has type "RetentionFilter") [assignment] -posthog/hogql_queries/actors_query_runner.py:0: error: Incompatible types in assignment (expression has type "Field", variable has type "Constant") [assignment] -posthog/hogql_queries/actors_query_runner.py:0: error: Incompatible types in assignment (expression has type "Expr", variable has type "Constant") [assignment] -posthog/hogql_queries/actors_query_runner.py:0: error: Statement is unreachable [unreachable] -posthog/hogql_queries/actors_query_runner.py:0: error: List comprehension has incompatible type List[Expr]; expected List[OrderExpr] [misc] -posthog/hogql_queries/actors_query_runner.py:0: error: Argument "select" to "SelectQuery" has incompatible type "list[Constant]"; expected "list[Expr]" [arg-type] -posthog/hogql_queries/actors_query_runner.py:0: note: "List" is invariant -- see https://mypy.readthedocs.io/en/stable/common_issues.html#variance -posthog/hogql_queries/actors_query_runner.py:0: note: Consider using "Sequence" instead, which is covariant -posthog/hogql_queries/actors_query_runner.py:0: error: Argument "group_by" to "SelectQuery" has incompatible type "list[Constant] | None"; expected "list[Expr] | None" [arg-type] posthog/api/insight.py:0: error: Argument 1 to "is_insight_with_hogql_support" has incompatible type "Insight | DashboardTile"; expected "Insight" [arg-type] posthog/api/insight.py:0: error: Argument 1 to "process_insight" has incompatible type "Insight | DashboardTile"; expected "Insight" [arg-type] posthog/api/dashboards/dashboard.py:0: error: Metaclass conflict: the metaclass of a derived class must be a (non-strict) subclass of the metaclasses of all its bases [misc] diff --git a/plugin-server/src/config/config.ts b/plugin-server/src/config/config.ts index fa1b290c0793a..e7d5ec8b18872 100644 --- a/plugin-server/src/config/config.ts +++ b/plugin-server/src/config/config.ts @@ -133,6 +133,7 @@ export function getDefaultConfig(): PluginsServerConfig { POE_WRITES_EXCLUDE_TEAMS: '', RELOAD_PLUGIN_JITTER_MAX_MS: 60000, RUSTY_HOOK_FOR_TEAMS: '', + RUSTY_HOOK_ROLLOUT_PERCENTAGE: 0, RUSTY_HOOK_URL: '', STARTUP_PROFILE_DURATION_SECONDS: 300, // 5 minutes diff --git a/plugin-server/src/main/pluginsServer.ts b/plugin-server/src/main/pluginsServer.ts index 42cdee24b3bab..ef3b369902551 100644 --- a/plugin-server/src/main/pluginsServer.ts +++ b/plugin-server/src/main/pluginsServer.ts @@ -361,6 +361,7 @@ export async function startPluginsServer( hub?.rustyHook ?? new RustyHook( buildIntegerMatcher(serverConfig.RUSTY_HOOK_FOR_TEAMS, true), + serverConfig.RUSTY_HOOK_ROLLOUT_PERCENTAGE, serverConfig.RUSTY_HOOK_URL, serverConfig.EXTERNAL_REQUEST_TIMEOUT_MS ) diff --git a/plugin-server/src/types.ts b/plugin-server/src/types.ts index 0031ec514f39a..d6e375fc814eb 100644 --- a/plugin-server/src/types.ts +++ b/plugin-server/src/types.ts @@ -204,6 +204,7 @@ export interface PluginsServerConfig { POE_WRITES_EXCLUDE_TEAMS: string RELOAD_PLUGIN_JITTER_MAX_MS: number RUSTY_HOOK_FOR_TEAMS: string + RUSTY_HOOK_ROLLOUT_PERCENTAGE: number RUSTY_HOOK_URL: string SKIP_UPDATE_EVENT_AND_PROPERTIES_STEP: boolean diff --git a/plugin-server/src/utils/db/hub.ts b/plugin-server/src/utils/db/hub.ts index 0e14d29bf5643..1389b2a954463 100644 --- a/plugin-server/src/utils/db/hub.ts +++ b/plugin-server/src/utils/db/hub.ts @@ -144,6 +144,7 @@ export async function createHub( const rootAccessManager = new RootAccessManager(db) const rustyHook = new RustyHook( buildIntegerMatcher(serverConfig.RUSTY_HOOK_FOR_TEAMS, true), + serverConfig.RUSTY_HOOK_ROLLOUT_PERCENTAGE, serverConfig.RUSTY_HOOK_URL, serverConfig.EXTERNAL_REQUEST_TIMEOUT_MS ) diff --git a/plugin-server/src/worker/rusty-hook.ts b/plugin-server/src/worker/rusty-hook.ts index d71fae955db73..208369932895f 100644 --- a/plugin-server/src/worker/rusty-hook.ts +++ b/plugin-server/src/worker/rusty-hook.ts @@ -24,6 +24,7 @@ interface RustyWebhookPayload { export class RustyHook { constructor( private enabledForTeams: ValueMatcher, + private rolloutPercentage: number, private serviceUrl: string, private requestTimeoutMs: number ) {} @@ -39,7 +40,10 @@ export class RustyHook { pluginId: number pluginConfigId: number }): Promise { - if (!this.enabledForTeams(teamId)) { + // A simple and blunt rollout that just uses the last digits of the Team ID as a stable + // selection against the `rolloutPercentage`. + const enabledByRolloutPercentage = (teamId % 1000) / 1000 < this.rolloutPercentage + if (!enabledByRolloutPercentage && !this.enabledForTeams(teamId)) { return false } diff --git a/posthog/api/test/dashboards/__snapshots__/test_dashboard.ambr b/posthog/api/test/dashboards/__snapshots__/test_dashboard.ambr index bf434e1e14bc0..6e2ee934a235e 100644 --- a/posthog/api/test/dashboards/__snapshots__/test_dashboard.ambr +++ b/posthog/api/test/dashboards/__snapshots__/test_dashboard.ambr @@ -1232,6 +1232,33 @@ 5 /* ... */)) /*controller='project_dashboards-detail',route='api/projects/%28%3FP%3Cparent_lookup_team_id%3E%5B%5E/.%5D%2B%29/dashboards/%28%3FP%3Cpk%3E%5B%5E/.%5D%2B%29/%3F%24'*/ ''' # --- +# name: TestDashboard.test_adding_insights_is_not_nplus1_for_gets.32 + ''' + SELECT "posthog_dashboard"."id", + "posthog_dashboard"."name", + "posthog_dashboard"."description", + "posthog_dashboard"."team_id", + "posthog_dashboard"."pinned", + "posthog_dashboard"."created_at", + "posthog_dashboard"."created_by_id", + "posthog_dashboard"."deleted", + "posthog_dashboard"."last_accessed_at", + "posthog_dashboard"."filters", + "posthog_dashboard"."creation_mode", + "posthog_dashboard"."restriction_level", + "posthog_dashboard"."deprecated_tags", + "posthog_dashboard"."tags", + "posthog_dashboard"."share_token", + "posthog_dashboard"."is_shared" + FROM "posthog_dashboard" + WHERE (NOT ("posthog_dashboard"."deleted") + AND "posthog_dashboard"."id" IN (1, + 2, + 3, + 4, + 5 /* ... */)) /*controller='project_dashboards-detail',route='api/projects/%28%3FP%3Cparent_lookup_team_id%3E%5B%5E/.%5D%2B%29/dashboards/%28%3FP%3Cpk%3E%5B%5E/.%5D%2B%29/%3F%24'*/ + ''' +# --- # name: TestDashboard.test_adding_insights_is_not_nplus1_for_gets.4 ''' SELECT "posthog_dashboard"."id", diff --git a/posthog/email.py b/posthog/email.py index 91badeac58e3d..ccdc2f9ae6d02 100644 --- a/posthog/email.py +++ b/posthog/email.py @@ -43,7 +43,7 @@ def is_email_available(with_absolute_urls: bool = False) -> bool: EMAIL_TASK_KWARGS = dict( - queue=CeleryQueue.EMAIL, + queue=CeleryQueue.EMAIL.value, ignore_result=True, autoretry_for=(Exception,), max_retries=3, diff --git a/posthog/hogql/parser.py b/posthog/hogql/parser.py index 5067eb1cff785..399f6953698cd 100644 --- a/posthog/hogql/parser.py +++ b/posthog/hogql/parser.py @@ -73,7 +73,7 @@ def parse_order_expr( timings: Optional[HogQLTimings] = None, *, backend: Optional[Literal["python", "cpp"]] = None, -) -> ast.Expr: +) -> ast.OrderExpr: if not backend: backend = "cpp" if timings is None: diff --git a/posthog/hogql_queries/actor_strategies.py b/posthog/hogql_queries/actor_strategies.py index 747c7e15da362..246ed8ecec0e1 100644 --- a/posthog/hogql_queries/actor_strategies.py +++ b/posthog/hogql_queries/actor_strategies.py @@ -5,6 +5,7 @@ from posthog.hogql import ast from posthog.hogql.property import property_to_expr from posthog.hogql_queries.insights.paginators import HogQLHasMorePaginator +from posthog.hogql_queries.utils.recordings import RecordingsHelper from posthog.models import Team, Person, Group from posthog.schema import ActorsQuery @@ -22,6 +23,9 @@ def __init__(self, team: Team, query: ActorsQuery, paginator: HogQLHasMorePagina def get_actors(self, actor_ids) -> Dict[str, Dict]: raise NotImplementedError() + def get_recordings(self, matching_events) -> dict[str, list[dict]]: + return {} + def input_columns(self) -> List[str]: raise NotImplementedError() @@ -50,6 +54,9 @@ def get_actors(self, actor_ids) -> Dict[str, Dict]: .iterator(chunk_size=self.paginator.limit) } + def get_recordings(self, matching_events) -> dict[str, list[dict]]: + return RecordingsHelper(self.team).get_recordings(matching_events) + def input_columns(self) -> List[str]: return ["person", "id", "created_at", "person.$delete"] diff --git a/posthog/hogql_queries/actors_query_runner.py b/posthog/hogql_queries/actors_query_runner.py index edfe06248595a..253841a1ac225 100644 --- a/posthog/hogql_queries/actors_query_runner.py +++ b/posthog/hogql_queries/actors_query_runner.py @@ -1,3 +1,4 @@ +import itertools from datetime import timedelta from typing import List, Generator, Sequence, Iterator, Optional from posthog.hogql import ast @@ -38,14 +39,42 @@ def determine_strategy(self) -> ActorStrategy: return GroupStrategy(self.group_type_index, team=self.team, query=self.query, paginator=self.paginator) return PersonStrategy(team=self.team, query=self.query, paginator=self.paginator) - def enrich_with_actors(self, results, actor_column_index, actors_lookup) -> Generator[List, None, None]: + def get_recordings(self, event_results, recordings_lookup) -> Generator[dict, None, None]: + return ( + {"session_id": session_id, "events": recordings_lookup[session_id]} + for session_id in (event[2] for event in event_results) + if session_id in recordings_lookup + ) + + def enrich_with_actors( + self, + results, + actor_column_index, + actors_lookup, + recordings_column_index: Optional[int], + recordings_lookup: Optional[dict[str, list[dict]]], + ) -> Generator[List, None, None]: for result in results: new_row = list(result) actor_id = str(result[actor_column_index]) actor = actors_lookup.get(actor_id) new_row[actor_column_index] = actor if actor else {"id": actor_id} + if recordings_column_index is not None and recordings_lookup is not None: + new_row[recordings_column_index] = ( + self.get_recordings(result[recordings_column_index], recordings_lookup) or None + ) yield new_row + def prepare_recordings(self, column_name, input_columns): + if column_name != "person" or "matched_recordings" not in input_columns: + return None, None + + column_index_events = input_columns.index("matched_recordings") + matching_events_list = itertools.chain.from_iterable( + (row[column_index_events] for row in self.paginator.results) + ) + return column_index_events, self.strategy.get_recordings(matching_events_list) + def calculate(self) -> ActorsQueryResponse: response = self.paginator.execute_hogql_query( query_type="ActorsQuery", @@ -60,10 +89,15 @@ def calculate(self) -> ActorsQueryResponse: enrich_columns = filter(lambda column: column in ("person", "group"), input_columns) for column_name in enrich_columns: - actor_ids = (row[input_columns.index(column_name)] for row in self.paginator.results) + actor_column_index = input_columns.index(column_name) + actor_ids = (row[actor_column_index] for row in self.paginator.results) actors_lookup = self.strategy.get_actors(actor_ids) + recordings_column_index, recordings_lookup = self.prepare_recordings(column_name, input_columns) + missing_actors_count = len(self.paginator.results) - len(actors_lookup) - results = self.enrich_with_actors(results, input_columns.index(column_name), actors_lookup) + results = self.enrich_with_actors( + results, actor_column_index, actors_lookup, recordings_column_index, recordings_lookup + ) return ActorsQueryResponse( results=results, @@ -125,12 +159,15 @@ def to_query(self) -> ast.SelectQuery: group_by = [] aggregations = [] for expr in self.input_columns(): + column: ast.Expr = parse_expr(expr) + if expr == "person.$delete": column = ast.Constant(value=1) elif expr == self.strategy.field: column = ast.Field(chain=[self.strategy.origin_id]) - else: - column = parse_expr(expr) + elif expr == "matched_recordings": + column = ast.Field(chain=["matching_events"]) # TODO: Hmm? + columns.append(column) if has_aggregation(column): aggregations.append(column) @@ -156,13 +193,14 @@ def to_query(self) -> ast.SelectQuery: else: having = ast.And(exprs=having_list) + order_by: list[ast.OrderExpr] with self.timings.measure("order"): if self.query.orderBy is not None: strategy_order_by = self.strategy.order_by() if strategy_order_by is not None: order_by = strategy_order_by else: - order_by = [parse_order_expr(column, timings=self.timings) for column in self.query.orderBy] + order_by = [parse_order_expr(col, timings=self.timings) for col in self.query.orderBy] elif "count()" in self.input_columns(): order_by = [ast.OrderExpr(expr=parse_expr("count()"), order="DESC")] elif len(aggregations) > 0: diff --git a/posthog/hogql_queries/insights/paths_query_runner.py b/posthog/hogql_queries/insights/paths_query_runner.py index 3932d315908e9..c10a5a2320207 100644 --- a/posthog/hogql_queries/insights/paths_query_runner.py +++ b/posthog/hogql_queries/insights/paths_query_runner.py @@ -1,3 +1,5 @@ +import itertools +from collections import defaultdict from datetime import datetime, timedelta from math import ceil from re import escape @@ -58,6 +60,9 @@ def __init__( escape(grouping).replace("\\*", ".*") for grouping in self.query.pathsFilter.pathGroupings ] + self.extra_event_fields: list[str] = [] + self.extra_event_properties: list[str] = [] + @property def group_type_index(self) -> int | None: return self.query.aggregation_group_type_index @@ -96,7 +101,7 @@ def _get_event_query(self) -> list[ast.Expr]: return [] def _should_query_event(self, event: str) -> bool: - if not self.query.pathsFilter.includeEventTypes: # TODO: include_custom_events ? + if not self.query.pathsFilter.includeEventTypes: return event not in (self.query.pathsFilter.excludeEvents or []) return event in (self.query.pathsFilter.includeEventTypes or []) @@ -132,6 +137,17 @@ def paths_events_query(self) -> ast.SelectQuery: ast.Field(chain=["events", "timestamp"]), ast.Field(chain=["events", "person_id"]), event_conditional, + *[ast.Field(chain=["events", field]) for field in self.extra_event_fields], + *[ + ast.Alias( + alias=field, + expr=ast.Call( + name="ifNull", + args=[ast.Field(chain=["events", "properties", f"${field}"]), ast.Constant(value="")], + ), + ) + for field in self.extra_event_properties + ], ] final_path_item_column = "path_item_ungrouped" @@ -238,55 +254,159 @@ def date_filter_expr(self) -> ast.Expr: ] ) + def get_array_compacting_function(self) -> Literal["arrayResize", "arraySlice"]: + if self.query.pathsFilter.endPoint: + return "arrayResize" + + return "arraySlice" + def get_filtered_path_ordering(self) -> list[ast.Expr]: fields = { "compact_path": "path", "timings": "timings", + **{f: f for f in self.extra_event_fields_and_properties}, } - return [ - ast.Alias( - alias=f"filtered_{field}", - expr=ast.Call( - name="if", - args=[ - ast.CompareOperation( - op=ast.CompareOperationOp.Gt, - left=ast.Field(chain=["target_index"]), - right=ast.Constant(value=0), - ), - ast.Call( - name=self.get_array_compacting_function(), - args=[ast.Field(chain=[orig]), ast.Field(chain=["target_index"])], - ), - ast.Field(chain=[orig]), - ], + expressions = ( + [ + ast.Alias( + alias=f"filtered_{field}", + expr=ast.Call( + name="if", + args=[ + ast.CompareOperation( + op=ast.CompareOperationOp.Gt, + left=ast.Field(chain=["target_index"]), + right=ast.Constant(value=0), + ), + ast.Call( + name=self.get_array_compacting_function(), + args=[ast.Field(chain=[orig]), ast.Field(chain=["target_index"])], + ), + ast.Field(chain=[orig]), + ], + ), ), - ) + ast.Alias( + alias=f"limited_{field}", + expr=ast.Call( + name="arraySlice", + args=[ + ast.Field(chain=[f"filtered_{field}"]), + *( + [ast.Constant(value=-1 * self.event_in_session_limit)] + if self.query.pathsFilter.endPoint + else [ + ast.Constant(value=1), + ast.Constant(value=self.event_in_session_limit), + ] + ), + ], + ), + ), + ] for orig, field in fields.items() - ] + ) + return list(itertools.chain.from_iterable(expressions)) - def get_limited_path_ordering(self) -> list[ast.Expr]: - fields_to_include = ["path", "timings"] - return [ - ast.Alias( - alias=f"limited_{field}", - expr=ast.Call( - name="arraySlice", - args=[ - ast.Field(chain=[f"filtered_{field}"]), - ast.Constant(value=1), - ast.Constant(value=self.event_in_session_limit), - ], + def get_start_end_filtered_limited(self) -> list[ast.Expr]: + fields = { + "compact_path": "path", + "timings": "timings", + **{f: f for f in self.extra_event_fields_and_properties}, + } + expressions = ( + [ + ast.Alias( + alias=f"start_filtered_{field}", + expr=ast.Call( + name="if", + args=[ + ast.CompareOperation( + op=ast.CompareOperationOp.Gt, + left=ast.Field(chain=["start_target_index"]), + right=ast.Constant(value=0), + ), + ast.Call( + name="arraySlice", + args=[ast.Field(chain=[orig]), ast.Field(chain=["start_target_index"])], + ), + ast.Field(chain=[orig]), + ], + ), + ), + ast.Alias( + alias=f"filtered_{field}", + expr=ast.Call( + name="if", + args=[ + ast.CompareOperation( + op=ast.CompareOperationOp.Gt, + left=ast.Field(chain=["end_target_index"]), + right=ast.Constant(value=0), + ), + ast.Call( + name="arrayResize", + args=[ + ast.Field(chain=[f"start_filtered_{field}"]), + ast.Field(chain=["end_target_index"]), + ], + ), + ast.Field(chain=[f"start_filtered_{field}"]), + ], + ), + ), + ast.Alias( + alias=f"limited_{field}", + expr=parse_expr( + expr=( + "if(length({field}) > {event_in_session_limit}, arrayConcat(arraySlice({field}, 1, intDiv({event_in_session_limit}, 2)), ['...'], arraySlice({field}, (-1)*intDiv({event_in_session_limit}, 2), intDiv({event_in_session_limit}, 2))), {field})" + if field == "path" + else "if(length({field}) > {event_in_session_limit}, arrayConcat(arraySlice({field}, 1, intDiv({event_in_session_limit}, 2)), [{field}[1+intDiv({event_in_session_limit}, 2)]], arraySlice({field}, (-1)*intDiv({event_in_session_limit}, 2), intDiv({event_in_session_limit}, 2))), {field})" + ), + placeholders={ + "field": ast.Field(chain=[f"filtered_{field}"]), + "event_in_session_limit": ast.Constant(value=self.event_in_session_limit), + }, + ), + ), + ] + for orig, field in fields.items() + ) + return list(itertools.chain.from_iterable(expressions)) + + def get_target_clause(self) -> list[ast.Expr]: + if self.query.pathsFilter.startPoint and self.query.pathsFilter.endPoint: + clauses: list[ast.Expr] = [ + ast.Alias( + alias=f"start_target_index", + expr=ast.Call( + name="indexOf", + args=[ + ast.Field(chain=["compact_path"]), + ast.Constant(value=self.query.pathsFilter.startPoint), + ], + ), + ), + ] + filtered_limited = self.get_start_end_filtered_limited() + # We need a special order of fields due to dependencies + clauses.append(filtered_limited[0]) + clauses.append( + ast.Alias( + alias=f"end_target_index", + expr=ast.Call( + name="indexOf", + args=[ + ast.Field(chain=["start_filtered_path"]), + ast.Constant(value=self.query.pathsFilter.endPoint), + ], + ), ), ) - for field in fields_to_include - ] - - def get_array_compacting_function(self) -> Literal["arrayResize", "arraySlice"]: - if self.query.pathsFilter.endPoint: - return "arrayResize" - - return "arraySlice" + clauses.extend(filtered_limited[1:]) + return clauses + else: + return self.get_filtered_path_ordering() def paths_per_person_query(self) -> ast.SelectQuery: target_point = self.query.pathsFilter.endPoint or self.query.pathsFilter.startPoint @@ -294,22 +414,31 @@ def paths_per_person_query(self) -> ast.SelectQuery: target_point[:-1] if target_point and len(target_point) > 1 and target_point.endswith("/") else target_point ) - filtered_paths = self.get_filtered_path_ordering() - limited_paths = self.get_limited_path_ordering() + path_tuples_expr = ast.Call( + name="arrayZip", + args=[ + ast.Field(chain=["path_list"]), + ast.Field(chain=["timing_list"]), + ast.Call( + name="arrayDifference", + args=[ast.Field(chain=["timing_list"])], + ), + *[ast.Field(chain=[f"{f}_list"]) for f in self.extra_event_fields_and_properties], + ], + ) placeholders = { "path_event_query": self.paths_events_query(), "boundary_event_filter": ast.Constant(value=None), "target_point": ast.Constant(value=target_point), - "target_clause": ast.Constant(value=None), "session_threshold_clause": ast.Constant(value=None), "session_time_threshold": ast.Constant(value=SESSION_TIME_THRESHOLD_DEFAULT_SECONDS), + "path_tuples_expr": path_tuples_expr, # TODO: "extra_final_select_statements": ast.Constant(value=None), "extra_joined_path_tuple_select_statements": ast.Constant(value=None), "extra_array_filter_select_statements": ast.Constant(value=None), "extra_limited_path_tuple_elements": ast.Constant(value=None), "extra_path_time_tuple_select_statements": ast.Constant(value=None), - "extra_paths_tuple_elements": ast.Constant(value=None), "extra_group_array_select_statements": ast.Constant(value=None), } select = cast( @@ -335,6 +464,7 @@ def paths_per_person_query(self) -> ast.SelectQuery: arrayPopFront(arrayPushBack(path_basic, '')) as path_basic_0, arrayMap((x,y) -> if(x=y, 0, 1), path_basic, path_basic_0) as mapping, arrayFilter((x,y) -> y, time, mapping) as timings, + /* more arrayFilter(x) added below if required */ arrayFilter((x,y)->y, path_basic, mapping) as compact_path, indexOf(compact_path, {target_point}) as target_index FROM ( @@ -342,14 +472,16 @@ def paths_per_person_query(self) -> ast.SelectQuery: person_id, path_time_tuple.1 as path_basic, path_time_tuple.2 as time, + /* path_time_tuple.x added below if required */ session_index, - arrayZip(paths, timing, arrayDifference(timing)) as paths_tuple, + {path_tuples_expr} as paths_tuple, arraySplit(x -> if(x.3 < ({session_time_threshold}), 0, 1), paths_tuple) as session_paths FROM ( SELECT person_id, - groupArray(timestamp) as timing, - groupArray(path_item) as paths + groupArray(timestamp) as timing_list, + groupArray(path_item) as path_list + /* groupArray(x) added below if required */ FROM {path_event_query} GROUP BY person_id ) @@ -368,15 +500,98 @@ def paths_per_person_query(self) -> ast.SelectQuery: ) assert select.select_from is not None table = cast(ast.SelectQuery, select.select_from.table) - table.select.extend(filtered_paths + limited_paths) + + select.select.extend( + [ + ast.Alias( + alias=field, + expr=ast.Field(chain=[f"final_{field}"]), + ) + for field in self.extra_event_fields_and_properties + ] + ) + + # Extra joined path tuple select statements + table.select.extend( + [ + ast.Alias( + alias=f"final_{field}", + expr=ast.TupleAccess(tuple=ast.Field(chain=["joined_path_tuple"]), index=i + 4), + ) + for i, field in enumerate(self.extra_event_fields_and_properties) + ] + ) + + # Extra arrayFilter(x) + table.select.extend( + [ + ast.Alias( + alias=field, + expr=ast.Call( + name="arrayFilter", + args=[ + ast.Lambda(args=["x", "y"], expr=ast.Field(chain=["y"])), + ast.Field(chain=[f"{field}_items"]), + ast.Field(chain=["mapping"]), + ], + ), + ) + for field in self.extra_event_fields_and_properties + ] + ) + + table.select.extend(self.get_target_clause()) + + # Extra path_time_tuple.x + table.select_from.table.select.extend( # type: ignore[union-attr] + [ + ast.Alias( + alias=f"{field}_items", + expr=ast.TupleAccess(tuple=ast.Field(chain=["path_time_tuple"]), index=i + 4), + ) + for i, field in enumerate(self.extra_event_fields_and_properties) + ] + ) + # Extra groupArray(x) + table.select_from.table.select_from.table.select.extend( # type: ignore[union-attr] + [ + ast.Alias(alias=f"{field}_list", expr=ast.Call(name="groupArray", args=[ast.Field(chain=[field])])) + for field in self.extra_event_fields_and_properties + ] + ) other_selects = [ "arrayDifference(limited_timings) as timings_diff", - "arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) as limited_path_timings", "concat(toString(length(limited_path)), '_', limited_path[-1]) as path_dropoff_key /* last path item */", ] table.select.extend([parse_expr(s, placeholders) for s in other_selects]) + table.select.append( + ast.Alias( + alias="limited_path_timings", + expr=ast.Call( + name="arrayZip", + args=[ + ast.Field(chain=["limited_path"]), + ast.Field(chain=["timings_diff"]), + ast.Call( + name="arrayPopBack", + args=[ + ast.Call( + name="arrayPushFront", + args=[ + ast.Field(chain=["limited_path"]), + ast.Constant(value=""), + ], + ) + ], + ), + *[ast.Field(chain=[f"limited_{field}"]) for field in self.extra_event_fields_and_properties], + ], + ), + ) + ) + if self.query.pathsFilter.endPoint and self.query.pathsFilter.startPoint: table.where = parse_expr("start_target_index > 0 AND end_target_index > 0") elif self.query.pathsFilter.endPoint or self.query.pathsFilter.startPoint: @@ -470,6 +685,36 @@ def _refresh_frequency(self): return refresh_frequency + def validate_results(self, results): + # Query guarantees results list to be: + # 1. Directed, Acyclic Tree where each node has only 1 child + # 2. All start nodes beginning with 1_ + + seen = set() # source nodes that have been traversed + edges = defaultdict(list) + validated_results = [] + starting_nodes_stack = [] + + for result in results: + edges[result[0]].append(result[1]) + if result[0].startswith("1_"): + # All nodes with 1_ are valid starting nodes + starting_nodes_stack.append(result[0]) + + while starting_nodes_stack: + current_node = starting_nodes_stack.pop() + seen.add(current_node) + + for node in edges[current_node]: + if node not in seen: + starting_nodes_stack.append(node) + + for result in results: + if result[0] in seen: + validated_results.append(result) + + return validated_results + def calculate(self) -> PathsQueryResponse: query = self.to_query() hogql = to_printed_hogql(query, self.team) @@ -482,7 +727,7 @@ def calculate(self) -> PathsQueryResponse: modifiers=self.modifiers, ) - # TODO: Validate results? + response.results = self.validate_results(response.results) assert response.results is not None results = ( @@ -497,7 +742,16 @@ def calculate(self) -> PathsQueryResponse: return PathsQueryResponse(results=results, timings=response.timings, hogql=hogql) + @property + def extra_event_fields_and_properties(self) -> list[str]: + return self.extra_event_fields + self.extra_event_properties + def to_actors_query(self) -> ast.SelectQuery | ast.SelectUnionQuery: + # To include matching_events, we need to add extra fields and properties + # TODO: Make sure going via self is the best way to do this + self.extra_event_fields = ["uuid", "timestamp"] + self.extra_event_properties = ["session_id", "window_id"] + path_per_person_query = self.paths_per_person_query() conditions = [] @@ -527,17 +781,16 @@ def to_actors_query(self) -> ast.SelectQuery | ast.SelectUnionQuery: conditions.append(parse_expr("1=1")) # TODO: Funnel? - # TODO: Include recordings? actors_query = parse_select( """ SELECT person_id as actor_id, + groupUniqArray(100)((timestamp, uuid, session_id, window_id)) as matching_events, COUNT(*) as event_count FROM {paths_per_person_query} WHERE {conditions} GROUP BY person_id - ORDER BY actor_id """, placeholders={ "paths_per_person_query": path_per_person_query, diff --git a/posthog/hogql_queries/insights/stickiness_query_runner.py b/posthog/hogql_queries/insights/stickiness_query_runner.py index 0a95c42f7e85f..3b311789f9267 100644 --- a/posthog/hogql_queries/insights/stickiness_query_runner.py +++ b/posthog/hogql_queries/insights/stickiness_query_runner.py @@ -256,13 +256,20 @@ def where_clause(self, series_with_extra: SeriesWithExtras) -> ast.Expr: ) # Series - if self.series_event(series) is not None: + if isinstance(series, EventsNode) and series.event is not None: filters.append( parse_expr( "event = {event}", - placeholders={"event": ast.Constant(value=self.series_event(series))}, + placeholders={"event": ast.Constant(value=series.event)}, ) ) + elif isinstance(series, ActionsNode): + try: + action = Action.objects.get(pk=int(series.id), team=self.team) + filters.append(action_to_expr(action)) + except Action.DoesNotExist: + # If an action doesn't exist, we want to return no events + filters.append(parse_expr("1 = 2")) # Filter Test Accounts if ( @@ -281,15 +288,6 @@ def where_clause(self, series_with_extra: SeriesWithExtras) -> ast.Expr: if series.properties is not None and series.properties != []: filters.append(property_to_expr(series.properties, self.team)) - # Actions - if isinstance(series, ActionsNode): - try: - action = Action.objects.get(pk=int(series.id), team=self.team) - filters.append(action_to_expr(action)) - except Action.DoesNotExist: - # If an action doesn't exist, we want to return no events - filters.append(parse_expr("1 = 2")) - if len(filters) == 0: return ast.Constant(value=True) elif len(filters) == 1: diff --git a/posthog/hogql_queries/insights/test/__snapshots__/test_paths_query_runner_2.ambr b/posthog/hogql_queries/insights/test/__snapshots__/test_paths_query_runner_2.ambr new file mode 100644 index 0000000000000..b93149873ae22 --- /dev/null +++ b/posthog/hogql_queries/insights/test/__snapshots__/test_paths_query_runner_2.ambr @@ -0,0 +1,5060 @@ +# serializer version: 1 +# name: TestClickhousePaths.test_end + ''' + SELECT last_path_key AS source_event, + path_key AS target_event, + count(*) AS event_count, + avg(conversion_time) AS average_conversion_time + FROM + (SELECT person_id AS person_id, + path AS path, + conversion_time AS conversion_time, + event_in_session_index AS event_in_session_index, + concat(ifNull(toString(event_in_session_index), ''), '_', ifNull(toString(path), '')) AS path_key, + if(ifNull(greater(event_in_session_index, 1), 0), concat(ifNull(toString(minus(event_in_session_index, 1)), ''), '_', ifNull(toString(prev_path), '')), NULL) AS last_path_key, + path_dropoff_key AS path_dropoff_key + FROM + (SELECT person_id AS person_id, + joined_path_tuple.1 AS path, + joined_path_tuple.2 AS conversion_time, + joined_path_tuple.3 AS prev_path, + event_in_session_index, + session_index AS session_index, + arrayPopFront(arrayPushBack(path_basic, '')) AS path_basic_0, + arrayMap((x, y) -> if(ifNull(equals(x, y), isNull(x) + and isNull(y)), 0, 1), path_basic, path_basic_0) AS mapping, + arrayFilter((x, y) -> y, time, mapping) AS timings, + arrayFilter((x, y) -> y, path_basic, mapping) AS compact_path, + indexOf(compact_path, '/about') AS target_index, + if(ifNull(greater(target_index, 0), 0), arrayResize(compact_path, target_index), compact_path) AS filtered_path, + arraySlice(filtered_path, -5) AS limited_path, + if(ifNull(greater(target_index, 0), 0), arrayResize(timings, target_index), timings) AS filtered_timings, + arraySlice(filtered_timings, -5) AS limited_timings, + arrayDifference(limited_timings) AS timings_diff, + concat(ifNull(toString(length(limited_path)), ''), '_', ifNull(toString(limited_path[-1]), '')) AS path_dropoff_key, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) AS limited_path_timings + FROM + (SELECT person_id AS person_id, + path_time_tuple.1 AS path_basic, + path_time_tuple.2 AS time, + session_index, + arrayZip(path_list, timing_list, arrayDifference(timing_list)) AS paths_tuple, + arraySplit(x -> if(ifNull(less(x.3, 1800), 0), 0, 1), paths_tuple) AS session_paths + FROM + (SELECT person_id AS person_id, + groupArray(timestamp) AS timing_list, + groupArray(path_item) AS path_list + FROM + (SELECT toTimeZone(events.timestamp, 'UTC') AS timestamp, + events__pdi.person_id AS person_id, + ifNull(if(equals(events.event, '$screen'), replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$screen_name'), ''), 'null'), '^"|"$', ''), if(equals(events.event, '$pageview'), replaceRegexpAll(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$current_url'), ''), 'null'), '^"|"$', ''), ''), '(.)/$', '\\1'), events.event)), '') AS path_item_ungrouped, + NULL AS groupings, + multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, + (if(ifNull(greater(group_index, 0), 0), groupings[group_index], path_item_ungrouped) AS path_item) AS path_item + FROM events + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) + WHERE and(equals(events.team_id, 2), and(greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2021-05-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2021-05-07 23:59:59', 6, 'UTC'))))) + ORDER BY events__pdi.person_id ASC, toTimeZone(events.timestamp, 'UTC') ASC) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, + arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, + arrayEnumerate(limited_path_timings) AS event_in_session_index + WHERE ifNull(greater(target_index, 0), 0))) + WHERE isNotNull(source_event) + GROUP BY source_event, + target_event + ORDER BY event_count DESC, + source_event ASC, + target_event ASC + LIMIT 50 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhousePaths.test_end.1 + ''' + SELECT last_path_key AS source_event, + path_key AS target_event, + count(*) AS event_count, + avg(conversion_time) AS average_conversion_time + FROM + (SELECT person_id AS person_id, + path AS path, + conversion_time AS conversion_time, + event_in_session_index AS event_in_session_index, + concat(ifNull(toString(event_in_session_index), ''), '_', ifNull(toString(path), '')) AS path_key, + if(ifNull(greater(event_in_session_index, 1), 0), concat(ifNull(toString(minus(event_in_session_index, 1)), ''), '_', ifNull(toString(prev_path), '')), NULL) AS last_path_key, + path_dropoff_key AS path_dropoff_key + FROM + (SELECT person_id AS person_id, + joined_path_tuple.1 AS path, + joined_path_tuple.2 AS conversion_time, + joined_path_tuple.3 AS prev_path, + event_in_session_index, + session_index AS session_index, + arrayPopFront(arrayPushBack(path_basic, '')) AS path_basic_0, + arrayMap((x, y) -> if(ifNull(equals(x, y), isNull(x) + and isNull(y)), 0, 1), path_basic, path_basic_0) AS mapping, + arrayFilter((x, y) -> y, time, mapping) AS timings, + arrayFilter((x, y) -> y, path_basic, mapping) AS compact_path, + indexOf(compact_path, '/about') AS target_index, + if(ifNull(greater(target_index, 0), 0), arrayResize(compact_path, target_index), compact_path) AS filtered_path, + arraySlice(filtered_path, -5) AS limited_path, + if(ifNull(greater(target_index, 0), 0), arrayResize(timings, target_index), timings) AS filtered_timings, + arraySlice(filtered_timings, -5) AS limited_timings, + arrayDifference(limited_timings) AS timings_diff, + concat(ifNull(toString(length(limited_path)), ''), '_', ifNull(toString(limited_path[-1]), '')) AS path_dropoff_key, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) AS limited_path_timings + FROM + (SELECT person_id AS person_id, + path_time_tuple.1 AS path_basic, + path_time_tuple.2 AS time, + session_index, + arrayZip(path_list, timing_list, arrayDifference(timing_list)) AS paths_tuple, + arraySplit(x -> if(ifNull(less(x.3, 1800), 0), 0, 1), paths_tuple) AS session_paths + FROM + (SELECT person_id AS person_id, + groupArray(timestamp) AS timing_list, + groupArray(path_item) AS path_list + FROM + (SELECT toTimeZone(events.timestamp, 'UTC') AS timestamp, + events__pdi.person_id AS person_id, + ifNull(if(equals(events.event, '$screen'), replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$screen_name'), ''), 'null'), '^"|"$', ''), if(equals(events.event, '$pageview'), replaceRegexpAll(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$current_url'), ''), 'null'), '^"|"$', ''), ''), '(.)/$', '\\1'), events.event)), '') AS path_item_ungrouped, + NULL AS groupings, + multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, + (if(ifNull(greater(group_index, 0), 0), groupings[group_index], path_item_ungrouped) AS path_item) AS path_item + FROM events + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) + WHERE and(equals(events.team_id, 2), and(greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2021-05-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2021-05-07 23:59:59', 6, 'UTC'))))) + ORDER BY events__pdi.person_id ASC, toTimeZone(events.timestamp, 'UTC') ASC) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, + arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, + arrayEnumerate(limited_path_timings) AS event_in_session_index + WHERE ifNull(greater(target_index, 0), 0))) + WHERE isNotNull(source_event) + GROUP BY source_event, + target_event + ORDER BY event_count DESC, + source_event ASC, + target_event ASC + LIMIT 50 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhousePaths.test_end_materialized + ''' + SELECT last_path_key AS source_event, + path_key AS target_event, + count(*) AS event_count, + avg(conversion_time) AS average_conversion_time + FROM + (SELECT person_id AS person_id, + path AS path, + conversion_time AS conversion_time, + event_in_session_index AS event_in_session_index, + concat(ifNull(toString(event_in_session_index), ''), '_', ifNull(toString(path), '')) AS path_key, + if(ifNull(greater(event_in_session_index, 1), 0), concat(ifNull(toString(minus(event_in_session_index, 1)), ''), '_', ifNull(toString(prev_path), '')), NULL) AS last_path_key, + path_dropoff_key AS path_dropoff_key + FROM + (SELECT person_id AS person_id, + joined_path_tuple.1 AS path, + joined_path_tuple.2 AS conversion_time, + joined_path_tuple.3 AS prev_path, + event_in_session_index, + session_index AS session_index, + arrayPopFront(arrayPushBack(path_basic, '')) AS path_basic_0, + arrayMap((x, y) -> if(ifNull(equals(x, y), isNull(x) + and isNull(y)), 0, 1), path_basic, path_basic_0) AS mapping, + arrayFilter((x, y) -> y, time, mapping) AS timings, + arrayFilter((x, y) -> y, path_basic, mapping) AS compact_path, + indexOf(compact_path, '/about') AS target_index, + if(ifNull(greater(target_index, 0), 0), arrayResize(compact_path, target_index), compact_path) AS filtered_path, + arraySlice(filtered_path, -5) AS limited_path, + if(ifNull(greater(target_index, 0), 0), arrayResize(timings, target_index), timings) AS filtered_timings, + arraySlice(filtered_timings, -5) AS limited_timings, + arrayDifference(limited_timings) AS timings_diff, + concat(ifNull(toString(length(limited_path)), ''), '_', ifNull(toString(limited_path[-1]), '')) AS path_dropoff_key, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) AS limited_path_timings + FROM + (SELECT person_id AS person_id, + path_time_tuple.1 AS path_basic, + path_time_tuple.2 AS time, + session_index, + arrayZip(path_list, timing_list, arrayDifference(timing_list)) AS paths_tuple, + arraySplit(x -> if(ifNull(less(x.3, 1800), 0), 0, 1), paths_tuple) AS session_paths + FROM + (SELECT person_id AS person_id, + groupArray(timestamp) AS timing_list, + groupArray(path_item) AS path_list + FROM + (SELECT toTimeZone(events.timestamp, 'UTC') AS timestamp, + events__pdi.person_id AS person_id, + ifNull(if(equals(events.event, '$screen'), nullIf(nullIf(events.`mat_$screen_name`, ''), 'null'), if(equals(events.event, '$pageview'), replaceRegexpAll(ifNull(nullIf(nullIf(events.`mat_$current_url`, ''), 'null'), ''), '(.)/$', '\\1'), events.event)), '') AS path_item_ungrouped, + NULL AS groupings, + multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, + (if(ifNull(greater(group_index, 0), 0), groupings[group_index], path_item_ungrouped) AS path_item) AS path_item + FROM events + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) + WHERE and(equals(events.team_id, 2), and(greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2021-05-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2021-05-07 23:59:59', 6, 'UTC'))))) + ORDER BY events__pdi.person_id ASC, toTimeZone(events.timestamp, 'UTC') ASC) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, + arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, + arrayEnumerate(limited_path_timings) AS event_in_session_index + WHERE ifNull(greater(target_index, 0), 0))) + WHERE isNotNull(source_event) + GROUP BY source_event, + target_event + ORDER BY event_count DESC, + source_event ASC, + target_event ASC + LIMIT 50 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhousePaths.test_end_materialized.1 + ''' + SELECT last_path_key AS source_event, + path_key AS target_event, + count(*) AS event_count, + avg(conversion_time) AS average_conversion_time + FROM + (SELECT person_id AS person_id, + path AS path, + conversion_time AS conversion_time, + event_in_session_index AS event_in_session_index, + concat(ifNull(toString(event_in_session_index), ''), '_', ifNull(toString(path), '')) AS path_key, + if(ifNull(greater(event_in_session_index, 1), 0), concat(ifNull(toString(minus(event_in_session_index, 1)), ''), '_', ifNull(toString(prev_path), '')), NULL) AS last_path_key, + path_dropoff_key AS path_dropoff_key + FROM + (SELECT person_id AS person_id, + joined_path_tuple.1 AS path, + joined_path_tuple.2 AS conversion_time, + joined_path_tuple.3 AS prev_path, + event_in_session_index, + session_index AS session_index, + arrayPopFront(arrayPushBack(path_basic, '')) AS path_basic_0, + arrayMap((x, y) -> if(ifNull(equals(x, y), isNull(x) + and isNull(y)), 0, 1), path_basic, path_basic_0) AS mapping, + arrayFilter((x, y) -> y, time, mapping) AS timings, + arrayFilter((x, y) -> y, path_basic, mapping) AS compact_path, + indexOf(compact_path, '/about') AS target_index, + if(ifNull(greater(target_index, 0), 0), arrayResize(compact_path, target_index), compact_path) AS filtered_path, + arraySlice(filtered_path, -5) AS limited_path, + if(ifNull(greater(target_index, 0), 0), arrayResize(timings, target_index), timings) AS filtered_timings, + arraySlice(filtered_timings, -5) AS limited_timings, + arrayDifference(limited_timings) AS timings_diff, + concat(ifNull(toString(length(limited_path)), ''), '_', ifNull(toString(limited_path[-1]), '')) AS path_dropoff_key, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) AS limited_path_timings + FROM + (SELECT person_id AS person_id, + path_time_tuple.1 AS path_basic, + path_time_tuple.2 AS time, + session_index, + arrayZip(path_list, timing_list, arrayDifference(timing_list)) AS paths_tuple, + arraySplit(x -> if(ifNull(less(x.3, 1800), 0), 0, 1), paths_tuple) AS session_paths + FROM + (SELECT person_id AS person_id, + groupArray(timestamp) AS timing_list, + groupArray(path_item) AS path_list + FROM + (SELECT toTimeZone(events.timestamp, 'UTC') AS timestamp, + events__pdi.person_id AS person_id, + ifNull(if(equals(events.event, '$screen'), nullIf(nullIf(events.`mat_$screen_name`, ''), 'null'), if(equals(events.event, '$pageview'), replaceRegexpAll(ifNull(nullIf(nullIf(events.`mat_$current_url`, ''), 'null'), ''), '(.)/$', '\\1'), events.event)), '') AS path_item_ungrouped, + NULL AS groupings, + multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, + (if(ifNull(greater(group_index, 0), 0), groupings[group_index], path_item_ungrouped) AS path_item) AS path_item + FROM events + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) + WHERE and(equals(events.team_id, 2), and(greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2021-05-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2021-05-07 23:59:59', 6, 'UTC'))))) + ORDER BY events__pdi.person_id ASC, toTimeZone(events.timestamp, 'UTC') ASC) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, + arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, + arrayEnumerate(limited_path_timings) AS event_in_session_index + WHERE ifNull(greater(target_index, 0), 0))) + WHERE isNotNull(source_event) + GROUP BY source_event, + target_event + ORDER BY event_count DESC, + source_event ASC, + target_event ASC + LIMIT 50 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhousePaths.test_event_exclusion_filters_with_wildcard_groups + ''' + SELECT last_path_key AS source_event, + path_key AS target_event, + count(*) AS event_count, + avg(conversion_time) AS average_conversion_time + FROM + (SELECT person_id AS person_id, + path AS path, + conversion_time AS conversion_time, + event_in_session_index AS event_in_session_index, + concat(ifNull(toString(event_in_session_index), ''), '_', ifNull(toString(path), '')) AS path_key, + if(ifNull(greater(event_in_session_index, 1), 0), concat(ifNull(toString(minus(event_in_session_index, 1)), ''), '_', ifNull(toString(prev_path), '')), NULL) AS last_path_key, + path_dropoff_key AS path_dropoff_key + FROM + (SELECT person_id AS person_id, + joined_path_tuple.1 AS path, + joined_path_tuple.2 AS conversion_time, + joined_path_tuple.3 AS prev_path, + event_in_session_index, + session_index AS session_index, + arrayPopFront(arrayPushBack(path_basic, '')) AS path_basic_0, + arrayMap((x, y) -> if(ifNull(equals(x, y), isNull(x) + and isNull(y)), 0, 1), path_basic, path_basic_0) AS mapping, + arrayFilter((x, y) -> y, time, mapping) AS timings, + arrayFilter((x, y) -> y, path_basic, mapping) AS compact_path, + indexOf(compact_path, NULL) AS target_index, + if(ifNull(greater(target_index, 0), 0), arraySlice(compact_path, target_index), compact_path) AS filtered_path, + arraySlice(filtered_path, 1, 4) AS limited_path, + if(ifNull(greater(target_index, 0), 0), arraySlice(timings, target_index), timings) AS filtered_timings, + arraySlice(filtered_timings, 1, 4) AS limited_timings, + arrayDifference(limited_timings) AS timings_diff, + concat(ifNull(toString(length(limited_path)), ''), '_', ifNull(toString(limited_path[-1]), '')) AS path_dropoff_key, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) AS limited_path_timings + FROM + (SELECT person_id AS person_id, + path_time_tuple.1 AS path_basic, + path_time_tuple.2 AS time, + session_index, + arrayZip(path_list, timing_list, arrayDifference(timing_list)) AS paths_tuple, + arraySplit(x -> if(ifNull(less(x.3, 1800), 0), 0, 1), paths_tuple) AS session_paths + FROM + (SELECT person_id AS person_id, + groupArray(timestamp) AS timing_list, + groupArray(path_item) AS path_list + FROM + (SELECT toTimeZone(events.timestamp, 'UTC') AS timestamp, + events__pdi.person_id AS person_id, + ifNull(if(equals(events.event, '$pageview'), replaceRegexpAll(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$current_url'), ''), 'null'), '^"|"$', ''), ''), '(.)/$', '\\1'), events.event), '') AS path_item_ungrouped, + ['/bar/*/foo'] AS groupings, + multiMatchAnyIndex(path_item_ungrouped, ['/bar/.*/foo']) AS group_index, + (if(ifNull(greater(group_index, 0), 0), groupings[group_index], path_item_ungrouped) AS path_item) AS path_item + FROM events + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) + WHERE and(equals(events.team_id, 2), and(greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2023-05-23 23:59:59', 6, 'UTC')))), and(equals(events.event, '$pageview'), ifNull(notIn(path_item, ['/bar/*/foo']), 0))) + ORDER BY events__pdi.person_id ASC, toTimeZone(events.timestamp, 'UTC') ASC) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, + arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, + arrayEnumerate(limited_path_timings) AS event_in_session_index)) + WHERE isNotNull(source_event) + GROUP BY source_event, + target_event + ORDER BY event_count DESC, + source_event ASC, + target_event ASC + LIMIT 50 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhousePaths.test_event_exclusion_filters_with_wildcard_groups.1 + ''' + SELECT last_path_key AS source_event, + path_key AS target_event, + count(*) AS event_count, + avg(conversion_time) AS average_conversion_time + FROM + (SELECT person_id AS person_id, + path AS path, + conversion_time AS conversion_time, + event_in_session_index AS event_in_session_index, + concat(ifNull(toString(event_in_session_index), ''), '_', ifNull(toString(path), '')) AS path_key, + if(ifNull(greater(event_in_session_index, 1), 0), concat(ifNull(toString(minus(event_in_session_index, 1)), ''), '_', ifNull(toString(prev_path), '')), NULL) AS last_path_key, + path_dropoff_key AS path_dropoff_key + FROM + (SELECT person_id AS person_id, + joined_path_tuple.1 AS path, + joined_path_tuple.2 AS conversion_time, + joined_path_tuple.3 AS prev_path, + event_in_session_index, + session_index AS session_index, + arrayPopFront(arrayPushBack(path_basic, '')) AS path_basic_0, + arrayMap((x, y) -> if(ifNull(equals(x, y), isNull(x) + and isNull(y)), 0, 1), path_basic, path_basic_0) AS mapping, + arrayFilter((x, y) -> y, time, mapping) AS timings, + arrayFilter((x, y) -> y, path_basic, mapping) AS compact_path, + indexOf(compact_path, NULL) AS target_index, + if(ifNull(greater(target_index, 0), 0), arraySlice(compact_path, target_index), compact_path) AS filtered_path, + arraySlice(filtered_path, 1, 4) AS limited_path, + if(ifNull(greater(target_index, 0), 0), arraySlice(timings, target_index), timings) AS filtered_timings, + arraySlice(filtered_timings, 1, 4) AS limited_timings, + arrayDifference(limited_timings) AS timings_diff, + concat(ifNull(toString(length(limited_path)), ''), '_', ifNull(toString(limited_path[-1]), '')) AS path_dropoff_key, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) AS limited_path_timings + FROM + (SELECT person_id AS person_id, + path_time_tuple.1 AS path_basic, + path_time_tuple.2 AS time, + session_index, + arrayZip(path_list, timing_list, arrayDifference(timing_list)) AS paths_tuple, + arraySplit(x -> if(ifNull(less(x.3, 1800), 0), 0, 1), paths_tuple) AS session_paths + FROM + (SELECT person_id AS person_id, + groupArray(timestamp) AS timing_list, + groupArray(path_item) AS path_list + FROM + (SELECT toTimeZone(events.timestamp, 'UTC') AS timestamp, + events__pdi.person_id AS person_id, + ifNull(if(equals(events.event, '$pageview'), replaceRegexpAll(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$current_url'), ''), 'null'), '^"|"$', ''), ''), '(.)/$', '\\1'), events.event), '') AS path_item_ungrouped, + ['/xxx/invalid/*'] AS groupings, + multiMatchAnyIndex(path_item_ungrouped, ['/xxx/invalid/.*']) AS group_index, + (if(ifNull(greater(group_index, 0), 0), groupings[group_index], path_item_ungrouped) AS path_item) AS path_item + FROM events + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) + WHERE and(equals(events.team_id, 2), and(greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2023-05-23 23:59:59', 6, 'UTC')))), and(equals(events.event, '$pageview'), ifNull(notIn(path_item, ['/bar/*/foo']), 0))) + ORDER BY events__pdi.person_id ASC, toTimeZone(events.timestamp, 'UTC') ASC) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, + arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, + arrayEnumerate(limited_path_timings) AS event_in_session_index)) + WHERE isNotNull(source_event) + GROUP BY source_event, + target_event + ORDER BY event_count DESC, + source_event ASC, + target_event ASC + LIMIT 50 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhousePaths.test_event_inclusion_exclusion_filters + ''' + SELECT last_path_key AS source_event, + path_key AS target_event, + count(*) AS event_count, + avg(conversion_time) AS average_conversion_time + FROM + (SELECT person_id AS person_id, + path AS path, + conversion_time AS conversion_time, + event_in_session_index AS event_in_session_index, + concat(ifNull(toString(event_in_session_index), ''), '_', ifNull(toString(path), '')) AS path_key, + if(ifNull(greater(event_in_session_index, 1), 0), concat(ifNull(toString(minus(event_in_session_index, 1)), ''), '_', ifNull(toString(prev_path), '')), NULL) AS last_path_key, + path_dropoff_key AS path_dropoff_key + FROM + (SELECT person_id AS person_id, + joined_path_tuple.1 AS path, + joined_path_tuple.2 AS conversion_time, + joined_path_tuple.3 AS prev_path, + event_in_session_index, + session_index AS session_index, + arrayPopFront(arrayPushBack(path_basic, '')) AS path_basic_0, + arrayMap((x, y) -> if(ifNull(equals(x, y), isNull(x) + and isNull(y)), 0, 1), path_basic, path_basic_0) AS mapping, + arrayFilter((x, y) -> y, time, mapping) AS timings, + arrayFilter((x, y) -> y, path_basic, mapping) AS compact_path, + indexOf(compact_path, NULL) AS target_index, + if(ifNull(greater(target_index, 0), 0), arraySlice(compact_path, target_index), compact_path) AS filtered_path, + arraySlice(filtered_path, 1, 4) AS limited_path, + if(ifNull(greater(target_index, 0), 0), arraySlice(timings, target_index), timings) AS filtered_timings, + arraySlice(filtered_timings, 1, 4) AS limited_timings, + arrayDifference(limited_timings) AS timings_diff, + concat(ifNull(toString(length(limited_path)), ''), '_', ifNull(toString(limited_path[-1]), '')) AS path_dropoff_key, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) AS limited_path_timings + FROM + (SELECT person_id AS person_id, + path_time_tuple.1 AS path_basic, + path_time_tuple.2 AS time, + session_index, + arrayZip(path_list, timing_list, arrayDifference(timing_list)) AS paths_tuple, + arraySplit(x -> if(ifNull(less(x.3, 1800), 0), 0, 1), paths_tuple) AS session_paths + FROM + (SELECT person_id AS person_id, + groupArray(timestamp) AS timing_list, + groupArray(path_item) AS path_list + FROM + (SELECT toTimeZone(events.timestamp, 'UTC') AS timestamp, + events__pdi.person_id AS person_id, + ifNull(if(equals(events.event, '$pageview'), replaceRegexpAll(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$current_url'), ''), 'null'), '^"|"$', ''), ''), '(.)/$', '\\1'), events.event), '') AS path_item_ungrouped, + NULL AS groupings, + multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, + (if(ifNull(greater(group_index, 0), 0), groupings[group_index], path_item_ungrouped) AS path_item) AS path_item + FROM events + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) + WHERE and(equals(events.team_id, 2), and(greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2023-05-23 23:59:59', 6, 'UTC')))), equals(events.event, '$pageview')) + ORDER BY events__pdi.person_id ASC, toTimeZone(events.timestamp, 'UTC') ASC) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, + arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, + arrayEnumerate(limited_path_timings) AS event_in_session_index)) + WHERE isNotNull(source_event) + GROUP BY source_event, + target_event + ORDER BY event_count DESC, + source_event ASC, + target_event ASC + LIMIT 50 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhousePaths.test_event_inclusion_exclusion_filters.1 + ''' + SELECT last_path_key AS source_event, + path_key AS target_event, + count(*) AS event_count, + avg(conversion_time) AS average_conversion_time + FROM + (SELECT person_id AS person_id, + path AS path, + conversion_time AS conversion_time, + event_in_session_index AS event_in_session_index, + concat(ifNull(toString(event_in_session_index), ''), '_', ifNull(toString(path), '')) AS path_key, + if(ifNull(greater(event_in_session_index, 1), 0), concat(ifNull(toString(minus(event_in_session_index, 1)), ''), '_', ifNull(toString(prev_path), '')), NULL) AS last_path_key, + path_dropoff_key AS path_dropoff_key + FROM + (SELECT person_id AS person_id, + joined_path_tuple.1 AS path, + joined_path_tuple.2 AS conversion_time, + joined_path_tuple.3 AS prev_path, + event_in_session_index, + session_index AS session_index, + arrayPopFront(arrayPushBack(path_basic, '')) AS path_basic_0, + arrayMap((x, y) -> if(ifNull(equals(x, y), isNull(x) + and isNull(y)), 0, 1), path_basic, path_basic_0) AS mapping, + arrayFilter((x, y) -> y, time, mapping) AS timings, + arrayFilter((x, y) -> y, path_basic, mapping) AS compact_path, + indexOf(compact_path, NULL) AS target_index, + if(ifNull(greater(target_index, 0), 0), arraySlice(compact_path, target_index), compact_path) AS filtered_path, + arraySlice(filtered_path, 1, 4) AS limited_path, + if(ifNull(greater(target_index, 0), 0), arraySlice(timings, target_index), timings) AS filtered_timings, + arraySlice(filtered_timings, 1, 4) AS limited_timings, + arrayDifference(limited_timings) AS timings_diff, + concat(ifNull(toString(length(limited_path)), ''), '_', ifNull(toString(limited_path[-1]), '')) AS path_dropoff_key, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) AS limited_path_timings + FROM + (SELECT person_id AS person_id, + path_time_tuple.1 AS path_basic, + path_time_tuple.2 AS time, + session_index, + arrayZip(path_list, timing_list, arrayDifference(timing_list)) AS paths_tuple, + arraySplit(x -> if(ifNull(less(x.3, 1800), 0), 0, 1), paths_tuple) AS session_paths + FROM + (SELECT person_id AS person_id, + groupArray(timestamp) AS timing_list, + groupArray(path_item) AS path_list + FROM + (SELECT toTimeZone(events.timestamp, 'UTC') AS timestamp, + events__pdi.person_id AS person_id, + ifNull(if(equals(events.event, '$screen'), replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$screen_name'), ''), 'null'), '^"|"$', ''), events.event), '') AS path_item_ungrouped, + NULL AS groupings, + multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, + (if(ifNull(greater(group_index, 0), 0), groupings[group_index], path_item_ungrouped) AS path_item) AS path_item + FROM events + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) + WHERE and(equals(events.team_id, 2), and(greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2023-05-23 23:59:59', 6, 'UTC')))), equals(events.event, '$screen')) + ORDER BY events__pdi.person_id ASC, toTimeZone(events.timestamp, 'UTC') ASC) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, + arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, + arrayEnumerate(limited_path_timings) AS event_in_session_index)) + WHERE isNotNull(source_event) + GROUP BY source_event, + target_event + ORDER BY event_count DESC, + source_event ASC, + target_event ASC + LIMIT 50 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhousePaths.test_event_inclusion_exclusion_filters.2 + ''' + SELECT last_path_key AS source_event, + path_key AS target_event, + count(*) AS event_count, + avg(conversion_time) AS average_conversion_time + FROM + (SELECT person_id AS person_id, + path AS path, + conversion_time AS conversion_time, + event_in_session_index AS event_in_session_index, + concat(ifNull(toString(event_in_session_index), ''), '_', ifNull(toString(path), '')) AS path_key, + if(ifNull(greater(event_in_session_index, 1), 0), concat(ifNull(toString(minus(event_in_session_index, 1)), ''), '_', ifNull(toString(prev_path), '')), NULL) AS last_path_key, + path_dropoff_key AS path_dropoff_key + FROM + (SELECT person_id AS person_id, + joined_path_tuple.1 AS path, + joined_path_tuple.2 AS conversion_time, + joined_path_tuple.3 AS prev_path, + event_in_session_index, + session_index AS session_index, + arrayPopFront(arrayPushBack(path_basic, '')) AS path_basic_0, + arrayMap((x, y) -> if(ifNull(equals(x, y), isNull(x) + and isNull(y)), 0, 1), path_basic, path_basic_0) AS mapping, + arrayFilter((x, y) -> y, time, mapping) AS timings, + arrayFilter((x, y) -> y, path_basic, mapping) AS compact_path, + indexOf(compact_path, NULL) AS target_index, + if(ifNull(greater(target_index, 0), 0), arraySlice(compact_path, target_index), compact_path) AS filtered_path, + arraySlice(filtered_path, 1, 4) AS limited_path, + if(ifNull(greater(target_index, 0), 0), arraySlice(timings, target_index), timings) AS filtered_timings, + arraySlice(filtered_timings, 1, 4) AS limited_timings, + arrayDifference(limited_timings) AS timings_diff, + concat(ifNull(toString(length(limited_path)), ''), '_', ifNull(toString(limited_path[-1]), '')) AS path_dropoff_key, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) AS limited_path_timings + FROM + (SELECT person_id AS person_id, + path_time_tuple.1 AS path_basic, + path_time_tuple.2 AS time, + session_index, + arrayZip(path_list, timing_list, arrayDifference(timing_list)) AS paths_tuple, + arraySplit(x -> if(ifNull(less(x.3, 1800), 0), 0, 1), paths_tuple) AS session_paths + FROM + (SELECT person_id AS person_id, + groupArray(timestamp) AS timing_list, + groupArray(path_item) AS path_list + FROM + (SELECT toTimeZone(events.timestamp, 'UTC') AS timestamp, + events__pdi.person_id AS person_id, + ifNull(events.event, '') AS path_item_ungrouped, + NULL AS groupings, + multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, + (if(ifNull(greater(group_index, 0), 0), groupings[group_index], path_item_ungrouped) AS path_item) AS path_item + FROM events + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) + WHERE and(equals(events.team_id, 2), and(greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2023-05-23 23:59:59', 6, 'UTC')))), not(startsWith(events.event, '$'))) + ORDER BY events__pdi.person_id ASC, toTimeZone(events.timestamp, 'UTC') ASC) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, + arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, + arrayEnumerate(limited_path_timings) AS event_in_session_index)) + WHERE isNotNull(source_event) + GROUP BY source_event, + target_event + ORDER BY event_count DESC, + source_event ASC, + target_event ASC + LIMIT 50 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhousePaths.test_event_inclusion_exclusion_filters.3 + ''' + SELECT last_path_key AS source_event, + path_key AS target_event, + count(*) AS event_count, + avg(conversion_time) AS average_conversion_time + FROM + (SELECT person_id AS person_id, + path AS path, + conversion_time AS conversion_time, + event_in_session_index AS event_in_session_index, + concat(ifNull(toString(event_in_session_index), ''), '_', ifNull(toString(path), '')) AS path_key, + if(ifNull(greater(event_in_session_index, 1), 0), concat(ifNull(toString(minus(event_in_session_index, 1)), ''), '_', ifNull(toString(prev_path), '')), NULL) AS last_path_key, + path_dropoff_key AS path_dropoff_key + FROM + (SELECT person_id AS person_id, + joined_path_tuple.1 AS path, + joined_path_tuple.2 AS conversion_time, + joined_path_tuple.3 AS prev_path, + event_in_session_index, + session_index AS session_index, + arrayPopFront(arrayPushBack(path_basic, '')) AS path_basic_0, + arrayMap((x, y) -> if(ifNull(equals(x, y), isNull(x) + and isNull(y)), 0, 1), path_basic, path_basic_0) AS mapping, + arrayFilter((x, y) -> y, time, mapping) AS timings, + arrayFilter((x, y) -> y, path_basic, mapping) AS compact_path, + indexOf(compact_path, NULL) AS target_index, + if(ifNull(greater(target_index, 0), 0), arraySlice(compact_path, target_index), compact_path) AS filtered_path, + arraySlice(filtered_path, 1, 4) AS limited_path, + if(ifNull(greater(target_index, 0), 0), arraySlice(timings, target_index), timings) AS filtered_timings, + arraySlice(filtered_timings, 1, 4) AS limited_timings, + arrayDifference(limited_timings) AS timings_diff, + concat(ifNull(toString(length(limited_path)), ''), '_', ifNull(toString(limited_path[-1]), '')) AS path_dropoff_key, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) AS limited_path_timings + FROM + (SELECT person_id AS person_id, + path_time_tuple.1 AS path_basic, + path_time_tuple.2 AS time, + session_index, + arrayZip(path_list, timing_list, arrayDifference(timing_list)) AS paths_tuple, + arraySplit(x -> if(ifNull(less(x.3, 1800), 0), 0, 1), paths_tuple) AS session_paths + FROM + (SELECT person_id AS person_id, + groupArray(timestamp) AS timing_list, + groupArray(path_item) AS path_list + FROM + (SELECT toTimeZone(events.timestamp, 'UTC') AS timestamp, + events__pdi.person_id AS person_id, + ifNull(if(equals(events.event, '$screen'), replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$screen_name'), ''), 'null'), '^"|"$', ''), if(equals(events.event, '$pageview'), replaceRegexpAll(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$current_url'), ''), 'null'), '^"|"$', ''), ''), '(.)/$', '\\1'), events.event)), '') AS path_item_ungrouped, + NULL AS groupings, + multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, + (if(ifNull(greater(group_index, 0), 0), groupings[group_index], path_item_ungrouped) AS path_item) AS path_item + FROM events + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) + WHERE and(equals(events.team_id, 2), and(greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2023-05-23 23:59:59', 6, 'UTC')))), and(or(equals(events.event, '$pageview'), equals(events.event, '$screen'), not(startsWith(events.event, '$'))), ifNull(notIn(path_item, ['/custom1', '/1', '/2', '/3']), 0))) + ORDER BY events__pdi.person_id ASC, toTimeZone(events.timestamp, 'UTC') ASC) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, + arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, + arrayEnumerate(limited_path_timings) AS event_in_session_index)) + WHERE isNotNull(source_event) + GROUP BY source_event, + target_event + ORDER BY event_count DESC, + source_event ASC, + target_event ASC + LIMIT 50 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhousePaths.test_event_ordering + ''' + SELECT last_path_key AS source_event, + path_key AS target_event, + count(*) AS event_count, + avg(conversion_time) AS average_conversion_time + FROM + (SELECT person_id AS person_id, + path AS path, + conversion_time AS conversion_time, + event_in_session_index AS event_in_session_index, + concat(ifNull(toString(event_in_session_index), ''), '_', ifNull(toString(path), '')) AS path_key, + if(ifNull(greater(event_in_session_index, 1), 0), concat(ifNull(toString(minus(event_in_session_index, 1)), ''), '_', ifNull(toString(prev_path), '')), NULL) AS last_path_key, + path_dropoff_key AS path_dropoff_key + FROM + (SELECT person_id AS person_id, + joined_path_tuple.1 AS path, + joined_path_tuple.2 AS conversion_time, + joined_path_tuple.3 AS prev_path, + event_in_session_index, + session_index AS session_index, + arrayPopFront(arrayPushBack(path_basic, '')) AS path_basic_0, + arrayMap((x, y) -> if(ifNull(equals(x, y), isNull(x) + and isNull(y)), 0, 1), path_basic, path_basic_0) AS mapping, + arrayFilter((x, y) -> y, time, mapping) AS timings, + arrayFilter((x, y) -> y, path_basic, mapping) AS compact_path, + indexOf(compact_path, NULL) AS target_index, + if(ifNull(greater(target_index, 0), 0), arraySlice(compact_path, target_index), compact_path) AS filtered_path, + arraySlice(filtered_path, 1, 5) AS limited_path, + if(ifNull(greater(target_index, 0), 0), arraySlice(timings, target_index), timings) AS filtered_timings, + arraySlice(filtered_timings, 1, 5) AS limited_timings, + arrayDifference(limited_timings) AS timings_diff, + concat(ifNull(toString(length(limited_path)), ''), '_', ifNull(toString(limited_path[-1]), '')) AS path_dropoff_key, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) AS limited_path_timings + FROM + (SELECT person_id AS person_id, + path_time_tuple.1 AS path_basic, + path_time_tuple.2 AS time, + session_index, + arrayZip(path_list, timing_list, arrayDifference(timing_list)) AS paths_tuple, + arraySplit(x -> if(ifNull(less(x.3, 1800), 0), 0, 1), paths_tuple) AS session_paths + FROM + (SELECT person_id AS person_id, + groupArray(timestamp) AS timing_list, + groupArray(path_item) AS path_list + FROM + (SELECT toTimeZone(events.timestamp, 'UTC') AS timestamp, + events__pdi.person_id AS person_id, + ifNull(events.event, '') AS path_item_ungrouped, + NULL AS groupings, + multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, + (if(ifNull(greater(group_index, 0), 0), groupings[group_index], path_item_ungrouped) AS path_item) AS path_item + FROM events + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) + WHERE and(equals(events.team_id, 2), and(greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2021-05-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2021-05-03 23:59:59', 6, 'UTC')))), not(startsWith(events.event, '$'))) + ORDER BY events__pdi.person_id ASC, toTimeZone(events.timestamp, 'UTC') ASC) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, + arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, + arrayEnumerate(limited_path_timings) AS event_in_session_index)) + WHERE isNotNull(source_event) + GROUP BY source_event, + target_event + ORDER BY event_count DESC, + source_event ASC, + target_event ASC + LIMIT 50 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhousePaths.test_groups_filtering_person_on_events + ''' + SELECT last_path_key AS source_event, + path_key AS target_event, + count(*) AS event_count, + avg(conversion_time) AS average_conversion_time + FROM + (SELECT person_id AS person_id, + path AS path, + conversion_time AS conversion_time, + event_in_session_index AS event_in_session_index, + concat(ifNull(toString(event_in_session_index), ''), '_', ifNull(toString(path), '')) AS path_key, + if(ifNull(greater(event_in_session_index, 1), 0), concat(ifNull(toString(minus(event_in_session_index, 1)), ''), '_', ifNull(toString(prev_path), '')), NULL) AS last_path_key, + path_dropoff_key AS path_dropoff_key + FROM + (SELECT person_id AS person_id, + joined_path_tuple.1 AS path, + joined_path_tuple.2 AS conversion_time, + joined_path_tuple.3 AS prev_path, + event_in_session_index, + session_index AS session_index, + arrayPopFront(arrayPushBack(path_basic, '')) AS path_basic_0, + arrayMap((x, y) -> if(ifNull(equals(x, y), isNull(x) + and isNull(y)), 0, 1), path_basic, path_basic_0) AS mapping, + arrayFilter((x, y) -> y, time, mapping) AS timings, + arrayFilter((x, y) -> y, path_basic, mapping) AS compact_path, + indexOf(compact_path, NULL) AS target_index, + if(ifNull(greater(target_index, 0), 0), arraySlice(compact_path, target_index), compact_path) AS filtered_path, + arraySlice(filtered_path, 1, 4) AS limited_path, + if(ifNull(greater(target_index, 0), 0), arraySlice(timings, target_index), timings) AS filtered_timings, + arraySlice(filtered_timings, 1, 4) AS limited_timings, + arrayDifference(limited_timings) AS timings_diff, + concat(ifNull(toString(length(limited_path)), ''), '_', ifNull(toString(limited_path[-1]), '')) AS path_dropoff_key, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) AS limited_path_timings + FROM + (SELECT person_id AS person_id, + path_time_tuple.1 AS path_basic, + path_time_tuple.2 AS time, + session_index, + arrayZip(path_list, timing_list, arrayDifference(timing_list)) AS paths_tuple, + arraySplit(x -> if(ifNull(less(x.3, 1800), 0), 0, 1), paths_tuple) AS session_paths + FROM + (SELECT person_id AS person_id, + groupArray(timestamp) AS timing_list, + groupArray(path_item) AS path_list + FROM + (SELECT toTimeZone(events.timestamp, 'UTC') AS timestamp, + events__pdi.person_id AS person_id, + ifNull(if(equals(events.event, '$screen'), replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$screen_name'), ''), 'null'), '^"|"$', ''), if(equals(events.event, '$pageview'), replaceRegexpAll(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$current_url'), ''), 'null'), '^"|"$', ''), ''), '(.)/$', '\\1'), events.event)), '') AS path_item_ungrouped, + NULL AS groupings, + multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, + (if(ifNull(greater(group_index, 0), 0), groupings[group_index], path_item_ungrouped) AS path_item) AS path_item + FROM events + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) + LEFT JOIN + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry, + groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) + GROUP BY groups.group_type_index, + groups.group_key) AS events__group_0 ON equals(events.`$group_0`, events__group_0.key) + WHERE and(equals(events.team_id, 2), ifNull(equals(events__group_0.properties___industry, 'finance'), 0), and(greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2012-02-01 23:59:59', 6, 'UTC')))), or(equals(events.event, '$pageview'), equals(events.event, '$screen'), not(startsWith(events.event, '$')))) + ORDER BY events__pdi.person_id ASC, toTimeZone(events.timestamp, 'UTC') ASC) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, + arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, + arrayEnumerate(limited_path_timings) AS event_in_session_index)) + WHERE isNotNull(source_event) + GROUP BY source_event, + target_event + ORDER BY event_count DESC, + source_event ASC, + target_event ASC + LIMIT 50 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhousePaths.test_groups_filtering_person_on_events.1 + ''' + SELECT last_path_key AS source_event, + path_key AS target_event, + count(*) AS event_count, + avg(conversion_time) AS average_conversion_time + FROM + (SELECT person_id AS person_id, + path AS path, + conversion_time AS conversion_time, + event_in_session_index AS event_in_session_index, + concat(ifNull(toString(event_in_session_index), ''), '_', ifNull(toString(path), '')) AS path_key, + if(ifNull(greater(event_in_session_index, 1), 0), concat(ifNull(toString(minus(event_in_session_index, 1)), ''), '_', ifNull(toString(prev_path), '')), NULL) AS last_path_key, + path_dropoff_key AS path_dropoff_key + FROM + (SELECT person_id AS person_id, + joined_path_tuple.1 AS path, + joined_path_tuple.2 AS conversion_time, + joined_path_tuple.3 AS prev_path, + event_in_session_index, + session_index AS session_index, + arrayPopFront(arrayPushBack(path_basic, '')) AS path_basic_0, + arrayMap((x, y) -> if(ifNull(equals(x, y), isNull(x) + and isNull(y)), 0, 1), path_basic, path_basic_0) AS mapping, + arrayFilter((x, y) -> y, time, mapping) AS timings, + arrayFilter((x, y) -> y, path_basic, mapping) AS compact_path, + indexOf(compact_path, NULL) AS target_index, + if(ifNull(greater(target_index, 0), 0), arraySlice(compact_path, target_index), compact_path) AS filtered_path, + arraySlice(filtered_path, 1, 4) AS limited_path, + if(ifNull(greater(target_index, 0), 0), arraySlice(timings, target_index), timings) AS filtered_timings, + arraySlice(filtered_timings, 1, 4) AS limited_timings, + arrayDifference(limited_timings) AS timings_diff, + concat(ifNull(toString(length(limited_path)), ''), '_', ifNull(toString(limited_path[-1]), '')) AS path_dropoff_key, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) AS limited_path_timings + FROM + (SELECT person_id AS person_id, + path_time_tuple.1 AS path_basic, + path_time_tuple.2 AS time, + session_index, + arrayZip(path_list, timing_list, arrayDifference(timing_list)) AS paths_tuple, + arraySplit(x -> if(ifNull(less(x.3, 1800), 0), 0, 1), paths_tuple) AS session_paths + FROM + (SELECT person_id AS person_id, + groupArray(timestamp) AS timing_list, + groupArray(path_item) AS path_list + FROM + (SELECT toTimeZone(events.timestamp, 'UTC') AS timestamp, + events.person_id AS person_id, + ifNull(if(equals(events.event, '$screen'), replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$screen_name'), ''), 'null'), '^"|"$', ''), if(equals(events.event, '$pageview'), replaceRegexpAll(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$current_url'), ''), 'null'), '^"|"$', ''), ''), '(.)/$', '\\1'), events.event)), '') AS path_item_ungrouped, + NULL AS groupings, + multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, + (if(ifNull(greater(group_index, 0), 0), groupings[group_index], path_item_ungrouped) AS path_item) AS path_item + FROM events + LEFT JOIN + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry, + groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0)) + GROUP BY groups.group_type_index, + groups.group_key) AS events__group_0 ON equals(events.`$group_0`, events__group_0.key) + WHERE and(equals(events.team_id, 2), ifNull(equals(events__group_0.properties___industry, 'technology'), 0), and(greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2012-02-01 23:59:59', 6, 'UTC')))), or(equals(events.event, '$pageview'), equals(events.event, '$screen'), not(startsWith(events.event, '$')))) + ORDER BY events.person_id ASC, toTimeZone(events.timestamp, 'UTC') ASC) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, + arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, + arrayEnumerate(limited_path_timings) AS event_in_session_index)) + WHERE isNotNull(source_event) + GROUP BY source_event, + target_event + ORDER BY event_count DESC, + source_event ASC, + target_event ASC + LIMIT 50 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhousePaths.test_groups_filtering_person_on_events.2 + ''' + SELECT last_path_key AS source_event, + path_key AS target_event, + count(*) AS event_count, + avg(conversion_time) AS average_conversion_time + FROM + (SELECT person_id AS person_id, + path AS path, + conversion_time AS conversion_time, + event_in_session_index AS event_in_session_index, + concat(ifNull(toString(event_in_session_index), ''), '_', ifNull(toString(path), '')) AS path_key, + if(ifNull(greater(event_in_session_index, 1), 0), concat(ifNull(toString(minus(event_in_session_index, 1)), ''), '_', ifNull(toString(prev_path), '')), NULL) AS last_path_key, + path_dropoff_key AS path_dropoff_key + FROM + (SELECT person_id AS person_id, + joined_path_tuple.1 AS path, + joined_path_tuple.2 AS conversion_time, + joined_path_tuple.3 AS prev_path, + event_in_session_index, + session_index AS session_index, + arrayPopFront(arrayPushBack(path_basic, '')) AS path_basic_0, + arrayMap((x, y) -> if(ifNull(equals(x, y), isNull(x) + and isNull(y)), 0, 1), path_basic, path_basic_0) AS mapping, + arrayFilter((x, y) -> y, time, mapping) AS timings, + arrayFilter((x, y) -> y, path_basic, mapping) AS compact_path, + indexOf(compact_path, NULL) AS target_index, + if(ifNull(greater(target_index, 0), 0), arraySlice(compact_path, target_index), compact_path) AS filtered_path, + arraySlice(filtered_path, 1, 4) AS limited_path, + if(ifNull(greater(target_index, 0), 0), arraySlice(timings, target_index), timings) AS filtered_timings, + arraySlice(filtered_timings, 1, 4) AS limited_timings, + arrayDifference(limited_timings) AS timings_diff, + concat(ifNull(toString(length(limited_path)), ''), '_', ifNull(toString(limited_path[-1]), '')) AS path_dropoff_key, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) AS limited_path_timings + FROM + (SELECT person_id AS person_id, + path_time_tuple.1 AS path_basic, + path_time_tuple.2 AS time, + session_index, + arrayZip(path_list, timing_list, arrayDifference(timing_list)) AS paths_tuple, + arraySplit(x -> if(ifNull(less(x.3, 1800), 0), 0, 1), paths_tuple) AS session_paths + FROM + (SELECT person_id AS person_id, + groupArray(timestamp) AS timing_list, + groupArray(path_item) AS path_list + FROM + (SELECT toTimeZone(events.timestamp, 'UTC') AS timestamp, + events.person_id AS person_id, + ifNull(if(equals(events.event, '$screen'), replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$screen_name'), ''), 'null'), '^"|"$', ''), if(equals(events.event, '$pageview'), replaceRegexpAll(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$current_url'), ''), 'null'), '^"|"$', ''), ''), '(.)/$', '\\1'), events.event)), '') AS path_item_ungrouped, + NULL AS groupings, + multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, + (if(ifNull(greater(group_index, 0), 0), groupings[group_index], path_item_ungrouped) AS path_item) AS path_item + FROM events + LEFT JOIN + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry, + groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 1), 0)) + GROUP BY groups.group_type_index, + groups.group_key) AS events__group_1 ON equals(events.`$group_1`, events__group_1.key) + WHERE and(equals(events.team_id, 2), ifNull(equals(events__group_1.properties___industry, 'technology'), 0), and(greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2012-02-01 23:59:59', 6, 'UTC')))), or(equals(events.event, '$pageview'), equals(events.event, '$screen'), not(startsWith(events.event, '$')))) + ORDER BY events.person_id ASC, toTimeZone(events.timestamp, 'UTC') ASC) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, + arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, + arrayEnumerate(limited_path_timings) AS event_in_session_index)) + WHERE isNotNull(source_event) + GROUP BY source_event, + target_event + ORDER BY event_count DESC, + source_event ASC, + target_event ASC + LIMIT 50 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhousePaths.test_person_dropoffs + ''' + SELECT persons.id AS id + FROM + (SELECT person.id AS id + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS persons + INNER JOIN + (SELECT person_id AS actor_id, + groupUniqArray(100)(tuple(timestamp, uuid, session_id, window_id)) AS matching_events, + count(*) AS event_count + FROM + (SELECT person_id AS person_id, + path AS path, + conversion_time AS conversion_time, + event_in_session_index AS event_in_session_index, + concat(ifNull(toString(event_in_session_index), ''), '_', ifNull(toString(path), '')) AS path_key, + if(ifNull(greater(event_in_session_index, 1), 0), concat(ifNull(toString(minus(event_in_session_index, 1)), ''), '_', ifNull(toString(prev_path), '')), NULL) AS last_path_key, + path_dropoff_key AS path_dropoff_key, + final_uuid AS uuid, + final_timestamp AS timestamp, + final_session_id AS session_id, + final_window_id AS window_id + FROM + (SELECT person_id AS person_id, + joined_path_tuple.1 AS path, + joined_path_tuple.2 AS conversion_time, + joined_path_tuple.3 AS prev_path, + event_in_session_index, + session_index AS session_index, + arrayPopFront(arrayPushBack(path_basic, '')) AS path_basic_0, + arrayMap((x, y) -> if(ifNull(equals(x, y), isNull(x) + and isNull(y)), 0, 1), path_basic, path_basic_0) AS mapping, + arrayFilter((x, y) -> y, time, mapping) AS timings, + arrayFilter((x, y) -> y, path_basic, mapping) AS compact_path, + indexOf(compact_path, NULL) AS target_index, + joined_path_tuple.4 AS final_uuid, + joined_path_tuple.5 AS final_timestamp, + joined_path_tuple.6 AS final_session_id, + joined_path_tuple.7 AS final_window_id, + arrayFilter((x, y) -> y, uuid_items, mapping) AS uuid, + arrayFilter((x, y) -> y, timestamp_items, mapping) AS timestamp, + arrayFilter((x, y) -> y, session_id_items, mapping) AS session_id, + arrayFilter((x, y) -> y, window_id_items, mapping) AS window_id, + if(ifNull(greater(target_index, 0), 0), arraySlice(compact_path, target_index), compact_path) AS filtered_path, + arraySlice(filtered_path, 1, 5) AS limited_path, + if(ifNull(greater(target_index, 0), 0), arraySlice(timings, target_index), timings) AS filtered_timings, + arraySlice(filtered_timings, 1, 5) AS limited_timings, + if(ifNull(greater(target_index, 0), 0), arraySlice(uuid, target_index), uuid) AS filtered_uuid, + arraySlice(filtered_uuid, 1, 5) AS limited_uuid, + if(ifNull(greater(target_index, 0), 0), arraySlice(timestamp, target_index), timestamp) AS filtered_timestamp, + arraySlice(filtered_timestamp, 1, 5) AS limited_timestamp, + if(ifNull(greater(target_index, 0), 0), arraySlice(session_id, target_index), session_id) AS filtered_session_id, + arraySlice(filtered_session_id, 1, 5) AS limited_session_id, + if(ifNull(greater(target_index, 0), 0), arraySlice(window_id, target_index), window_id) AS filtered_window_id, + arraySlice(filtered_window_id, 1, 5) AS limited_window_id, + arrayDifference(limited_timings) AS timings_diff, + concat(ifNull(toString(length(limited_path)), ''), '_', ifNull(toString(limited_path[-1]), '')) AS path_dropoff_key, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, '')), limited_uuid, limited_timestamp, limited_session_id, limited_window_id) AS limited_path_timings + FROM + (SELECT person_id AS person_id, + path_time_tuple.1 AS path_basic, + path_time_tuple.2 AS time, + session_index, + arrayZip(path_list, timing_list, arrayDifference(timing_list), uuid_list, timestamp_list, session_id_list, window_id_list) AS paths_tuple, + arraySplit(x -> if(ifNull(less(x.3, 1800), 0), 0, 1), paths_tuple) AS session_paths, + path_time_tuple.4 AS uuid_items, + path_time_tuple.5 AS timestamp_items, + path_time_tuple.6 AS session_id_items, + path_time_tuple.7 AS window_id_items + FROM + (SELECT person_id AS person_id, + groupArray(timestamp) AS timing_list, + groupArray(path_item) AS path_list, + groupArray(uuid) AS uuid_list, + groupArray(timestamp) AS timestamp_list, + groupArray(session_id) AS session_id_list, + groupArray(window_id) AS window_id_list + FROM + (SELECT toTimeZone(events.timestamp, 'UTC') AS timestamp, + events__pdi.person_id AS person_id, + ifNull(events.event, '') AS path_item_ungrouped, + events.uuid AS uuid, + toTimeZone(events.timestamp, 'UTC') AS timestamp, + ifNull(nullIf(nullIf(events.`$session_id`, ''), 'null'), '') AS session_id, + ifNull(nullIf(nullIf(events.`$window_id`, ''), 'null'), '') AS window_id, + NULL AS groupings, + multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, + (if(ifNull(greater(group_index, 0), 0), groupings[group_index], path_item_ungrouped) AS path_item) AS path_item + FROM events + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) + WHERE and(equals(events.team_id, 2), and(greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2021-05-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2021-05-07 23:59:59', 6, 'UTC')))), not(startsWith(events.event, '$'))) + ORDER BY events__pdi.person_id ASC, toTimeZone(events.timestamp, 'UTC') ASC) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, + arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, + arrayEnumerate(limited_path_timings) AS event_in_session_index)) + WHERE and(ifNull(equals(path_dropoff_key, '2_step two'), 0), ifNull(equals(path_dropoff_key, path_key), isNull(path_dropoff_key) + and isNull(path_key))) + GROUP BY person_id) AS source ON equals(persons.id, source.actor_id) + ORDER BY persons.id ASC + LIMIT 101 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhousePaths.test_person_dropoffs.1 + ''' + SELECT persons.id AS id + FROM + (SELECT person.id AS id + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS persons + INNER JOIN + (SELECT person_id AS actor_id, + groupUniqArray(100)(tuple(timestamp, uuid, session_id, window_id)) AS matching_events, + count(*) AS event_count + FROM + (SELECT person_id AS person_id, + path AS path, + conversion_time AS conversion_time, + event_in_session_index AS event_in_session_index, + concat(ifNull(toString(event_in_session_index), ''), '_', ifNull(toString(path), '')) AS path_key, + if(ifNull(greater(event_in_session_index, 1), 0), concat(ifNull(toString(minus(event_in_session_index, 1)), ''), '_', ifNull(toString(prev_path), '')), NULL) AS last_path_key, + path_dropoff_key AS path_dropoff_key, + final_uuid AS uuid, + final_timestamp AS timestamp, + final_session_id AS session_id, + final_window_id AS window_id + FROM + (SELECT person_id AS person_id, + joined_path_tuple.1 AS path, + joined_path_tuple.2 AS conversion_time, + joined_path_tuple.3 AS prev_path, + event_in_session_index, + session_index AS session_index, + arrayPopFront(arrayPushBack(path_basic, '')) AS path_basic_0, + arrayMap((x, y) -> if(ifNull(equals(x, y), isNull(x) + and isNull(y)), 0, 1), path_basic, path_basic_0) AS mapping, + arrayFilter((x, y) -> y, time, mapping) AS timings, + arrayFilter((x, y) -> y, path_basic, mapping) AS compact_path, + indexOf(compact_path, NULL) AS target_index, + joined_path_tuple.4 AS final_uuid, + joined_path_tuple.5 AS final_timestamp, + joined_path_tuple.6 AS final_session_id, + joined_path_tuple.7 AS final_window_id, + arrayFilter((x, y) -> y, uuid_items, mapping) AS uuid, + arrayFilter((x, y) -> y, timestamp_items, mapping) AS timestamp, + arrayFilter((x, y) -> y, session_id_items, mapping) AS session_id, + arrayFilter((x, y) -> y, window_id_items, mapping) AS window_id, + if(ifNull(greater(target_index, 0), 0), arraySlice(compact_path, target_index), compact_path) AS filtered_path, + arraySlice(filtered_path, 1, 5) AS limited_path, + if(ifNull(greater(target_index, 0), 0), arraySlice(timings, target_index), timings) AS filtered_timings, + arraySlice(filtered_timings, 1, 5) AS limited_timings, + if(ifNull(greater(target_index, 0), 0), arraySlice(uuid, target_index), uuid) AS filtered_uuid, + arraySlice(filtered_uuid, 1, 5) AS limited_uuid, + if(ifNull(greater(target_index, 0), 0), arraySlice(timestamp, target_index), timestamp) AS filtered_timestamp, + arraySlice(filtered_timestamp, 1, 5) AS limited_timestamp, + if(ifNull(greater(target_index, 0), 0), arraySlice(session_id, target_index), session_id) AS filtered_session_id, + arraySlice(filtered_session_id, 1, 5) AS limited_session_id, + if(ifNull(greater(target_index, 0), 0), arraySlice(window_id, target_index), window_id) AS filtered_window_id, + arraySlice(filtered_window_id, 1, 5) AS limited_window_id, + arrayDifference(limited_timings) AS timings_diff, + concat(ifNull(toString(length(limited_path)), ''), '_', ifNull(toString(limited_path[-1]), '')) AS path_dropoff_key, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, '')), limited_uuid, limited_timestamp, limited_session_id, limited_window_id) AS limited_path_timings + FROM + (SELECT person_id AS person_id, + path_time_tuple.1 AS path_basic, + path_time_tuple.2 AS time, + session_index, + arrayZip(path_list, timing_list, arrayDifference(timing_list), uuid_list, timestamp_list, session_id_list, window_id_list) AS paths_tuple, + arraySplit(x -> if(ifNull(less(x.3, 1800), 0), 0, 1), paths_tuple) AS session_paths, + path_time_tuple.4 AS uuid_items, + path_time_tuple.5 AS timestamp_items, + path_time_tuple.6 AS session_id_items, + path_time_tuple.7 AS window_id_items + FROM + (SELECT person_id AS person_id, + groupArray(timestamp) AS timing_list, + groupArray(path_item) AS path_list, + groupArray(uuid) AS uuid_list, + groupArray(timestamp) AS timestamp_list, + groupArray(session_id) AS session_id_list, + groupArray(window_id) AS window_id_list + FROM + (SELECT toTimeZone(events.timestamp, 'UTC') AS timestamp, + events__pdi.person_id AS person_id, + ifNull(events.event, '') AS path_item_ungrouped, + events.uuid AS uuid, + toTimeZone(events.timestamp, 'UTC') AS timestamp, + ifNull(nullIf(nullIf(events.`$session_id`, ''), 'null'), '') AS session_id, + ifNull(nullIf(nullIf(events.`$window_id`, ''), 'null'), '') AS window_id, + NULL AS groupings, + multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, + (if(ifNull(greater(group_index, 0), 0), groupings[group_index], path_item_ungrouped) AS path_item) AS path_item + FROM events + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) + WHERE and(equals(events.team_id, 2), and(greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2021-05-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2021-05-07 23:59:59', 6, 'UTC')))), not(startsWith(events.event, '$'))) + ORDER BY events__pdi.person_id ASC, toTimeZone(events.timestamp, 'UTC') ASC) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, + arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, + arrayEnumerate(limited_path_timings) AS event_in_session_index)) + WHERE ifNull(equals(path_key, '2_step two'), 0) + GROUP BY person_id) AS source ON equals(persons.id, source.actor_id) + ORDER BY persons.id ASC + LIMIT 101 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhousePaths.test_person_dropoffs.2 + ''' + SELECT persons.id AS id + FROM + (SELECT person.id AS id + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS persons + INNER JOIN + (SELECT person_id AS actor_id, + groupUniqArray(100)(tuple(timestamp, uuid, session_id, window_id)) AS matching_events, + count(*) AS event_count + FROM + (SELECT person_id AS person_id, + path AS path, + conversion_time AS conversion_time, + event_in_session_index AS event_in_session_index, + concat(ifNull(toString(event_in_session_index), ''), '_', ifNull(toString(path), '')) AS path_key, + if(ifNull(greater(event_in_session_index, 1), 0), concat(ifNull(toString(minus(event_in_session_index, 1)), ''), '_', ifNull(toString(prev_path), '')), NULL) AS last_path_key, + path_dropoff_key AS path_dropoff_key, + final_uuid AS uuid, + final_timestamp AS timestamp, + final_session_id AS session_id, + final_window_id AS window_id + FROM + (SELECT person_id AS person_id, + joined_path_tuple.1 AS path, + joined_path_tuple.2 AS conversion_time, + joined_path_tuple.3 AS prev_path, + event_in_session_index, + session_index AS session_index, + arrayPopFront(arrayPushBack(path_basic, '')) AS path_basic_0, + arrayMap((x, y) -> if(ifNull(equals(x, y), isNull(x) + and isNull(y)), 0, 1), path_basic, path_basic_0) AS mapping, + arrayFilter((x, y) -> y, time, mapping) AS timings, + arrayFilter((x, y) -> y, path_basic, mapping) AS compact_path, + indexOf(compact_path, NULL) AS target_index, + joined_path_tuple.4 AS final_uuid, + joined_path_tuple.5 AS final_timestamp, + joined_path_tuple.6 AS final_session_id, + joined_path_tuple.7 AS final_window_id, + arrayFilter((x, y) -> y, uuid_items, mapping) AS uuid, + arrayFilter((x, y) -> y, timestamp_items, mapping) AS timestamp, + arrayFilter((x, y) -> y, session_id_items, mapping) AS session_id, + arrayFilter((x, y) -> y, window_id_items, mapping) AS window_id, + if(ifNull(greater(target_index, 0), 0), arraySlice(compact_path, target_index), compact_path) AS filtered_path, + arraySlice(filtered_path, 1, 5) AS limited_path, + if(ifNull(greater(target_index, 0), 0), arraySlice(timings, target_index), timings) AS filtered_timings, + arraySlice(filtered_timings, 1, 5) AS limited_timings, + if(ifNull(greater(target_index, 0), 0), arraySlice(uuid, target_index), uuid) AS filtered_uuid, + arraySlice(filtered_uuid, 1, 5) AS limited_uuid, + if(ifNull(greater(target_index, 0), 0), arraySlice(timestamp, target_index), timestamp) AS filtered_timestamp, + arraySlice(filtered_timestamp, 1, 5) AS limited_timestamp, + if(ifNull(greater(target_index, 0), 0), arraySlice(session_id, target_index), session_id) AS filtered_session_id, + arraySlice(filtered_session_id, 1, 5) AS limited_session_id, + if(ifNull(greater(target_index, 0), 0), arraySlice(window_id, target_index), window_id) AS filtered_window_id, + arraySlice(filtered_window_id, 1, 5) AS limited_window_id, + arrayDifference(limited_timings) AS timings_diff, + concat(ifNull(toString(length(limited_path)), ''), '_', ifNull(toString(limited_path[-1]), '')) AS path_dropoff_key, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, '')), limited_uuid, limited_timestamp, limited_session_id, limited_window_id) AS limited_path_timings + FROM + (SELECT person_id AS person_id, + path_time_tuple.1 AS path_basic, + path_time_tuple.2 AS time, + session_index, + arrayZip(path_list, timing_list, arrayDifference(timing_list), uuid_list, timestamp_list, session_id_list, window_id_list) AS paths_tuple, + arraySplit(x -> if(ifNull(less(x.3, 1800), 0), 0, 1), paths_tuple) AS session_paths, + path_time_tuple.4 AS uuid_items, + path_time_tuple.5 AS timestamp_items, + path_time_tuple.6 AS session_id_items, + path_time_tuple.7 AS window_id_items + FROM + (SELECT person_id AS person_id, + groupArray(timestamp) AS timing_list, + groupArray(path_item) AS path_list, + groupArray(uuid) AS uuid_list, + groupArray(timestamp) AS timestamp_list, + groupArray(session_id) AS session_id_list, + groupArray(window_id) AS window_id_list + FROM + (SELECT toTimeZone(events.timestamp, 'UTC') AS timestamp, + events__pdi.person_id AS person_id, + ifNull(events.event, '') AS path_item_ungrouped, + events.uuid AS uuid, + toTimeZone(events.timestamp, 'UTC') AS timestamp, + ifNull(nullIf(nullIf(events.`$session_id`, ''), 'null'), '') AS session_id, + ifNull(nullIf(nullIf(events.`$window_id`, ''), 'null'), '') AS window_id, + NULL AS groupings, + multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, + (if(ifNull(greater(group_index, 0), 0), groupings[group_index], path_item_ungrouped) AS path_item) AS path_item + FROM events + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) + WHERE and(equals(events.team_id, 2), and(greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2021-05-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2021-05-07 23:59:59', 6, 'UTC')))), not(startsWith(events.event, '$'))) + ORDER BY events__pdi.person_id ASC, toTimeZone(events.timestamp, 'UTC') ASC) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, + arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, + arrayEnumerate(limited_path_timings) AS event_in_session_index)) + WHERE and(ifNull(equals(last_path_key, '2_step two'), 0), 1) + GROUP BY person_id) AS source ON equals(persons.id, source.actor_id) + ORDER BY persons.id ASC + LIMIT 101 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhousePaths.test_person_dropoffs.3 + ''' + SELECT persons.id AS id + FROM + (SELECT person.id AS id + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS persons + INNER JOIN + (SELECT person_id AS actor_id, + groupUniqArray(100)(tuple(timestamp, uuid, session_id, window_id)) AS matching_events, + count(*) AS event_count + FROM + (SELECT person_id AS person_id, + path AS path, + conversion_time AS conversion_time, + event_in_session_index AS event_in_session_index, + concat(ifNull(toString(event_in_session_index), ''), '_', ifNull(toString(path), '')) AS path_key, + if(ifNull(greater(event_in_session_index, 1), 0), concat(ifNull(toString(minus(event_in_session_index, 1)), ''), '_', ifNull(toString(prev_path), '')), NULL) AS last_path_key, + path_dropoff_key AS path_dropoff_key, + final_uuid AS uuid, + final_timestamp AS timestamp, + final_session_id AS session_id, + final_window_id AS window_id + FROM + (SELECT person_id AS person_id, + joined_path_tuple.1 AS path, + joined_path_tuple.2 AS conversion_time, + joined_path_tuple.3 AS prev_path, + event_in_session_index, + session_index AS session_index, + arrayPopFront(arrayPushBack(path_basic, '')) AS path_basic_0, + arrayMap((x, y) -> if(ifNull(equals(x, y), isNull(x) + and isNull(y)), 0, 1), path_basic, path_basic_0) AS mapping, + arrayFilter((x, y) -> y, time, mapping) AS timings, + arrayFilter((x, y) -> y, path_basic, mapping) AS compact_path, + indexOf(compact_path, NULL) AS target_index, + joined_path_tuple.4 AS final_uuid, + joined_path_tuple.5 AS final_timestamp, + joined_path_tuple.6 AS final_session_id, + joined_path_tuple.7 AS final_window_id, + arrayFilter((x, y) -> y, uuid_items, mapping) AS uuid, + arrayFilter((x, y) -> y, timestamp_items, mapping) AS timestamp, + arrayFilter((x, y) -> y, session_id_items, mapping) AS session_id, + arrayFilter((x, y) -> y, window_id_items, mapping) AS window_id, + if(ifNull(greater(target_index, 0), 0), arraySlice(compact_path, target_index), compact_path) AS filtered_path, + arraySlice(filtered_path, 1, 5) AS limited_path, + if(ifNull(greater(target_index, 0), 0), arraySlice(timings, target_index), timings) AS filtered_timings, + arraySlice(filtered_timings, 1, 5) AS limited_timings, + if(ifNull(greater(target_index, 0), 0), arraySlice(uuid, target_index), uuid) AS filtered_uuid, + arraySlice(filtered_uuid, 1, 5) AS limited_uuid, + if(ifNull(greater(target_index, 0), 0), arraySlice(timestamp, target_index), timestamp) AS filtered_timestamp, + arraySlice(filtered_timestamp, 1, 5) AS limited_timestamp, + if(ifNull(greater(target_index, 0), 0), arraySlice(session_id, target_index), session_id) AS filtered_session_id, + arraySlice(filtered_session_id, 1, 5) AS limited_session_id, + if(ifNull(greater(target_index, 0), 0), arraySlice(window_id, target_index), window_id) AS filtered_window_id, + arraySlice(filtered_window_id, 1, 5) AS limited_window_id, + arrayDifference(limited_timings) AS timings_diff, + concat(ifNull(toString(length(limited_path)), ''), '_', ifNull(toString(limited_path[-1]), '')) AS path_dropoff_key, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, '')), limited_uuid, limited_timestamp, limited_session_id, limited_window_id) AS limited_path_timings + FROM + (SELECT person_id AS person_id, + path_time_tuple.1 AS path_basic, + path_time_tuple.2 AS time, + session_index, + arrayZip(path_list, timing_list, arrayDifference(timing_list), uuid_list, timestamp_list, session_id_list, window_id_list) AS paths_tuple, + arraySplit(x -> if(ifNull(less(x.3, 1800), 0), 0, 1), paths_tuple) AS session_paths, + path_time_tuple.4 AS uuid_items, + path_time_tuple.5 AS timestamp_items, + path_time_tuple.6 AS session_id_items, + path_time_tuple.7 AS window_id_items + FROM + (SELECT person_id AS person_id, + groupArray(timestamp) AS timing_list, + groupArray(path_item) AS path_list, + groupArray(uuid) AS uuid_list, + groupArray(timestamp) AS timestamp_list, + groupArray(session_id) AS session_id_list, + groupArray(window_id) AS window_id_list + FROM + (SELECT toTimeZone(events.timestamp, 'UTC') AS timestamp, + events__pdi.person_id AS person_id, + ifNull(events.event, '') AS path_item_ungrouped, + events.uuid AS uuid, + toTimeZone(events.timestamp, 'UTC') AS timestamp, + ifNull(nullIf(nullIf(events.`$session_id`, ''), 'null'), '') AS session_id, + ifNull(nullIf(nullIf(events.`$window_id`, ''), 'null'), '') AS window_id, + NULL AS groupings, + multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, + (if(ifNull(greater(group_index, 0), 0), groupings[group_index], path_item_ungrouped) AS path_item) AS path_item + FROM events + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) + WHERE and(equals(events.team_id, 2), and(greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2021-05-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2021-05-07 23:59:59', 6, 'UTC')))), not(startsWith(events.event, '$'))) + ORDER BY events__pdi.person_id ASC, toTimeZone(events.timestamp, 'UTC') ASC) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, + arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, + arrayEnumerate(limited_path_timings) AS event_in_session_index)) + WHERE and(ifNull(equals(path_dropoff_key, '3_step three'), 0), ifNull(equals(path_dropoff_key, path_key), isNull(path_dropoff_key) + and isNull(path_key))) + GROUP BY person_id) AS source ON equals(persons.id, source.actor_id) + ORDER BY persons.id ASC + LIMIT 101 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhousePaths.test_person_dropoffs.4 + ''' + SELECT persons.id AS id + FROM + (SELECT person.id AS id + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS persons + INNER JOIN + (SELECT person_id AS actor_id, + groupUniqArray(100)(tuple(timestamp, uuid, session_id, window_id)) AS matching_events, + count(*) AS event_count + FROM + (SELECT person_id AS person_id, + path AS path, + conversion_time AS conversion_time, + event_in_session_index AS event_in_session_index, + concat(ifNull(toString(event_in_session_index), ''), '_', ifNull(toString(path), '')) AS path_key, + if(ifNull(greater(event_in_session_index, 1), 0), concat(ifNull(toString(minus(event_in_session_index, 1)), ''), '_', ifNull(toString(prev_path), '')), NULL) AS last_path_key, + path_dropoff_key AS path_dropoff_key, + final_uuid AS uuid, + final_timestamp AS timestamp, + final_session_id AS session_id, + final_window_id AS window_id + FROM + (SELECT person_id AS person_id, + joined_path_tuple.1 AS path, + joined_path_tuple.2 AS conversion_time, + joined_path_tuple.3 AS prev_path, + event_in_session_index, + session_index AS session_index, + arrayPopFront(arrayPushBack(path_basic, '')) AS path_basic_0, + arrayMap((x, y) -> if(ifNull(equals(x, y), isNull(x) + and isNull(y)), 0, 1), path_basic, path_basic_0) AS mapping, + arrayFilter((x, y) -> y, time, mapping) AS timings, + arrayFilter((x, y) -> y, path_basic, mapping) AS compact_path, + indexOf(compact_path, NULL) AS target_index, + joined_path_tuple.4 AS final_uuid, + joined_path_tuple.5 AS final_timestamp, + joined_path_tuple.6 AS final_session_id, + joined_path_tuple.7 AS final_window_id, + arrayFilter((x, y) -> y, uuid_items, mapping) AS uuid, + arrayFilter((x, y) -> y, timestamp_items, mapping) AS timestamp, + arrayFilter((x, y) -> y, session_id_items, mapping) AS session_id, + arrayFilter((x, y) -> y, window_id_items, mapping) AS window_id, + if(ifNull(greater(target_index, 0), 0), arraySlice(compact_path, target_index), compact_path) AS filtered_path, + arraySlice(filtered_path, 1, 5) AS limited_path, + if(ifNull(greater(target_index, 0), 0), arraySlice(timings, target_index), timings) AS filtered_timings, + arraySlice(filtered_timings, 1, 5) AS limited_timings, + if(ifNull(greater(target_index, 0), 0), arraySlice(uuid, target_index), uuid) AS filtered_uuid, + arraySlice(filtered_uuid, 1, 5) AS limited_uuid, + if(ifNull(greater(target_index, 0), 0), arraySlice(timestamp, target_index), timestamp) AS filtered_timestamp, + arraySlice(filtered_timestamp, 1, 5) AS limited_timestamp, + if(ifNull(greater(target_index, 0), 0), arraySlice(session_id, target_index), session_id) AS filtered_session_id, + arraySlice(filtered_session_id, 1, 5) AS limited_session_id, + if(ifNull(greater(target_index, 0), 0), arraySlice(window_id, target_index), window_id) AS filtered_window_id, + arraySlice(filtered_window_id, 1, 5) AS limited_window_id, + arrayDifference(limited_timings) AS timings_diff, + concat(ifNull(toString(length(limited_path)), ''), '_', ifNull(toString(limited_path[-1]), '')) AS path_dropoff_key, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, '')), limited_uuid, limited_timestamp, limited_session_id, limited_window_id) AS limited_path_timings + FROM + (SELECT person_id AS person_id, + path_time_tuple.1 AS path_basic, + path_time_tuple.2 AS time, + session_index, + arrayZip(path_list, timing_list, arrayDifference(timing_list), uuid_list, timestamp_list, session_id_list, window_id_list) AS paths_tuple, + arraySplit(x -> if(ifNull(less(x.3, 1800), 0), 0, 1), paths_tuple) AS session_paths, + path_time_tuple.4 AS uuid_items, + path_time_tuple.5 AS timestamp_items, + path_time_tuple.6 AS session_id_items, + path_time_tuple.7 AS window_id_items + FROM + (SELECT person_id AS person_id, + groupArray(timestamp) AS timing_list, + groupArray(path_item) AS path_list, + groupArray(uuid) AS uuid_list, + groupArray(timestamp) AS timestamp_list, + groupArray(session_id) AS session_id_list, + groupArray(window_id) AS window_id_list + FROM + (SELECT toTimeZone(events.timestamp, 'UTC') AS timestamp, + events__pdi.person_id AS person_id, + ifNull(events.event, '') AS path_item_ungrouped, + events.uuid AS uuid, + toTimeZone(events.timestamp, 'UTC') AS timestamp, + ifNull(nullIf(nullIf(events.`$session_id`, ''), 'null'), '') AS session_id, + ifNull(nullIf(nullIf(events.`$window_id`, ''), 'null'), '') AS window_id, + NULL AS groupings, + multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, + (if(ifNull(greater(group_index, 0), 0), groupings[group_index], path_item_ungrouped) AS path_item) AS path_item + FROM events + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) + WHERE and(equals(events.team_id, 2), and(greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2021-05-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2021-05-07 23:59:59', 6, 'UTC')))), not(startsWith(events.event, '$'))) + ORDER BY events__pdi.person_id ASC, toTimeZone(events.timestamp, 'UTC') ASC) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, + arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, + arrayEnumerate(limited_path_timings) AS event_in_session_index)) + WHERE ifNull(equals(path_key, '3_step three'), 0) + GROUP BY person_id) AS source ON equals(persons.id, source.actor_id) + ORDER BY persons.id ASC + LIMIT 101 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhousePaths.test_person_dropoffs.5 + ''' + SELECT persons.id AS id + FROM + (SELECT person.id AS id + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS persons + INNER JOIN + (SELECT person_id AS actor_id, + groupUniqArray(100)(tuple(timestamp, uuid, session_id, window_id)) AS matching_events, + count(*) AS event_count + FROM + (SELECT person_id AS person_id, + path AS path, + conversion_time AS conversion_time, + event_in_session_index AS event_in_session_index, + concat(ifNull(toString(event_in_session_index), ''), '_', ifNull(toString(path), '')) AS path_key, + if(ifNull(greater(event_in_session_index, 1), 0), concat(ifNull(toString(minus(event_in_session_index, 1)), ''), '_', ifNull(toString(prev_path), '')), NULL) AS last_path_key, + path_dropoff_key AS path_dropoff_key, + final_uuid AS uuid, + final_timestamp AS timestamp, + final_session_id AS session_id, + final_window_id AS window_id + FROM + (SELECT person_id AS person_id, + joined_path_tuple.1 AS path, + joined_path_tuple.2 AS conversion_time, + joined_path_tuple.3 AS prev_path, + event_in_session_index, + session_index AS session_index, + arrayPopFront(arrayPushBack(path_basic, '')) AS path_basic_0, + arrayMap((x, y) -> if(ifNull(equals(x, y), isNull(x) + and isNull(y)), 0, 1), path_basic, path_basic_0) AS mapping, + arrayFilter((x, y) -> y, time, mapping) AS timings, + arrayFilter((x, y) -> y, path_basic, mapping) AS compact_path, + indexOf(compact_path, NULL) AS target_index, + joined_path_tuple.4 AS final_uuid, + joined_path_tuple.5 AS final_timestamp, + joined_path_tuple.6 AS final_session_id, + joined_path_tuple.7 AS final_window_id, + arrayFilter((x, y) -> y, uuid_items, mapping) AS uuid, + arrayFilter((x, y) -> y, timestamp_items, mapping) AS timestamp, + arrayFilter((x, y) -> y, session_id_items, mapping) AS session_id, + arrayFilter((x, y) -> y, window_id_items, mapping) AS window_id, + if(ifNull(greater(target_index, 0), 0), arraySlice(compact_path, target_index), compact_path) AS filtered_path, + arraySlice(filtered_path, 1, 5) AS limited_path, + if(ifNull(greater(target_index, 0), 0), arraySlice(timings, target_index), timings) AS filtered_timings, + arraySlice(filtered_timings, 1, 5) AS limited_timings, + if(ifNull(greater(target_index, 0), 0), arraySlice(uuid, target_index), uuid) AS filtered_uuid, + arraySlice(filtered_uuid, 1, 5) AS limited_uuid, + if(ifNull(greater(target_index, 0), 0), arraySlice(timestamp, target_index), timestamp) AS filtered_timestamp, + arraySlice(filtered_timestamp, 1, 5) AS limited_timestamp, + if(ifNull(greater(target_index, 0), 0), arraySlice(session_id, target_index), session_id) AS filtered_session_id, + arraySlice(filtered_session_id, 1, 5) AS limited_session_id, + if(ifNull(greater(target_index, 0), 0), arraySlice(window_id, target_index), window_id) AS filtered_window_id, + arraySlice(filtered_window_id, 1, 5) AS limited_window_id, + arrayDifference(limited_timings) AS timings_diff, + concat(ifNull(toString(length(limited_path)), ''), '_', ifNull(toString(limited_path[-1]), '')) AS path_dropoff_key, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, '')), limited_uuid, limited_timestamp, limited_session_id, limited_window_id) AS limited_path_timings + FROM + (SELECT person_id AS person_id, + path_time_tuple.1 AS path_basic, + path_time_tuple.2 AS time, + session_index, + arrayZip(path_list, timing_list, arrayDifference(timing_list), uuid_list, timestamp_list, session_id_list, window_id_list) AS paths_tuple, + arraySplit(x -> if(ifNull(less(x.3, 1800), 0), 0, 1), paths_tuple) AS session_paths, + path_time_tuple.4 AS uuid_items, + path_time_tuple.5 AS timestamp_items, + path_time_tuple.6 AS session_id_items, + path_time_tuple.7 AS window_id_items + FROM + (SELECT person_id AS person_id, + groupArray(timestamp) AS timing_list, + groupArray(path_item) AS path_list, + groupArray(uuid) AS uuid_list, + groupArray(timestamp) AS timestamp_list, + groupArray(session_id) AS session_id_list, + groupArray(window_id) AS window_id_list + FROM + (SELECT toTimeZone(events.timestamp, 'UTC') AS timestamp, + events__pdi.person_id AS person_id, + ifNull(events.event, '') AS path_item_ungrouped, + events.uuid AS uuid, + toTimeZone(events.timestamp, 'UTC') AS timestamp, + ifNull(nullIf(nullIf(events.`$session_id`, ''), 'null'), '') AS session_id, + ifNull(nullIf(nullIf(events.`$window_id`, ''), 'null'), '') AS window_id, + NULL AS groupings, + multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, + (if(ifNull(greater(group_index, 0), 0), groupings[group_index], path_item_ungrouped) AS path_item) AS path_item + FROM events + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) + WHERE and(equals(events.team_id, 2), and(greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2021-05-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2021-05-07 23:59:59', 6, 'UTC')))), not(startsWith(events.event, '$'))) + ORDER BY events__pdi.person_id ASC, toTimeZone(events.timestamp, 'UTC') ASC) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, + arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, + arrayEnumerate(limited_path_timings) AS event_in_session_index)) + WHERE and(ifNull(equals(last_path_key, '3_step three'), 0), 1) + GROUP BY person_id) AS source ON equals(persons.id, source.actor_id) + ORDER BY persons.id ASC + LIMIT 101 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhousePaths.test_person_dropoffs.6 + ''' + SELECT persons.id AS id + FROM + (SELECT person.id AS id + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS persons + INNER JOIN + (SELECT person_id AS actor_id, + groupUniqArray(100)(tuple(timestamp, uuid, session_id, window_id)) AS matching_events, + count(*) AS event_count + FROM + (SELECT person_id AS person_id, + path AS path, + conversion_time AS conversion_time, + event_in_session_index AS event_in_session_index, + concat(ifNull(toString(event_in_session_index), ''), '_', ifNull(toString(path), '')) AS path_key, + if(ifNull(greater(event_in_session_index, 1), 0), concat(ifNull(toString(minus(event_in_session_index, 1)), ''), '_', ifNull(toString(prev_path), '')), NULL) AS last_path_key, + path_dropoff_key AS path_dropoff_key, + final_uuid AS uuid, + final_timestamp AS timestamp, + final_session_id AS session_id, + final_window_id AS window_id + FROM + (SELECT person_id AS person_id, + joined_path_tuple.1 AS path, + joined_path_tuple.2 AS conversion_time, + joined_path_tuple.3 AS prev_path, + event_in_session_index, + session_index AS session_index, + arrayPopFront(arrayPushBack(path_basic, '')) AS path_basic_0, + arrayMap((x, y) -> if(ifNull(equals(x, y), isNull(x) + and isNull(y)), 0, 1), path_basic, path_basic_0) AS mapping, + arrayFilter((x, y) -> y, time, mapping) AS timings, + arrayFilter((x, y) -> y, path_basic, mapping) AS compact_path, + indexOf(compact_path, NULL) AS target_index, + joined_path_tuple.4 AS final_uuid, + joined_path_tuple.5 AS final_timestamp, + joined_path_tuple.6 AS final_session_id, + joined_path_tuple.7 AS final_window_id, + arrayFilter((x, y) -> y, uuid_items, mapping) AS uuid, + arrayFilter((x, y) -> y, timestamp_items, mapping) AS timestamp, + arrayFilter((x, y) -> y, session_id_items, mapping) AS session_id, + arrayFilter((x, y) -> y, window_id_items, mapping) AS window_id, + if(ifNull(greater(target_index, 0), 0), arraySlice(compact_path, target_index), compact_path) AS filtered_path, + arraySlice(filtered_path, 1, 5) AS limited_path, + if(ifNull(greater(target_index, 0), 0), arraySlice(timings, target_index), timings) AS filtered_timings, + arraySlice(filtered_timings, 1, 5) AS limited_timings, + if(ifNull(greater(target_index, 0), 0), arraySlice(uuid, target_index), uuid) AS filtered_uuid, + arraySlice(filtered_uuid, 1, 5) AS limited_uuid, + if(ifNull(greater(target_index, 0), 0), arraySlice(timestamp, target_index), timestamp) AS filtered_timestamp, + arraySlice(filtered_timestamp, 1, 5) AS limited_timestamp, + if(ifNull(greater(target_index, 0), 0), arraySlice(session_id, target_index), session_id) AS filtered_session_id, + arraySlice(filtered_session_id, 1, 5) AS limited_session_id, + if(ifNull(greater(target_index, 0), 0), arraySlice(window_id, target_index), window_id) AS filtered_window_id, + arraySlice(filtered_window_id, 1, 5) AS limited_window_id, + arrayDifference(limited_timings) AS timings_diff, + concat(ifNull(toString(length(limited_path)), ''), '_', ifNull(toString(limited_path[-1]), '')) AS path_dropoff_key, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, '')), limited_uuid, limited_timestamp, limited_session_id, limited_window_id) AS limited_path_timings + FROM + (SELECT person_id AS person_id, + path_time_tuple.1 AS path_basic, + path_time_tuple.2 AS time, + session_index, + arrayZip(path_list, timing_list, arrayDifference(timing_list), uuid_list, timestamp_list, session_id_list, window_id_list) AS paths_tuple, + arraySplit(x -> if(ifNull(less(x.3, 1800), 0), 0, 1), paths_tuple) AS session_paths, + path_time_tuple.4 AS uuid_items, + path_time_tuple.5 AS timestamp_items, + path_time_tuple.6 AS session_id_items, + path_time_tuple.7 AS window_id_items + FROM + (SELECT person_id AS person_id, + groupArray(timestamp) AS timing_list, + groupArray(path_item) AS path_list, + groupArray(uuid) AS uuid_list, + groupArray(timestamp) AS timestamp_list, + groupArray(session_id) AS session_id_list, + groupArray(window_id) AS window_id_list + FROM + (SELECT toTimeZone(events.timestamp, 'UTC') AS timestamp, + events__pdi.person_id AS person_id, + ifNull(events.event, '') AS path_item_ungrouped, + events.uuid AS uuid, + toTimeZone(events.timestamp, 'UTC') AS timestamp, + ifNull(nullIf(nullIf(events.`$session_id`, ''), 'null'), '') AS session_id, + ifNull(nullIf(nullIf(events.`$window_id`, ''), 'null'), '') AS window_id, + NULL AS groupings, + multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, + (if(ifNull(greater(group_index, 0), 0), groupings[group_index], path_item_ungrouped) AS path_item) AS path_item + FROM events + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) + WHERE and(equals(events.team_id, 2), and(greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2021-05-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2021-05-07 23:59:59', 6, 'UTC')))), not(startsWith(events.event, '$'))) + ORDER BY events__pdi.person_id ASC, toTimeZone(events.timestamp, 'UTC') ASC) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, + arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, + arrayEnumerate(limited_path_timings) AS event_in_session_index)) + WHERE and(ifNull(equals(path_dropoff_key, '4_step four'), 0), ifNull(equals(path_dropoff_key, path_key), isNull(path_dropoff_key) + and isNull(path_key))) + GROUP BY person_id) AS source ON equals(persons.id, source.actor_id) + ORDER BY persons.id ASC + LIMIT 101 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhousePaths.test_person_dropoffs.7 + ''' + SELECT persons.id AS id + FROM + (SELECT person.id AS id + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS persons + INNER JOIN + (SELECT person_id AS actor_id, + groupUniqArray(100)(tuple(timestamp, uuid, session_id, window_id)) AS matching_events, + count(*) AS event_count + FROM + (SELECT person_id AS person_id, + path AS path, + conversion_time AS conversion_time, + event_in_session_index AS event_in_session_index, + concat(ifNull(toString(event_in_session_index), ''), '_', ifNull(toString(path), '')) AS path_key, + if(ifNull(greater(event_in_session_index, 1), 0), concat(ifNull(toString(minus(event_in_session_index, 1)), ''), '_', ifNull(toString(prev_path), '')), NULL) AS last_path_key, + path_dropoff_key AS path_dropoff_key, + final_uuid AS uuid, + final_timestamp AS timestamp, + final_session_id AS session_id, + final_window_id AS window_id + FROM + (SELECT person_id AS person_id, + joined_path_tuple.1 AS path, + joined_path_tuple.2 AS conversion_time, + joined_path_tuple.3 AS prev_path, + event_in_session_index, + session_index AS session_index, + arrayPopFront(arrayPushBack(path_basic, '')) AS path_basic_0, + arrayMap((x, y) -> if(ifNull(equals(x, y), isNull(x) + and isNull(y)), 0, 1), path_basic, path_basic_0) AS mapping, + arrayFilter((x, y) -> y, time, mapping) AS timings, + arrayFilter((x, y) -> y, path_basic, mapping) AS compact_path, + indexOf(compact_path, NULL) AS target_index, + joined_path_tuple.4 AS final_uuid, + joined_path_tuple.5 AS final_timestamp, + joined_path_tuple.6 AS final_session_id, + joined_path_tuple.7 AS final_window_id, + arrayFilter((x, y) -> y, uuid_items, mapping) AS uuid, + arrayFilter((x, y) -> y, timestamp_items, mapping) AS timestamp, + arrayFilter((x, y) -> y, session_id_items, mapping) AS session_id, + arrayFilter((x, y) -> y, window_id_items, mapping) AS window_id, + if(ifNull(greater(target_index, 0), 0), arraySlice(compact_path, target_index), compact_path) AS filtered_path, + arraySlice(filtered_path, 1, 5) AS limited_path, + if(ifNull(greater(target_index, 0), 0), arraySlice(timings, target_index), timings) AS filtered_timings, + arraySlice(filtered_timings, 1, 5) AS limited_timings, + if(ifNull(greater(target_index, 0), 0), arraySlice(uuid, target_index), uuid) AS filtered_uuid, + arraySlice(filtered_uuid, 1, 5) AS limited_uuid, + if(ifNull(greater(target_index, 0), 0), arraySlice(timestamp, target_index), timestamp) AS filtered_timestamp, + arraySlice(filtered_timestamp, 1, 5) AS limited_timestamp, + if(ifNull(greater(target_index, 0), 0), arraySlice(session_id, target_index), session_id) AS filtered_session_id, + arraySlice(filtered_session_id, 1, 5) AS limited_session_id, + if(ifNull(greater(target_index, 0), 0), arraySlice(window_id, target_index), window_id) AS filtered_window_id, + arraySlice(filtered_window_id, 1, 5) AS limited_window_id, + arrayDifference(limited_timings) AS timings_diff, + concat(ifNull(toString(length(limited_path)), ''), '_', ifNull(toString(limited_path[-1]), '')) AS path_dropoff_key, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, '')), limited_uuid, limited_timestamp, limited_session_id, limited_window_id) AS limited_path_timings + FROM + (SELECT person_id AS person_id, + path_time_tuple.1 AS path_basic, + path_time_tuple.2 AS time, + session_index, + arrayZip(path_list, timing_list, arrayDifference(timing_list), uuid_list, timestamp_list, session_id_list, window_id_list) AS paths_tuple, + arraySplit(x -> if(ifNull(less(x.3, 1800), 0), 0, 1), paths_tuple) AS session_paths, + path_time_tuple.4 AS uuid_items, + path_time_tuple.5 AS timestamp_items, + path_time_tuple.6 AS session_id_items, + path_time_tuple.7 AS window_id_items + FROM + (SELECT person_id AS person_id, + groupArray(timestamp) AS timing_list, + groupArray(path_item) AS path_list, + groupArray(uuid) AS uuid_list, + groupArray(timestamp) AS timestamp_list, + groupArray(session_id) AS session_id_list, + groupArray(window_id) AS window_id_list + FROM + (SELECT toTimeZone(events.timestamp, 'UTC') AS timestamp, + events__pdi.person_id AS person_id, + ifNull(events.event, '') AS path_item_ungrouped, + events.uuid AS uuid, + toTimeZone(events.timestamp, 'UTC') AS timestamp, + ifNull(nullIf(nullIf(events.`$session_id`, ''), 'null'), '') AS session_id, + ifNull(nullIf(nullIf(events.`$window_id`, ''), 'null'), '') AS window_id, + NULL AS groupings, + multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, + (if(ifNull(greater(group_index, 0), 0), groupings[group_index], path_item_ungrouped) AS path_item) AS path_item + FROM events + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) + WHERE and(equals(events.team_id, 2), and(greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2021-05-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2021-05-07 23:59:59', 6, 'UTC')))), not(startsWith(events.event, '$'))) + ORDER BY events__pdi.person_id ASC, toTimeZone(events.timestamp, 'UTC') ASC) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, + arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, + arrayEnumerate(limited_path_timings) AS event_in_session_index)) + WHERE ifNull(equals(path_key, '4_step four'), 0) + GROUP BY person_id) AS source ON equals(persons.id, source.actor_id) + ORDER BY persons.id ASC + LIMIT 101 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhousePaths.test_person_dropoffs.8 + ''' + SELECT persons.id AS id + FROM + (SELECT person.id AS id + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS persons + INNER JOIN + (SELECT person_id AS actor_id, + groupUniqArray(100)(tuple(timestamp, uuid, session_id, window_id)) AS matching_events, + count(*) AS event_count + FROM + (SELECT person_id AS person_id, + path AS path, + conversion_time AS conversion_time, + event_in_session_index AS event_in_session_index, + concat(ifNull(toString(event_in_session_index), ''), '_', ifNull(toString(path), '')) AS path_key, + if(ifNull(greater(event_in_session_index, 1), 0), concat(ifNull(toString(minus(event_in_session_index, 1)), ''), '_', ifNull(toString(prev_path), '')), NULL) AS last_path_key, + path_dropoff_key AS path_dropoff_key, + final_uuid AS uuid, + final_timestamp AS timestamp, + final_session_id AS session_id, + final_window_id AS window_id + FROM + (SELECT person_id AS person_id, + joined_path_tuple.1 AS path, + joined_path_tuple.2 AS conversion_time, + joined_path_tuple.3 AS prev_path, + event_in_session_index, + session_index AS session_index, + arrayPopFront(arrayPushBack(path_basic, '')) AS path_basic_0, + arrayMap((x, y) -> if(ifNull(equals(x, y), isNull(x) + and isNull(y)), 0, 1), path_basic, path_basic_0) AS mapping, + arrayFilter((x, y) -> y, time, mapping) AS timings, + arrayFilter((x, y) -> y, path_basic, mapping) AS compact_path, + indexOf(compact_path, NULL) AS target_index, + joined_path_tuple.4 AS final_uuid, + joined_path_tuple.5 AS final_timestamp, + joined_path_tuple.6 AS final_session_id, + joined_path_tuple.7 AS final_window_id, + arrayFilter((x, y) -> y, uuid_items, mapping) AS uuid, + arrayFilter((x, y) -> y, timestamp_items, mapping) AS timestamp, + arrayFilter((x, y) -> y, session_id_items, mapping) AS session_id, + arrayFilter((x, y) -> y, window_id_items, mapping) AS window_id, + if(ifNull(greater(target_index, 0), 0), arraySlice(compact_path, target_index), compact_path) AS filtered_path, + arraySlice(filtered_path, 1, 5) AS limited_path, + if(ifNull(greater(target_index, 0), 0), arraySlice(timings, target_index), timings) AS filtered_timings, + arraySlice(filtered_timings, 1, 5) AS limited_timings, + if(ifNull(greater(target_index, 0), 0), arraySlice(uuid, target_index), uuid) AS filtered_uuid, + arraySlice(filtered_uuid, 1, 5) AS limited_uuid, + if(ifNull(greater(target_index, 0), 0), arraySlice(timestamp, target_index), timestamp) AS filtered_timestamp, + arraySlice(filtered_timestamp, 1, 5) AS limited_timestamp, + if(ifNull(greater(target_index, 0), 0), arraySlice(session_id, target_index), session_id) AS filtered_session_id, + arraySlice(filtered_session_id, 1, 5) AS limited_session_id, + if(ifNull(greater(target_index, 0), 0), arraySlice(window_id, target_index), window_id) AS filtered_window_id, + arraySlice(filtered_window_id, 1, 5) AS limited_window_id, + arrayDifference(limited_timings) AS timings_diff, + concat(ifNull(toString(length(limited_path)), ''), '_', ifNull(toString(limited_path[-1]), '')) AS path_dropoff_key, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, '')), limited_uuid, limited_timestamp, limited_session_id, limited_window_id) AS limited_path_timings + FROM + (SELECT person_id AS person_id, + path_time_tuple.1 AS path_basic, + path_time_tuple.2 AS time, + session_index, + arrayZip(path_list, timing_list, arrayDifference(timing_list), uuid_list, timestamp_list, session_id_list, window_id_list) AS paths_tuple, + arraySplit(x -> if(ifNull(less(x.3, 1800), 0), 0, 1), paths_tuple) AS session_paths, + path_time_tuple.4 AS uuid_items, + path_time_tuple.5 AS timestamp_items, + path_time_tuple.6 AS session_id_items, + path_time_tuple.7 AS window_id_items + FROM + (SELECT person_id AS person_id, + groupArray(timestamp) AS timing_list, + groupArray(path_item) AS path_list, + groupArray(uuid) AS uuid_list, + groupArray(timestamp) AS timestamp_list, + groupArray(session_id) AS session_id_list, + groupArray(window_id) AS window_id_list + FROM + (SELECT toTimeZone(events.timestamp, 'UTC') AS timestamp, + events__pdi.person_id AS person_id, + ifNull(events.event, '') AS path_item_ungrouped, + events.uuid AS uuid, + toTimeZone(events.timestamp, 'UTC') AS timestamp, + ifNull(nullIf(nullIf(events.`$session_id`, ''), 'null'), '') AS session_id, + ifNull(nullIf(nullIf(events.`$window_id`, ''), 'null'), '') AS window_id, + NULL AS groupings, + multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, + (if(ifNull(greater(group_index, 0), 0), groupings[group_index], path_item_ungrouped) AS path_item) AS path_item + FROM events + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) + WHERE and(equals(events.team_id, 2), and(greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2021-05-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2021-05-07 23:59:59', 6, 'UTC')))), not(startsWith(events.event, '$'))) + ORDER BY events__pdi.person_id ASC, toTimeZone(events.timestamp, 'UTC') ASC) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, + arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, + arrayEnumerate(limited_path_timings) AS event_in_session_index)) + WHERE and(ifNull(equals(last_path_key, '4_step four'), 0), 1) + GROUP BY person_id) AS source ON equals(persons.id, source.actor_id) + ORDER BY persons.id ASC + LIMIT 101 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhousePaths.test_person_on_events_v2 + ''' + + SELECT distinct_id, + person_id + FROM events + WHERE team_id = 2 + AND distinct_id IN ('poev2_p1', + 'poev2_p2') + GROUP BY distinct_id, + person_id + ORDER BY if(distinct_id = 'poev2_p1', -1, 0) + ''' +# --- +# name: TestClickhousePaths.test_person_on_events_v2.1 + ''' + SELECT last_path_key AS source_event, + path_key AS target_event, + count(*) AS event_count, + avg(conversion_time) AS average_conversion_time + FROM + (SELECT person_id AS person_id, + path AS path, + conversion_time AS conversion_time, + event_in_session_index AS event_in_session_index, + concat(ifNull(toString(event_in_session_index), ''), '_', ifNull(toString(path), '')) AS path_key, + if(ifNull(greater(event_in_session_index, 1), 0), concat(ifNull(toString(minus(event_in_session_index, 1)), ''), '_', ifNull(toString(prev_path), '')), NULL) AS last_path_key, + path_dropoff_key AS path_dropoff_key + FROM + (SELECT person_id AS person_id, + joined_path_tuple.1 AS path, + joined_path_tuple.2 AS conversion_time, + joined_path_tuple.3 AS prev_path, + event_in_session_index, + session_index AS session_index, + arrayPopFront(arrayPushBack(path_basic, '')) AS path_basic_0, + arrayMap((x, y) -> if(ifNull(equals(x, y), isNull(x) + and isNull(y)), 0, 1), path_basic, path_basic_0) AS mapping, + arrayFilter((x, y) -> y, time, mapping) AS timings, + arrayFilter((x, y) -> y, path_basic, mapping) AS compact_path, + indexOf(compact_path, NULL) AS target_index, + if(ifNull(greater(target_index, 0), 0), arraySlice(compact_path, target_index), compact_path) AS filtered_path, + arraySlice(filtered_path, 1, 4) AS limited_path, + if(ifNull(greater(target_index, 0), 0), arraySlice(timings, target_index), timings) AS filtered_timings, + arraySlice(filtered_timings, 1, 4) AS limited_timings, + arrayDifference(limited_timings) AS timings_diff, + concat(ifNull(toString(length(limited_path)), ''), '_', ifNull(toString(limited_path[-1]), '')) AS path_dropoff_key, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) AS limited_path_timings + FROM + (SELECT person_id AS person_id, + path_time_tuple.1 AS path_basic, + path_time_tuple.2 AS time, + session_index, + arrayZip(path_list, timing_list, arrayDifference(timing_list)) AS paths_tuple, + arraySplit(x -> if(ifNull(less(x.3, 1800), 0), 0, 1), paths_tuple) AS session_paths + FROM + (SELECT person_id AS person_id, + groupArray(timestamp) AS timing_list, + groupArray(path_item) AS path_list + FROM + (SELECT toTimeZone(events.timestamp, 'UTC') AS timestamp, + ifNull(nullIf(events__override.override_person_id, '00000000-0000-0000-0000-000000000000'), events.person_id) AS person_id, + ifNull(if(equals(events.event, '$screen'), replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$screen_name'), ''), 'null'), '^"|"$', ''), if(equals(events.event, '$pageview'), replaceRegexpAll(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$current_url'), ''), 'null'), '^"|"$', ''), ''), '(.)/$', '\\1'), events.event)), '') AS path_item_ungrouped, + NULL AS groupings, + multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, + (if(ifNull(greater(group_index, 0), 0), groupings[group_index], path_item_ungrouped) AS path_item) AS path_item + FROM events + LEFT OUTER JOIN + (SELECT argMax(person_overrides.override_person_id, person_overrides.version) AS override_person_id, + person_overrides.old_person_id AS old_person_id + FROM person_overrides + WHERE equals(person_overrides.team_id, 2) + GROUP BY person_overrides.old_person_id) AS events__override ON equals(events.person_id, events__override.old_person_id) + WHERE and(equals(events.team_id, 2), and(greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2012-02-01 23:59:59', 6, 'UTC')))), or(equals(events.event, '$pageview'), equals(events.event, '$screen'), not(startsWith(events.event, '$')))) + ORDER BY ifNull(nullIf(events__override.override_person_id, '00000000-0000-0000-0000-000000000000'), events.person_id) ASC, toTimeZone(events.timestamp, 'UTC') ASC) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, + arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, + arrayEnumerate(limited_path_timings) AS event_in_session_index)) + WHERE isNotNull(source_event) + GROUP BY source_event, + target_event + ORDER BY event_count DESC, + source_event ASC, + target_event ASC + LIMIT 50 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhousePaths.test_recording + ''' + SELECT persons.id AS id, + toTimeZone(persons.created_at, 'UTC') AS created_at, + source.event_count AS event_count, + source.matching_events AS matching_events + FROM + (SELECT argMax(person.created_at, person.version) AS created_at, + person.id AS id + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS persons + INNER JOIN + (SELECT person_id AS actor_id, + groupUniqArray(100)(tuple(timestamp, uuid, session_id, window_id)) AS matching_events, + count(*) AS event_count + FROM + (SELECT person_id AS person_id, + path AS path, + conversion_time AS conversion_time, + event_in_session_index AS event_in_session_index, + concat(ifNull(toString(event_in_session_index), ''), '_', ifNull(toString(path), '')) AS path_key, + if(ifNull(greater(event_in_session_index, 1), 0), concat(ifNull(toString(minus(event_in_session_index, 1)), ''), '_', ifNull(toString(prev_path), '')), NULL) AS last_path_key, + path_dropoff_key AS path_dropoff_key, + final_uuid AS uuid, + final_timestamp AS timestamp, + final_session_id AS session_id, + final_window_id AS window_id + FROM + (SELECT person_id AS person_id, + joined_path_tuple.1 AS path, + joined_path_tuple.2 AS conversion_time, + joined_path_tuple.3 AS prev_path, + event_in_session_index, + session_index AS session_index, + arrayPopFront(arrayPushBack(path_basic, '')) AS path_basic_0, + arrayMap((x, y) -> if(ifNull(equals(x, y), isNull(x) + and isNull(y)), 0, 1), path_basic, path_basic_0) AS mapping, + arrayFilter((x, y) -> y, time, mapping) AS timings, + arrayFilter((x, y) -> y, path_basic, mapping) AS compact_path, + indexOf(compact_path, NULL) AS target_index, + joined_path_tuple.4 AS final_uuid, + joined_path_tuple.5 AS final_timestamp, + joined_path_tuple.6 AS final_session_id, + joined_path_tuple.7 AS final_window_id, + arrayFilter((x, y) -> y, uuid_items, mapping) AS uuid, + arrayFilter((x, y) -> y, timestamp_items, mapping) AS timestamp, + arrayFilter((x, y) -> y, session_id_items, mapping) AS session_id, + arrayFilter((x, y) -> y, window_id_items, mapping) AS window_id, + if(ifNull(greater(target_index, 0), 0), arraySlice(compact_path, target_index), compact_path) AS filtered_path, + arraySlice(filtered_path, 1, 5) AS limited_path, + if(ifNull(greater(target_index, 0), 0), arraySlice(timings, target_index), timings) AS filtered_timings, + arraySlice(filtered_timings, 1, 5) AS limited_timings, + if(ifNull(greater(target_index, 0), 0), arraySlice(uuid, target_index), uuid) AS filtered_uuid, + arraySlice(filtered_uuid, 1, 5) AS limited_uuid, + if(ifNull(greater(target_index, 0), 0), arraySlice(timestamp, target_index), timestamp) AS filtered_timestamp, + arraySlice(filtered_timestamp, 1, 5) AS limited_timestamp, + if(ifNull(greater(target_index, 0), 0), arraySlice(session_id, target_index), session_id) AS filtered_session_id, + arraySlice(filtered_session_id, 1, 5) AS limited_session_id, + if(ifNull(greater(target_index, 0), 0), arraySlice(window_id, target_index), window_id) AS filtered_window_id, + arraySlice(filtered_window_id, 1, 5) AS limited_window_id, + arrayDifference(limited_timings) AS timings_diff, + concat(ifNull(toString(length(limited_path)), ''), '_', ifNull(toString(limited_path[-1]), '')) AS path_dropoff_key, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, '')), limited_uuid, limited_timestamp, limited_session_id, limited_window_id) AS limited_path_timings + FROM + (SELECT person_id AS person_id, + path_time_tuple.1 AS path_basic, + path_time_tuple.2 AS time, + session_index, + arrayZip(path_list, timing_list, arrayDifference(timing_list), uuid_list, timestamp_list, session_id_list, window_id_list) AS paths_tuple, + arraySplit(x -> if(ifNull(less(x.3, 1800), 0), 0, 1), paths_tuple) AS session_paths, + path_time_tuple.4 AS uuid_items, + path_time_tuple.5 AS timestamp_items, + path_time_tuple.6 AS session_id_items, + path_time_tuple.7 AS window_id_items + FROM + (SELECT person_id AS person_id, + groupArray(timestamp) AS timing_list, + groupArray(path_item) AS path_list, + groupArray(uuid) AS uuid_list, + groupArray(timestamp) AS timestamp_list, + groupArray(session_id) AS session_id_list, + groupArray(window_id) AS window_id_list + FROM + (SELECT toTimeZone(events.timestamp, 'UTC') AS timestamp, + events__pdi.person_id AS person_id, + ifNull(if(equals(events.event, '$pageview'), replaceRegexpAll(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$current_url'), ''), 'null'), '^"|"$', ''), ''), '(.)/$', '\\1'), events.event), '') AS path_item_ungrouped, + events.uuid AS uuid, + toTimeZone(events.timestamp, 'UTC') AS timestamp, + ifNull(nullIf(nullIf(events.`$session_id`, ''), 'null'), '') AS session_id, + ifNull(nullIf(nullIf(events.`$window_id`, ''), 'null'), '') AS window_id, + NULL AS groupings, + multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, + (if(ifNull(greater(group_index, 0), 0), groupings[group_index], path_item_ungrouped) AS path_item) AS path_item + FROM events + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) + WHERE and(equals(events.team_id, 2), and(greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-02 23:59:59', 6, 'UTC')))), equals(events.event, '$pageview')) + ORDER BY events__pdi.person_id ASC, toTimeZone(events.timestamp, 'UTC') ASC) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, + arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, + arrayEnumerate(limited_path_timings) AS event_in_session_index)) + WHERE ifNull(equals(path_key, '2_/2'), 0) + GROUP BY person_id) AS source ON equals(persons.id, source.actor_id) + ORDER BY persons.id ASC + LIMIT 101 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhousePaths.test_recording.1 + ''' + SELECT DISTINCT session_replay_events.session_id AS session_id + FROM + (SELECT session_replay_events.session_id AS session_id + FROM session_replay_events + WHERE equals(session_replay_events.team_id, 2) + GROUP BY session_replay_events.session_id) AS session_replay_events + WHERE ifNull(in(session_replay_events.session_id, ['s3', 's1', 's5']), 0) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhousePaths.test_recording_for_dropoff + ''' + SELECT persons.id AS id, + toTimeZone(persons.created_at, 'UTC') AS created_at, + source.event_count AS event_count, + source.matching_events AS matching_events + FROM + (SELECT argMax(person.created_at, person.version) AS created_at, + person.id AS id + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS persons + INNER JOIN + (SELECT person_id AS actor_id, + groupUniqArray(100)(tuple(timestamp, uuid, session_id, window_id)) AS matching_events, + count(*) AS event_count + FROM + (SELECT person_id AS person_id, + path AS path, + conversion_time AS conversion_time, + event_in_session_index AS event_in_session_index, + concat(ifNull(toString(event_in_session_index), ''), '_', ifNull(toString(path), '')) AS path_key, + if(ifNull(greater(event_in_session_index, 1), 0), concat(ifNull(toString(minus(event_in_session_index, 1)), ''), '_', ifNull(toString(prev_path), '')), NULL) AS last_path_key, + path_dropoff_key AS path_dropoff_key, + final_uuid AS uuid, + final_timestamp AS timestamp, + final_session_id AS session_id, + final_window_id AS window_id + FROM + (SELECT person_id AS person_id, + joined_path_tuple.1 AS path, + joined_path_tuple.2 AS conversion_time, + joined_path_tuple.3 AS prev_path, + event_in_session_index, + session_index AS session_index, + arrayPopFront(arrayPushBack(path_basic, '')) AS path_basic_0, + arrayMap((x, y) -> if(ifNull(equals(x, y), isNull(x) + and isNull(y)), 0, 1), path_basic, path_basic_0) AS mapping, + arrayFilter((x, y) -> y, time, mapping) AS timings, + arrayFilter((x, y) -> y, path_basic, mapping) AS compact_path, + indexOf(compact_path, NULL) AS target_index, + joined_path_tuple.4 AS final_uuid, + joined_path_tuple.5 AS final_timestamp, + joined_path_tuple.6 AS final_session_id, + joined_path_tuple.7 AS final_window_id, + arrayFilter((x, y) -> y, uuid_items, mapping) AS uuid, + arrayFilter((x, y) -> y, timestamp_items, mapping) AS timestamp, + arrayFilter((x, y) -> y, session_id_items, mapping) AS session_id, + arrayFilter((x, y) -> y, window_id_items, mapping) AS window_id, + if(ifNull(greater(target_index, 0), 0), arraySlice(compact_path, target_index), compact_path) AS filtered_path, + arraySlice(filtered_path, 1, 5) AS limited_path, + if(ifNull(greater(target_index, 0), 0), arraySlice(timings, target_index), timings) AS filtered_timings, + arraySlice(filtered_timings, 1, 5) AS limited_timings, + if(ifNull(greater(target_index, 0), 0), arraySlice(uuid, target_index), uuid) AS filtered_uuid, + arraySlice(filtered_uuid, 1, 5) AS limited_uuid, + if(ifNull(greater(target_index, 0), 0), arraySlice(timestamp, target_index), timestamp) AS filtered_timestamp, + arraySlice(filtered_timestamp, 1, 5) AS limited_timestamp, + if(ifNull(greater(target_index, 0), 0), arraySlice(session_id, target_index), session_id) AS filtered_session_id, + arraySlice(filtered_session_id, 1, 5) AS limited_session_id, + if(ifNull(greater(target_index, 0), 0), arraySlice(window_id, target_index), window_id) AS filtered_window_id, + arraySlice(filtered_window_id, 1, 5) AS limited_window_id, + arrayDifference(limited_timings) AS timings_diff, + concat(ifNull(toString(length(limited_path)), ''), '_', ifNull(toString(limited_path[-1]), '')) AS path_dropoff_key, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, '')), limited_uuid, limited_timestamp, limited_session_id, limited_window_id) AS limited_path_timings + FROM + (SELECT person_id AS person_id, + path_time_tuple.1 AS path_basic, + path_time_tuple.2 AS time, + session_index, + arrayZip(path_list, timing_list, arrayDifference(timing_list), uuid_list, timestamp_list, session_id_list, window_id_list) AS paths_tuple, + arraySplit(x -> if(ifNull(less(x.3, 1800), 0), 0, 1), paths_tuple) AS session_paths, + path_time_tuple.4 AS uuid_items, + path_time_tuple.5 AS timestamp_items, + path_time_tuple.6 AS session_id_items, + path_time_tuple.7 AS window_id_items + FROM + (SELECT person_id AS person_id, + groupArray(timestamp) AS timing_list, + groupArray(path_item) AS path_list, + groupArray(uuid) AS uuid_list, + groupArray(timestamp) AS timestamp_list, + groupArray(session_id) AS session_id_list, + groupArray(window_id) AS window_id_list + FROM + (SELECT toTimeZone(events.timestamp, 'UTC') AS timestamp, + events__pdi.person_id AS person_id, + ifNull(if(equals(events.event, '$pageview'), replaceRegexpAll(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$current_url'), ''), 'null'), '^"|"$', ''), ''), '(.)/$', '\\1'), events.event), '') AS path_item_ungrouped, + events.uuid AS uuid, + toTimeZone(events.timestamp, 'UTC') AS timestamp, + ifNull(nullIf(nullIf(events.`$session_id`, ''), 'null'), '') AS session_id, + ifNull(nullIf(nullIf(events.`$window_id`, ''), 'null'), '') AS window_id, + NULL AS groupings, + multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, + (if(ifNull(greater(group_index, 0), 0), groupings[group_index], path_item_ungrouped) AS path_item) AS path_item + FROM events + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) + WHERE and(equals(events.team_id, 2), and(greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-02 23:59:59', 6, 'UTC')))), equals(events.event, '$pageview')) + ORDER BY events__pdi.person_id ASC, toTimeZone(events.timestamp, 'UTC') ASC) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, + arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, + arrayEnumerate(limited_path_timings) AS event_in_session_index)) + WHERE and(ifNull(equals(path_dropoff_key, '2_/2'), 0), ifNull(equals(path_dropoff_key, path_key), isNull(path_dropoff_key) + and isNull(path_key))) + GROUP BY person_id) AS source ON equals(persons.id, source.actor_id) + ORDER BY persons.id ASC + LIMIT 101 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhousePaths.test_recording_for_dropoff.1 + ''' + SELECT DISTINCT session_replay_events.session_id AS session_id + FROM + (SELECT session_replay_events.session_id AS session_id + FROM session_replay_events + WHERE equals(session_replay_events.team_id, 2) + GROUP BY session_replay_events.session_id) AS session_replay_events + WHERE ifNull(in(session_replay_events.session_id, []), 0) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhousePaths.test_recording_for_dropoff.2 + ''' + SELECT persons.id AS id, + toTimeZone(persons.created_at, 'UTC') AS created_at, + source.event_count AS event_count, + source.matching_events AS matching_events + FROM + (SELECT argMax(person.created_at, person.version) AS created_at, + person.id AS id + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS persons + INNER JOIN + (SELECT person_id AS actor_id, + groupUniqArray(100)(tuple(timestamp, uuid, session_id, window_id)) AS matching_events, + count(*) AS event_count + FROM + (SELECT person_id AS person_id, + path AS path, + conversion_time AS conversion_time, + event_in_session_index AS event_in_session_index, + concat(ifNull(toString(event_in_session_index), ''), '_', ifNull(toString(path), '')) AS path_key, + if(ifNull(greater(event_in_session_index, 1), 0), concat(ifNull(toString(minus(event_in_session_index, 1)), ''), '_', ifNull(toString(prev_path), '')), NULL) AS last_path_key, + path_dropoff_key AS path_dropoff_key, + final_uuid AS uuid, + final_timestamp AS timestamp, + final_session_id AS session_id, + final_window_id AS window_id + FROM + (SELECT person_id AS person_id, + joined_path_tuple.1 AS path, + joined_path_tuple.2 AS conversion_time, + joined_path_tuple.3 AS prev_path, + event_in_session_index, + session_index AS session_index, + arrayPopFront(arrayPushBack(path_basic, '')) AS path_basic_0, + arrayMap((x, y) -> if(ifNull(equals(x, y), isNull(x) + and isNull(y)), 0, 1), path_basic, path_basic_0) AS mapping, + arrayFilter((x, y) -> y, time, mapping) AS timings, + arrayFilter((x, y) -> y, path_basic, mapping) AS compact_path, + indexOf(compact_path, NULL) AS target_index, + joined_path_tuple.4 AS final_uuid, + joined_path_tuple.5 AS final_timestamp, + joined_path_tuple.6 AS final_session_id, + joined_path_tuple.7 AS final_window_id, + arrayFilter((x, y) -> y, uuid_items, mapping) AS uuid, + arrayFilter((x, y) -> y, timestamp_items, mapping) AS timestamp, + arrayFilter((x, y) -> y, session_id_items, mapping) AS session_id, + arrayFilter((x, y) -> y, window_id_items, mapping) AS window_id, + if(ifNull(greater(target_index, 0), 0), arraySlice(compact_path, target_index), compact_path) AS filtered_path, + arraySlice(filtered_path, 1, 5) AS limited_path, + if(ifNull(greater(target_index, 0), 0), arraySlice(timings, target_index), timings) AS filtered_timings, + arraySlice(filtered_timings, 1, 5) AS limited_timings, + if(ifNull(greater(target_index, 0), 0), arraySlice(uuid, target_index), uuid) AS filtered_uuid, + arraySlice(filtered_uuid, 1, 5) AS limited_uuid, + if(ifNull(greater(target_index, 0), 0), arraySlice(timestamp, target_index), timestamp) AS filtered_timestamp, + arraySlice(filtered_timestamp, 1, 5) AS limited_timestamp, + if(ifNull(greater(target_index, 0), 0), arraySlice(session_id, target_index), session_id) AS filtered_session_id, + arraySlice(filtered_session_id, 1, 5) AS limited_session_id, + if(ifNull(greater(target_index, 0), 0), arraySlice(window_id, target_index), window_id) AS filtered_window_id, + arraySlice(filtered_window_id, 1, 5) AS limited_window_id, + arrayDifference(limited_timings) AS timings_diff, + concat(ifNull(toString(length(limited_path)), ''), '_', ifNull(toString(limited_path[-1]), '')) AS path_dropoff_key, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, '')), limited_uuid, limited_timestamp, limited_session_id, limited_window_id) AS limited_path_timings + FROM + (SELECT person_id AS person_id, + path_time_tuple.1 AS path_basic, + path_time_tuple.2 AS time, + session_index, + arrayZip(path_list, timing_list, arrayDifference(timing_list), uuid_list, timestamp_list, session_id_list, window_id_list) AS paths_tuple, + arraySplit(x -> if(ifNull(less(x.3, 1800), 0), 0, 1), paths_tuple) AS session_paths, + path_time_tuple.4 AS uuid_items, + path_time_tuple.5 AS timestamp_items, + path_time_tuple.6 AS session_id_items, + path_time_tuple.7 AS window_id_items + FROM + (SELECT person_id AS person_id, + groupArray(timestamp) AS timing_list, + groupArray(path_item) AS path_list, + groupArray(uuid) AS uuid_list, + groupArray(timestamp) AS timestamp_list, + groupArray(session_id) AS session_id_list, + groupArray(window_id) AS window_id_list + FROM + (SELECT toTimeZone(events.timestamp, 'UTC') AS timestamp, + events__pdi.person_id AS person_id, + ifNull(if(equals(events.event, '$pageview'), replaceRegexpAll(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$current_url'), ''), 'null'), '^"|"$', ''), ''), '(.)/$', '\\1'), events.event), '') AS path_item_ungrouped, + events.uuid AS uuid, + toTimeZone(events.timestamp, 'UTC') AS timestamp, + ifNull(nullIf(nullIf(events.`$session_id`, ''), 'null'), '') AS session_id, + ifNull(nullIf(nullIf(events.`$window_id`, ''), 'null'), '') AS window_id, + NULL AS groupings, + multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, + (if(ifNull(greater(group_index, 0), 0), groupings[group_index], path_item_ungrouped) AS path_item) AS path_item + FROM events + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) + WHERE and(equals(events.team_id, 2), and(greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-02 23:59:59', 6, 'UTC')))), equals(events.event, '$pageview')) + ORDER BY events__pdi.person_id ASC, toTimeZone(events.timestamp, 'UTC') ASC) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, + arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, + arrayEnumerate(limited_path_timings) AS event_in_session_index)) + WHERE and(ifNull(equals(path_dropoff_key, '3_/3'), 0), ifNull(equals(path_dropoff_key, path_key), isNull(path_dropoff_key) + and isNull(path_key))) + GROUP BY person_id) AS source ON equals(persons.id, source.actor_id) + ORDER BY persons.id ASC + LIMIT 101 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhousePaths.test_recording_for_dropoff.3 + ''' + SELECT DISTINCT session_replay_events.session_id AS session_id + FROM + (SELECT session_replay_events.session_id AS session_id + FROM session_replay_events + WHERE equals(session_replay_events.team_id, 2) + GROUP BY session_replay_events.session_id) AS session_replay_events + WHERE ifNull(in(session_replay_events.session_id, ['s1']), 0) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhousePaths.test_recording_with_no_window_or_session_id + ''' + SELECT persons.id AS id, + toTimeZone(persons.created_at, 'UTC') AS created_at, + source.event_count AS event_count, + source.matching_events AS matching_events + FROM + (SELECT argMax(person.created_at, person.version) AS created_at, + person.id AS id + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS persons + INNER JOIN + (SELECT person_id AS actor_id, + groupUniqArray(100)(tuple(timestamp, uuid, session_id, window_id)) AS matching_events, + count(*) AS event_count + FROM + (SELECT person_id AS person_id, + path AS path, + conversion_time AS conversion_time, + event_in_session_index AS event_in_session_index, + concat(ifNull(toString(event_in_session_index), ''), '_', ifNull(toString(path), '')) AS path_key, + if(ifNull(greater(event_in_session_index, 1), 0), concat(ifNull(toString(minus(event_in_session_index, 1)), ''), '_', ifNull(toString(prev_path), '')), NULL) AS last_path_key, + path_dropoff_key AS path_dropoff_key, + final_uuid AS uuid, + final_timestamp AS timestamp, + final_session_id AS session_id, + final_window_id AS window_id + FROM + (SELECT person_id AS person_id, + joined_path_tuple.1 AS path, + joined_path_tuple.2 AS conversion_time, + joined_path_tuple.3 AS prev_path, + event_in_session_index, + session_index AS session_index, + arrayPopFront(arrayPushBack(path_basic, '')) AS path_basic_0, + arrayMap((x, y) -> if(ifNull(equals(x, y), isNull(x) + and isNull(y)), 0, 1), path_basic, path_basic_0) AS mapping, + arrayFilter((x, y) -> y, time, mapping) AS timings, + arrayFilter((x, y) -> y, path_basic, mapping) AS compact_path, + indexOf(compact_path, NULL) AS target_index, + joined_path_tuple.4 AS final_uuid, + joined_path_tuple.5 AS final_timestamp, + joined_path_tuple.6 AS final_session_id, + joined_path_tuple.7 AS final_window_id, + arrayFilter((x, y) -> y, uuid_items, mapping) AS uuid, + arrayFilter((x, y) -> y, timestamp_items, mapping) AS timestamp, + arrayFilter((x, y) -> y, session_id_items, mapping) AS session_id, + arrayFilter((x, y) -> y, window_id_items, mapping) AS window_id, + if(ifNull(greater(target_index, 0), 0), arraySlice(compact_path, target_index), compact_path) AS filtered_path, + arraySlice(filtered_path, 1, 5) AS limited_path, + if(ifNull(greater(target_index, 0), 0), arraySlice(timings, target_index), timings) AS filtered_timings, + arraySlice(filtered_timings, 1, 5) AS limited_timings, + if(ifNull(greater(target_index, 0), 0), arraySlice(uuid, target_index), uuid) AS filtered_uuid, + arraySlice(filtered_uuid, 1, 5) AS limited_uuid, + if(ifNull(greater(target_index, 0), 0), arraySlice(timestamp, target_index), timestamp) AS filtered_timestamp, + arraySlice(filtered_timestamp, 1, 5) AS limited_timestamp, + if(ifNull(greater(target_index, 0), 0), arraySlice(session_id, target_index), session_id) AS filtered_session_id, + arraySlice(filtered_session_id, 1, 5) AS limited_session_id, + if(ifNull(greater(target_index, 0), 0), arraySlice(window_id, target_index), window_id) AS filtered_window_id, + arraySlice(filtered_window_id, 1, 5) AS limited_window_id, + arrayDifference(limited_timings) AS timings_diff, + concat(ifNull(toString(length(limited_path)), ''), '_', ifNull(toString(limited_path[-1]), '')) AS path_dropoff_key, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, '')), limited_uuid, limited_timestamp, limited_session_id, limited_window_id) AS limited_path_timings + FROM + (SELECT person_id AS person_id, + path_time_tuple.1 AS path_basic, + path_time_tuple.2 AS time, + session_index, + arrayZip(path_list, timing_list, arrayDifference(timing_list), uuid_list, timestamp_list, session_id_list, window_id_list) AS paths_tuple, + arraySplit(x -> if(ifNull(less(x.3, 1800), 0), 0, 1), paths_tuple) AS session_paths, + path_time_tuple.4 AS uuid_items, + path_time_tuple.5 AS timestamp_items, + path_time_tuple.6 AS session_id_items, + path_time_tuple.7 AS window_id_items + FROM + (SELECT person_id AS person_id, + groupArray(timestamp) AS timing_list, + groupArray(path_item) AS path_list, + groupArray(uuid) AS uuid_list, + groupArray(timestamp) AS timestamp_list, + groupArray(session_id) AS session_id_list, + groupArray(window_id) AS window_id_list + FROM + (SELECT toTimeZone(events.timestamp, 'UTC') AS timestamp, + events__pdi.person_id AS person_id, + ifNull(if(equals(events.event, '$pageview'), replaceRegexpAll(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$current_url'), ''), 'null'), '^"|"$', ''), ''), '(.)/$', '\\1'), events.event), '') AS path_item_ungrouped, + events.uuid AS uuid, + toTimeZone(events.timestamp, 'UTC') AS timestamp, + ifNull(nullIf(nullIf(events.`$session_id`, ''), 'null'), '') AS session_id, + ifNull(nullIf(nullIf(events.`$window_id`, ''), 'null'), '') AS window_id, + NULL AS groupings, + multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, + (if(ifNull(greater(group_index, 0), 0), groupings[group_index], path_item_ungrouped) AS path_item) AS path_item + FROM events + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) + WHERE and(equals(events.team_id, 2), and(greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-02 23:59:59', 6, 'UTC')))), equals(events.event, '$pageview')) + ORDER BY events__pdi.person_id ASC, toTimeZone(events.timestamp, 'UTC') ASC) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, + arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, + arrayEnumerate(limited_path_timings) AS event_in_session_index)) + WHERE ifNull(equals(path_key, '2_/2'), 0) + GROUP BY person_id) AS source ON equals(persons.id, source.actor_id) + ORDER BY persons.id ASC + LIMIT 101 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhousePaths.test_recording_with_no_window_or_session_id.1 + ''' + SELECT DISTINCT session_replay_events.session_id AS session_id + FROM + (SELECT session_replay_events.session_id AS session_id + FROM session_replay_events + WHERE equals(session_replay_events.team_id, 2) + GROUP BY session_replay_events.session_id) AS session_replay_events + WHERE ifNull(in(session_replay_events.session_id, ['']), 0) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhousePaths.test_recording_with_start_and_end + ''' + SELECT persons.id AS id, + toTimeZone(persons.created_at, 'UTC') AS created_at, + source.event_count AS event_count, + source.matching_events AS matching_events + FROM + (SELECT argMax(person.created_at, person.version) AS created_at, + person.id AS id + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS persons + INNER JOIN + (SELECT person_id AS actor_id, + groupUniqArray(100)(tuple(timestamp, uuid, session_id, window_id)) AS matching_events, + count(*) AS event_count + FROM + (SELECT person_id AS person_id, + path AS path, + conversion_time AS conversion_time, + event_in_session_index AS event_in_session_index, + concat(ifNull(toString(event_in_session_index), ''), '_', ifNull(toString(path), '')) AS path_key, + if(ifNull(greater(event_in_session_index, 1), 0), concat(ifNull(toString(minus(event_in_session_index, 1)), ''), '_', ifNull(toString(prev_path), '')), NULL) AS last_path_key, + path_dropoff_key AS path_dropoff_key, + final_uuid AS uuid, + final_timestamp AS timestamp, + final_session_id AS session_id, + final_window_id AS window_id + FROM + (SELECT person_id AS person_id, + joined_path_tuple.1 AS path, + joined_path_tuple.2 AS conversion_time, + joined_path_tuple.3 AS prev_path, + event_in_session_index, + session_index AS session_index, + arrayPopFront(arrayPushBack(path_basic, '')) AS path_basic_0, + arrayMap((x, y) -> if(ifNull(equals(x, y), isNull(x) + and isNull(y)), 0, 1), path_basic, path_basic_0) AS mapping, + arrayFilter((x, y) -> y, time, mapping) AS timings, + arrayFilter((x, y) -> y, path_basic, mapping) AS compact_path, + indexOf(compact_path, '/3') AS target_index, + joined_path_tuple.4 AS final_uuid, + joined_path_tuple.5 AS final_timestamp, + joined_path_tuple.6 AS final_session_id, + joined_path_tuple.7 AS final_window_id, + arrayFilter((x, y) -> y, uuid_items, mapping) AS uuid, + arrayFilter((x, y) -> y, timestamp_items, mapping) AS timestamp, + arrayFilter((x, y) -> y, session_id_items, mapping) AS session_id, + arrayFilter((x, y) -> y, window_id_items, mapping) AS window_id, + indexOf(compact_path, '/1') AS start_target_index, + if(ifNull(greater(start_target_index, 0), 0), arraySlice(compact_path, start_target_index), compact_path) AS start_filtered_path, + indexOf(start_filtered_path, '/3') AS end_target_index, + if(ifNull(greater(end_target_index, 0), 0), arrayResize(start_filtered_path, end_target_index), start_filtered_path) AS filtered_path, + if(ifNull(greater(length(filtered_path), 5), 0), arrayConcat(arraySlice(filtered_path, 1, intDiv(5, 2)), ['...'], arraySlice(filtered_path, multiply(-1, intDiv(5, 2)), intDiv(5, 2))), filtered_path) AS limited_path, + if(ifNull(greater(start_target_index, 0), 0), arraySlice(timings, start_target_index), timings) AS start_filtered_timings, + if(ifNull(greater(end_target_index, 0), 0), arrayResize(start_filtered_timings, end_target_index), start_filtered_timings) AS filtered_timings, + if(ifNull(greater(length(filtered_timings), 5), 0), arrayConcat(arraySlice(filtered_timings, 1, intDiv(5, 2)), [filtered_timings[plus(1, intDiv(5, 2))]], arraySlice(filtered_timings, multiply(-1, intDiv(5, 2)), intDiv(5, 2))), filtered_timings) AS limited_timings, + if(ifNull(greater(start_target_index, 0), 0), arraySlice(uuid, start_target_index), uuid) AS start_filtered_uuid, + if(ifNull(greater(end_target_index, 0), 0), arrayResize(start_filtered_uuid, end_target_index), start_filtered_uuid) AS filtered_uuid, + if(ifNull(greater(length(filtered_uuid), 5), 0), arrayConcat(arraySlice(filtered_uuid, 1, intDiv(5, 2)), [filtered_uuid[plus(1, intDiv(5, 2))]], arraySlice(filtered_uuid, multiply(-1, intDiv(5, 2)), intDiv(5, 2))), filtered_uuid) AS limited_uuid, + if(ifNull(greater(start_target_index, 0), 0), arraySlice(timestamp, start_target_index), timestamp) AS start_filtered_timestamp, + if(ifNull(greater(end_target_index, 0), 0), arrayResize(start_filtered_timestamp, end_target_index), start_filtered_timestamp) AS filtered_timestamp, + if(ifNull(greater(length(filtered_timestamp), 5), 0), arrayConcat(arraySlice(filtered_timestamp, 1, intDiv(5, 2)), [filtered_timestamp[plus(1, intDiv(5, 2))]], arraySlice(filtered_timestamp, multiply(-1, intDiv(5, 2)), intDiv(5, 2))), filtered_timestamp) AS limited_timestamp, + if(ifNull(greater(start_target_index, 0), 0), arraySlice(session_id, start_target_index), session_id) AS start_filtered_session_id, + if(ifNull(greater(end_target_index, 0), 0), arrayResize(start_filtered_session_id, end_target_index), start_filtered_session_id) AS filtered_session_id, + if(ifNull(greater(length(filtered_session_id), 5), 0), arrayConcat(arraySlice(filtered_session_id, 1, intDiv(5, 2)), [filtered_session_id[plus(1, intDiv(5, 2))]], arraySlice(filtered_session_id, multiply(-1, intDiv(5, 2)), intDiv(5, 2))), filtered_session_id) AS limited_session_id, + if(ifNull(greater(start_target_index, 0), 0), arraySlice(window_id, start_target_index), window_id) AS start_filtered_window_id, + if(ifNull(greater(end_target_index, 0), 0), arrayResize(start_filtered_window_id, end_target_index), start_filtered_window_id) AS filtered_window_id, + if(ifNull(greater(length(filtered_window_id), 5), 0), arrayConcat(arraySlice(filtered_window_id, 1, intDiv(5, 2)), [filtered_window_id[plus(1, intDiv(5, 2))]], arraySlice(filtered_window_id, multiply(-1, intDiv(5, 2)), intDiv(5, 2))), filtered_window_id) AS limited_window_id, + arrayDifference(limited_timings) AS timings_diff, + concat(ifNull(toString(length(limited_path)), ''), '_', ifNull(toString(limited_path[-1]), '')) AS path_dropoff_key, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, '')), limited_uuid, limited_timestamp, limited_session_id, limited_window_id) AS limited_path_timings + FROM + (SELECT person_id AS person_id, + path_time_tuple.1 AS path_basic, + path_time_tuple.2 AS time, + session_index, + arrayZip(path_list, timing_list, arrayDifference(timing_list), uuid_list, timestamp_list, session_id_list, window_id_list) AS paths_tuple, + arraySplit(x -> if(ifNull(less(x.3, 1800), 0), 0, 1), paths_tuple) AS session_paths, + path_time_tuple.4 AS uuid_items, + path_time_tuple.5 AS timestamp_items, + path_time_tuple.6 AS session_id_items, + path_time_tuple.7 AS window_id_items + FROM + (SELECT person_id AS person_id, + groupArray(timestamp) AS timing_list, + groupArray(path_item) AS path_list, + groupArray(uuid) AS uuid_list, + groupArray(timestamp) AS timestamp_list, + groupArray(session_id) AS session_id_list, + groupArray(window_id) AS window_id_list + FROM + (SELECT toTimeZone(events.timestamp, 'UTC') AS timestamp, + events__pdi.person_id AS person_id, + ifNull(if(equals(events.event, '$pageview'), replaceRegexpAll(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$current_url'), ''), 'null'), '^"|"$', ''), ''), '(.)/$', '\\1'), events.event), '') AS path_item_ungrouped, + events.uuid AS uuid, + toTimeZone(events.timestamp, 'UTC') AS timestamp, + ifNull(nullIf(nullIf(events.`$session_id`, ''), 'null'), '') AS session_id, + ifNull(nullIf(nullIf(events.`$window_id`, ''), 'null'), '') AS window_id, + NULL AS groupings, + multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, + (if(ifNull(greater(group_index, 0), 0), groupings[group_index], path_item_ungrouped) AS path_item) AS path_item + FROM events + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) + WHERE and(equals(events.team_id, 2), and(greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-02 23:59:59', 6, 'UTC')))), equals(events.event, '$pageview')) + ORDER BY events__pdi.person_id ASC, toTimeZone(events.timestamp, 'UTC') ASC) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, + arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, + arrayEnumerate(limited_path_timings) AS event_in_session_index + WHERE and(ifNull(greater(start_target_index, 0), 0), ifNull(greater(end_target_index, 0), 0)))) + WHERE ifNull(equals(path_key, '2_/2'), 0) + GROUP BY person_id) AS source ON equals(persons.id, source.actor_id) + ORDER BY persons.id ASC + LIMIT 101 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhousePaths.test_recording_with_start_and_end.1 + ''' + SELECT DISTINCT session_replay_events.session_id AS session_id + FROM + (SELECT session_replay_events.session_id AS session_id + FROM session_replay_events + WHERE equals(session_replay_events.team_id, 2) + GROUP BY session_replay_events.session_id) AS session_replay_events + WHERE ifNull(in(session_replay_events.session_id, ['s1']), 0) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhousePaths.test_respect_session_limits + ''' + SELECT last_path_key AS source_event, + path_key AS target_event, + count(*) AS event_count, + avg(conversion_time) AS average_conversion_time + FROM + (SELECT person_id AS person_id, + path AS path, + conversion_time AS conversion_time, + event_in_session_index AS event_in_session_index, + concat(ifNull(toString(event_in_session_index), ''), '_', ifNull(toString(path), '')) AS path_key, + if(ifNull(greater(event_in_session_index, 1), 0), concat(ifNull(toString(minus(event_in_session_index, 1)), ''), '_', ifNull(toString(prev_path), '')), NULL) AS last_path_key, + path_dropoff_key AS path_dropoff_key + FROM + (SELECT person_id AS person_id, + joined_path_tuple.1 AS path, + joined_path_tuple.2 AS conversion_time, + joined_path_tuple.3 AS prev_path, + event_in_session_index, + session_index AS session_index, + arrayPopFront(arrayPushBack(path_basic, '')) AS path_basic_0, + arrayMap((x, y) -> if(ifNull(equals(x, y), isNull(x) + and isNull(y)), 0, 1), path_basic, path_basic_0) AS mapping, + arrayFilter((x, y) -> y, time, mapping) AS timings, + arrayFilter((x, y) -> y, path_basic, mapping) AS compact_path, + indexOf(compact_path, NULL) AS target_index, + if(ifNull(greater(target_index, 0), 0), arraySlice(compact_path, target_index), compact_path) AS filtered_path, + arraySlice(filtered_path, 1, 5) AS limited_path, + if(ifNull(greater(target_index, 0), 0), arraySlice(timings, target_index), timings) AS filtered_timings, + arraySlice(filtered_timings, 1, 5) AS limited_timings, + arrayDifference(limited_timings) AS timings_diff, + concat(ifNull(toString(length(limited_path)), ''), '_', ifNull(toString(limited_path[-1]), '')) AS path_dropoff_key, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) AS limited_path_timings + FROM + (SELECT person_id AS person_id, + path_time_tuple.1 AS path_basic, + path_time_tuple.2 AS time, + session_index, + arrayZip(path_list, timing_list, arrayDifference(timing_list)) AS paths_tuple, + arraySplit(x -> if(ifNull(less(x.3, 1800), 0), 0, 1), paths_tuple) AS session_paths + FROM + (SELECT person_id AS person_id, + groupArray(timestamp) AS timing_list, + groupArray(path_item) AS path_list + FROM + (SELECT toTimeZone(events.timestamp, 'UTC') AS timestamp, + events__pdi.person_id AS person_id, + ifNull(if(equals(events.event, '$screen'), replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$screen_name'), ''), 'null'), '^"|"$', ''), if(equals(events.event, '$pageview'), replaceRegexpAll(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$current_url'), ''), 'null'), '^"|"$', ''), ''), '(.)/$', '\\1'), events.event)), '') AS path_item_ungrouped, + NULL AS groupings, + multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, + (if(ifNull(greater(group_index, 0), 0), groupings[group_index], path_item_ungrouped) AS path_item) AS path_item + FROM events + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) + WHERE and(equals(events.team_id, 2), and(greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2023-05-23 23:59:59', 6, 'UTC'))))) + ORDER BY events__pdi.person_id ASC, toTimeZone(events.timestamp, 'UTC') ASC) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, + arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, + arrayEnumerate(limited_path_timings) AS event_in_session_index)) + WHERE isNotNull(source_event) + GROUP BY source_event, + target_event + ORDER BY event_count DESC, + source_event ASC, + target_event ASC + LIMIT 50 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhousePaths.test_start_and_end + ''' + SELECT last_path_key AS source_event, + path_key AS target_event, + count(*) AS event_count, + avg(conversion_time) AS average_conversion_time + FROM + (SELECT person_id AS person_id, + path AS path, + conversion_time AS conversion_time, + event_in_session_index AS event_in_session_index, + concat(ifNull(toString(event_in_session_index), ''), '_', ifNull(toString(path), '')) AS path_key, + if(ifNull(greater(event_in_session_index, 1), 0), concat(ifNull(toString(minus(event_in_session_index, 1)), ''), '_', ifNull(toString(prev_path), '')), NULL) AS last_path_key, + path_dropoff_key AS path_dropoff_key + FROM + (SELECT person_id AS person_id, + joined_path_tuple.1 AS path, + joined_path_tuple.2 AS conversion_time, + joined_path_tuple.3 AS prev_path, + event_in_session_index, + session_index AS session_index, + arrayPopFront(arrayPushBack(path_basic, '')) AS path_basic_0, + arrayMap((x, y) -> if(ifNull(equals(x, y), isNull(x) + and isNull(y)), 0, 1), path_basic, path_basic_0) AS mapping, + arrayFilter((x, y) -> y, time, mapping) AS timings, + arrayFilter((x, y) -> y, path_basic, mapping) AS compact_path, + indexOf(compact_path, '/about') AS target_index, + indexOf(compact_path, '/5') AS start_target_index, + if(ifNull(greater(start_target_index, 0), 0), arraySlice(compact_path, start_target_index), compact_path) AS start_filtered_path, + indexOf(start_filtered_path, '/about') AS end_target_index, + if(ifNull(greater(end_target_index, 0), 0), arrayResize(start_filtered_path, end_target_index), start_filtered_path) AS filtered_path, + if(ifNull(greater(length(filtered_path), 5), 0), arrayConcat(arraySlice(filtered_path, 1, intDiv(5, 2)), ['...'], arraySlice(filtered_path, multiply(-1, intDiv(5, 2)), intDiv(5, 2))), filtered_path) AS limited_path, + if(ifNull(greater(start_target_index, 0), 0), arraySlice(timings, start_target_index), timings) AS start_filtered_timings, + if(ifNull(greater(end_target_index, 0), 0), arrayResize(start_filtered_timings, end_target_index), start_filtered_timings) AS filtered_timings, + if(ifNull(greater(length(filtered_timings), 5), 0), arrayConcat(arraySlice(filtered_timings, 1, intDiv(5, 2)), [filtered_timings[plus(1, intDiv(5, 2))]], arraySlice(filtered_timings, multiply(-1, intDiv(5, 2)), intDiv(5, 2))), filtered_timings) AS limited_timings, + arrayDifference(limited_timings) AS timings_diff, + concat(ifNull(toString(length(limited_path)), ''), '_', ifNull(toString(limited_path[-1]), '')) AS path_dropoff_key, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) AS limited_path_timings + FROM + (SELECT person_id AS person_id, + path_time_tuple.1 AS path_basic, + path_time_tuple.2 AS time, + session_index, + arrayZip(path_list, timing_list, arrayDifference(timing_list)) AS paths_tuple, + arraySplit(x -> if(ifNull(less(x.3, 1800), 0), 0, 1), paths_tuple) AS session_paths + FROM + (SELECT person_id AS person_id, + groupArray(timestamp) AS timing_list, + groupArray(path_item) AS path_list + FROM + (SELECT toTimeZone(events.timestamp, 'UTC') AS timestamp, + events__pdi.person_id AS person_id, + ifNull(if(equals(events.event, '$pageview'), replaceRegexpAll(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$current_url'), ''), 'null'), '^"|"$', ''), ''), '(.)/$', '\\1'), events.event), '') AS path_item_ungrouped, + NULL AS groupings, + multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, + (if(ifNull(greater(group_index, 0), 0), groupings[group_index], path_item_ungrouped) AS path_item) AS path_item + FROM events + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) + WHERE and(equals(events.team_id, 2), and(greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-05-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2021-05-07 23:59:59', 6, 'UTC')))), equals(events.event, '$pageview')) + ORDER BY events__pdi.person_id ASC, toTimeZone(events.timestamp, 'UTC') ASC) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, + arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, + arrayEnumerate(limited_path_timings) AS event_in_session_index + WHERE and(ifNull(greater(start_target_index, 0), 0), ifNull(greater(end_target_index, 0), 0)))) + WHERE isNotNull(source_event) + GROUP BY source_event, + target_event + ORDER BY event_count DESC, + source_event ASC, + target_event ASC + LIMIT 50 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhousePaths.test_start_and_end.1 + ''' + SELECT persons.id AS id + FROM + (SELECT person.id AS id + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS persons + INNER JOIN + (SELECT person_id AS actor_id, + groupUniqArray(100)(tuple(timestamp, uuid, session_id, window_id)) AS matching_events, + count(*) AS event_count + FROM + (SELECT person_id AS person_id, + path AS path, + conversion_time AS conversion_time, + event_in_session_index AS event_in_session_index, + concat(ifNull(toString(event_in_session_index), ''), '_', ifNull(toString(path), '')) AS path_key, + if(ifNull(greater(event_in_session_index, 1), 0), concat(ifNull(toString(minus(event_in_session_index, 1)), ''), '_', ifNull(toString(prev_path), '')), NULL) AS last_path_key, + path_dropoff_key AS path_dropoff_key, + final_uuid AS uuid, + final_timestamp AS timestamp, + final_session_id AS session_id, + final_window_id AS window_id + FROM + (SELECT person_id AS person_id, + joined_path_tuple.1 AS path, + joined_path_tuple.2 AS conversion_time, + joined_path_tuple.3 AS prev_path, + event_in_session_index, + session_index AS session_index, + arrayPopFront(arrayPushBack(path_basic, '')) AS path_basic_0, + arrayMap((x, y) -> if(ifNull(equals(x, y), isNull(x) + and isNull(y)), 0, 1), path_basic, path_basic_0) AS mapping, + arrayFilter((x, y) -> y, time, mapping) AS timings, + arrayFilter((x, y) -> y, path_basic, mapping) AS compact_path, + indexOf(compact_path, '/about') AS target_index, + joined_path_tuple.4 AS final_uuid, + joined_path_tuple.5 AS final_timestamp, + joined_path_tuple.6 AS final_session_id, + joined_path_tuple.7 AS final_window_id, + arrayFilter((x, y) -> y, uuid_items, mapping) AS uuid, + arrayFilter((x, y) -> y, timestamp_items, mapping) AS timestamp, + arrayFilter((x, y) -> y, session_id_items, mapping) AS session_id, + arrayFilter((x, y) -> y, window_id_items, mapping) AS window_id, + indexOf(compact_path, '/5') AS start_target_index, + if(ifNull(greater(start_target_index, 0), 0), arraySlice(compact_path, start_target_index), compact_path) AS start_filtered_path, + indexOf(start_filtered_path, '/about') AS end_target_index, + if(ifNull(greater(end_target_index, 0), 0), arrayResize(start_filtered_path, end_target_index), start_filtered_path) AS filtered_path, + if(ifNull(greater(length(filtered_path), 5), 0), arrayConcat(arraySlice(filtered_path, 1, intDiv(5, 2)), ['...'], arraySlice(filtered_path, multiply(-1, intDiv(5, 2)), intDiv(5, 2))), filtered_path) AS limited_path, + if(ifNull(greater(start_target_index, 0), 0), arraySlice(timings, start_target_index), timings) AS start_filtered_timings, + if(ifNull(greater(end_target_index, 0), 0), arrayResize(start_filtered_timings, end_target_index), start_filtered_timings) AS filtered_timings, + if(ifNull(greater(length(filtered_timings), 5), 0), arrayConcat(arraySlice(filtered_timings, 1, intDiv(5, 2)), [filtered_timings[plus(1, intDiv(5, 2))]], arraySlice(filtered_timings, multiply(-1, intDiv(5, 2)), intDiv(5, 2))), filtered_timings) AS limited_timings, + if(ifNull(greater(start_target_index, 0), 0), arraySlice(uuid, start_target_index), uuid) AS start_filtered_uuid, + if(ifNull(greater(end_target_index, 0), 0), arrayResize(start_filtered_uuid, end_target_index), start_filtered_uuid) AS filtered_uuid, + if(ifNull(greater(length(filtered_uuid), 5), 0), arrayConcat(arraySlice(filtered_uuid, 1, intDiv(5, 2)), [filtered_uuid[plus(1, intDiv(5, 2))]], arraySlice(filtered_uuid, multiply(-1, intDiv(5, 2)), intDiv(5, 2))), filtered_uuid) AS limited_uuid, + if(ifNull(greater(start_target_index, 0), 0), arraySlice(timestamp, start_target_index), timestamp) AS start_filtered_timestamp, + if(ifNull(greater(end_target_index, 0), 0), arrayResize(start_filtered_timestamp, end_target_index), start_filtered_timestamp) AS filtered_timestamp, + if(ifNull(greater(length(filtered_timestamp), 5), 0), arrayConcat(arraySlice(filtered_timestamp, 1, intDiv(5, 2)), [filtered_timestamp[plus(1, intDiv(5, 2))]], arraySlice(filtered_timestamp, multiply(-1, intDiv(5, 2)), intDiv(5, 2))), filtered_timestamp) AS limited_timestamp, + if(ifNull(greater(start_target_index, 0), 0), arraySlice(session_id, start_target_index), session_id) AS start_filtered_session_id, + if(ifNull(greater(end_target_index, 0), 0), arrayResize(start_filtered_session_id, end_target_index), start_filtered_session_id) AS filtered_session_id, + if(ifNull(greater(length(filtered_session_id), 5), 0), arrayConcat(arraySlice(filtered_session_id, 1, intDiv(5, 2)), [filtered_session_id[plus(1, intDiv(5, 2))]], arraySlice(filtered_session_id, multiply(-1, intDiv(5, 2)), intDiv(5, 2))), filtered_session_id) AS limited_session_id, + if(ifNull(greater(start_target_index, 0), 0), arraySlice(window_id, start_target_index), window_id) AS start_filtered_window_id, + if(ifNull(greater(end_target_index, 0), 0), arrayResize(start_filtered_window_id, end_target_index), start_filtered_window_id) AS filtered_window_id, + if(ifNull(greater(length(filtered_window_id), 5), 0), arrayConcat(arraySlice(filtered_window_id, 1, intDiv(5, 2)), [filtered_window_id[plus(1, intDiv(5, 2))]], arraySlice(filtered_window_id, multiply(-1, intDiv(5, 2)), intDiv(5, 2))), filtered_window_id) AS limited_window_id, + arrayDifference(limited_timings) AS timings_diff, + concat(ifNull(toString(length(limited_path)), ''), '_', ifNull(toString(limited_path[-1]), '')) AS path_dropoff_key, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, '')), limited_uuid, limited_timestamp, limited_session_id, limited_window_id) AS limited_path_timings + FROM + (SELECT person_id AS person_id, + path_time_tuple.1 AS path_basic, + path_time_tuple.2 AS time, + session_index, + arrayZip(path_list, timing_list, arrayDifference(timing_list), uuid_list, timestamp_list, session_id_list, window_id_list) AS paths_tuple, + arraySplit(x -> if(ifNull(less(x.3, 1800), 0), 0, 1), paths_tuple) AS session_paths, + path_time_tuple.4 AS uuid_items, + path_time_tuple.5 AS timestamp_items, + path_time_tuple.6 AS session_id_items, + path_time_tuple.7 AS window_id_items + FROM + (SELECT person_id AS person_id, + groupArray(timestamp) AS timing_list, + groupArray(path_item) AS path_list, + groupArray(uuid) AS uuid_list, + groupArray(timestamp) AS timestamp_list, + groupArray(session_id) AS session_id_list, + groupArray(window_id) AS window_id_list + FROM + (SELECT toTimeZone(events.timestamp, 'UTC') AS timestamp, + events__pdi.person_id AS person_id, + ifNull(if(equals(events.event, '$pageview'), replaceRegexpAll(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$current_url'), ''), 'null'), '^"|"$', ''), ''), '(.)/$', '\\1'), events.event), '') AS path_item_ungrouped, + events.uuid AS uuid, + toTimeZone(events.timestamp, 'UTC') AS timestamp, + ifNull(nullIf(nullIf(events.`$session_id`, ''), 'null'), '') AS session_id, + ifNull(nullIf(nullIf(events.`$window_id`, ''), 'null'), '') AS window_id, + NULL AS groupings, + multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, + (if(ifNull(greater(group_index, 0), 0), groupings[group_index], path_item_ungrouped) AS path_item) AS path_item + FROM events + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) + WHERE and(equals(events.team_id, 2), and(greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-05-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2021-05-07 23:59:59', 6, 'UTC')))), equals(events.event, '$pageview')) + ORDER BY events__pdi.person_id ASC, toTimeZone(events.timestamp, 'UTC') ASC) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, + arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, + arrayEnumerate(limited_path_timings) AS event_in_session_index + WHERE and(ifNull(greater(start_target_index, 0), 0), ifNull(greater(end_target_index, 0), 0)))) + WHERE and(ifNull(equals(last_path_key, '1_/5'), 0), ifNull(equals(path_key, '2_/about'), 0)) + GROUP BY person_id) AS source ON equals(persons.id, source.actor_id) + ORDER BY persons.id ASC + LIMIT 101 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhousePaths.test_start_and_end.2 + ''' + SELECT last_path_key AS source_event, + path_key AS target_event, + count(*) AS event_count, + avg(conversion_time) AS average_conversion_time + FROM + (SELECT person_id AS person_id, + path AS path, + conversion_time AS conversion_time, + event_in_session_index AS event_in_session_index, + concat(ifNull(toString(event_in_session_index), ''), '_', ifNull(toString(path), '')) AS path_key, + if(ifNull(greater(event_in_session_index, 1), 0), concat(ifNull(toString(minus(event_in_session_index, 1)), ''), '_', ifNull(toString(prev_path), '')), NULL) AS last_path_key, + path_dropoff_key AS path_dropoff_key + FROM + (SELECT person_id AS person_id, + joined_path_tuple.1 AS path, + joined_path_tuple.2 AS conversion_time, + joined_path_tuple.3 AS prev_path, + event_in_session_index, + session_index AS session_index, + arrayPopFront(arrayPushBack(path_basic, '')) AS path_basic_0, + arrayMap((x, y) -> if(ifNull(equals(x, y), isNull(x) + and isNull(y)), 0, 1), path_basic, path_basic_0) AS mapping, + arrayFilter((x, y) -> y, time, mapping) AS timings, + arrayFilter((x, y) -> y, path_basic, mapping) AS compact_path, + indexOf(compact_path, '/about') AS target_index, + indexOf(compact_path, '/2') AS start_target_index, + if(ifNull(greater(start_target_index, 0), 0), arraySlice(compact_path, start_target_index), compact_path) AS start_filtered_path, + indexOf(start_filtered_path, '/about') AS end_target_index, + if(ifNull(greater(end_target_index, 0), 0), arrayResize(start_filtered_path, end_target_index), start_filtered_path) AS filtered_path, + if(ifNull(greater(length(filtered_path), 4), 0), arrayConcat(arraySlice(filtered_path, 1, intDiv(4, 2)), ['...'], arraySlice(filtered_path, multiply(-1, intDiv(4, 2)), intDiv(4, 2))), filtered_path) AS limited_path, + if(ifNull(greater(start_target_index, 0), 0), arraySlice(timings, start_target_index), timings) AS start_filtered_timings, + if(ifNull(greater(end_target_index, 0), 0), arrayResize(start_filtered_timings, end_target_index), start_filtered_timings) AS filtered_timings, + if(ifNull(greater(length(filtered_timings), 4), 0), arrayConcat(arraySlice(filtered_timings, 1, intDiv(4, 2)), [filtered_timings[plus(1, intDiv(4, 2))]], arraySlice(filtered_timings, multiply(-1, intDiv(4, 2)), intDiv(4, 2))), filtered_timings) AS limited_timings, + arrayDifference(limited_timings) AS timings_diff, + concat(ifNull(toString(length(limited_path)), ''), '_', ifNull(toString(limited_path[-1]), '')) AS path_dropoff_key, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) AS limited_path_timings + FROM + (SELECT person_id AS person_id, + path_time_tuple.1 AS path_basic, + path_time_tuple.2 AS time, + session_index, + arrayZip(path_list, timing_list, arrayDifference(timing_list)) AS paths_tuple, + arraySplit(x -> if(ifNull(less(x.3, 1800), 0), 0, 1), paths_tuple) AS session_paths + FROM + (SELECT person_id AS person_id, + groupArray(timestamp) AS timing_list, + groupArray(path_item) AS path_list + FROM + (SELECT toTimeZone(events.timestamp, 'UTC') AS timestamp, + events__pdi.person_id AS person_id, + ifNull(if(equals(events.event, '$pageview'), replaceRegexpAll(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$current_url'), ''), 'null'), '^"|"$', ''), ''), '(.)/$', '\\1'), events.event), '') AS path_item_ungrouped, + NULL AS groupings, + multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, + (if(ifNull(greater(group_index, 0), 0), groupings[group_index], path_item_ungrouped) AS path_item) AS path_item + FROM events + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) + WHERE and(equals(events.team_id, 2), and(greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-05-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2021-05-07 23:59:59', 6, 'UTC')))), equals(events.event, '$pageview')) + ORDER BY events__pdi.person_id ASC, toTimeZone(events.timestamp, 'UTC') ASC) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, + arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, + arrayEnumerate(limited_path_timings) AS event_in_session_index + WHERE and(ifNull(greater(start_target_index, 0), 0), ifNull(greater(end_target_index, 0), 0)))) + WHERE isNotNull(source_event) + GROUP BY source_event, + target_event + ORDER BY event_count DESC, + source_event ASC, + target_event ASC + LIMIT 50 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhousePaths.test_start_and_end.3 + ''' + SELECT persons.id AS id + FROM + (SELECT person.id AS id + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS persons + INNER JOIN + (SELECT person_id AS actor_id, + groupUniqArray(100)(tuple(timestamp, uuid, session_id, window_id)) AS matching_events, + count(*) AS event_count + FROM + (SELECT person_id AS person_id, + path AS path, + conversion_time AS conversion_time, + event_in_session_index AS event_in_session_index, + concat(ifNull(toString(event_in_session_index), ''), '_', ifNull(toString(path), '')) AS path_key, + if(ifNull(greater(event_in_session_index, 1), 0), concat(ifNull(toString(minus(event_in_session_index, 1)), ''), '_', ifNull(toString(prev_path), '')), NULL) AS last_path_key, + path_dropoff_key AS path_dropoff_key, + final_uuid AS uuid, + final_timestamp AS timestamp, + final_session_id AS session_id, + final_window_id AS window_id + FROM + (SELECT person_id AS person_id, + joined_path_tuple.1 AS path, + joined_path_tuple.2 AS conversion_time, + joined_path_tuple.3 AS prev_path, + event_in_session_index, + session_index AS session_index, + arrayPopFront(arrayPushBack(path_basic, '')) AS path_basic_0, + arrayMap((x, y) -> if(ifNull(equals(x, y), isNull(x) + and isNull(y)), 0, 1), path_basic, path_basic_0) AS mapping, + arrayFilter((x, y) -> y, time, mapping) AS timings, + arrayFilter((x, y) -> y, path_basic, mapping) AS compact_path, + indexOf(compact_path, '/about') AS target_index, + joined_path_tuple.4 AS final_uuid, + joined_path_tuple.5 AS final_timestamp, + joined_path_tuple.6 AS final_session_id, + joined_path_tuple.7 AS final_window_id, + arrayFilter((x, y) -> y, uuid_items, mapping) AS uuid, + arrayFilter((x, y) -> y, timestamp_items, mapping) AS timestamp, + arrayFilter((x, y) -> y, session_id_items, mapping) AS session_id, + arrayFilter((x, y) -> y, window_id_items, mapping) AS window_id, + indexOf(compact_path, '/2') AS start_target_index, + if(ifNull(greater(start_target_index, 0), 0), arraySlice(compact_path, start_target_index), compact_path) AS start_filtered_path, + indexOf(start_filtered_path, '/about') AS end_target_index, + if(ifNull(greater(end_target_index, 0), 0), arrayResize(start_filtered_path, end_target_index), start_filtered_path) AS filtered_path, + if(ifNull(greater(length(filtered_path), 4), 0), arrayConcat(arraySlice(filtered_path, 1, intDiv(4, 2)), ['...'], arraySlice(filtered_path, multiply(-1, intDiv(4, 2)), intDiv(4, 2))), filtered_path) AS limited_path, + if(ifNull(greater(start_target_index, 0), 0), arraySlice(timings, start_target_index), timings) AS start_filtered_timings, + if(ifNull(greater(end_target_index, 0), 0), arrayResize(start_filtered_timings, end_target_index), start_filtered_timings) AS filtered_timings, + if(ifNull(greater(length(filtered_timings), 4), 0), arrayConcat(arraySlice(filtered_timings, 1, intDiv(4, 2)), [filtered_timings[plus(1, intDiv(4, 2))]], arraySlice(filtered_timings, multiply(-1, intDiv(4, 2)), intDiv(4, 2))), filtered_timings) AS limited_timings, + if(ifNull(greater(start_target_index, 0), 0), arraySlice(uuid, start_target_index), uuid) AS start_filtered_uuid, + if(ifNull(greater(end_target_index, 0), 0), arrayResize(start_filtered_uuid, end_target_index), start_filtered_uuid) AS filtered_uuid, + if(ifNull(greater(length(filtered_uuid), 4), 0), arrayConcat(arraySlice(filtered_uuid, 1, intDiv(4, 2)), [filtered_uuid[plus(1, intDiv(4, 2))]], arraySlice(filtered_uuid, multiply(-1, intDiv(4, 2)), intDiv(4, 2))), filtered_uuid) AS limited_uuid, + if(ifNull(greater(start_target_index, 0), 0), arraySlice(timestamp, start_target_index), timestamp) AS start_filtered_timestamp, + if(ifNull(greater(end_target_index, 0), 0), arrayResize(start_filtered_timestamp, end_target_index), start_filtered_timestamp) AS filtered_timestamp, + if(ifNull(greater(length(filtered_timestamp), 4), 0), arrayConcat(arraySlice(filtered_timestamp, 1, intDiv(4, 2)), [filtered_timestamp[plus(1, intDiv(4, 2))]], arraySlice(filtered_timestamp, multiply(-1, intDiv(4, 2)), intDiv(4, 2))), filtered_timestamp) AS limited_timestamp, + if(ifNull(greater(start_target_index, 0), 0), arraySlice(session_id, start_target_index), session_id) AS start_filtered_session_id, + if(ifNull(greater(end_target_index, 0), 0), arrayResize(start_filtered_session_id, end_target_index), start_filtered_session_id) AS filtered_session_id, + if(ifNull(greater(length(filtered_session_id), 4), 0), arrayConcat(arraySlice(filtered_session_id, 1, intDiv(4, 2)), [filtered_session_id[plus(1, intDiv(4, 2))]], arraySlice(filtered_session_id, multiply(-1, intDiv(4, 2)), intDiv(4, 2))), filtered_session_id) AS limited_session_id, + if(ifNull(greater(start_target_index, 0), 0), arraySlice(window_id, start_target_index), window_id) AS start_filtered_window_id, + if(ifNull(greater(end_target_index, 0), 0), arrayResize(start_filtered_window_id, end_target_index), start_filtered_window_id) AS filtered_window_id, + if(ifNull(greater(length(filtered_window_id), 4), 0), arrayConcat(arraySlice(filtered_window_id, 1, intDiv(4, 2)), [filtered_window_id[plus(1, intDiv(4, 2))]], arraySlice(filtered_window_id, multiply(-1, intDiv(4, 2)), intDiv(4, 2))), filtered_window_id) AS limited_window_id, + arrayDifference(limited_timings) AS timings_diff, + concat(ifNull(toString(length(limited_path)), ''), '_', ifNull(toString(limited_path[-1]), '')) AS path_dropoff_key, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, '')), limited_uuid, limited_timestamp, limited_session_id, limited_window_id) AS limited_path_timings + FROM + (SELECT person_id AS person_id, + path_time_tuple.1 AS path_basic, + path_time_tuple.2 AS time, + session_index, + arrayZip(path_list, timing_list, arrayDifference(timing_list), uuid_list, timestamp_list, session_id_list, window_id_list) AS paths_tuple, + arraySplit(x -> if(ifNull(less(x.3, 1800), 0), 0, 1), paths_tuple) AS session_paths, + path_time_tuple.4 AS uuid_items, + path_time_tuple.5 AS timestamp_items, + path_time_tuple.6 AS session_id_items, + path_time_tuple.7 AS window_id_items + FROM + (SELECT person_id AS person_id, + groupArray(timestamp) AS timing_list, + groupArray(path_item) AS path_list, + groupArray(uuid) AS uuid_list, + groupArray(timestamp) AS timestamp_list, + groupArray(session_id) AS session_id_list, + groupArray(window_id) AS window_id_list + FROM + (SELECT toTimeZone(events.timestamp, 'UTC') AS timestamp, + events__pdi.person_id AS person_id, + ifNull(if(equals(events.event, '$pageview'), replaceRegexpAll(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$current_url'), ''), 'null'), '^"|"$', ''), ''), '(.)/$', '\\1'), events.event), '') AS path_item_ungrouped, + events.uuid AS uuid, + toTimeZone(events.timestamp, 'UTC') AS timestamp, + ifNull(nullIf(nullIf(events.`$session_id`, ''), 'null'), '') AS session_id, + ifNull(nullIf(nullIf(events.`$window_id`, ''), 'null'), '') AS window_id, + NULL AS groupings, + multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, + (if(ifNull(greater(group_index, 0), 0), groupings[group_index], path_item_ungrouped) AS path_item) AS path_item + FROM events + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) + WHERE and(equals(events.team_id, 2), and(greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-05-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2021-05-07 23:59:59', 6, 'UTC')))), equals(events.event, '$pageview')) + ORDER BY events__pdi.person_id ASC, toTimeZone(events.timestamp, 'UTC') ASC) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, + arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, + arrayEnumerate(limited_path_timings) AS event_in_session_index + WHERE and(ifNull(greater(start_target_index, 0), 0), ifNull(greater(end_target_index, 0), 0)))) + WHERE and(ifNull(equals(last_path_key, '3_...'), 0), ifNull(equals(path_key, '4_/5'), 0)) + GROUP BY person_id) AS source ON equals(persons.id, source.actor_id) + ORDER BY persons.id ASC + LIMIT 101 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhousePaths.test_start_and_end_materialized + ''' + SELECT last_path_key AS source_event, + path_key AS target_event, + count(*) AS event_count, + avg(conversion_time) AS average_conversion_time + FROM + (SELECT person_id AS person_id, + path AS path, + conversion_time AS conversion_time, + event_in_session_index AS event_in_session_index, + concat(ifNull(toString(event_in_session_index), ''), '_', ifNull(toString(path), '')) AS path_key, + if(ifNull(greater(event_in_session_index, 1), 0), concat(ifNull(toString(minus(event_in_session_index, 1)), ''), '_', ifNull(toString(prev_path), '')), NULL) AS last_path_key, + path_dropoff_key AS path_dropoff_key + FROM + (SELECT person_id AS person_id, + joined_path_tuple.1 AS path, + joined_path_tuple.2 AS conversion_time, + joined_path_tuple.3 AS prev_path, + event_in_session_index, + session_index AS session_index, + arrayPopFront(arrayPushBack(path_basic, '')) AS path_basic_0, + arrayMap((x, y) -> if(ifNull(equals(x, y), isNull(x) + and isNull(y)), 0, 1), path_basic, path_basic_0) AS mapping, + arrayFilter((x, y) -> y, time, mapping) AS timings, + arrayFilter((x, y) -> y, path_basic, mapping) AS compact_path, + indexOf(compact_path, '/about') AS target_index, + indexOf(compact_path, '/5') AS start_target_index, + if(ifNull(greater(start_target_index, 0), 0), arraySlice(compact_path, start_target_index), compact_path) AS start_filtered_path, + indexOf(start_filtered_path, '/about') AS end_target_index, + if(ifNull(greater(end_target_index, 0), 0), arrayResize(start_filtered_path, end_target_index), start_filtered_path) AS filtered_path, + if(ifNull(greater(length(filtered_path), 5), 0), arrayConcat(arraySlice(filtered_path, 1, intDiv(5, 2)), ['...'], arraySlice(filtered_path, multiply(-1, intDiv(5, 2)), intDiv(5, 2))), filtered_path) AS limited_path, + if(ifNull(greater(start_target_index, 0), 0), arraySlice(timings, start_target_index), timings) AS start_filtered_timings, + if(ifNull(greater(end_target_index, 0), 0), arrayResize(start_filtered_timings, end_target_index), start_filtered_timings) AS filtered_timings, + if(ifNull(greater(length(filtered_timings), 5), 0), arrayConcat(arraySlice(filtered_timings, 1, intDiv(5, 2)), [filtered_timings[plus(1, intDiv(5, 2))]], arraySlice(filtered_timings, multiply(-1, intDiv(5, 2)), intDiv(5, 2))), filtered_timings) AS limited_timings, + arrayDifference(limited_timings) AS timings_diff, + concat(ifNull(toString(length(limited_path)), ''), '_', ifNull(toString(limited_path[-1]), '')) AS path_dropoff_key, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) AS limited_path_timings + FROM + (SELECT person_id AS person_id, + path_time_tuple.1 AS path_basic, + path_time_tuple.2 AS time, + session_index, + arrayZip(path_list, timing_list, arrayDifference(timing_list)) AS paths_tuple, + arraySplit(x -> if(ifNull(less(x.3, 1800), 0), 0, 1), paths_tuple) AS session_paths + FROM + (SELECT person_id AS person_id, + groupArray(timestamp) AS timing_list, + groupArray(path_item) AS path_list + FROM + (SELECT toTimeZone(events.timestamp, 'UTC') AS timestamp, + events__pdi.person_id AS person_id, + ifNull(if(equals(events.event, '$pageview'), replaceRegexpAll(ifNull(nullIf(nullIf(events.`mat_$current_url`, ''), 'null'), ''), '(.)/$', '\\1'), events.event), '') AS path_item_ungrouped, + NULL AS groupings, + multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, + (if(ifNull(greater(group_index, 0), 0), groupings[group_index], path_item_ungrouped) AS path_item) AS path_item + FROM events + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) + WHERE and(equals(events.team_id, 2), and(greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-05-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2021-05-07 23:59:59', 6, 'UTC')))), equals(events.event, '$pageview')) + ORDER BY events__pdi.person_id ASC, toTimeZone(events.timestamp, 'UTC') ASC) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, + arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, + arrayEnumerate(limited_path_timings) AS event_in_session_index + WHERE and(ifNull(greater(start_target_index, 0), 0), ifNull(greater(end_target_index, 0), 0)))) + WHERE isNotNull(source_event) + GROUP BY source_event, + target_event + ORDER BY event_count DESC, + source_event ASC, + target_event ASC + LIMIT 50 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhousePaths.test_start_and_end_materialized.1 + ''' + SELECT persons.id AS id + FROM + (SELECT person.id AS id + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS persons + INNER JOIN + (SELECT person_id AS actor_id, + groupUniqArray(100)(tuple(timestamp, uuid, session_id, window_id)) AS matching_events, + count(*) AS event_count + FROM + (SELECT person_id AS person_id, + path AS path, + conversion_time AS conversion_time, + event_in_session_index AS event_in_session_index, + concat(ifNull(toString(event_in_session_index), ''), '_', ifNull(toString(path), '')) AS path_key, + if(ifNull(greater(event_in_session_index, 1), 0), concat(ifNull(toString(minus(event_in_session_index, 1)), ''), '_', ifNull(toString(prev_path), '')), NULL) AS last_path_key, + path_dropoff_key AS path_dropoff_key, + final_uuid AS uuid, + final_timestamp AS timestamp, + final_session_id AS session_id, + final_window_id AS window_id + FROM + (SELECT person_id AS person_id, + joined_path_tuple.1 AS path, + joined_path_tuple.2 AS conversion_time, + joined_path_tuple.3 AS prev_path, + event_in_session_index, + session_index AS session_index, + arrayPopFront(arrayPushBack(path_basic, '')) AS path_basic_0, + arrayMap((x, y) -> if(ifNull(equals(x, y), isNull(x) + and isNull(y)), 0, 1), path_basic, path_basic_0) AS mapping, + arrayFilter((x, y) -> y, time, mapping) AS timings, + arrayFilter((x, y) -> y, path_basic, mapping) AS compact_path, + indexOf(compact_path, '/about') AS target_index, + joined_path_tuple.4 AS final_uuid, + joined_path_tuple.5 AS final_timestamp, + joined_path_tuple.6 AS final_session_id, + joined_path_tuple.7 AS final_window_id, + arrayFilter((x, y) -> y, uuid_items, mapping) AS uuid, + arrayFilter((x, y) -> y, timestamp_items, mapping) AS timestamp, + arrayFilter((x, y) -> y, session_id_items, mapping) AS session_id, + arrayFilter((x, y) -> y, window_id_items, mapping) AS window_id, + indexOf(compact_path, '/5') AS start_target_index, + if(ifNull(greater(start_target_index, 0), 0), arraySlice(compact_path, start_target_index), compact_path) AS start_filtered_path, + indexOf(start_filtered_path, '/about') AS end_target_index, + if(ifNull(greater(end_target_index, 0), 0), arrayResize(start_filtered_path, end_target_index), start_filtered_path) AS filtered_path, + if(ifNull(greater(length(filtered_path), 5), 0), arrayConcat(arraySlice(filtered_path, 1, intDiv(5, 2)), ['...'], arraySlice(filtered_path, multiply(-1, intDiv(5, 2)), intDiv(5, 2))), filtered_path) AS limited_path, + if(ifNull(greater(start_target_index, 0), 0), arraySlice(timings, start_target_index), timings) AS start_filtered_timings, + if(ifNull(greater(end_target_index, 0), 0), arrayResize(start_filtered_timings, end_target_index), start_filtered_timings) AS filtered_timings, + if(ifNull(greater(length(filtered_timings), 5), 0), arrayConcat(arraySlice(filtered_timings, 1, intDiv(5, 2)), [filtered_timings[plus(1, intDiv(5, 2))]], arraySlice(filtered_timings, multiply(-1, intDiv(5, 2)), intDiv(5, 2))), filtered_timings) AS limited_timings, + if(ifNull(greater(start_target_index, 0), 0), arraySlice(uuid, start_target_index), uuid) AS start_filtered_uuid, + if(ifNull(greater(end_target_index, 0), 0), arrayResize(start_filtered_uuid, end_target_index), start_filtered_uuid) AS filtered_uuid, + if(ifNull(greater(length(filtered_uuid), 5), 0), arrayConcat(arraySlice(filtered_uuid, 1, intDiv(5, 2)), [filtered_uuid[plus(1, intDiv(5, 2))]], arraySlice(filtered_uuid, multiply(-1, intDiv(5, 2)), intDiv(5, 2))), filtered_uuid) AS limited_uuid, + if(ifNull(greater(start_target_index, 0), 0), arraySlice(timestamp, start_target_index), timestamp) AS start_filtered_timestamp, + if(ifNull(greater(end_target_index, 0), 0), arrayResize(start_filtered_timestamp, end_target_index), start_filtered_timestamp) AS filtered_timestamp, + if(ifNull(greater(length(filtered_timestamp), 5), 0), arrayConcat(arraySlice(filtered_timestamp, 1, intDiv(5, 2)), [filtered_timestamp[plus(1, intDiv(5, 2))]], arraySlice(filtered_timestamp, multiply(-1, intDiv(5, 2)), intDiv(5, 2))), filtered_timestamp) AS limited_timestamp, + if(ifNull(greater(start_target_index, 0), 0), arraySlice(session_id, start_target_index), session_id) AS start_filtered_session_id, + if(ifNull(greater(end_target_index, 0), 0), arrayResize(start_filtered_session_id, end_target_index), start_filtered_session_id) AS filtered_session_id, + if(ifNull(greater(length(filtered_session_id), 5), 0), arrayConcat(arraySlice(filtered_session_id, 1, intDiv(5, 2)), [filtered_session_id[plus(1, intDiv(5, 2))]], arraySlice(filtered_session_id, multiply(-1, intDiv(5, 2)), intDiv(5, 2))), filtered_session_id) AS limited_session_id, + if(ifNull(greater(start_target_index, 0), 0), arraySlice(window_id, start_target_index), window_id) AS start_filtered_window_id, + if(ifNull(greater(end_target_index, 0), 0), arrayResize(start_filtered_window_id, end_target_index), start_filtered_window_id) AS filtered_window_id, + if(ifNull(greater(length(filtered_window_id), 5), 0), arrayConcat(arraySlice(filtered_window_id, 1, intDiv(5, 2)), [filtered_window_id[plus(1, intDiv(5, 2))]], arraySlice(filtered_window_id, multiply(-1, intDiv(5, 2)), intDiv(5, 2))), filtered_window_id) AS limited_window_id, + arrayDifference(limited_timings) AS timings_diff, + concat(ifNull(toString(length(limited_path)), ''), '_', ifNull(toString(limited_path[-1]), '')) AS path_dropoff_key, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, '')), limited_uuid, limited_timestamp, limited_session_id, limited_window_id) AS limited_path_timings + FROM + (SELECT person_id AS person_id, + path_time_tuple.1 AS path_basic, + path_time_tuple.2 AS time, + session_index, + arrayZip(path_list, timing_list, arrayDifference(timing_list), uuid_list, timestamp_list, session_id_list, window_id_list) AS paths_tuple, + arraySplit(x -> if(ifNull(less(x.3, 1800), 0), 0, 1), paths_tuple) AS session_paths, + path_time_tuple.4 AS uuid_items, + path_time_tuple.5 AS timestamp_items, + path_time_tuple.6 AS session_id_items, + path_time_tuple.7 AS window_id_items + FROM + (SELECT person_id AS person_id, + groupArray(timestamp) AS timing_list, + groupArray(path_item) AS path_list, + groupArray(uuid) AS uuid_list, + groupArray(timestamp) AS timestamp_list, + groupArray(session_id) AS session_id_list, + groupArray(window_id) AS window_id_list + FROM + (SELECT toTimeZone(events.timestamp, 'UTC') AS timestamp, + events__pdi.person_id AS person_id, + ifNull(if(equals(events.event, '$pageview'), replaceRegexpAll(ifNull(nullIf(nullIf(events.`mat_$current_url`, ''), 'null'), ''), '(.)/$', '\\1'), events.event), '') AS path_item_ungrouped, + events.uuid AS uuid, + toTimeZone(events.timestamp, 'UTC') AS timestamp, + ifNull(nullIf(nullIf(events.`$session_id`, ''), 'null'), '') AS session_id, + ifNull(nullIf(nullIf(events.`$window_id`, ''), 'null'), '') AS window_id, + NULL AS groupings, + multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, + (if(ifNull(greater(group_index, 0), 0), groupings[group_index], path_item_ungrouped) AS path_item) AS path_item + FROM events + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) + WHERE and(equals(events.team_id, 2), and(greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-05-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2021-05-07 23:59:59', 6, 'UTC')))), equals(events.event, '$pageview')) + ORDER BY events__pdi.person_id ASC, toTimeZone(events.timestamp, 'UTC') ASC) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, + arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, + arrayEnumerate(limited_path_timings) AS event_in_session_index + WHERE and(ifNull(greater(start_target_index, 0), 0), ifNull(greater(end_target_index, 0), 0)))) + WHERE and(ifNull(equals(last_path_key, '1_/5'), 0), ifNull(equals(path_key, '2_/about'), 0)) + GROUP BY person_id) AS source ON equals(persons.id, source.actor_id) + ORDER BY persons.id ASC + LIMIT 101 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhousePaths.test_start_and_end_materialized.2 + ''' + SELECT last_path_key AS source_event, + path_key AS target_event, + count(*) AS event_count, + avg(conversion_time) AS average_conversion_time + FROM + (SELECT person_id AS person_id, + path AS path, + conversion_time AS conversion_time, + event_in_session_index AS event_in_session_index, + concat(ifNull(toString(event_in_session_index), ''), '_', ifNull(toString(path), '')) AS path_key, + if(ifNull(greater(event_in_session_index, 1), 0), concat(ifNull(toString(minus(event_in_session_index, 1)), ''), '_', ifNull(toString(prev_path), '')), NULL) AS last_path_key, + path_dropoff_key AS path_dropoff_key + FROM + (SELECT person_id AS person_id, + joined_path_tuple.1 AS path, + joined_path_tuple.2 AS conversion_time, + joined_path_tuple.3 AS prev_path, + event_in_session_index, + session_index AS session_index, + arrayPopFront(arrayPushBack(path_basic, '')) AS path_basic_0, + arrayMap((x, y) -> if(ifNull(equals(x, y), isNull(x) + and isNull(y)), 0, 1), path_basic, path_basic_0) AS mapping, + arrayFilter((x, y) -> y, time, mapping) AS timings, + arrayFilter((x, y) -> y, path_basic, mapping) AS compact_path, + indexOf(compact_path, '/about') AS target_index, + indexOf(compact_path, '/2') AS start_target_index, + if(ifNull(greater(start_target_index, 0), 0), arraySlice(compact_path, start_target_index), compact_path) AS start_filtered_path, + indexOf(start_filtered_path, '/about') AS end_target_index, + if(ifNull(greater(end_target_index, 0), 0), arrayResize(start_filtered_path, end_target_index), start_filtered_path) AS filtered_path, + if(ifNull(greater(length(filtered_path), 4), 0), arrayConcat(arraySlice(filtered_path, 1, intDiv(4, 2)), ['...'], arraySlice(filtered_path, multiply(-1, intDiv(4, 2)), intDiv(4, 2))), filtered_path) AS limited_path, + if(ifNull(greater(start_target_index, 0), 0), arraySlice(timings, start_target_index), timings) AS start_filtered_timings, + if(ifNull(greater(end_target_index, 0), 0), arrayResize(start_filtered_timings, end_target_index), start_filtered_timings) AS filtered_timings, + if(ifNull(greater(length(filtered_timings), 4), 0), arrayConcat(arraySlice(filtered_timings, 1, intDiv(4, 2)), [filtered_timings[plus(1, intDiv(4, 2))]], arraySlice(filtered_timings, multiply(-1, intDiv(4, 2)), intDiv(4, 2))), filtered_timings) AS limited_timings, + arrayDifference(limited_timings) AS timings_diff, + concat(ifNull(toString(length(limited_path)), ''), '_', ifNull(toString(limited_path[-1]), '')) AS path_dropoff_key, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) AS limited_path_timings + FROM + (SELECT person_id AS person_id, + path_time_tuple.1 AS path_basic, + path_time_tuple.2 AS time, + session_index, + arrayZip(path_list, timing_list, arrayDifference(timing_list)) AS paths_tuple, + arraySplit(x -> if(ifNull(less(x.3, 1800), 0), 0, 1), paths_tuple) AS session_paths + FROM + (SELECT person_id AS person_id, + groupArray(timestamp) AS timing_list, + groupArray(path_item) AS path_list + FROM + (SELECT toTimeZone(events.timestamp, 'UTC') AS timestamp, + events__pdi.person_id AS person_id, + ifNull(if(equals(events.event, '$pageview'), replaceRegexpAll(ifNull(nullIf(nullIf(events.`mat_$current_url`, ''), 'null'), ''), '(.)/$', '\\1'), events.event), '') AS path_item_ungrouped, + NULL AS groupings, + multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, + (if(ifNull(greater(group_index, 0), 0), groupings[group_index], path_item_ungrouped) AS path_item) AS path_item + FROM events + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) + WHERE and(equals(events.team_id, 2), and(greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-05-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2021-05-07 23:59:59', 6, 'UTC')))), equals(events.event, '$pageview')) + ORDER BY events__pdi.person_id ASC, toTimeZone(events.timestamp, 'UTC') ASC) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, + arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, + arrayEnumerate(limited_path_timings) AS event_in_session_index + WHERE and(ifNull(greater(start_target_index, 0), 0), ifNull(greater(end_target_index, 0), 0)))) + WHERE isNotNull(source_event) + GROUP BY source_event, + target_event + ORDER BY event_count DESC, + source_event ASC, + target_event ASC + LIMIT 50 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhousePaths.test_start_and_end_materialized.3 + ''' + SELECT persons.id AS id + FROM + (SELECT person.id AS id + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS persons + INNER JOIN + (SELECT person_id AS actor_id, + groupUniqArray(100)(tuple(timestamp, uuid, session_id, window_id)) AS matching_events, + count(*) AS event_count + FROM + (SELECT person_id AS person_id, + path AS path, + conversion_time AS conversion_time, + event_in_session_index AS event_in_session_index, + concat(ifNull(toString(event_in_session_index), ''), '_', ifNull(toString(path), '')) AS path_key, + if(ifNull(greater(event_in_session_index, 1), 0), concat(ifNull(toString(minus(event_in_session_index, 1)), ''), '_', ifNull(toString(prev_path), '')), NULL) AS last_path_key, + path_dropoff_key AS path_dropoff_key, + final_uuid AS uuid, + final_timestamp AS timestamp, + final_session_id AS session_id, + final_window_id AS window_id + FROM + (SELECT person_id AS person_id, + joined_path_tuple.1 AS path, + joined_path_tuple.2 AS conversion_time, + joined_path_tuple.3 AS prev_path, + event_in_session_index, + session_index AS session_index, + arrayPopFront(arrayPushBack(path_basic, '')) AS path_basic_0, + arrayMap((x, y) -> if(ifNull(equals(x, y), isNull(x) + and isNull(y)), 0, 1), path_basic, path_basic_0) AS mapping, + arrayFilter((x, y) -> y, time, mapping) AS timings, + arrayFilter((x, y) -> y, path_basic, mapping) AS compact_path, + indexOf(compact_path, '/about') AS target_index, + joined_path_tuple.4 AS final_uuid, + joined_path_tuple.5 AS final_timestamp, + joined_path_tuple.6 AS final_session_id, + joined_path_tuple.7 AS final_window_id, + arrayFilter((x, y) -> y, uuid_items, mapping) AS uuid, + arrayFilter((x, y) -> y, timestamp_items, mapping) AS timestamp, + arrayFilter((x, y) -> y, session_id_items, mapping) AS session_id, + arrayFilter((x, y) -> y, window_id_items, mapping) AS window_id, + indexOf(compact_path, '/2') AS start_target_index, + if(ifNull(greater(start_target_index, 0), 0), arraySlice(compact_path, start_target_index), compact_path) AS start_filtered_path, + indexOf(start_filtered_path, '/about') AS end_target_index, + if(ifNull(greater(end_target_index, 0), 0), arrayResize(start_filtered_path, end_target_index), start_filtered_path) AS filtered_path, + if(ifNull(greater(length(filtered_path), 4), 0), arrayConcat(arraySlice(filtered_path, 1, intDiv(4, 2)), ['...'], arraySlice(filtered_path, multiply(-1, intDiv(4, 2)), intDiv(4, 2))), filtered_path) AS limited_path, + if(ifNull(greater(start_target_index, 0), 0), arraySlice(timings, start_target_index), timings) AS start_filtered_timings, + if(ifNull(greater(end_target_index, 0), 0), arrayResize(start_filtered_timings, end_target_index), start_filtered_timings) AS filtered_timings, + if(ifNull(greater(length(filtered_timings), 4), 0), arrayConcat(arraySlice(filtered_timings, 1, intDiv(4, 2)), [filtered_timings[plus(1, intDiv(4, 2))]], arraySlice(filtered_timings, multiply(-1, intDiv(4, 2)), intDiv(4, 2))), filtered_timings) AS limited_timings, + if(ifNull(greater(start_target_index, 0), 0), arraySlice(uuid, start_target_index), uuid) AS start_filtered_uuid, + if(ifNull(greater(end_target_index, 0), 0), arrayResize(start_filtered_uuid, end_target_index), start_filtered_uuid) AS filtered_uuid, + if(ifNull(greater(length(filtered_uuid), 4), 0), arrayConcat(arraySlice(filtered_uuid, 1, intDiv(4, 2)), [filtered_uuid[plus(1, intDiv(4, 2))]], arraySlice(filtered_uuid, multiply(-1, intDiv(4, 2)), intDiv(4, 2))), filtered_uuid) AS limited_uuid, + if(ifNull(greater(start_target_index, 0), 0), arraySlice(timestamp, start_target_index), timestamp) AS start_filtered_timestamp, + if(ifNull(greater(end_target_index, 0), 0), arrayResize(start_filtered_timestamp, end_target_index), start_filtered_timestamp) AS filtered_timestamp, + if(ifNull(greater(length(filtered_timestamp), 4), 0), arrayConcat(arraySlice(filtered_timestamp, 1, intDiv(4, 2)), [filtered_timestamp[plus(1, intDiv(4, 2))]], arraySlice(filtered_timestamp, multiply(-1, intDiv(4, 2)), intDiv(4, 2))), filtered_timestamp) AS limited_timestamp, + if(ifNull(greater(start_target_index, 0), 0), arraySlice(session_id, start_target_index), session_id) AS start_filtered_session_id, + if(ifNull(greater(end_target_index, 0), 0), arrayResize(start_filtered_session_id, end_target_index), start_filtered_session_id) AS filtered_session_id, + if(ifNull(greater(length(filtered_session_id), 4), 0), arrayConcat(arraySlice(filtered_session_id, 1, intDiv(4, 2)), [filtered_session_id[plus(1, intDiv(4, 2))]], arraySlice(filtered_session_id, multiply(-1, intDiv(4, 2)), intDiv(4, 2))), filtered_session_id) AS limited_session_id, + if(ifNull(greater(start_target_index, 0), 0), arraySlice(window_id, start_target_index), window_id) AS start_filtered_window_id, + if(ifNull(greater(end_target_index, 0), 0), arrayResize(start_filtered_window_id, end_target_index), start_filtered_window_id) AS filtered_window_id, + if(ifNull(greater(length(filtered_window_id), 4), 0), arrayConcat(arraySlice(filtered_window_id, 1, intDiv(4, 2)), [filtered_window_id[plus(1, intDiv(4, 2))]], arraySlice(filtered_window_id, multiply(-1, intDiv(4, 2)), intDiv(4, 2))), filtered_window_id) AS limited_window_id, + arrayDifference(limited_timings) AS timings_diff, + concat(ifNull(toString(length(limited_path)), ''), '_', ifNull(toString(limited_path[-1]), '')) AS path_dropoff_key, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, '')), limited_uuid, limited_timestamp, limited_session_id, limited_window_id) AS limited_path_timings + FROM + (SELECT person_id AS person_id, + path_time_tuple.1 AS path_basic, + path_time_tuple.2 AS time, + session_index, + arrayZip(path_list, timing_list, arrayDifference(timing_list), uuid_list, timestamp_list, session_id_list, window_id_list) AS paths_tuple, + arraySplit(x -> if(ifNull(less(x.3, 1800), 0), 0, 1), paths_tuple) AS session_paths, + path_time_tuple.4 AS uuid_items, + path_time_tuple.5 AS timestamp_items, + path_time_tuple.6 AS session_id_items, + path_time_tuple.7 AS window_id_items + FROM + (SELECT person_id AS person_id, + groupArray(timestamp) AS timing_list, + groupArray(path_item) AS path_list, + groupArray(uuid) AS uuid_list, + groupArray(timestamp) AS timestamp_list, + groupArray(session_id) AS session_id_list, + groupArray(window_id) AS window_id_list + FROM + (SELECT toTimeZone(events.timestamp, 'UTC') AS timestamp, + events__pdi.person_id AS person_id, + ifNull(if(equals(events.event, '$pageview'), replaceRegexpAll(ifNull(nullIf(nullIf(events.`mat_$current_url`, ''), 'null'), ''), '(.)/$', '\\1'), events.event), '') AS path_item_ungrouped, + events.uuid AS uuid, + toTimeZone(events.timestamp, 'UTC') AS timestamp, + ifNull(nullIf(nullIf(events.`$session_id`, ''), 'null'), '') AS session_id, + ifNull(nullIf(nullIf(events.`$window_id`, ''), 'null'), '') AS window_id, + NULL AS groupings, + multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, + (if(ifNull(greater(group_index, 0), 0), groupings[group_index], path_item_ungrouped) AS path_item) AS path_item + FROM events + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) + WHERE and(equals(events.team_id, 2), and(greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-05-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2021-05-07 23:59:59', 6, 'UTC')))), equals(events.event, '$pageview')) + ORDER BY events__pdi.person_id ASC, toTimeZone(events.timestamp, 'UTC') ASC) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, + arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, + arrayEnumerate(limited_path_timings) AS event_in_session_index + WHERE and(ifNull(greater(start_target_index, 0), 0), ifNull(greater(end_target_index, 0), 0)))) + WHERE and(ifNull(equals(last_path_key, '3_...'), 0), ifNull(equals(path_key, '4_/5'), 0)) + GROUP BY person_id) AS source ON equals(persons.id, source.actor_id) + ORDER BY persons.id ASC + LIMIT 101 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhousePaths.test_start_dropping_orphaned_edges + ''' + SELECT last_path_key AS source_event, + path_key AS target_event, + count(*) AS event_count, + avg(conversion_time) AS average_conversion_time + FROM + (SELECT person_id AS person_id, + path AS path, + conversion_time AS conversion_time, + event_in_session_index AS event_in_session_index, + concat(ifNull(toString(event_in_session_index), ''), '_', ifNull(toString(path), '')) AS path_key, + if(ifNull(greater(event_in_session_index, 1), 0), concat(ifNull(toString(minus(event_in_session_index, 1)), ''), '_', ifNull(toString(prev_path), '')), NULL) AS last_path_key, + path_dropoff_key AS path_dropoff_key + FROM + (SELECT person_id AS person_id, + joined_path_tuple.1 AS path, + joined_path_tuple.2 AS conversion_time, + joined_path_tuple.3 AS prev_path, + event_in_session_index, + session_index AS session_index, + arrayPopFront(arrayPushBack(path_basic, '')) AS path_basic_0, + arrayMap((x, y) -> if(ifNull(equals(x, y), isNull(x) + and isNull(y)), 0, 1), path_basic, path_basic_0) AS mapping, + arrayFilter((x, y) -> y, time, mapping) AS timings, + arrayFilter((x, y) -> y, path_basic, mapping) AS compact_path, + indexOf(compact_path, '/2') AS target_index, + if(ifNull(greater(target_index, 0), 0), arraySlice(compact_path, target_index), compact_path) AS filtered_path, + arraySlice(filtered_path, 1, 5) AS limited_path, + if(ifNull(greater(target_index, 0), 0), arraySlice(timings, target_index), timings) AS filtered_timings, + arraySlice(filtered_timings, 1, 5) AS limited_timings, + arrayDifference(limited_timings) AS timings_diff, + concat(ifNull(toString(length(limited_path)), ''), '_', ifNull(toString(limited_path[-1]), '')) AS path_dropoff_key, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) AS limited_path_timings + FROM + (SELECT person_id AS person_id, + path_time_tuple.1 AS path_basic, + path_time_tuple.2 AS time, + session_index, + arrayZip(path_list, timing_list, arrayDifference(timing_list)) AS paths_tuple, + arraySplit(x -> if(ifNull(less(x.3, 1800), 0), 0, 1), paths_tuple) AS session_paths + FROM + (SELECT person_id AS person_id, + groupArray(timestamp) AS timing_list, + groupArray(path_item) AS path_list + FROM + (SELECT toTimeZone(events.timestamp, 'UTC') AS timestamp, + events__pdi.person_id AS person_id, + ifNull(if(equals(events.event, '$screen'), replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$screen_name'), ''), 'null'), '^"|"$', ''), if(equals(events.event, '$pageview'), replaceRegexpAll(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$current_url'), ''), 'null'), '^"|"$', ''), ''), '(.)/$', '\\1'), events.event)), '') AS path_item_ungrouped, + NULL AS groupings, + multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, + (if(ifNull(greater(group_index, 0), 0), groupings[group_index], path_item_ungrouped) AS path_item) AS path_item + FROM events + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) + WHERE and(equals(events.team_id, 2), and(greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2021-05-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2021-05-07 23:59:59', 6, 'UTC'))))) + ORDER BY events__pdi.person_id ASC, toTimeZone(events.timestamp, 'UTC') ASC) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, + arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, + arrayEnumerate(limited_path_timings) AS event_in_session_index + WHERE ifNull(greater(target_index, 0), 0))) + WHERE isNotNull(source_event) + GROUP BY source_event, + target_event + ORDER BY event_count DESC, + source_event ASC, + target_event ASC + LIMIT 6 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhousePaths.test_step_conversion_times + ''' + SELECT last_path_key AS source_event, + path_key AS target_event, + count(*) AS event_count, + avg(conversion_time) AS average_conversion_time + FROM + (SELECT person_id AS person_id, + path AS path, + conversion_time AS conversion_time, + event_in_session_index AS event_in_session_index, + concat(ifNull(toString(event_in_session_index), ''), '_', ifNull(toString(path), '')) AS path_key, + if(ifNull(greater(event_in_session_index, 1), 0), concat(ifNull(toString(minus(event_in_session_index, 1)), ''), '_', ifNull(toString(prev_path), '')), NULL) AS last_path_key, + path_dropoff_key AS path_dropoff_key + FROM + (SELECT person_id AS person_id, + joined_path_tuple.1 AS path, + joined_path_tuple.2 AS conversion_time, + joined_path_tuple.3 AS prev_path, + event_in_session_index, + session_index AS session_index, + arrayPopFront(arrayPushBack(path_basic, '')) AS path_basic_0, + arrayMap((x, y) -> if(ifNull(equals(x, y), isNull(x) + and isNull(y)), 0, 1), path_basic, path_basic_0) AS mapping, + arrayFilter((x, y) -> y, time, mapping) AS timings, + arrayFilter((x, y) -> y, path_basic, mapping) AS compact_path, + indexOf(compact_path, NULL) AS target_index, + if(ifNull(greater(target_index, 0), 0), arraySlice(compact_path, target_index), compact_path) AS filtered_path, + arraySlice(filtered_path, 1, 4) AS limited_path, + if(ifNull(greater(target_index, 0), 0), arraySlice(timings, target_index), timings) AS filtered_timings, + arraySlice(filtered_timings, 1, 4) AS limited_timings, + arrayDifference(limited_timings) AS timings_diff, + concat(ifNull(toString(length(limited_path)), ''), '_', ifNull(toString(limited_path[-1]), '')) AS path_dropoff_key, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) AS limited_path_timings + FROM + (SELECT person_id AS person_id, + path_time_tuple.1 AS path_basic, + path_time_tuple.2 AS time, + session_index, + arrayZip(path_list, timing_list, arrayDifference(timing_list)) AS paths_tuple, + arraySplit(x -> if(ifNull(less(x.3, 1800), 0), 0, 1), paths_tuple) AS session_paths + FROM + (SELECT person_id AS person_id, + groupArray(timestamp) AS timing_list, + groupArray(path_item) AS path_list + FROM + (SELECT toTimeZone(events.timestamp, 'UTC') AS timestamp, + events__pdi.person_id AS person_id, + ifNull(if(equals(events.event, '$pageview'), replaceRegexpAll(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$current_url'), ''), 'null'), '^"|"$', ''), ''), '(.)/$', '\\1'), events.event), '') AS path_item_ungrouped, + NULL AS groupings, + multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, + (if(ifNull(greater(group_index, 0), 0), groupings[group_index], path_item_ungrouped) AS path_item) AS path_item + FROM events + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) + WHERE and(equals(events.team_id, 2), and(greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2023-05-23 23:59:59', 6, 'UTC')))), equals(events.event, '$pageview')) + ORDER BY events__pdi.person_id ASC, toTimeZone(events.timestamp, 'UTC') ASC) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, + arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, + arrayEnumerate(limited_path_timings) AS event_in_session_index)) + WHERE isNotNull(source_event) + GROUP BY source_event, + target_event + ORDER BY event_count DESC, + source_event ASC, + target_event ASC + LIMIT 50 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhousePaths.test_step_limit + ''' + SELECT last_path_key AS source_event, + path_key AS target_event, + count(*) AS event_count, + avg(conversion_time) AS average_conversion_time + FROM + (SELECT person_id AS person_id, + path AS path, + conversion_time AS conversion_time, + event_in_session_index AS event_in_session_index, + concat(ifNull(toString(event_in_session_index), ''), '_', ifNull(toString(path), '')) AS path_key, + if(ifNull(greater(event_in_session_index, 1), 0), concat(ifNull(toString(minus(event_in_session_index, 1)), ''), '_', ifNull(toString(prev_path), '')), NULL) AS last_path_key, + path_dropoff_key AS path_dropoff_key + FROM + (SELECT person_id AS person_id, + joined_path_tuple.1 AS path, + joined_path_tuple.2 AS conversion_time, + joined_path_tuple.3 AS prev_path, + event_in_session_index, + session_index AS session_index, + arrayPopFront(arrayPushBack(path_basic, '')) AS path_basic_0, + arrayMap((x, y) -> if(ifNull(equals(x, y), isNull(x) + and isNull(y)), 0, 1), path_basic, path_basic_0) AS mapping, + arrayFilter((x, y) -> y, time, mapping) AS timings, + arrayFilter((x, y) -> y, path_basic, mapping) AS compact_path, + indexOf(compact_path, NULL) AS target_index, + if(ifNull(greater(target_index, 0), 0), arraySlice(compact_path, target_index), compact_path) AS filtered_path, + arraySlice(filtered_path, 1, 2) AS limited_path, + if(ifNull(greater(target_index, 0), 0), arraySlice(timings, target_index), timings) AS filtered_timings, + arraySlice(filtered_timings, 1, 2) AS limited_timings, + arrayDifference(limited_timings) AS timings_diff, + concat(ifNull(toString(length(limited_path)), ''), '_', ifNull(toString(limited_path[-1]), '')) AS path_dropoff_key, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) AS limited_path_timings + FROM + (SELECT person_id AS person_id, + path_time_tuple.1 AS path_basic, + path_time_tuple.2 AS time, + session_index, + arrayZip(path_list, timing_list, arrayDifference(timing_list)) AS paths_tuple, + arraySplit(x -> if(ifNull(less(x.3, 1800), 0), 0, 1), paths_tuple) AS session_paths + FROM + (SELECT person_id AS person_id, + groupArray(timestamp) AS timing_list, + groupArray(path_item) AS path_list + FROM + (SELECT toTimeZone(events.timestamp, 'UTC') AS timestamp, + events__pdi.person_id AS person_id, + ifNull(if(equals(events.event, '$screen'), replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$screen_name'), ''), 'null'), '^"|"$', ''), if(equals(events.event, '$pageview'), replaceRegexpAll(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$current_url'), ''), 'null'), '^"|"$', ''), ''), '(.)/$', '\\1'), events.event)), '') AS path_item_ungrouped, + NULL AS groupings, + multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, + (if(ifNull(greater(group_index, 0), 0), groupings[group_index], path_item_ungrouped) AS path_item) AS path_item + FROM events + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) + WHERE and(equals(events.team_id, 2), and(greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2011-12-31 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-07 23:59:59', 6, 'UTC'))))) + ORDER BY events__pdi.person_id ASC, toTimeZone(events.timestamp, 'UTC') ASC) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, + arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, + arrayEnumerate(limited_path_timings) AS event_in_session_index)) + WHERE isNotNull(source_event) + GROUP BY source_event, + target_event + ORDER BY event_count DESC, + source_event ASC, + target_event ASC + LIMIT 50 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhousePaths.test_step_limit.1 + ''' + SELECT persons.id AS id + FROM + (SELECT person.id AS id + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS persons + INNER JOIN + (SELECT person_id AS actor_id, + groupUniqArray(100)(tuple(timestamp, uuid, session_id, window_id)) AS matching_events, + count(*) AS event_count + FROM + (SELECT person_id AS person_id, + path AS path, + conversion_time AS conversion_time, + event_in_session_index AS event_in_session_index, + concat(ifNull(toString(event_in_session_index), ''), '_', ifNull(toString(path), '')) AS path_key, + if(ifNull(greater(event_in_session_index, 1), 0), concat(ifNull(toString(minus(event_in_session_index, 1)), ''), '_', ifNull(toString(prev_path), '')), NULL) AS last_path_key, + path_dropoff_key AS path_dropoff_key, + final_uuid AS uuid, + final_timestamp AS timestamp, + final_session_id AS session_id, + final_window_id AS window_id + FROM + (SELECT person_id AS person_id, + joined_path_tuple.1 AS path, + joined_path_tuple.2 AS conversion_time, + joined_path_tuple.3 AS prev_path, + event_in_session_index, + session_index AS session_index, + arrayPopFront(arrayPushBack(path_basic, '')) AS path_basic_0, + arrayMap((x, y) -> if(ifNull(equals(x, y), isNull(x) + and isNull(y)), 0, 1), path_basic, path_basic_0) AS mapping, + arrayFilter((x, y) -> y, time, mapping) AS timings, + arrayFilter((x, y) -> y, path_basic, mapping) AS compact_path, + indexOf(compact_path, NULL) AS target_index, + joined_path_tuple.4 AS final_uuid, + joined_path_tuple.5 AS final_timestamp, + joined_path_tuple.6 AS final_session_id, + joined_path_tuple.7 AS final_window_id, + arrayFilter((x, y) -> y, uuid_items, mapping) AS uuid, + arrayFilter((x, y) -> y, timestamp_items, mapping) AS timestamp, + arrayFilter((x, y) -> y, session_id_items, mapping) AS session_id, + arrayFilter((x, y) -> y, window_id_items, mapping) AS window_id, + if(ifNull(greater(target_index, 0), 0), arraySlice(compact_path, target_index), compact_path) AS filtered_path, + arraySlice(filtered_path, 1, 2) AS limited_path, + if(ifNull(greater(target_index, 0), 0), arraySlice(timings, target_index), timings) AS filtered_timings, + arraySlice(filtered_timings, 1, 2) AS limited_timings, + if(ifNull(greater(target_index, 0), 0), arraySlice(uuid, target_index), uuid) AS filtered_uuid, + arraySlice(filtered_uuid, 1, 2) AS limited_uuid, + if(ifNull(greater(target_index, 0), 0), arraySlice(timestamp, target_index), timestamp) AS filtered_timestamp, + arraySlice(filtered_timestamp, 1, 2) AS limited_timestamp, + if(ifNull(greater(target_index, 0), 0), arraySlice(session_id, target_index), session_id) AS filtered_session_id, + arraySlice(filtered_session_id, 1, 2) AS limited_session_id, + if(ifNull(greater(target_index, 0), 0), arraySlice(window_id, target_index), window_id) AS filtered_window_id, + arraySlice(filtered_window_id, 1, 2) AS limited_window_id, + arrayDifference(limited_timings) AS timings_diff, + concat(ifNull(toString(length(limited_path)), ''), '_', ifNull(toString(limited_path[-1]), '')) AS path_dropoff_key, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, '')), limited_uuid, limited_timestamp, limited_session_id, limited_window_id) AS limited_path_timings + FROM + (SELECT person_id AS person_id, + path_time_tuple.1 AS path_basic, + path_time_tuple.2 AS time, + session_index, + arrayZip(path_list, timing_list, arrayDifference(timing_list), uuid_list, timestamp_list, session_id_list, window_id_list) AS paths_tuple, + arraySplit(x -> if(ifNull(less(x.3, 1800), 0), 0, 1), paths_tuple) AS session_paths, + path_time_tuple.4 AS uuid_items, + path_time_tuple.5 AS timestamp_items, + path_time_tuple.6 AS session_id_items, + path_time_tuple.7 AS window_id_items + FROM + (SELECT person_id AS person_id, + groupArray(timestamp) AS timing_list, + groupArray(path_item) AS path_list, + groupArray(uuid) AS uuid_list, + groupArray(timestamp) AS timestamp_list, + groupArray(session_id) AS session_id_list, + groupArray(window_id) AS window_id_list + FROM + (SELECT toTimeZone(events.timestamp, 'UTC') AS timestamp, + events__pdi.person_id AS person_id, + ifNull(if(equals(events.event, '$screen'), replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$screen_name'), ''), 'null'), '^"|"$', ''), if(equals(events.event, '$pageview'), replaceRegexpAll(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$current_url'), ''), 'null'), '^"|"$', ''), ''), '(.)/$', '\\1'), events.event)), '') AS path_item_ungrouped, + events.uuid AS uuid, + toTimeZone(events.timestamp, 'UTC') AS timestamp, + ifNull(nullIf(nullIf(events.`$session_id`, ''), 'null'), '') AS session_id, + ifNull(nullIf(nullIf(events.`$window_id`, ''), 'null'), '') AS window_id, + NULL AS groupings, + multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, + (if(ifNull(greater(group_index, 0), 0), groupings[group_index], path_item_ungrouped) AS path_item) AS path_item + FROM events + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) + WHERE and(equals(events.team_id, 2), and(greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2011-12-31 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-07 23:59:59', 6, 'UTC'))))) + ORDER BY events__pdi.person_id ASC, toTimeZone(events.timestamp, 'UTC') ASC) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, + arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, + arrayEnumerate(limited_path_timings) AS event_in_session_index)) + WHERE and(ifNull(equals(last_path_key, '1_/1'), 0), ifNull(equals(path_key, '2_/2'), 0)) + GROUP BY person_id) AS source ON equals(persons.id, source.actor_id) + ORDER BY persons.id ASC + LIMIT 101 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhousePaths.test_step_limit.2 + ''' + SELECT persons.id AS id + FROM + (SELECT person.id AS id + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS persons + INNER JOIN + (SELECT person_id AS actor_id, + groupUniqArray(100)(tuple(timestamp, uuid, session_id, window_id)) AS matching_events, + count(*) AS event_count + FROM + (SELECT person_id AS person_id, + path AS path, + conversion_time AS conversion_time, + event_in_session_index AS event_in_session_index, + concat(ifNull(toString(event_in_session_index), ''), '_', ifNull(toString(path), '')) AS path_key, + if(ifNull(greater(event_in_session_index, 1), 0), concat(ifNull(toString(minus(event_in_session_index, 1)), ''), '_', ifNull(toString(prev_path), '')), NULL) AS last_path_key, + path_dropoff_key AS path_dropoff_key, + final_uuid AS uuid, + final_timestamp AS timestamp, + final_session_id AS session_id, + final_window_id AS window_id + FROM + (SELECT person_id AS person_id, + joined_path_tuple.1 AS path, + joined_path_tuple.2 AS conversion_time, + joined_path_tuple.3 AS prev_path, + event_in_session_index, + session_index AS session_index, + arrayPopFront(arrayPushBack(path_basic, '')) AS path_basic_0, + arrayMap((x, y) -> if(ifNull(equals(x, y), isNull(x) + and isNull(y)), 0, 1), path_basic, path_basic_0) AS mapping, + arrayFilter((x, y) -> y, time, mapping) AS timings, + arrayFilter((x, y) -> y, path_basic, mapping) AS compact_path, + indexOf(compact_path, NULL) AS target_index, + joined_path_tuple.4 AS final_uuid, + joined_path_tuple.5 AS final_timestamp, + joined_path_tuple.6 AS final_session_id, + joined_path_tuple.7 AS final_window_id, + arrayFilter((x, y) -> y, uuid_items, mapping) AS uuid, + arrayFilter((x, y) -> y, timestamp_items, mapping) AS timestamp, + arrayFilter((x, y) -> y, session_id_items, mapping) AS session_id, + arrayFilter((x, y) -> y, window_id_items, mapping) AS window_id, + if(ifNull(greater(target_index, 0), 0), arraySlice(compact_path, target_index), compact_path) AS filtered_path, + arraySlice(filtered_path, 1, 2) AS limited_path, + if(ifNull(greater(target_index, 0), 0), arraySlice(timings, target_index), timings) AS filtered_timings, + arraySlice(filtered_timings, 1, 2) AS limited_timings, + if(ifNull(greater(target_index, 0), 0), arraySlice(uuid, target_index), uuid) AS filtered_uuid, + arraySlice(filtered_uuid, 1, 2) AS limited_uuid, + if(ifNull(greater(target_index, 0), 0), arraySlice(timestamp, target_index), timestamp) AS filtered_timestamp, + arraySlice(filtered_timestamp, 1, 2) AS limited_timestamp, + if(ifNull(greater(target_index, 0), 0), arraySlice(session_id, target_index), session_id) AS filtered_session_id, + arraySlice(filtered_session_id, 1, 2) AS limited_session_id, + if(ifNull(greater(target_index, 0), 0), arraySlice(window_id, target_index), window_id) AS filtered_window_id, + arraySlice(filtered_window_id, 1, 2) AS limited_window_id, + arrayDifference(limited_timings) AS timings_diff, + concat(ifNull(toString(length(limited_path)), ''), '_', ifNull(toString(limited_path[-1]), '')) AS path_dropoff_key, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, '')), limited_uuid, limited_timestamp, limited_session_id, limited_window_id) AS limited_path_timings + FROM + (SELECT person_id AS person_id, + path_time_tuple.1 AS path_basic, + path_time_tuple.2 AS time, + session_index, + arrayZip(path_list, timing_list, arrayDifference(timing_list), uuid_list, timestamp_list, session_id_list, window_id_list) AS paths_tuple, + arraySplit(x -> if(ifNull(less(x.3, 1800), 0), 0, 1), paths_tuple) AS session_paths, + path_time_tuple.4 AS uuid_items, + path_time_tuple.5 AS timestamp_items, + path_time_tuple.6 AS session_id_items, + path_time_tuple.7 AS window_id_items + FROM + (SELECT person_id AS person_id, + groupArray(timestamp) AS timing_list, + groupArray(path_item) AS path_list, + groupArray(uuid) AS uuid_list, + groupArray(timestamp) AS timestamp_list, + groupArray(session_id) AS session_id_list, + groupArray(window_id) AS window_id_list + FROM + (SELECT toTimeZone(events.timestamp, 'UTC') AS timestamp, + events__pdi.person_id AS person_id, + ifNull(if(equals(events.event, '$screen'), replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$screen_name'), ''), 'null'), '^"|"$', ''), if(equals(events.event, '$pageview'), replaceRegexpAll(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$current_url'), ''), 'null'), '^"|"$', ''), ''), '(.)/$', '\\1'), events.event)), '') AS path_item_ungrouped, + events.uuid AS uuid, + toTimeZone(events.timestamp, 'UTC') AS timestamp, + ifNull(nullIf(nullIf(events.`$session_id`, ''), 'null'), '') AS session_id, + ifNull(nullIf(nullIf(events.`$window_id`, ''), 'null'), '') AS window_id, + NULL AS groupings, + multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, + (if(ifNull(greater(group_index, 0), 0), groupings[group_index], path_item_ungrouped) AS path_item) AS path_item + FROM events + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) + WHERE and(equals(events.team_id, 2), and(greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2011-12-31 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-07 23:59:59', 6, 'UTC'))))) + ORDER BY events__pdi.person_id ASC, toTimeZone(events.timestamp, 'UTC') ASC) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, + arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, + arrayEnumerate(limited_path_timings) AS event_in_session_index)) + WHERE and(ifNull(equals(last_path_key, '2_/2'), 0), ifNull(equals(path_key, '3_/3'), 0)) + GROUP BY person_id) AS source ON equals(persons.id, source.actor_id) + ORDER BY persons.id ASC + LIMIT 101 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhousePaths.test_step_limit.3 + ''' + SELECT last_path_key AS source_event, + path_key AS target_event, + count(*) AS event_count, + avg(conversion_time) AS average_conversion_time + FROM + (SELECT person_id AS person_id, + path AS path, + conversion_time AS conversion_time, + event_in_session_index AS event_in_session_index, + concat(ifNull(toString(event_in_session_index), ''), '_', ifNull(toString(path), '')) AS path_key, + if(ifNull(greater(event_in_session_index, 1), 0), concat(ifNull(toString(minus(event_in_session_index, 1)), ''), '_', ifNull(toString(prev_path), '')), NULL) AS last_path_key, + path_dropoff_key AS path_dropoff_key + FROM + (SELECT person_id AS person_id, + joined_path_tuple.1 AS path, + joined_path_tuple.2 AS conversion_time, + joined_path_tuple.3 AS prev_path, + event_in_session_index, + session_index AS session_index, + arrayPopFront(arrayPushBack(path_basic, '')) AS path_basic_0, + arrayMap((x, y) -> if(ifNull(equals(x, y), isNull(x) + and isNull(y)), 0, 1), path_basic, path_basic_0) AS mapping, + arrayFilter((x, y) -> y, time, mapping) AS timings, + arrayFilter((x, y) -> y, path_basic, mapping) AS compact_path, + indexOf(compact_path, NULL) AS target_index, + if(ifNull(greater(target_index, 0), 0), arraySlice(compact_path, target_index), compact_path) AS filtered_path, + arraySlice(filtered_path, 1, 3) AS limited_path, + if(ifNull(greater(target_index, 0), 0), arraySlice(timings, target_index), timings) AS filtered_timings, + arraySlice(filtered_timings, 1, 3) AS limited_timings, + arrayDifference(limited_timings) AS timings_diff, + concat(ifNull(toString(length(limited_path)), ''), '_', ifNull(toString(limited_path[-1]), '')) AS path_dropoff_key, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) AS limited_path_timings + FROM + (SELECT person_id AS person_id, + path_time_tuple.1 AS path_basic, + path_time_tuple.2 AS time, + session_index, + arrayZip(path_list, timing_list, arrayDifference(timing_list)) AS paths_tuple, + arraySplit(x -> if(ifNull(less(x.3, 1800), 0), 0, 1), paths_tuple) AS session_paths + FROM + (SELECT person_id AS person_id, + groupArray(timestamp) AS timing_list, + groupArray(path_item) AS path_list + FROM + (SELECT toTimeZone(events.timestamp, 'UTC') AS timestamp, + events__pdi.person_id AS person_id, + ifNull(if(equals(events.event, '$screen'), replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$screen_name'), ''), 'null'), '^"|"$', ''), if(equals(events.event, '$pageview'), replaceRegexpAll(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$current_url'), ''), 'null'), '^"|"$', ''), ''), '(.)/$', '\\1'), events.event)), '') AS path_item_ungrouped, + NULL AS groupings, + multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, + (if(ifNull(greater(group_index, 0), 0), groupings[group_index], path_item_ungrouped) AS path_item) AS path_item + FROM events + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) + WHERE and(equals(events.team_id, 2), and(greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2011-12-31 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-07 23:59:59', 6, 'UTC'))))) + ORDER BY events__pdi.person_id ASC, toTimeZone(events.timestamp, 'UTC') ASC) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, + arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, + arrayEnumerate(limited_path_timings) AS event_in_session_index)) + WHERE isNotNull(source_event) + GROUP BY source_event, + target_event + ORDER BY event_count DESC, + source_event ASC, + target_event ASC + LIMIT 50 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhousePaths.test_step_limit.4 + ''' + SELECT persons.id AS id + FROM + (SELECT person.id AS id + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS persons + INNER JOIN + (SELECT person_id AS actor_id, + groupUniqArray(100)(tuple(timestamp, uuid, session_id, window_id)) AS matching_events, + count(*) AS event_count + FROM + (SELECT person_id AS person_id, + path AS path, + conversion_time AS conversion_time, + event_in_session_index AS event_in_session_index, + concat(ifNull(toString(event_in_session_index), ''), '_', ifNull(toString(path), '')) AS path_key, + if(ifNull(greater(event_in_session_index, 1), 0), concat(ifNull(toString(minus(event_in_session_index, 1)), ''), '_', ifNull(toString(prev_path), '')), NULL) AS last_path_key, + path_dropoff_key AS path_dropoff_key, + final_uuid AS uuid, + final_timestamp AS timestamp, + final_session_id AS session_id, + final_window_id AS window_id + FROM + (SELECT person_id AS person_id, + joined_path_tuple.1 AS path, + joined_path_tuple.2 AS conversion_time, + joined_path_tuple.3 AS prev_path, + event_in_session_index, + session_index AS session_index, + arrayPopFront(arrayPushBack(path_basic, '')) AS path_basic_0, + arrayMap((x, y) -> if(ifNull(equals(x, y), isNull(x) + and isNull(y)), 0, 1), path_basic, path_basic_0) AS mapping, + arrayFilter((x, y) -> y, time, mapping) AS timings, + arrayFilter((x, y) -> y, path_basic, mapping) AS compact_path, + indexOf(compact_path, NULL) AS target_index, + joined_path_tuple.4 AS final_uuid, + joined_path_tuple.5 AS final_timestamp, + joined_path_tuple.6 AS final_session_id, + joined_path_tuple.7 AS final_window_id, + arrayFilter((x, y) -> y, uuid_items, mapping) AS uuid, + arrayFilter((x, y) -> y, timestamp_items, mapping) AS timestamp, + arrayFilter((x, y) -> y, session_id_items, mapping) AS session_id, + arrayFilter((x, y) -> y, window_id_items, mapping) AS window_id, + if(ifNull(greater(target_index, 0), 0), arraySlice(compact_path, target_index), compact_path) AS filtered_path, + arraySlice(filtered_path, 1, 3) AS limited_path, + if(ifNull(greater(target_index, 0), 0), arraySlice(timings, target_index), timings) AS filtered_timings, + arraySlice(filtered_timings, 1, 3) AS limited_timings, + if(ifNull(greater(target_index, 0), 0), arraySlice(uuid, target_index), uuid) AS filtered_uuid, + arraySlice(filtered_uuid, 1, 3) AS limited_uuid, + if(ifNull(greater(target_index, 0), 0), arraySlice(timestamp, target_index), timestamp) AS filtered_timestamp, + arraySlice(filtered_timestamp, 1, 3) AS limited_timestamp, + if(ifNull(greater(target_index, 0), 0), arraySlice(session_id, target_index), session_id) AS filtered_session_id, + arraySlice(filtered_session_id, 1, 3) AS limited_session_id, + if(ifNull(greater(target_index, 0), 0), arraySlice(window_id, target_index), window_id) AS filtered_window_id, + arraySlice(filtered_window_id, 1, 3) AS limited_window_id, + arrayDifference(limited_timings) AS timings_diff, + concat(ifNull(toString(length(limited_path)), ''), '_', ifNull(toString(limited_path[-1]), '')) AS path_dropoff_key, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, '')), limited_uuid, limited_timestamp, limited_session_id, limited_window_id) AS limited_path_timings + FROM + (SELECT person_id AS person_id, + path_time_tuple.1 AS path_basic, + path_time_tuple.2 AS time, + session_index, + arrayZip(path_list, timing_list, arrayDifference(timing_list), uuid_list, timestamp_list, session_id_list, window_id_list) AS paths_tuple, + arraySplit(x -> if(ifNull(less(x.3, 1800), 0), 0, 1), paths_tuple) AS session_paths, + path_time_tuple.4 AS uuid_items, + path_time_tuple.5 AS timestamp_items, + path_time_tuple.6 AS session_id_items, + path_time_tuple.7 AS window_id_items + FROM + (SELECT person_id AS person_id, + groupArray(timestamp) AS timing_list, + groupArray(path_item) AS path_list, + groupArray(uuid) AS uuid_list, + groupArray(timestamp) AS timestamp_list, + groupArray(session_id) AS session_id_list, + groupArray(window_id) AS window_id_list + FROM + (SELECT toTimeZone(events.timestamp, 'UTC') AS timestamp, + events__pdi.person_id AS person_id, + ifNull(if(equals(events.event, '$screen'), replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$screen_name'), ''), 'null'), '^"|"$', ''), if(equals(events.event, '$pageview'), replaceRegexpAll(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$current_url'), ''), 'null'), '^"|"$', ''), ''), '(.)/$', '\\1'), events.event)), '') AS path_item_ungrouped, + events.uuid AS uuid, + toTimeZone(events.timestamp, 'UTC') AS timestamp, + ifNull(nullIf(nullIf(events.`$session_id`, ''), 'null'), '') AS session_id, + ifNull(nullIf(nullIf(events.`$window_id`, ''), 'null'), '') AS window_id, + NULL AS groupings, + multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, + (if(ifNull(greater(group_index, 0), 0), groupings[group_index], path_item_ungrouped) AS path_item) AS path_item + FROM events + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) + WHERE and(equals(events.team_id, 2), and(greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2011-12-31 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-07 23:59:59', 6, 'UTC'))))) + ORDER BY events__pdi.person_id ASC, toTimeZone(events.timestamp, 'UTC') ASC) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, + arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, + arrayEnumerate(limited_path_timings) AS event_in_session_index)) + WHERE and(ifNull(equals(last_path_key, '2_/2'), 0), ifNull(equals(path_key, '3_/3'), 0)) + GROUP BY person_id) AS source ON equals(persons.id, source.actor_id) + ORDER BY persons.id ASC + LIMIT 101 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhousePaths.test_step_limit.5 + ''' + SELECT last_path_key AS source_event, + path_key AS target_event, + count(*) AS event_count, + avg(conversion_time) AS average_conversion_time + FROM + (SELECT person_id AS person_id, + path AS path, + conversion_time AS conversion_time, + event_in_session_index AS event_in_session_index, + concat(ifNull(toString(event_in_session_index), ''), '_', ifNull(toString(path), '')) AS path_key, + if(ifNull(greater(event_in_session_index, 1), 0), concat(ifNull(toString(minus(event_in_session_index, 1)), ''), '_', ifNull(toString(prev_path), '')), NULL) AS last_path_key, + path_dropoff_key AS path_dropoff_key + FROM + (SELECT person_id AS person_id, + joined_path_tuple.1 AS path, + joined_path_tuple.2 AS conversion_time, + joined_path_tuple.3 AS prev_path, + event_in_session_index, + session_index AS session_index, + arrayPopFront(arrayPushBack(path_basic, '')) AS path_basic_0, + arrayMap((x, y) -> if(ifNull(equals(x, y), isNull(x) + and isNull(y)), 0, 1), path_basic, path_basic_0) AS mapping, + arrayFilter((x, y) -> y, time, mapping) AS timings, + arrayFilter((x, y) -> y, path_basic, mapping) AS compact_path, + indexOf(compact_path, NULL) AS target_index, + if(ifNull(greater(target_index, 0), 0), arraySlice(compact_path, target_index), compact_path) AS filtered_path, + arraySlice(filtered_path, 1, 4) AS limited_path, + if(ifNull(greater(target_index, 0), 0), arraySlice(timings, target_index), timings) AS filtered_timings, + arraySlice(filtered_timings, 1, 4) AS limited_timings, + arrayDifference(limited_timings) AS timings_diff, + concat(ifNull(toString(length(limited_path)), ''), '_', ifNull(toString(limited_path[-1]), '')) AS path_dropoff_key, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) AS limited_path_timings + FROM + (SELECT person_id AS person_id, + path_time_tuple.1 AS path_basic, + path_time_tuple.2 AS time, + session_index, + arrayZip(path_list, timing_list, arrayDifference(timing_list)) AS paths_tuple, + arraySplit(x -> if(ifNull(less(x.3, 1800), 0), 0, 1), paths_tuple) AS session_paths + FROM + (SELECT person_id AS person_id, + groupArray(timestamp) AS timing_list, + groupArray(path_item) AS path_list + FROM + (SELECT toTimeZone(events.timestamp, 'UTC') AS timestamp, + events__pdi.person_id AS person_id, + ifNull(if(equals(events.event, '$screen'), replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$screen_name'), ''), 'null'), '^"|"$', ''), if(equals(events.event, '$pageview'), replaceRegexpAll(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$current_url'), ''), 'null'), '^"|"$', ''), ''), '(.)/$', '\\1'), events.event)), '') AS path_item_ungrouped, + NULL AS groupings, + multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, + (if(ifNull(greater(group_index, 0), 0), groupings[group_index], path_item_ungrouped) AS path_item) AS path_item + FROM events + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) + WHERE and(equals(events.team_id, 2), and(greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2011-12-31 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-07 23:59:59', 6, 'UTC'))))) + ORDER BY events__pdi.person_id ASC, toTimeZone(events.timestamp, 'UTC') ASC) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, + arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, + arrayEnumerate(limited_path_timings) AS event_in_session_index)) + WHERE isNotNull(source_event) + GROUP BY source_event, + target_event + ORDER BY event_count DESC, + source_event ASC, + target_event ASC + LIMIT 50 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhousePaths.test_step_limit.6 + ''' + SELECT persons.id AS id + FROM + (SELECT person.id AS id + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS persons + INNER JOIN + (SELECT person_id AS actor_id, + groupUniqArray(100)(tuple(timestamp, uuid, session_id, window_id)) AS matching_events, + count(*) AS event_count + FROM + (SELECT person_id AS person_id, + path AS path, + conversion_time AS conversion_time, + event_in_session_index AS event_in_session_index, + concat(ifNull(toString(event_in_session_index), ''), '_', ifNull(toString(path), '')) AS path_key, + if(ifNull(greater(event_in_session_index, 1), 0), concat(ifNull(toString(minus(event_in_session_index, 1)), ''), '_', ifNull(toString(prev_path), '')), NULL) AS last_path_key, + path_dropoff_key AS path_dropoff_key, + final_uuid AS uuid, + final_timestamp AS timestamp, + final_session_id AS session_id, + final_window_id AS window_id + FROM + (SELECT person_id AS person_id, + joined_path_tuple.1 AS path, + joined_path_tuple.2 AS conversion_time, + joined_path_tuple.3 AS prev_path, + event_in_session_index, + session_index AS session_index, + arrayPopFront(arrayPushBack(path_basic, '')) AS path_basic_0, + arrayMap((x, y) -> if(ifNull(equals(x, y), isNull(x) + and isNull(y)), 0, 1), path_basic, path_basic_0) AS mapping, + arrayFilter((x, y) -> y, time, mapping) AS timings, + arrayFilter((x, y) -> y, path_basic, mapping) AS compact_path, + indexOf(compact_path, NULL) AS target_index, + joined_path_tuple.4 AS final_uuid, + joined_path_tuple.5 AS final_timestamp, + joined_path_tuple.6 AS final_session_id, + joined_path_tuple.7 AS final_window_id, + arrayFilter((x, y) -> y, uuid_items, mapping) AS uuid, + arrayFilter((x, y) -> y, timestamp_items, mapping) AS timestamp, + arrayFilter((x, y) -> y, session_id_items, mapping) AS session_id, + arrayFilter((x, y) -> y, window_id_items, mapping) AS window_id, + if(ifNull(greater(target_index, 0), 0), arraySlice(compact_path, target_index), compact_path) AS filtered_path, + arraySlice(filtered_path, 1, 4) AS limited_path, + if(ifNull(greater(target_index, 0), 0), arraySlice(timings, target_index), timings) AS filtered_timings, + arraySlice(filtered_timings, 1, 4) AS limited_timings, + if(ifNull(greater(target_index, 0), 0), arraySlice(uuid, target_index), uuid) AS filtered_uuid, + arraySlice(filtered_uuid, 1, 4) AS limited_uuid, + if(ifNull(greater(target_index, 0), 0), arraySlice(timestamp, target_index), timestamp) AS filtered_timestamp, + arraySlice(filtered_timestamp, 1, 4) AS limited_timestamp, + if(ifNull(greater(target_index, 0), 0), arraySlice(session_id, target_index), session_id) AS filtered_session_id, + arraySlice(filtered_session_id, 1, 4) AS limited_session_id, + if(ifNull(greater(target_index, 0), 0), arraySlice(window_id, target_index), window_id) AS filtered_window_id, + arraySlice(filtered_window_id, 1, 4) AS limited_window_id, + arrayDifference(limited_timings) AS timings_diff, + concat(ifNull(toString(length(limited_path)), ''), '_', ifNull(toString(limited_path[-1]), '')) AS path_dropoff_key, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, '')), limited_uuid, limited_timestamp, limited_session_id, limited_window_id) AS limited_path_timings + FROM + (SELECT person_id AS person_id, + path_time_tuple.1 AS path_basic, + path_time_tuple.2 AS time, + session_index, + arrayZip(path_list, timing_list, arrayDifference(timing_list), uuid_list, timestamp_list, session_id_list, window_id_list) AS paths_tuple, + arraySplit(x -> if(ifNull(less(x.3, 1800), 0), 0, 1), paths_tuple) AS session_paths, + path_time_tuple.4 AS uuid_items, + path_time_tuple.5 AS timestamp_items, + path_time_tuple.6 AS session_id_items, + path_time_tuple.7 AS window_id_items + FROM + (SELECT person_id AS person_id, + groupArray(timestamp) AS timing_list, + groupArray(path_item) AS path_list, + groupArray(uuid) AS uuid_list, + groupArray(timestamp) AS timestamp_list, + groupArray(session_id) AS session_id_list, + groupArray(window_id) AS window_id_list + FROM + (SELECT toTimeZone(events.timestamp, 'UTC') AS timestamp, + events__pdi.person_id AS person_id, + ifNull(if(equals(events.event, '$screen'), replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$screen_name'), ''), 'null'), '^"|"$', ''), if(equals(events.event, '$pageview'), replaceRegexpAll(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$current_url'), ''), 'null'), '^"|"$', ''), ''), '(.)/$', '\\1'), events.event)), '') AS path_item_ungrouped, + events.uuid AS uuid, + toTimeZone(events.timestamp, 'UTC') AS timestamp, + ifNull(nullIf(nullIf(events.`$session_id`, ''), 'null'), '') AS session_id, + ifNull(nullIf(nullIf(events.`$window_id`, ''), 'null'), '') AS window_id, + NULL AS groupings, + multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, + (if(ifNull(greater(group_index, 0), 0), groupings[group_index], path_item_ungrouped) AS path_item) AS path_item + FROM events + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) + WHERE and(equals(events.team_id, 2), and(greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2011-12-31 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-07 23:59:59', 6, 'UTC'))))) + ORDER BY events__pdi.person_id ASC, toTimeZone(events.timestamp, 'UTC') ASC) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, + arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, + arrayEnumerate(limited_path_timings) AS event_in_session_index)) + WHERE and(ifNull(equals(last_path_key, '1_/1'), 0), ifNull(equals(path_key, '2_/2'), 0)) + GROUP BY person_id) AS source ON equals(persons.id, source.actor_id) + ORDER BY persons.id ASC + LIMIT 101 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhousePaths.test_step_limit.7 + ''' + SELECT persons.id AS id + FROM + (SELECT person.id AS id + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS persons + INNER JOIN + (SELECT person_id AS actor_id, + groupUniqArray(100)(tuple(timestamp, uuid, session_id, window_id)) AS matching_events, + count(*) AS event_count + FROM + (SELECT person_id AS person_id, + path AS path, + conversion_time AS conversion_time, + event_in_session_index AS event_in_session_index, + concat(ifNull(toString(event_in_session_index), ''), '_', ifNull(toString(path), '')) AS path_key, + if(ifNull(greater(event_in_session_index, 1), 0), concat(ifNull(toString(minus(event_in_session_index, 1)), ''), '_', ifNull(toString(prev_path), '')), NULL) AS last_path_key, + path_dropoff_key AS path_dropoff_key, + final_uuid AS uuid, + final_timestamp AS timestamp, + final_session_id AS session_id, + final_window_id AS window_id + FROM + (SELECT person_id AS person_id, + joined_path_tuple.1 AS path, + joined_path_tuple.2 AS conversion_time, + joined_path_tuple.3 AS prev_path, + event_in_session_index, + session_index AS session_index, + arrayPopFront(arrayPushBack(path_basic, '')) AS path_basic_0, + arrayMap((x, y) -> if(ifNull(equals(x, y), isNull(x) + and isNull(y)), 0, 1), path_basic, path_basic_0) AS mapping, + arrayFilter((x, y) -> y, time, mapping) AS timings, + arrayFilter((x, y) -> y, path_basic, mapping) AS compact_path, + indexOf(compact_path, NULL) AS target_index, + joined_path_tuple.4 AS final_uuid, + joined_path_tuple.5 AS final_timestamp, + joined_path_tuple.6 AS final_session_id, + joined_path_tuple.7 AS final_window_id, + arrayFilter((x, y) -> y, uuid_items, mapping) AS uuid, + arrayFilter((x, y) -> y, timestamp_items, mapping) AS timestamp, + arrayFilter((x, y) -> y, session_id_items, mapping) AS session_id, + arrayFilter((x, y) -> y, window_id_items, mapping) AS window_id, + if(ifNull(greater(target_index, 0), 0), arraySlice(compact_path, target_index), compact_path) AS filtered_path, + arraySlice(filtered_path, 1, 4) AS limited_path, + if(ifNull(greater(target_index, 0), 0), arraySlice(timings, target_index), timings) AS filtered_timings, + arraySlice(filtered_timings, 1, 4) AS limited_timings, + if(ifNull(greater(target_index, 0), 0), arraySlice(uuid, target_index), uuid) AS filtered_uuid, + arraySlice(filtered_uuid, 1, 4) AS limited_uuid, + if(ifNull(greater(target_index, 0), 0), arraySlice(timestamp, target_index), timestamp) AS filtered_timestamp, + arraySlice(filtered_timestamp, 1, 4) AS limited_timestamp, + if(ifNull(greater(target_index, 0), 0), arraySlice(session_id, target_index), session_id) AS filtered_session_id, + arraySlice(filtered_session_id, 1, 4) AS limited_session_id, + if(ifNull(greater(target_index, 0), 0), arraySlice(window_id, target_index), window_id) AS filtered_window_id, + arraySlice(filtered_window_id, 1, 4) AS limited_window_id, + arrayDifference(limited_timings) AS timings_diff, + concat(ifNull(toString(length(limited_path)), ''), '_', ifNull(toString(limited_path[-1]), '')) AS path_dropoff_key, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, '')), limited_uuid, limited_timestamp, limited_session_id, limited_window_id) AS limited_path_timings + FROM + (SELECT person_id AS person_id, + path_time_tuple.1 AS path_basic, + path_time_tuple.2 AS time, + session_index, + arrayZip(path_list, timing_list, arrayDifference(timing_list), uuid_list, timestamp_list, session_id_list, window_id_list) AS paths_tuple, + arraySplit(x -> if(ifNull(less(x.3, 1800), 0), 0, 1), paths_tuple) AS session_paths, + path_time_tuple.4 AS uuid_items, + path_time_tuple.5 AS timestamp_items, + path_time_tuple.6 AS session_id_items, + path_time_tuple.7 AS window_id_items + FROM + (SELECT person_id AS person_id, + groupArray(timestamp) AS timing_list, + groupArray(path_item) AS path_list, + groupArray(uuid) AS uuid_list, + groupArray(timestamp) AS timestamp_list, + groupArray(session_id) AS session_id_list, + groupArray(window_id) AS window_id_list + FROM + (SELECT toTimeZone(events.timestamp, 'UTC') AS timestamp, + events__pdi.person_id AS person_id, + ifNull(if(equals(events.event, '$screen'), replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$screen_name'), ''), 'null'), '^"|"$', ''), if(equals(events.event, '$pageview'), replaceRegexpAll(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$current_url'), ''), 'null'), '^"|"$', ''), ''), '(.)/$', '\\1'), events.event)), '') AS path_item_ungrouped, + events.uuid AS uuid, + toTimeZone(events.timestamp, 'UTC') AS timestamp, + ifNull(nullIf(nullIf(events.`$session_id`, ''), 'null'), '') AS session_id, + ifNull(nullIf(nullIf(events.`$window_id`, ''), 'null'), '') AS window_id, + NULL AS groupings, + multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, + (if(ifNull(greater(group_index, 0), 0), groupings[group_index], path_item_ungrouped) AS path_item) AS path_item + FROM events + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) + WHERE and(equals(events.team_id, 2), and(greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2011-12-31 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-07 23:59:59', 6, 'UTC'))))) + ORDER BY events__pdi.person_id ASC, toTimeZone(events.timestamp, 'UTC') ASC) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, + arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, + arrayEnumerate(limited_path_timings) AS event_in_session_index)) + WHERE and(ifNull(equals(last_path_key, '2_/2'), 0), ifNull(equals(path_key, '3_/3'), 0)) + GROUP BY person_id) AS source ON equals(persons.id, source.actor_id) + ORDER BY persons.id ASC + LIMIT 101 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhousePaths.test_step_limit.8 + ''' + SELECT persons.id AS id + FROM + (SELECT person.id AS id + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS persons + INNER JOIN + (SELECT person_id AS actor_id, + groupUniqArray(100)(tuple(timestamp, uuid, session_id, window_id)) AS matching_events, + count(*) AS event_count + FROM + (SELECT person_id AS person_id, + path AS path, + conversion_time AS conversion_time, + event_in_session_index AS event_in_session_index, + concat(ifNull(toString(event_in_session_index), ''), '_', ifNull(toString(path), '')) AS path_key, + if(ifNull(greater(event_in_session_index, 1), 0), concat(ifNull(toString(minus(event_in_session_index, 1)), ''), '_', ifNull(toString(prev_path), '')), NULL) AS last_path_key, + path_dropoff_key AS path_dropoff_key, + final_uuid AS uuid, + final_timestamp AS timestamp, + final_session_id AS session_id, + final_window_id AS window_id + FROM + (SELECT person_id AS person_id, + joined_path_tuple.1 AS path, + joined_path_tuple.2 AS conversion_time, + joined_path_tuple.3 AS prev_path, + event_in_session_index, + session_index AS session_index, + arrayPopFront(arrayPushBack(path_basic, '')) AS path_basic_0, + arrayMap((x, y) -> if(ifNull(equals(x, y), isNull(x) + and isNull(y)), 0, 1), path_basic, path_basic_0) AS mapping, + arrayFilter((x, y) -> y, time, mapping) AS timings, + arrayFilter((x, y) -> y, path_basic, mapping) AS compact_path, + indexOf(compact_path, NULL) AS target_index, + joined_path_tuple.4 AS final_uuid, + joined_path_tuple.5 AS final_timestamp, + joined_path_tuple.6 AS final_session_id, + joined_path_tuple.7 AS final_window_id, + arrayFilter((x, y) -> y, uuid_items, mapping) AS uuid, + arrayFilter((x, y) -> y, timestamp_items, mapping) AS timestamp, + arrayFilter((x, y) -> y, session_id_items, mapping) AS session_id, + arrayFilter((x, y) -> y, window_id_items, mapping) AS window_id, + if(ifNull(greater(target_index, 0), 0), arraySlice(compact_path, target_index), compact_path) AS filtered_path, + arraySlice(filtered_path, 1, 4) AS limited_path, + if(ifNull(greater(target_index, 0), 0), arraySlice(timings, target_index), timings) AS filtered_timings, + arraySlice(filtered_timings, 1, 4) AS limited_timings, + if(ifNull(greater(target_index, 0), 0), arraySlice(uuid, target_index), uuid) AS filtered_uuid, + arraySlice(filtered_uuid, 1, 4) AS limited_uuid, + if(ifNull(greater(target_index, 0), 0), arraySlice(timestamp, target_index), timestamp) AS filtered_timestamp, + arraySlice(filtered_timestamp, 1, 4) AS limited_timestamp, + if(ifNull(greater(target_index, 0), 0), arraySlice(session_id, target_index), session_id) AS filtered_session_id, + arraySlice(filtered_session_id, 1, 4) AS limited_session_id, + if(ifNull(greater(target_index, 0), 0), arraySlice(window_id, target_index), window_id) AS filtered_window_id, + arraySlice(filtered_window_id, 1, 4) AS limited_window_id, + arrayDifference(limited_timings) AS timings_diff, + concat(ifNull(toString(length(limited_path)), ''), '_', ifNull(toString(limited_path[-1]), '')) AS path_dropoff_key, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, '')), limited_uuid, limited_timestamp, limited_session_id, limited_window_id) AS limited_path_timings + FROM + (SELECT person_id AS person_id, + path_time_tuple.1 AS path_basic, + path_time_tuple.2 AS time, + session_index, + arrayZip(path_list, timing_list, arrayDifference(timing_list), uuid_list, timestamp_list, session_id_list, window_id_list) AS paths_tuple, + arraySplit(x -> if(ifNull(less(x.3, 1800), 0), 0, 1), paths_tuple) AS session_paths, + path_time_tuple.4 AS uuid_items, + path_time_tuple.5 AS timestamp_items, + path_time_tuple.6 AS session_id_items, + path_time_tuple.7 AS window_id_items + FROM + (SELECT person_id AS person_id, + groupArray(timestamp) AS timing_list, + groupArray(path_item) AS path_list, + groupArray(uuid) AS uuid_list, + groupArray(timestamp) AS timestamp_list, + groupArray(session_id) AS session_id_list, + groupArray(window_id) AS window_id_list + FROM + (SELECT toTimeZone(events.timestamp, 'UTC') AS timestamp, + events__pdi.person_id AS person_id, + ifNull(if(equals(events.event, '$screen'), replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$screen_name'), ''), 'null'), '^"|"$', ''), if(equals(events.event, '$pageview'), replaceRegexpAll(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$current_url'), ''), 'null'), '^"|"$', ''), ''), '(.)/$', '\\1'), events.event)), '') AS path_item_ungrouped, + events.uuid AS uuid, + toTimeZone(events.timestamp, 'UTC') AS timestamp, + ifNull(nullIf(nullIf(events.`$session_id`, ''), 'null'), '') AS session_id, + ifNull(nullIf(nullIf(events.`$window_id`, ''), 'null'), '') AS window_id, + NULL AS groupings, + multiMatchAnyIndex(path_item_ungrouped, NULL) AS group_index, + (if(ifNull(greater(group_index, 0), 0), groupings[group_index], path_item_ungrouped) AS path_item) AS path_item + FROM events + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) + WHERE and(equals(events.team_id, 2), and(greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2011-12-31 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-07 23:59:59', 6, 'UTC'))))) + ORDER BY events__pdi.person_id ASC, toTimeZone(events.timestamp, 'UTC') ASC) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, + arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, + arrayEnumerate(limited_path_timings) AS event_in_session_index)) + WHERE and(ifNull(equals(last_path_key, '3_/3'), 0), ifNull(equals(path_key, '4_/4'), 0)) + GROUP BY person_id) AS source ON equals(persons.id, source.actor_id) + ORDER BY persons.id ASC + LIMIT 101 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhousePaths.test_wildcard_groups_across_people + ''' + SELECT last_path_key AS source_event, + path_key AS target_event, + count(*) AS event_count, + avg(conversion_time) AS average_conversion_time + FROM + (SELECT person_id AS person_id, + path AS path, + conversion_time AS conversion_time, + event_in_session_index AS event_in_session_index, + concat(ifNull(toString(event_in_session_index), ''), '_', ifNull(toString(path), '')) AS path_key, + if(ifNull(greater(event_in_session_index, 1), 0), concat(ifNull(toString(minus(event_in_session_index, 1)), ''), '_', ifNull(toString(prev_path), '')), NULL) AS last_path_key, + path_dropoff_key AS path_dropoff_key + FROM + (SELECT person_id AS person_id, + joined_path_tuple.1 AS path, + joined_path_tuple.2 AS conversion_time, + joined_path_tuple.3 AS prev_path, + event_in_session_index, + session_index AS session_index, + arrayPopFront(arrayPushBack(path_basic, '')) AS path_basic_0, + arrayMap((x, y) -> if(ifNull(equals(x, y), isNull(x) + and isNull(y)), 0, 1), path_basic, path_basic_0) AS mapping, + arrayFilter((x, y) -> y, time, mapping) AS timings, + arrayFilter((x, y) -> y, path_basic, mapping) AS compact_path, + indexOf(compact_path, NULL) AS target_index, + if(ifNull(greater(target_index, 0), 0), arraySlice(compact_path, target_index), compact_path) AS filtered_path, + arraySlice(filtered_path, 1, 4) AS limited_path, + if(ifNull(greater(target_index, 0), 0), arraySlice(timings, target_index), timings) AS filtered_timings, + arraySlice(filtered_timings, 1, 4) AS limited_timings, + arrayDifference(limited_timings) AS timings_diff, + concat(ifNull(toString(length(limited_path)), ''), '_', ifNull(toString(limited_path[-1]), '')) AS path_dropoff_key, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) AS limited_path_timings + FROM + (SELECT person_id AS person_id, + path_time_tuple.1 AS path_basic, + path_time_tuple.2 AS time, + session_index, + arrayZip(path_list, timing_list, arrayDifference(timing_list)) AS paths_tuple, + arraySplit(x -> if(ifNull(less(x.3, 1800), 0), 0, 1), paths_tuple) AS session_paths + FROM + (SELECT person_id AS person_id, + groupArray(timestamp) AS timing_list, + groupArray(path_item) AS path_list + FROM + (SELECT toTimeZone(events.timestamp, 'UTC') AS timestamp, + events__pdi.person_id AS person_id, + ifNull(if(equals(events.event, '$pageview'), replaceRegexpAll(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$current_url'), ''), 'null'), '^"|"$', ''), ''), '(.)/$', '\\1'), events.event), '') AS path_item_ungrouped, + ['/bar/*/foo'] AS groupings, + multiMatchAnyIndex(path_item_ungrouped, ['/bar/.*/foo']) AS group_index, + (if(ifNull(greater(group_index, 0), 0), groupings[group_index], path_item_ungrouped) AS path_item) AS path_item + FROM events + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) + WHERE and(equals(events.team_id, 2), and(greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2023-05-23 23:59:59', 6, 'UTC')))), equals(events.event, '$pageview')) + ORDER BY events__pdi.person_id ASC, toTimeZone(events.timestamp, 'UTC') ASC) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, + arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, + arrayEnumerate(limited_path_timings) AS event_in_session_index)) + WHERE isNotNull(source_event) + GROUP BY source_event, + target_event + ORDER BY event_count DESC, + source_event ASC, + target_event ASC + LIMIT 50 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- +# name: TestClickhousePaths.test_wildcard_groups_evil_input + ''' + SELECT last_path_key AS source_event, + path_key AS target_event, + count(*) AS event_count, + avg(conversion_time) AS average_conversion_time + FROM + (SELECT person_id AS person_id, + path AS path, + conversion_time AS conversion_time, + event_in_session_index AS event_in_session_index, + concat(ifNull(toString(event_in_session_index), ''), '_', ifNull(toString(path), '')) AS path_key, + if(ifNull(greater(event_in_session_index, 1), 0), concat(ifNull(toString(minus(event_in_session_index, 1)), ''), '_', ifNull(toString(prev_path), '')), NULL) AS last_path_key, + path_dropoff_key AS path_dropoff_key + FROM + (SELECT person_id AS person_id, + joined_path_tuple.1 AS path, + joined_path_tuple.2 AS conversion_time, + joined_path_tuple.3 AS prev_path, + event_in_session_index, + session_index AS session_index, + arrayPopFront(arrayPushBack(path_basic, '')) AS path_basic_0, + arrayMap((x, y) -> if(ifNull(equals(x, y), isNull(x) + and isNull(y)), 0, 1), path_basic, path_basic_0) AS mapping, + arrayFilter((x, y) -> y, time, mapping) AS timings, + arrayFilter((x, y) -> y, path_basic, mapping) AS compact_path, + indexOf(compact_path, NULL) AS target_index, + if(ifNull(greater(target_index, 0), 0), arraySlice(compact_path, target_index), compact_path) AS filtered_path, + arraySlice(filtered_path, 1, 5) AS limited_path, + if(ifNull(greater(target_index, 0), 0), arraySlice(timings, target_index), timings) AS filtered_timings, + arraySlice(filtered_timings, 1, 5) AS limited_timings, + arrayDifference(limited_timings) AS timings_diff, + concat(ifNull(toString(length(limited_path)), ''), '_', ifNull(toString(limited_path[-1]), '')) AS path_dropoff_key, + arrayZip(limited_path, timings_diff, arrayPopBack(arrayPushFront(limited_path, ''))) AS limited_path_timings + FROM + (SELECT person_id AS person_id, + path_time_tuple.1 AS path_basic, + path_time_tuple.2 AS time, + session_index, + arrayZip(path_list, timing_list, arrayDifference(timing_list)) AS paths_tuple, + arraySplit(x -> if(ifNull(less(x.3, 1800), 0), 0, 1), paths_tuple) AS session_paths + FROM + (SELECT person_id AS person_id, + groupArray(timestamp) AS timing_list, + groupArray(path_item) AS path_list + FROM + (SELECT toTimeZone(events.timestamp, 'UTC') AS timestamp, + events__pdi.person_id AS person_id, + ifNull(if(equals(events.event, '$pageview'), replaceRegexpAll(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, '$current_url'), ''), 'null'), '^"|"$', ''), ''), '(.)/$', '\\1'), events.event), '') AS path_item_ungrouped, + ['(a+)+', + '[aaa|aaaa]+', + '1.*', + '.*', + '/3?q=1', + '/3*'] AS groupings, + multiMatchAnyIndex(path_item_ungrouped, ['\\(a\\+\\)\\+', '\\[aaa\\|aaaa\\]\\+', '1\\..*', '\\..*', '/3\\?q=1', '/3.*']) AS group_index, + (if(ifNull(greater(group_index, 0), 0), groupings[group_index], path_item_ungrouped) AS path_item) AS path_item + FROM events + INNER JOIN + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 2) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) + WHERE and(equals(events.team_id, 2), and(greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2012-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2023-05-23 23:59:59', 6, 'UTC')))), equals(events.event, '$pageview')) + ORDER BY events__pdi.person_id ASC, toTimeZone(events.timestamp, 'UTC') ASC) + GROUP BY person_id) ARRAY + JOIN session_paths AS path_time_tuple, + arrayEnumerate(session_paths) AS session_index) ARRAY + JOIN limited_path_timings AS joined_path_tuple, + arrayEnumerate(limited_path_timings) AS event_in_session_index)) + WHERE isNotNull(source_event) + GROUP BY source_event, + target_event + ORDER BY event_count DESC, + source_event ASC, + target_event ASC + LIMIT 50 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ''' +# --- diff --git a/posthog/hogql_queries/insights/test/test_stickiness_query_runner.py b/posthog/hogql_queries/insights/test/test_stickiness_query_runner.py index c1d217872046b..0304184731c76 100644 --- a/posthog/hogql_queries/insights/test/test_stickiness_query_runner.py +++ b/posthog/hogql_queries/insights/test/test_stickiness_query_runner.py @@ -442,7 +442,7 @@ def test_any_event(self): def test_actions(self): self._create_test_events() - action = Action.objects.create(name="$pageview", team=self.team) + action = Action.objects.create(name="My Action", team=self.team) ActionStep.objects.create( action=action, event="$pageview", diff --git a/posthog/hogql_queries/utils/__init__.py b/posthog/hogql_queries/utils/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/posthog/hogql_queries/utils/recordings.py b/posthog/hogql_queries/utils/recordings.py new file mode 100644 index 0000000000000..0f0e3daa6c9b6 --- /dev/null +++ b/posthog/hogql_queries/utils/recordings.py @@ -0,0 +1,58 @@ +from collections import defaultdict + +from posthog.hogql import ast +from posthog.hogql.query import execute_hogql_query +from posthog.models import Team +from posthog.session_recordings.models.session_recording import SessionRecording + + +class RecordingsHelper: + def __init__(self, team: Team): + self.team = team + + def session_ids_all(self, session_ids) -> set[str]: + query = """ + SELECT DISTINCT session_id + FROM session_replay_events + WHERE session_id in {session_ids} + """ + + # TODO: Date filters, are they needed? + + response = execute_hogql_query( + query, + placeholders={"session_ids": ast.Array(exprs=[ast.Constant(value=s) for s in session_ids])}, + team=self.team, + ) + if not response.results: + return set() + + return {str(result[0]) for result in response.results} + + def session_ids_deleted(self, session_ids) -> set[str]: + return set( + SessionRecording.objects.filter(team_id=self.team.pk, session_id__in=session_ids, deleted=True).values_list( + "session_id", flat=True + ) + ) + + def get_recordings(self, matching_events) -> dict[str, list[dict]]: + mapped_events = defaultdict(list) + for event in matching_events: + mapped_events[event[2]].append(event) + + raw_session_ids = mapped_events.keys() + valid_session_ids = self.session_ids_all(raw_session_ids) - self.session_ids_deleted(raw_session_ids) + + return { + str(session_id): [ + { + "timestamp": event[0], + "uuid": event[1], + "window_id": event[3], + } + for event in events + ] + for session_id, events in mapped_events.items() + if session_id in valid_session_ids and len(events) > 0 + } diff --git a/posthog/schema.py b/posthog/schema.py index 037c48aab6e50..24ce85784c1a3 100644 --- a/posthog/schema.py +++ b/posthog/schema.py @@ -454,11 +454,11 @@ class PathsFilter(BaseModel): localPathCleaningFilters: Optional[List[PathCleaningFilter]] = None maxEdgeWeight: Optional[int] = None minEdgeWeight: Optional[int] = None - pathDropoffKey: Optional[str] = None - pathEndKey: Optional[str] = None + pathDropoffKey: Optional[str] = Field(default=None, description="Relevant only within actors query") + pathEndKey: Optional[str] = Field(default=None, description="Relevant only within actors query") pathGroupings: Optional[List[str]] = None pathReplacements: Optional[bool] = None - pathStartKey: Optional[str] = None + pathStartKey: Optional[str] = Field(default=None, description="Relevant only within actors query") pathsHogQLExpression: Optional[str] = None startPoint: Optional[str] = None stepLimit: Optional[int] = None diff --git a/posthog/tasks/exporter.py b/posthog/tasks/exporter.py index 1d01de9b76566..097010027717f 100644 --- a/posthog/tasks/exporter.py +++ b/posthog/tasks/exporter.py @@ -43,7 +43,7 @@ acks_late=True, ignore_result=False, time_limit=settings.ASSET_GENERATION_MAX_TIMEOUT_SECONDS, - queue=CeleryQueue.EXPORTS, + queue=CeleryQueue.EXPORTS.value, ) def export_asset(exported_asset_id: int, limit: Optional[int] = None) -> None: from posthog.tasks.exports import csv_exporter, image_exporter diff --git a/posthog/tasks/tasks.py b/posthog/tasks/tasks.py index 22ddbd71c9897..b213c8fdd9ce0 100644 --- a/posthog/tasks/tasks.py +++ b/posthog/tasks/tasks.py @@ -27,7 +27,7 @@ def redis_heartbeat() -> None: get_client().set("POSTHOG_HEARTBEAT", int(time.time())) -@shared_task(ignore_result=True, queue=CeleryQueue.ANALYTICS_QUERIES) +@shared_task(ignore_result=True, queue=CeleryQueue.ANALYTICS_QUERIES.value) def process_query_task( team_id: str, query_id: str, query_json: Any, limit_context: Any = None, refresh_requested: bool = False ) -> None: @@ -450,7 +450,7 @@ def clear_clickhouse_deleted_person() -> None: remove_deleted_person_data() -@shared_task(ignore_result=True, queue=CeleryQueue.STATS) +@shared_task(ignore_result=True, queue=CeleryQueue.STATS.value) def redis_celery_queue_depth() -> None: try: with pushed_metrics_registry("redis_celery_queue_depth_registry") as registry: @@ -460,8 +460,10 @@ def redis_celery_queue_depth() -> None: registry=registry, ) - llen = get_client().llen("celery") - celery_task_queue_depth_gauge.set(llen) + for queue in CeleryQueue: + llen = get_client().llen(queue.value) + celery_task_queue_depth_gauge.labels(queue_name=queue.value).set(llen) + except: # if we can't generate the metric don't complain about it. return diff --git a/posthog/tasks/utils.py b/posthog/tasks/utils.py index 3efd8290cceea..ecabc29adac79 100644 --- a/posthog/tasks/utils.py +++ b/posthog/tasks/utils.py @@ -22,7 +22,10 @@ # NOTE: Keep in sync with bin/celery-queues.env -class CeleryQueue: +from enum import Enum + + +class CeleryQueue(Enum): DEFAULT = "celery" STATS = "stats" EMAIL = "email"